From e1bd1dc207dae92cdb5b424226f89d1b4e4bb182 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 14 Nov 2013 01:05:58 +0000 Subject: net_tstamp: Improve kernel-doc for struct hwtstamp_config Fix the name of the rx_filter field. Remove text about 32/64-bit compatibility; this works just the same as for most socket ioctls and as the structure is not allowed to grow there is no need to remind anyone how to maintain it. Add explanation about drivers changing the filter mode. Signed-off-by: Ben Hutchings --- include/uapi/linux/net_tstamp.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index ae5df122e42f..c9a7de2a6276 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -30,13 +30,13 @@ enum { * * @flags: no flags defined right now, must be zero * @tx_type: one of HWTSTAMP_TX_* - * @rx_type: one of one of HWTSTAMP_FILTER_* + * @rx_filter: one of HWTSTAMP_FILTER_* * * %SIOCSHWTSTAMP expects a &struct ifreq with a ifr_data pointer to - * this structure. dev_ifsioc() in the kernel takes care of the - * translation between 32 bit userspace and 64 bit kernel. The - * structure is intentionally chosen so that it has the same layout on - * 32 and 64 bit systems, don't break this! + * this structure. If the driver or hardware does not support the + * requested @rx_filter value, the driver may use a more general + * filter mode. In this case @rx_filter will indicate the actual mode + * on return. */ struct hwtstamp_config { int flags; -- cgit v1.2.2 From fd468c74bd4d6949736810a80d6ca05eb20fba84 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 14 Nov 2013 01:19:29 +0000 Subject: net_tstamp: Add SIOCGHWTSTAMP ioctl to match SIOCSHWTSTAMP SIOCSHWTSTAMP returns the real configuration to the application using it, but there is currently no way for any other application to find out the configuration non-destructively. Add a new ioctl for this, making it unprivileged. Signed-off-by: Ben Hutchings --- include/uapi/linux/net_tstamp.h | 14 +++++++------- include/uapi/linux/sockios.h | 3 ++- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index c9a7de2a6276..f53879c0f590 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -26,17 +26,17 @@ enum { }; /** - * struct hwtstamp_config - %SIOCSHWTSTAMP parameter + * struct hwtstamp_config - %SIOCGHWTSTAMP and %SIOCSHWTSTAMP parameter * - * @flags: no flags defined right now, must be zero + * @flags: no flags defined right now, must be zero for %SIOCSHWTSTAMP * @tx_type: one of HWTSTAMP_TX_* * @rx_filter: one of HWTSTAMP_FILTER_* * - * %SIOCSHWTSTAMP expects a &struct ifreq with a ifr_data pointer to - * this structure. If the driver or hardware does not support the - * requested @rx_filter value, the driver may use a more general - * filter mode. In this case @rx_filter will indicate the actual mode - * on return. + * %SIOCGHWTSTAMP and %SIOCSHWTSTAMP expect a &struct ifreq with a + * ifr_data pointer to this structure. For %SIOCSHWTSTAMP, if the + * driver or hardware does not support the requested @rx_filter value, + * the driver may use a more general filter mode. In this case + * @rx_filter will indicate the actual mode on return. */ struct hwtstamp_config { int flags; diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h index 7997a506ad41..e888b1aed69f 100644 --- a/include/uapi/linux/sockios.h +++ b/include/uapi/linux/sockios.h @@ -125,7 +125,8 @@ #define SIOCBRDELIF 0x89a3 /* remove interface from bridge */ /* hardware time stamping: parameters in linux/net_tstamp.h */ -#define SIOCSHWTSTAMP 0x89b0 +#define SIOCSHWTSTAMP 0x89b0 /* set and get config */ +#define SIOCGHWTSTAMP 0x89b1 /* get config */ /* Device private ioctl calls */ -- cgit v1.2.2 From c17bff87bed4bda1835ee41dc908e926414d8d85 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Thu, 31 Oct 2013 14:54:35 +0200 Subject: nl80211: better document NL80211_CMD_TDLS_MGMT This command has different semantics depending on the action code sent. Document this fact and detail the supported action codes. Signed-off-by: Arik Nemtsov Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f752e9821e71..3d8325bb50cd 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -581,7 +581,14 @@ * operation, %NL80211_ATTR_MAC contains the peer MAC address, and * %NL80211_ATTR_REASON_CODE the reason code to be used (only with * %NL80211_TDLS_TEARDOWN). - * @NL80211_CMD_TDLS_MGMT: Send a TDLS management frame. + * @NL80211_CMD_TDLS_MGMT: Send a TDLS management frame. The + * %NL80211_ATTR_TDLS_ACTION attribute determines the type of frame to be + * sent. Public Action codes (802.11-2012 8.1.5.1) will be sent as + * 802.11 management frames, while TDLS action codes (802.11-2012 + * 8.5.13.1) will be encapsulated and sent as data frames. The currently + * supported Public Action code is %WLAN_PUB_ACTION_TDLS_DISCOVER_RES + * and the currently supported TDLS actions codes are given in + * &enum ieee80211_tdls_actioncode. * * @NL80211_CMD_UNEXPECTED_FRAME: Used by an application controlling an AP * (or GO) interface (i.e. hostapd) to ask for unexpected frames to -- cgit v1.2.2 From 8fe02e167efa8ed4a4503a5eedc0f49fcb7e3eb9 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 21 Oct 2013 19:22:25 +0200 Subject: cfg80211: consolidate passive-scan and no-ibss flags These two flags are used for the same purpose, just combine them into a no-ir flag to annotate no initiating radiation is allowed. Old userspace sending either flag will have it treated as the no-ir flag. To be considerate to older userspace we also send both the no-ir flag and the old no-ibss flags. Newer userspace will have to be aware of older kernels. Update all places in the tree using these flags with the following semantic patch: @@ @@ -NL80211_RRF_PASSIVE_SCAN +NL80211_RRF_NO_IR @@ @@ -NL80211_RRF_NO_IBSS +NL80211_RRF_NO_IR @@ @@ -IEEE80211_CHAN_PASSIVE_SCAN +IEEE80211_CHAN_NO_IR @@ @@ -IEEE80211_CHAN_NO_IBSS +IEEE80211_CHAN_NO_IR @@ @@ -NL80211_RRF_NO_IR | NL80211_RRF_NO_IR +NL80211_RRF_NO_IR @@ @@ -IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_NO_IR +IEEE80211_CHAN_NO_IR @@ @@ -(NL80211_RRF_NO_IR) +NL80211_RRF_NO_IR @@ @@ -(IEEE80211_CHAN_NO_IR) +IEEE80211_CHAN_NO_IR Along with some hand-optimisations in documentation, to remove duplicates and to fix some indentation. Signed-off-by: Luis R. Rodriguez [do all the driver updates in one go] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 9 ++++----- include/uapi/linux/nl80211.h | 30 ++++++++++++++++++++---------- 2 files changed, 24 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 3eae46cb1acf..c1b887413234 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -91,9 +91,8 @@ enum ieee80211_band { * Channel flags set by the regulatory control code. * * @IEEE80211_CHAN_DISABLED: This channel is disabled. - * @IEEE80211_CHAN_PASSIVE_SCAN: Only passive scanning is permitted - * on this channel. - * @IEEE80211_CHAN_NO_IBSS: IBSS is not allowed on this channel. + * @IEEE80211_CHAN_NO_IR: do not initiate radiation, this includes + * sending probe requests or beaconing. * @IEEE80211_CHAN_RADAR: Radar detection is required on this channel. * @IEEE80211_CHAN_NO_HT40PLUS: extension channel above this channel * is not permitted. @@ -113,8 +112,8 @@ enum ieee80211_band { */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = 1<<0, - IEEE80211_CHAN_PASSIVE_SCAN = 1<<1, - IEEE80211_CHAN_NO_IBSS = 1<<2, + IEEE80211_CHAN_NO_IR = 1<<1, + /* hole at 1<<2 */ IEEE80211_CHAN_RADAR = 1<<3, IEEE80211_CHAN_NO_HT40PLUS = 1<<4, IEEE80211_CHAN_NO_HT40MINUS = 1<<5, diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 3d8325bb50cd..7e25164adfe9 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2231,10 +2231,9 @@ enum nl80211_band_attr { * @NL80211_FREQUENCY_ATTR_FREQ: Frequency in MHz * @NL80211_FREQUENCY_ATTR_DISABLED: Channel is disabled in current * regulatory domain. - * @NL80211_FREQUENCY_ATTR_PASSIVE_SCAN: Only passive scanning is - * permitted on this channel in current regulatory domain. - * @NL80211_FREQUENCY_ATTR_NO_IBSS: IBSS networks are not permitted - * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_NO_IR: no mechanisms that initiate radiation + * are permitted on this channel, this includes sending probe + * requests, or modes of operation that require beaconing. * @NL80211_FREQUENCY_ATTR_RADAR: Radar detection is mandatory * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm @@ -2261,8 +2260,8 @@ enum nl80211_frequency_attr { __NL80211_FREQUENCY_ATTR_INVALID, NL80211_FREQUENCY_ATTR_FREQ, NL80211_FREQUENCY_ATTR_DISABLED, - NL80211_FREQUENCY_ATTR_PASSIVE_SCAN, - NL80211_FREQUENCY_ATTR_NO_IBSS, + NL80211_FREQUENCY_ATTR_NO_IR, + __NL80211_FREQUENCY_ATTR_NO_IBSS, NL80211_FREQUENCY_ATTR_RADAR, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, NL80211_FREQUENCY_ATTR_DFS_STATE, @@ -2278,6 +2277,9 @@ enum nl80211_frequency_attr { }; #define NL80211_FREQUENCY_ATTR_MAX_TX_POWER NL80211_FREQUENCY_ATTR_MAX_TX_POWER +#define NL80211_FREQUENCY_ATTR_PASSIVE_SCAN NL80211_FREQUENCY_ATTR_NO_IR +#define NL80211_FREQUENCY_ATTR_NO_IBSS NL80211_FREQUENCY_ATTR_NO_IR +#define NL80211_FREQUENCY_ATTR_NO_IR NL80211_FREQUENCY_ATTR_NO_IR /** * enum nl80211_bitrate_attr - bitrate attributes @@ -2420,8 +2422,9 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_DFS: DFS support is required to be used * @NL80211_RRF_PTP_ONLY: this is only for Point To Point links * @NL80211_RRF_PTMP_ONLY: this is only for Point To Multi Point links - * @NL80211_RRF_PASSIVE_SCAN: passive scan is required - * @NL80211_RRF_NO_IBSS: no IBSS is allowed + * @NL80211_RRF_NO_IR: no mechanisms that initiate radiation are allowed, + * this includes probe requests or modes of operation that require + * beaconing. */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -2431,10 +2434,17 @@ enum nl80211_reg_rule_flags { NL80211_RRF_DFS = 1<<4, NL80211_RRF_PTP_ONLY = 1<<5, NL80211_RRF_PTMP_ONLY = 1<<6, - NL80211_RRF_PASSIVE_SCAN = 1<<7, - NL80211_RRF_NO_IBSS = 1<<8, + NL80211_RRF_NO_IR = 1<<7, + __NL80211_RRF_NO_IBSS = 1<<8, }; +#define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR +#define NL80211_RRF_NO_IBSS NL80211_RRF_NO_IR +#define NL80211_RRF_NO_IR NL80211_RRF_NO_IR + +/* For backport compatibility with older userspace */ +#define NL80211_RRF_NO_IR_ALL (NL80211_RRF_NO_IR | __NL80211_RRF_NO_IBSS) + /** * enum nl80211_dfs_regions - regulatory DFS regions * -- cgit v1.2.2 From 222ea5819901ed174db4df2e26aa5e982f857845 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 5 Nov 2013 09:18:00 -0800 Subject: cfg80211: force WIPHY_FLAG_CUSTOM_REGULATORY on wiphy_apply_custom_regulatory() wiphy_apply_custom_regulatory() implies WIPHY_FLAG_CUSTOM_REGULATORY but we never enforced it, do that now and warn if the driver didn't set it. All drivers should be following this today already. Having WIPHY_FLAG_CUSTOM_REGULATORY does not however mean you will use wiphy_apply_custom_regulatory() though, you may have your own _orig value set up tools / helpers. The intel drivers are examples of this type of driver. Signed-off-by: Luis R. Rodriguez Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c1b887413234..b7a825ecff56 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2441,7 +2441,9 @@ struct cfg80211_ops { * has its own custom regulatory domain and cannot identify the * ISO / IEC 3166 alpha2 it belongs to. When this is enabled * we will disregard the first regulatory hint (when the - * initiator is %REGDOM_SET_BY_CORE). + * initiator is %REGDOM_SET_BY_CORE). Drivers that use + * wiphy_apply_custom_regulatory() should have this flag set + * or the regulatory core will set it for wiphy. * @WIPHY_FLAG_STRICT_REGULATORY: tells us the driver for this device will * ignore regulatory domain settings until it gets its own regulatory * domain via its regulatory_hint() unless the regulatory hint is @@ -3471,6 +3473,9 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2); * custom regulatory domain will be trusted completely and as such previous * default channel settings will be disregarded. If no rule is found for a * channel on the regulatory domain the channel will be disabled. + * Drivers using this for a wiphy should also set the wiphy flag + * WIPHY_FLAG_CUSTOM_REGULATORY or cfg80211 will set it for the wiphy + * that called this helper. */ void wiphy_apply_custom_regulatory(struct wiphy *wiphy, const struct ieee80211_regdomain *regd); -- cgit v1.2.2 From d2859df5e7f00469011482d850fba652517a2eab Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Wed, 6 Nov 2013 13:55:51 +0100 Subject: cfg80211/mac80211: DFS setup chandef for cac event To report channel width correctly we have to send correct channel parameters from mac80211 when calling cfg80211_cac_event(). This is required in case of using channel width higher than 20MHz and we have to set correct dfs channel state after CAC (NL80211_DFS_AVAILABLE). Signed-off-by: Janusz Dziedzic Reviewed-by: Luis R. Rodriguez Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index b7a825ecff56..968f2ad40ccd 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4150,6 +4150,7 @@ void cfg80211_radar_event(struct wiphy *wiphy, /** * cfg80211_cac_event - Channel availability check (CAC) event * @netdev: network device + * @chandef: chandef for the current channel * @event: type of event * @gfp: context flags * @@ -4158,6 +4159,7 @@ void cfg80211_radar_event(struct wiphy *wiphy, * also by full-MAC drivers. */ void cfg80211_cac_event(struct net_device *netdev, + const struct cfg80211_chan_def *chandef, enum nl80211_radar_event event, gfp_t gfp); -- cgit v1.2.2 From 2475b1cc0d5283a33144b79f3eba6d401d873962 Mon Sep 17 00:00:00 2001 From: Max Stepanov Date: Sun, 24 Mar 2013 14:23:27 +0200 Subject: mac80211: add generic cipher scheme support This adds generic cipher scheme support to mac80211, such schemes are fully under control by the driver. On hw registration drivers may specify additional HW ciphers with a scheme how these ciphers have to be handled by mac80211 TX/RR. A cipher scheme specifies a cipher suite value, a size of the security header to be added to or stripped from frames and how the PN is to be verified on RX. Signed-off-by: Max Stepanov Signed-off-by: Johannes Berg --- include/net/mac80211.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7ceed99a05bc..cca02b2f5c7e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1228,6 +1228,36 @@ struct ieee80211_key_conf { u8 key[0]; }; +/** + * struct ieee80211_cipher_scheme - cipher scheme + * + * This structure contains a cipher scheme information defining + * the secure packet crypto handling. + * + * @cipher: a cipher suite selector + * @iftype: a cipher iftype bit mask indicating an allowed cipher usage + * @hdr_len: a length of a security header used the cipher + * @pn_len: a length of a packet number in the security header + * @pn_off: an offset of pn from the beginning of the security header + * @key_idx_off: an offset of key index byte in the security header + * @key_idx_mask: a bit mask of key_idx bits + * @key_idx_shift: a bit shift needed to get key_idx + * key_idx value calculation: + * (sec_header_base[key_idx_off] & key_idx_mask) >> key_idx_shift + * @mic_len: a mic length in bytes + */ +struct ieee80211_cipher_scheme { + u32 cipher; + u16 iftype; + u8 hdr_len; + u8 pn_len; + u8 pn_off; + u8 key_idx_off; + u8 key_idx_mask; + u8 key_idx_shift; + u8 mic_len; +}; + /** * enum set_key_cmd - key command * @@ -1636,6 +1666,10 @@ enum ieee80211_hw_flags { * @uapsd_max_sp_len: maximum number of total buffered frames the WMM AP may * deliver to a WMM STA during any Service Period triggered by the WMM STA. * Use IEEE80211_WMM_IE_STA_QOSINFO_SP_* for correct values. + * + * @n_cipher_schemes: a size of an array of cipher schemes definitions. + * @cipher_schemes: a pointer to an array of cipher scheme definitions + * supported by HW. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -1663,6 +1697,8 @@ struct ieee80211_hw { netdev_features_t netdev_features; u8 uapsd_queues; u8 uapsd_max_sp_len; + u8 n_cipher_schemes; + const struct ieee80211_cipher_scheme *cipher_schemes; }; /** -- cgit v1.2.2 From a2f73b6c5db3c272d87eaebb5bed355d75a0f25f Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 11 Nov 2013 22:15:29 +0100 Subject: cfg80211: move regulatory flags to their own variable We'll expand this later, this will make it easier to classify and review what things are related to regulatory or not. Coccinelle only missed 4 hits, which I had to do manually, supplying the SmPL in case of merge conflicts. @@ struct wiphy *wiphy; @@ -wiphy->flags |= WIPHY_FLAG_CUSTOM_REGULATORY +wiphy->regulatory_flags |= REGULATORY_CUSTOM_REG @@ expression e; @@ -e->flags |= WIPHY_FLAG_CUSTOM_REGULATORY +e->regulatory_flags |= REGULATORY_CUSTOM_REG @@ struct wiphy *wiphy; @@ -wiphy->flags &= ~WIPHY_FLAG_CUSTOM_REGULATORY +wiphy->regulatory_flags &= ~REGULATORY_CUSTOM_REG @@ struct wiphy *wiphy; @@ -wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY +wiphy->regulatory_flags & REGULATORY_CUSTOM_REG @@ struct wiphy *wiphy; @@ -wiphy->flags |= WIPHY_FLAG_STRICT_REGULATORY +wiphy->regulatory_flags |= REGULATORY_STRICT_REG @@ expression e; @@ -e->flags |= WIPHY_FLAG_STRICT_REGULATORY +e->regulatory_flags |= REGULATORY_STRICT_REG @@ struct wiphy *wiphy; @@ -wiphy->flags &= ~WIPHY_FLAG_STRICT_REGULATORY +wiphy->regulatory_flags &= ~REGULATORY_STRICT_REG @@ struct wiphy *wiphy; @@ -wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY +wiphy->regulatory_flags & REGULATORY_STRICT_REG @@ struct wiphy *wiphy; @@ -wiphy->flags |= WIPHY_FLAG_DISABLE_BEACON_HINTS +wiphy->regulatory_flags |= REGULATORY_DISABLE_BEACON_HINTS @@ expression e; @@ -e->flags |= WIPHY_FLAG_DISABLE_BEACON_HINTS +e->regulatory_flags |= REGULATORY_DISABLE_BEACON_HINTS @@ struct wiphy *wiphy; @@ -wiphy->flags &= ~WIPHY_FLAG_DISABLE_BEACON_HINTS +wiphy->regulatory_flags &= ~REGULATORY_DISABLE_BEACON_HINTS @@ struct wiphy *wiphy; @@ -wiphy->flags & WIPHY_FLAG_DISABLE_BEACON_HINTS +wiphy->regulatory_flags & REGULATORY_DISABLE_BEACON_HINTS Generated-by: Coccinelle SmPL Cc: Julia Lawall Cc: Peter Senna Tschudin Cc: Mihir Shete Cc: Henri Bahini Cc: Tushnim Bhattacharyya Signed-off-by: Luis R. Rodriguez [fix up whitespace damage, overly long lines] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 33 +++++---------------------- include/net/regulatory.h | 59 +++++++++++++++++++++++++++++++++++++----------- 2 files changed, 52 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 968f2ad40ccd..bacc5033f0b6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2437,29 +2437,6 @@ struct cfg80211_ops { /** * enum wiphy_flags - wiphy capability flags * - * @WIPHY_FLAG_CUSTOM_REGULATORY: tells us the driver for this device - * has its own custom regulatory domain and cannot identify the - * ISO / IEC 3166 alpha2 it belongs to. When this is enabled - * we will disregard the first regulatory hint (when the - * initiator is %REGDOM_SET_BY_CORE). Drivers that use - * wiphy_apply_custom_regulatory() should have this flag set - * or the regulatory core will set it for wiphy. - * @WIPHY_FLAG_STRICT_REGULATORY: tells us the driver for this device will - * ignore regulatory domain settings until it gets its own regulatory - * domain via its regulatory_hint() unless the regulatory hint is - * from a country IE. After its gets its own regulatory domain it will - * only allow further regulatory domain settings to further enhance - * compliance. For example if channel 13 and 14 are disabled by this - * regulatory domain no user regulatory domain can enable these channels - * at a later time. This can be used for devices which do not have - * calibration information guaranteed for frequencies or settings - * outside of its regulatory domain. If used in combination with - * WIPHY_FLAG_CUSTOM_REGULATORY the inspected country IE power settings - * will be followed. - * @WIPHY_FLAG_DISABLE_BEACON_HINTS: enable this if your driver needs to ensure - * that passive scan flags and beaconing flags may not be lifted by - * cfg80211 due to regulatory beacon hints. For more information on beacon - * hints read the documenation for regulatory_hint_found_beacon() * @WIPHY_FLAG_NETNS_OK: if not set, do not allow changing the netns of this * wiphy at all * @WIPHY_FLAG_PS_ON_BY_DEFAULT: if set to true, powersave will be enabled @@ -2498,9 +2475,9 @@ struct cfg80211_ops { * beaconing mode (AP, IBSS, Mesh, ...). */ enum wiphy_flags { - WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), - WIPHY_FLAG_STRICT_REGULATORY = BIT(1), - WIPHY_FLAG_DISABLE_BEACON_HINTS = BIT(2), + /* use hole at 0 */ + /* use hole at 1 */ + /* use hole at 2 */ WIPHY_FLAG_NETNS_OK = BIT(3), WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), WIPHY_FLAG_4ADDR_AP = BIT(5), @@ -2722,6 +2699,8 @@ struct wiphy_coalesce_support { * @software_iftypes: bitmask of software interface types, these are not * subject to any restrictions since they are purely managed in SW. * @flags: wiphy flags, see &enum wiphy_flags + * @regulatory_flags: wiphy regulatory flags, see + * &enum ieee80211_regulatory_flags * @features: features advertised to nl80211, see &enum nl80211_feature_flags. * @bss_priv_size: each BSS struct has private data allocated with it, * this variable determines its size @@ -2810,7 +2789,7 @@ struct wiphy { u16 max_acl_mac_addrs; - u32 flags, features; + u32 flags, regulatory_flags, features; u32 ap_sme_capa; diff --git a/include/net/regulatory.h b/include/net/regulatory.h index f17ed590d64a..a6a20e2a54c4 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -38,17 +38,17 @@ enum environment_cap { * * @rcu_head: RCU head struct used to free the request * @wiphy_idx: this is set if this request's initiator is - * %REGDOM_SET_BY_COUNTRY_IE or %REGDOM_SET_BY_DRIVER. This - * can be used by the wireless core to deal with conflicts - * and potentially inform users of which devices specifically - * cased the conflicts. + * %REGDOM_SET_BY_COUNTRY_IE or %REGDOM_SET_BY_DRIVER. This + * can be used by the wireless core to deal with conflicts + * and potentially inform users of which devices specifically + * cased the conflicts. * @initiator: indicates who sent this request, could be any of - * of those set in nl80211_reg_initiator (%NL80211_REGDOM_SET_BY_*) + * of those set in nl80211_reg_initiator (%NL80211_REGDOM_SET_BY_*) * @alpha2: the ISO / IEC 3166 alpha2 country code of the requested - * regulatory domain. We have a few special codes: - * 00 - World regulatory domain - * 99 - built by driver but a specific alpha2 cannot be determined - * 98 - result of an intersection between two regulatory domains + * regulatory domain. We have a few special codes: + * 00 - World regulatory domain + * 99 - built by driver but a specific alpha2 cannot be determined + * 98 - result of an intersection between two regulatory domains * 97 - regulatory domain has not yet been configured * @dfs_region: If CRDA responded with a regulatory domain that requires * DFS master operation on a known DFS region (NL80211_DFS_*), @@ -59,8 +59,8 @@ enum environment_cap { * of hint passed. This could be any of the %NL80211_USER_REG_HINT_* * types. * @intersect: indicates whether the wireless core should intersect - * the requested regulatory domain with the presently set regulatory - * domain. + * the requested regulatory domain with the presently set regulatory + * domain. * @processed: indicates whether or not this requests has already been * processed. When the last request is processed it means that the * currently regulatory domain set on cfg80211 is updated from @@ -68,9 +68,9 @@ enum environment_cap { * the last request is not yet processed we must yield until it * is processed before processing any new requests. * @country_ie_checksum: checksum of the last processed and accepted - * country IE + * country IE * @country_ie_env: lets us know if the AP is telling us we are outdoor, - * indoor, or if it doesn't matter + * indoor, or if it doesn't matter * @list: used to insert into the reg_requests_list linked list */ struct regulatory_request { @@ -86,6 +86,39 @@ struct regulatory_request { struct list_head list; }; +/** + * enum ieee80211_regulatory_flags - device regulatory flags + * + * @REGULATORY_CUSTOM_REG: tells us the driver for this device + * has its own custom regulatory domain and cannot identify the + * ISO / IEC 3166 alpha2 it belongs to. When this is enabled + * we will disregard the first regulatory hint (when the + * initiator is %REGDOM_SET_BY_CORE). Drivers that use + * wiphy_apply_custom_regulatory() should have this flag set + * or the regulatory core will set it for the wiphy. + * @REGULATORY_STRICT_REG: tells us the driver for this device will + * ignore regulatory domain settings until it gets its own regulatory + * domain via its regulatory_hint() unless the regulatory hint is + * from a country IE. After its gets its own regulatory domain it will + * only allow further regulatory domain settings to further enhance + * compliance. For example if channel 13 and 14 are disabled by this + * regulatory domain no user regulatory domain can enable these channels + * at a later time. This can be used for devices which do not have + * calibration information guaranteed for frequencies or settings + * outside of its regulatory domain. If used in combination with + * REGULATORY_FLAG_CUSTOM_REG the inspected country IE power settings + * will be followed. + * @REGULATORY_DISABLE_BEACON_HINTS: enable this if your driver needs to + * ensure that passive scan flags and beaconing flags may not be lifted by + * cfg80211 due to regulatory beacon hints. For more information on beacon + * hints read the documenation for regulatory_hint_found_beacon() + */ +enum ieee80211_regulatory_flags { + REGULATORY_CUSTOM_REG = BIT(0), + REGULATORY_STRICT_REG = BIT(1), + REGULATORY_DISABLE_BEACON_HINTS = BIT(2), +}; + struct ieee80211_freq_range { u32 start_freq_khz; u32 end_freq_khz; -- cgit v1.2.2 From a09a85a013523a8b572dc5732b5c30e0785195f3 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 11 Nov 2013 22:15:30 +0100 Subject: cfg80211: add flags to define country IE processing rules 802.11 cards may have different country IE parsing behavioural preferences and vendors may want to support these. These preferences were managed by the REGULATORY_CUSTOM_REG and the REGULATORY_STRICT_REG flags and their combination. Instead of using this existing notation, split out the country IE behavioural preferences as a new flag. This will allow us to add more customizations easily and make the code more maintainable. Cc: Mihir Shete Cc: Henri Bahini Cc: Tushnim Bhattacharyya Signed-off-by: Luis R. Rodriguez [fix up conflicts] Signed-off-by: Johannes Berg --- include/net/regulatory.h | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/regulatory.h b/include/net/regulatory.h index a6a20e2a54c4..b03ddee3341d 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -96,27 +96,39 @@ struct regulatory_request { * initiator is %REGDOM_SET_BY_CORE). Drivers that use * wiphy_apply_custom_regulatory() should have this flag set * or the regulatory core will set it for the wiphy. - * @REGULATORY_STRICT_REG: tells us the driver for this device will - * ignore regulatory domain settings until it gets its own regulatory - * domain via its regulatory_hint() unless the regulatory hint is - * from a country IE. After its gets its own regulatory domain it will - * only allow further regulatory domain settings to further enhance - * compliance. For example if channel 13 and 14 are disabled by this - * regulatory domain no user regulatory domain can enable these channels - * at a later time. This can be used for devices which do not have - * calibration information guaranteed for frequencies or settings - * outside of its regulatory domain. If used in combination with - * REGULATORY_FLAG_CUSTOM_REG the inspected country IE power settings - * will be followed. + * @REGULATORY_STRICT_REG: tells us that the wiphy for this device + * has regulatory domain that it wishes to be considered as the + * superset for regulatory rules. After this device gets its regulatory + * domain programmed further regulatory hints shall only be considered + * for this device to enhance regulatory compliance, forcing the + * device to only possibly use subsets of the original regulatory + * rules. For example if channel 13 and 14 are disabled by this + * device's regulatory domain no user specified regulatory hint which + * has these channels enabled would enable them for this wiphy, + * the device's original regulatory domain will be trusted as the + * base. You can program the superset of regulatory rules for this + * wiphy with regulatory_hint() for cards programmed with an + * ISO3166-alpha2 country code. wiphys that use regulatory_hint() + * will have their wiphy->regd programmed once the regulatory + * domain is set, and all other regulatory hints will be ignored + * until their own regulatory domain gets programmed. * @REGULATORY_DISABLE_BEACON_HINTS: enable this if your driver needs to * ensure that passive scan flags and beaconing flags may not be lifted by * cfg80211 due to regulatory beacon hints. For more information on beacon * hints read the documenation for regulatory_hint_found_beacon() + * @REGULATORY_COUNTRY_IE_FOLLOW_POWER: for devices that have a preference + * that even though they may have programmed their own custom power + * setting prior to wiphy registration, they want to ensure their channel + * power settings are updated for this connection with the power settings + * derived from the regulatory domain. The regulatory domain used will be + * based on the ISO3166-alpha2 from country IE provided through + * regulatory_hint_country_ie() */ enum ieee80211_regulatory_flags { REGULATORY_CUSTOM_REG = BIT(0), REGULATORY_STRICT_REG = BIT(1), REGULATORY_DISABLE_BEACON_HINTS = BIT(2), + REGULATORY_COUNTRY_IE_FOLLOW_POWER = BIT(3), }; struct ieee80211_freq_range { -- cgit v1.2.2 From 2a901468c221e778af52603e006a53d286e81f90 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 11 Nov 2013 22:15:31 +0100 Subject: cfg80211: add an option to disable processing country IEs Certain vendors may want to disable the processing of country IEs so that they can continue using the regulatory domain the driver or user has set. Currently there is no way to stop the core from processing country IEs, so add support to the core to ignore country IE hints. Cc: Mihir Shete Cc: Henri Bahini Cc: Tushnim Bhattacharyya Signed-off-by: Luis R. Rodriguez Signed-off-by: Johannes Berg --- include/net/regulatory.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/regulatory.h b/include/net/regulatory.h index b03ddee3341d..92ab80f69efe 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -123,12 +123,17 @@ struct regulatory_request { * derived from the regulatory domain. The regulatory domain used will be * based on the ISO3166-alpha2 from country IE provided through * regulatory_hint_country_ie() + * @REGULATORY_COUNTRY_IE_IGNORE: for devices that have a preference to ignore + * all country IE information processed by the regulatory core. This will + * override %REGULATORY_COUNTRY_IE_FOLLOW_POWER as all country IEs will + * be ignored. */ enum ieee80211_regulatory_flags { REGULATORY_CUSTOM_REG = BIT(0), REGULATORY_STRICT_REG = BIT(1), REGULATORY_DISABLE_BEACON_HINTS = BIT(2), REGULATORY_COUNTRY_IE_FOLLOW_POWER = BIT(3), + REGULATORY_COUNTRY_IE_IGNORE = BIT(4), }; struct ieee80211_freq_range { -- cgit v1.2.2 From fbdd90ea830162960fb8fbe377dfef9a54d74d2f Mon Sep 17 00:00:00 2001 From: Eyal Shapira Date: Mon, 11 Nov 2013 20:14:00 +0200 Subject: mac80211: enable easier manipulation of VHT beamforming caps Introduce shift and mask defines for beamformee STS cap and number of sounding dimensions cap as these can take any 3 bit value. While at it also cleanup an unrequired parenthesis. Signed-off-by: Eyal Shapira Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 8c3b26a21574..776cbb80d098 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1411,8 +1411,12 @@ struct ieee80211_vht_operation { #define IEEE80211_VHT_CAP_RXSTBC_MASK 0x00000700 #define IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE 0x00000800 #define IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE 0x00001000 -#define IEEE80211_VHT_CAP_BEAMFORMEE_STS_MAX 0x0000e000 -#define IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX 0x00070000 +#define IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT 13 +#define IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK \ + (7 << IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT) +#define IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_SHIFT 16 +#define IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK \ + (7 << IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_SHIFT) #define IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE 0x00080000 #define IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE 0x00100000 #define IEEE80211_VHT_CAP_VHT_TXOP_PS 0x00200000 -- cgit v1.2.2 From 21f659bf1f93f7052b977d95cca560f02dc2edce Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Mon, 11 Nov 2013 20:14:01 +0200 Subject: mac80211: add min required channel definition field Add a new field to ieee80211_chanctx_conf to indicate the min required channel configuration. Tuning to a narrower channel might help reducing the noise level and saving some power. The min required channel definition is the max of all min required channel definitions of the interfaces bound to this channel context. In AP mode, use 20MHz when there are no connected station. When a new station is added/removed, calculate the new max bandwidth supported by any of the stations (e.g. 80MHz when 80MHz and 40MHz stations are connected). In other cases, simply use bss_conf.chandef as the min required chandef. Notify drivers about changes to this field by calling drv_change_chanctx with a new CHANGE_MIN_WIDTH notification. Signed-off-by: Eliad Peller Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cca02b2f5c7e..3cd408b326de 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -154,12 +154,14 @@ struct ieee80211_low_level_stats { * @IEEE80211_CHANCTX_CHANGE_RADAR: radar detection flag changed * @IEEE80211_CHANCTX_CHANGE_CHANNEL: switched to another operating channel, * this is used only with channel switching with CSA + * @IEEE80211_CHANCTX_CHANGE_MIN_WIDTH: The min required channel width changed */ enum ieee80211_chanctx_change { IEEE80211_CHANCTX_CHANGE_WIDTH = BIT(0), IEEE80211_CHANCTX_CHANGE_RX_CHAINS = BIT(1), IEEE80211_CHANCTX_CHANGE_RADAR = BIT(2), IEEE80211_CHANCTX_CHANGE_CHANNEL = BIT(3), + IEEE80211_CHANCTX_CHANGE_MIN_WIDTH = BIT(4), }; /** @@ -169,6 +171,7 @@ enum ieee80211_chanctx_change { * that contains it is visible in mac80211 only. * * @def: the channel definition + * @min_def: the minimum channel definition currently required. * @rx_chains_static: The number of RX chains that must always be * active on the channel to receive MIMO transmissions * @rx_chains_dynamic: The number of RX chains that must be enabled @@ -180,6 +183,7 @@ enum ieee80211_chanctx_change { */ struct ieee80211_chanctx_conf { struct cfg80211_chan_def def; + struct cfg80211_chan_def min_def; u8 rx_chains_static, rx_chains_dynamic; -- cgit v1.2.2 From 4c7d3982a6e37831382b9ef90aa0dbadc0bf3a22 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 13 Nov 2013 18:54:02 +0100 Subject: cfg80211: use enum nl80211_dfs_regions for dfs_region everywhere u8 was used in some other places, just stick to the enum, this forces us to express the values that are expected. Signed-off-by: Luis R. Rodriguez Signed-off-by: Johannes Berg --- include/net/regulatory.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/regulatory.h b/include/net/regulatory.h index 92ab80f69efe..c96a0b86f342 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -79,7 +79,7 @@ struct regulatory_request { enum nl80211_reg_initiator initiator; enum nl80211_user_reg_hint_type user_reg_hint_type; char alpha2[2]; - u8 dfs_region; + enum nl80211_dfs_regions dfs_region; bool intersect; bool processed; enum environment_cap country_ie_env; @@ -157,7 +157,7 @@ struct ieee80211_regdomain { struct rcu_head rcu_head; u32 n_reg_rules; char alpha2[2]; - u8 dfs_region; + enum nl80211_dfs_regions dfs_region; struct ieee80211_reg_rule reg_rules[]; }; -- cgit v1.2.2 From 01e0daa43f129fc1a6bc6f1197343c0293af866d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 9 Nov 2013 14:57:54 +0100 Subject: cfg80211: fix reporting 5/10 MHz support to user space nla_put_flag needs a real nl80211 attribute id, not a wiphy flag bit. While at it, split 5 and 10 MHz capability flags in case we ever need to support hardware that can only do one of the two. Also move the flag settings to the split-only information so we don't increase the space needed for old userspace. Signed-off-by: Felix Fietkau [change location of flag setting] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 7e25164adfe9..129b7b087148 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1515,6 +1515,11 @@ enum nl80211_commands { * to react to radar events, e.g. initiate a channel switch or leave the * IBSS network. * + * @NL80211_ATTR_SUPPORT_5_MHZ: A flag indicating that the device supports + * 5 MHz channel bandwidth. + * @NL80211_ATTR_SUPPORT_10_MHZ: A flag indicating that the device supports + * 10 MHz channel bandwidth. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1831,6 +1836,9 @@ enum nl80211_attrs { NL80211_ATTR_HANDLE_DFS, + NL80211_ATTR_SUPPORT_5_MHZ, + NL80211_ATTR_SUPPORT_10_MHZ, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.2 From b176e629402f41f2b984d3aa842ddae23ed5562e Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Mon, 18 Nov 2013 19:06:49 +0200 Subject: cfg80211: aggregate mgmt_tx parameters into a struct Change cfg80211 and mac80211 to use cfg80211_mgmt_tx_params struct to aggregate parameters for mgmt_tx functions. This makes the functions' signatures less clumsy and allows less painful parameters extension. Signed-off-by: Andrei Otcheretianski [fix all other drivers] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index bacc5033f0b6..6c2bc329a900 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1943,6 +1943,29 @@ struct cfg80211_update_ft_ies_params { size_t ie_len; }; +/** + * struct cfg80211_mgmt_tx_params - mgmt tx parameters + * + * This structure provides information needed to transmit a mgmt frame + * + * @chan: channel to use + * @offchan: indicates wether off channel operation is required + * @wait: duration for ROC + * @buf: buffer to transmit + * @len: buffer length + * @no_cck: don't use cck rates for this frame + * @dont_wait_for_ack: tells the low level not to wait for an ack + */ +struct cfg80211_mgmt_tx_params { + struct ieee80211_channel *chan; + bool offchan; + unsigned int wait; + const u8 *buf; + size_t len; + bool no_cck; + bool dont_wait_for_ack; +}; + /** * struct cfg80211_ops - backend description for wireless configuration * @@ -2341,9 +2364,8 @@ struct cfg80211_ops { u64 cookie); int (*mgmt_tx)(struct wiphy *wiphy, struct wireless_dev *wdev, - struct ieee80211_channel *chan, bool offchan, - unsigned int wait, const u8 *buf, size_t len, - bool no_cck, bool dont_wait_for_ack, u64 *cookie); + struct cfg80211_mgmt_tx_params *params, + u64 *cookie); int (*mgmt_tx_cancel_wait)(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie); -- cgit v1.2.2 From e487eaeb076a44c69dc61348cbc903151bb8fcbd Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Thu, 21 Nov 2013 18:19:51 +0100 Subject: cfg80211/mac80211/ath6kl: acquire wdev lock outside ch_switch_notify The channel switch notification should be sent under the wdev/sdata-lock, preferably in the same moment as the channel change happens, to avoid races by other callers (e.g. start/stop_ap). This also adds the previously missing sdata_lock protection in csa_finalize_work. Reported-by: Johannes Berg Signed-off-by: Simon Wunderlich Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6c2bc329a900..e9abc7b536cd 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4286,7 +4286,8 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, * @dev: the device which switched channels * @chandef: the new channel definition * - * Acquires wdev_lock, so must only be called from sleepable driver context! + * Caller must acquire wdev_lock, therefore must only be called from sleepable + * driver context! */ void cfg80211_ch_switch_notify(struct net_device *dev, struct cfg80211_chan_def *chandef); -- cgit v1.2.2 From 18cfd3bfc904e2360af3a1c059c84b3f1944afb4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 3 Dec 2013 13:21:36 +0100 Subject: Revert "mac80211: add driver callback for per-interface multicast filter" This reverts commit 488b366a452934141959384c7a1b52b22d6154ef. The code isn't used by anyone, and the Intel driver isn't planning to use it either right now. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3cd408b326de..73d99bc3e636 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2398,9 +2398,6 @@ enum ieee80211_roc_type { * See the section "Frame filtering" for more information. * This callback must be implemented and can sleep. * - * @set_multicast_list: Configure the device's interface specific RX multicast - * filter. This callback is optional. This callback must be atomic. - * * @set_tim: Set TIM bit. mac80211 calls this function when a TIM bit * must be set or cleared for a given STA. Must be atomic. * @@ -2764,10 +2761,6 @@ struct ieee80211_ops { unsigned int changed_flags, unsigned int *total_flags, u64 multicast); - void (*set_multicast_list)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, bool allmulti, - struct netdev_hw_addr_list *mc_list); - int (*set_tim)(struct ieee80211_hw *hw, struct ieee80211_sta *sta, bool set); int (*set_key)(struct ieee80211_hw *hw, enum set_key_cmd cmd, -- cgit v1.2.2 From 60f4a7b1676c9028edb90e22f6994ebb698c9088 Mon Sep 17 00:00:00 2001 From: Marek Kwaczynski Date: Tue, 3 Dec 2013 10:04:59 +0100 Subject: nl80211/cfg80211: Set Operating Mode Notification This attribute is needed for setting Operating Mode Notification in AP mode from User Space. This functionality is required when User Space received Assoc Request contains Operation Mode Notification element. Signed-off-by: Marek Kwaczynski [fix typos, nl80211 documentation] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ include/uapi/linux/nl80211.h | 6 ++++++ 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e9abc7b536cd..47290a4059ae 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -747,6 +747,8 @@ enum station_parameters_apply_mask { * @supported_channels_len: number of supported channels * @supported_oper_classes: supported oper classes in IEEE 802.11 format * @supported_oper_classes_len: number of supported operating classes + * @opmode_notif: operating mode field from Operating Mode Notification + * @opmode_notif_used: information if operating mode field is used */ struct station_parameters { const u8 *supported_rates; @@ -770,6 +772,8 @@ struct station_parameters { u8 supported_channels_len; const u8 *supported_oper_classes; u8 supported_oper_classes_len; + u8 opmode_notif; + bool opmode_notif_used; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 129b7b087148..9a4d0e18251c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1520,6 +1520,10 @@ enum nl80211_commands { * @NL80211_ATTR_SUPPORT_10_MHZ: A flag indicating that the device supports * 10 MHz channel bandwidth. * + * @NL80211_ATTR_OPMODE_NOTIF: Operating mode field from Operating Mode + * Notification Element based on association request when used with + * %NL80211_CMD_NEW_STATION; u8 attribute. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1839,6 +1843,8 @@ enum nl80211_attrs { NL80211_ATTR_SUPPORT_5_MHZ, NL80211_ATTR_SUPPORT_10_MHZ, + NL80211_ATTR_OPMODE_NOTIF, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.2 From ad7e718c9b4f717823fd920a0103f7b0fb06183f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Nov 2013 13:37:47 +0100 Subject: nl80211: vendor command support Add support for vendor-specific commands to nl80211. This is intended to be used for really vendor-specific functionality that can't be implemented in a generic fashion for any reason. It's *NOT* intended to be used for any normal/generic feature or any optimisations that could be implemented across drivers. Currently, only vendor commands (with replies) are supported, no dump operations or vendor-specific notifications. Also add a function wdev_to_ieee80211_vif() to mac80211 which is needed for mac80211-based drivers wanting to implement any vendor commands. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 116 +++++++++++++++++++++++++++++++++++++++++-- include/net/mac80211.h | 13 +++++ include/uapi/linux/nl80211.h | 41 +++++++++++++++ 3 files changed, 167 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 47290a4059ae..884ac69b5e55 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2679,6 +2679,34 @@ struct wiphy_coalesce_support { int max_pkt_offset; }; +/** + * enum wiphy_vendor_command_flags - validation flags for vendor commands + * @WIPHY_VENDOR_CMD_NEED_WDEV: vendor command requires wdev + * @WIPHY_VENDOR_CMD_NEED_NETDEV: vendor command requires netdev + * @WIPHY_VENDOR_CMD_NEED_RUNNING: interface/wdev must be up & running + * (must be combined with %_WDEV or %_NETDEV) + */ +enum wiphy_vendor_command_flags { + WIPHY_VENDOR_CMD_NEED_WDEV = BIT(0), + WIPHY_VENDOR_CMD_NEED_NETDEV = BIT(1), + WIPHY_VENDOR_CMD_NEED_RUNNING = BIT(2), +}; + +/** + * struct wiphy_vendor_command - vendor command definition + * @info: vendor command identifying information, as used in nl80211 + * @flags: flags, see &enum wiphy_vendor_command_flags + * @doit: callback for the operation, note that wdev is %NULL if the + * flags didn't ask for a wdev and non-%NULL otherwise; the data + * pointer may be %NULL if userspace provided no data at all + */ +struct wiphy_vendor_command { + struct nl80211_vendor_cmd_info info; + u32 flags; + int (*doit)(struct wiphy *wiphy, struct wireless_dev *wdev, + const void *data, int data_len); +}; + /** * struct wiphy - wireless hardware description * @reg_notifier: the driver's regulatory notification callback, @@ -2792,6 +2820,9 @@ struct wiphy_coalesce_support { * @extended_capabilities_mask: mask of the valid values * @extended_capabilities_len: length of the extended capabilities * @coalesce: packet coalescing support information + * + * @vendor_commands: array of vendor commands supported by the hardware + * @n_vendor_commands: number of vendor commands */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -2903,6 +2934,9 @@ struct wiphy { const struct wiphy_coalesce_support *coalesce; + const struct wiphy_vendor_command *vendor_commands; + int n_vendor_commands; + char priv[0] __aligned(NETDEV_ALIGN); }; @@ -3847,6 +3881,75 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy); */ void wiphy_rfkill_stop_polling(struct wiphy *wiphy); +/** + * DOC: Vendor commands + * + * Occasionally, there are special protocol or firmware features that + * can't be implemented very openly. For this and similar cases, the + * vendor command functionality allows implementing the features with + * (typically closed-source) userspace and firmware, using nl80211 as + * the configuration mechanism. + * + * A driver supporting vendor commands must register them as an array + * in struct wiphy, with handlers for each one, each command has an + * OUI and sub command ID to identify it. + * + * Note that this feature should not be (ab)used to implement protocol + * features that could openly be shared across drivers. In particular, + * it must never be required to use vendor commands to implement any + * "normal" functionality that higher-level userspace like connection + * managers etc. need. + */ + +struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy, + enum nl80211_commands cmd, + enum nl80211_attrs attr, + int approxlen); + +/** + * cfg80211_vendor_cmd_alloc_reply_skb - allocate vendor command reply + * @wiphy: the wiphy + * @approxlen: an upper bound of the length of the data that will + * be put into the skb + * + * This function allocates and pre-fills an skb for a reply to + * a vendor command. Since it is intended for a reply, calling + * it outside of a vendor command's doit() operation is invalid. + * + * The returned skb is pre-filled with some identifying data in + * a way that any data that is put into the skb (with skb_put(), + * nla_put() or similar) will end up being within the + * %NL80211_ATTR_VENDOR_DATA attribute, so all that needs to be done + * with the skb is adding data for the corresponding userspace tool + * which can then read that data out of the vendor data attribute. + * You must not modify the skb in any other way. + * + * When done, call cfg80211_vendor_cmd_reply() with the skb and return + * its error code as the result of the doit() operation. + * + * Return: An allocated and pre-filled skb. %NULL if any errors happen. + */ +static inline struct sk_buff * +cfg80211_vendor_cmd_alloc_reply_skb(struct wiphy *wiphy, int approxlen) +{ + return __cfg80211_alloc_reply_skb(wiphy, NL80211_CMD_VENDOR, + NL80211_ATTR_VENDOR_DATA, approxlen); +} + +/** + * cfg80211_vendor_cmd_reply - send the reply skb + * @skb: The skb, must have been allocated with + * cfg80211_vendor_cmd_alloc_reply_skb() + * + * Since calling this function will usually be the last thing + * before returning from the vendor command doit() you should + * return the error code. Note that this function consumes the + * skb regardless of the return value. + * + * Return: An error code or 0 on success. + */ +int cfg80211_vendor_cmd_reply(struct sk_buff *skb); + #ifdef CONFIG_NL80211_TESTMODE /** * DOC: Test mode @@ -3882,8 +3985,12 @@ void wiphy_rfkill_stop_polling(struct wiphy *wiphy); * * Return: An allocated and pre-filled skb. %NULL if any errors happen. */ -struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy, - int approxlen); +static inline struct sk_buff * +cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy, int approxlen) +{ + return __cfg80211_alloc_reply_skb(wiphy, NL80211_CMD_TESTMODE, + NL80211_ATTR_TESTDATA, approxlen); +} /** * cfg80211_testmode_reply - send the reply skb @@ -3897,7 +4004,10 @@ struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy, * * Return: An error code or 0 on success. */ -int cfg80211_testmode_reply(struct sk_buff *skb); +static inline int cfg80211_testmode_reply(struct sk_buff *skb) +{ + return cfg80211_vendor_cmd_reply(skb); +} /** * cfg80211_testmode_alloc_event_skb - allocate testmode event diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 73d99bc3e636..c014acc09ebc 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1162,6 +1162,19 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) return false; } +/** + * wdev_to_ieee80211_vif - return a vif struct from a wdev + * @wdev: the wdev to get the vif for + * + * This can be used by mac80211 drivers with direct cfg80211 APIs + * (like the vendor commands) that get a wdev. + * + * Note that this function may return %NULL if the given wdev isn't + * associated with a vif that the driver knows about (e.g. monitor + * or AP_VLAN interfaces.) + */ +struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev); + /** * enum ieee80211_key_flags - key flags * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9a4d0e18251c..72ba3584c90d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -693,6 +693,15 @@ * other station that transmission must be blocked until the channel * switch is complete. * + * @NL80211_CMD_VENDOR: Vendor-specified command/event. The command is specified + * by the %NL80211_ATTR_VENDOR_ID attribute and a sub-command in + * %NL80211_ATTR_VENDOR_SUBCMD. Parameter(s) can be transported in + * %NL80211_ATTR_VENDOR_DATA. + * For feature advertisement, the %NL80211_ATTR_VENDOR_DATA attribute is + * used in the wiphy data as a nested attribute containing descriptions + * (&struct nl80211_vendor_cmd_info) of the supported vendor commands. + * This may also be sent as an event with the same attributes. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -860,6 +869,8 @@ enum nl80211_commands { NL80211_CMD_CHANNEL_SWITCH, + NL80211_CMD_VENDOR, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1524,6 +1535,12 @@ enum nl80211_commands { * Notification Element based on association request when used with * %NL80211_CMD_NEW_STATION; u8 attribute. * + * @NL80211_ATTR_VENDOR_ID: The vendor ID, either a 24-bit OUI or, if + * %NL80211_VENDOR_ID_IS_LINUX is set, a special Linux ID (not used yet) + * @NL80211_ATTR_VENDOR_SUBCMD: vendor sub-command + * @NL80211_ATTR_VENDOR_DATA: data for the vendor command, if any; this + * attribute is also used for vendor command feature advertisement + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1845,6 +1862,10 @@ enum nl80211_attrs { NL80211_ATTR_OPMODE_NOTIF, + NL80211_ATTR_VENDOR_ID, + NL80211_ATTR_VENDOR_SUBCMD, + NL80211_ATTR_VENDOR_DATA, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3965,4 +3986,24 @@ enum nl80211_rxmgmt_flags { NL80211_RXMGMT_FLAG_ANSWERED = 1 << 0, }; +/* + * If this flag is unset, the lower 24 bits are an OUI, if set + * a Linux nl80211 vendor ID is used (no such IDs are allocated + * yet, so that's not valid so far) + */ +#define NL80211_VENDOR_ID_IS_LINUX 0x80000000 + +/** + * struct nl80211_vendor_cmd_info - vendor command data + * @vendor_id: If the %NL80211_VENDOR_ID_IS_LINUX flag is clear, then the + * value is a 24-bit OUI; if it is set then a separately allocated ID + * may be used, but no such IDs are allocated yet. New IDs should be + * added to this file when needed. + * @subcmd: sub-command ID for the command + */ +struct nl80211_vendor_cmd_info { + __u32 vendor_id; + __u32 subcmd; +}; + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.2 From 9149761ad74f618371b58fd37141d4c3706d88fc Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 2 Dec 2013 11:20:20 +0200 Subject: Bluetooth: Add module parameter to enable LE CoC support Along with the L2CAP Connection Oriented Channels features it is now allowed to use both custom fixed CIDs as well as PSM based (connection oriented connections). Since the support for this (with the subsequent patches) is still on an experimental stage, add a module parameter to enable it. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index c853b16de4ef..94645d56fea7 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -809,6 +809,7 @@ static inline long l2cap_chan_no_get_sndtimeo(struct l2cap_chan *chan) } extern bool disable_ertm; +extern bool enable_lecoc; int l2cap_init_sockets(void); void l2cap_cleanup_sockets(void); -- cgit v1.2.2 From ee5ec5cf00760b8810f7e3350b82434092a960e3 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 13 May 2013 12:15:39 +0300 Subject: Bluetooth: Add definitions for LE connection oriented channels This patch adds the necessary defines and structs for LE connection oriented channels. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 94645d56fea7..84c0754b87dc 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -112,6 +112,9 @@ struct l2cap_conninfo { #define L2CAP_MOVE_CHAN_CFM_RSP 0x11 #define L2CAP_CONN_PARAM_UPDATE_REQ 0x12 #define L2CAP_CONN_PARAM_UPDATE_RSP 0x13 +#define L2CAP_LE_CONN_REQ 0x14 +#define L2CAP_LE_CONN_RSP 0x15 +#define L2CAP_LE_CREDITS 0x16 /* L2CAP extended feature mask */ #define L2CAP_FEAT_FLOWCTL 0x00000001 @@ -257,6 +260,10 @@ struct l2cap_conn_rsp { #define L2CAP_CR_SEC_BLOCK 0x0003 #define L2CAP_CR_NO_MEM 0x0004 #define L2CAP_CR_BAD_AMP 0x0005 +#define L2CAP_CR_AUTHENTICATION 0x0005 +#define L2CAP_CR_AUTHORIZATION 0x0006 +#define L2CAP_CR_BAD_KEY_SIZE 0x0007 +#define L2CAP_CR_ENCRYPTION 0x0008 /* connect/create channel status */ #define L2CAP_CS_NO_INFO 0x0000 @@ -423,6 +430,30 @@ struct l2cap_conn_param_update_rsp { #define L2CAP_CONN_PARAM_ACCEPTED 0x0000 #define L2CAP_CONN_PARAM_REJECTED 0x0001 +#define L2CAP_LE_MAX_CREDITS 10 +#define L2CAP_LE_DEFAULT_MPS 230 + +struct l2cap_le_conn_req { + __le16 psm; + __le16 scid; + __le16 mtu; + __le16 mps; + __le16 credits; +} __packed; + +struct l2cap_le_conn_rsp { + __le16 dcid; + __le16 mtu; + __le16 mps; + __le16 credits; + __le16 result; +} __packed; + +struct l2cap_le_credits { + __le16 cid; + __le16 credits; +} __packed; + /* ----- L2CAP channels and connections ----- */ struct l2cap_seq_list { __u16 head; -- cgit v1.2.2 From 27e2d4c8d28be1d1b4ecfbffab572d7dbd35254d Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 14 May 2013 13:27:21 +0300 Subject: Bluetooth: Add basic LE L2CAP connect request receiving support This patch adds the necessary boiler plate code to handle receiving L2CAP connect requests over LE and respond to them with a proper connect response. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 84c0754b87dc..84b25202596e 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -846,6 +846,7 @@ int l2cap_init_sockets(void); void l2cap_cleanup_sockets(void); bool l2cap_is_socket(struct socket *sock); +void __l2cap_le_connect_rsp_defer(struct l2cap_chan *chan); void __l2cap_connect_rsp_defer(struct l2cap_chan *chan); int l2cap_add_psm(struct l2cap_chan *chan, bdaddr_t *src, __le16 psm); -- cgit v1.2.2 From 3831971355d901ccfb76533a422b4395072849a3 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 17 May 2013 12:49:23 +0300 Subject: Bluetooth: Add LE L2CAP flow control mode The LE connection oriented channels have their own mode with its own data transfer rules. In order to implement this properly we need to distinguish L2CAP channels operating in this mode from other modes. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 84b25202596e..063d786b0991 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -328,6 +328,12 @@ struct l2cap_conf_rfc { #define L2CAP_MODE_ERTM 0x03 #define L2CAP_MODE_STREAMING 0x04 +/* Unlike the above this one doesn't actually map to anything that would + * ever be sent over the air. Therefore, use a value that's unlikely to + * ever be used in the BR/EDR configuration phase. + */ +#define L2CAP_MODE_LE_FLOWCTL 0x80 + struct l2cap_conf_efs { __u8 id; __u8 stype; @@ -861,6 +867,7 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, void l2cap_chan_busy(struct l2cap_chan *chan, int busy); int l2cap_chan_check_security(struct l2cap_chan *chan); void l2cap_chan_set_defaults(struct l2cap_chan *chan); +void l2cap_le_flowctl_init(struct l2cap_chan *chan); int l2cap_ertm_init(struct l2cap_chan *chan); void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan); void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan); -- cgit v1.2.2 From 0cd75f7ed740a8c605fe55ac71a9b5162c612422 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 17 May 2013 13:09:05 +0300 Subject: Bluetooth: Track LE L2CAP credits in l2cap_chan This patch adds tracking of L2CAP connection oriented channel local and remote credits to struct l2cap_chan and ensures that connect requests and responses contain the right values. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 063d786b0991..d7cd1ddfe576 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -514,6 +514,9 @@ struct l2cap_chan { __u16 monitor_timeout; __u16 mps; + __u16 tx_credits; + __u16 rx_credits; + __u8 tx_state; __u8 rx_state; -- cgit v1.2.2 From 1f435424ce2c93c31c3887ec67e3afb6056f18f6 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 2 Dec 2013 16:34:18 +0200 Subject: Bluetooth: Add new BT_SNDMTU and BT_RCVMTU socket options This patch adds new socket options for LE sockets since the existing L2CAP_OPTIONS socket option is not usable for LE. For now, the new socket options also require LE CoC support to be explicitly enabled to leave some playroom in case something needs to be changed in a backwards incompatible way. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 2a628b28249f..f4f9ee466791 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -115,6 +115,9 @@ struct bt_voice { #define BT_VOICE_TRANSPARENT 0x0003 #define BT_VOICE_CVSD_16BIT 0x0060 +#define BT_SNDMTU 12 +#define BT_RCVMTU 13 + __printf(1, 2) int bt_info(const char *fmt, ...); __printf(1, 2) -- cgit v1.2.2 From 837776f7904024df451422f32b09c67e88ae2aa2 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 15 Oct 2013 11:03:18 +0300 Subject: Bluetooth: Introduce L2CAP channel callback for suspending Setting the BT_SK_SUSPEND socket flag from the L2CAP core is causing a dependency on the socket. So instead of doing that, use a channel callback into the socket handling to suspend. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index d7cd1ddfe576..b0d7e4adb167 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -593,6 +593,7 @@ struct l2cap_ops { void (*ready) (struct l2cap_chan *chan); void (*defer) (struct l2cap_chan *chan); void (*resume) (struct l2cap_chan *chan); + void (*suspend) (struct l2cap_chan *chan); void (*set_shutdown) (struct l2cap_chan *chan); long (*get_sndtimeo) (struct l2cap_chan *chan); struct sk_buff *(*alloc_skb) (struct l2cap_chan *chan, -- cgit v1.2.2 From 595177f311f5d42804f729e927f7eac87bbcc21b Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 2 Dec 2013 22:12:22 +0200 Subject: Bluetooth: Fix LE L2CAP Connect Request handling together with SMP Unlike BR/EDR, for LE when we're in the BT_CONNECT state we may or may not have already have sent the Connect Request. This means that we need some extra tracking of the request. This patch adds an extra channel flag to prevent the request from being sent a second time. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index b0d7e4adb167..292749c0f4bc 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -695,6 +695,7 @@ enum { FLAG_EXT_CTRL, FLAG_EFS_ENABLE, FLAG_DEFER_SETUP, + FLAG_LE_CONN_REQ_SENT, }; enum { -- cgit v1.2.2 From e77af7559238895ec5fd1706762e1c9f890dcc7e Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 8 Oct 2013 10:31:00 +0200 Subject: Bluetooth: Fix CID ranges for LE CoC CID allocations LE CoC used differend CIC ranges than BR/EDR L2CAP. The start of the range is the same (0x0040) but the range ends at 0x007f (unlike BR/EDR where it goes all the way to 0xffff). Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 292749c0f4bc..abba765c174e 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -252,6 +252,7 @@ struct l2cap_conn_rsp { #define L2CAP_CID_SMP 0x0006 #define L2CAP_CID_DYN_START 0x0040 #define L2CAP_CID_DYN_END 0xffff +#define L2CAP_CID_LE_DYN_END 0x007f /* connect/create channel results */ #define L2CAP_CR_SUCCESS 0x0000 -- cgit v1.2.2 From 0ce43ce60d5e0c079d33be1fe33ba92828c7e5da Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 5 Dec 2013 14:55:33 +0200 Subject: Bluetooth: Simplify l2cap_chan initialization for LE CoC The values in l2cap_chan that are used for actually transmitting data only need to be initialized right after we've received an L2CAP Connect Request or just before we send one. The only thing that we need to initialize though bind() and connect() is the chan->mode value. This way all other initializations can be done in the l2cap_le_flowctl_init function (which now becomes private to l2cap_core.c) and the l2cap_le_flowctl_start function can be completely removed. Also, since the l2cap_sock_init function initializes the imtu and omtu to adequate values these do not need to be part of l2cap_le_flowctl_init. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index abba765c174e..e149e992fdae 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -873,7 +873,6 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, void l2cap_chan_busy(struct l2cap_chan *chan, int busy); int l2cap_chan_check_security(struct l2cap_chan *chan); void l2cap_chan_set_defaults(struct l2cap_chan *chan); -void l2cap_le_flowctl_init(struct l2cap_chan *chan); int l2cap_ertm_init(struct l2cap_chan *chan); void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan); void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan); -- cgit v1.2.2 From d1e33e654ef6bb3dee766353ed9dd31e7dcb8a94 Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Thu, 5 Dec 2013 10:02:15 +0100 Subject: cfg80211: in bitrate_mask, rename mcs to ht_mcs Rename NL80211_TXRATE_MCS to NL80211_TXRATE_HT and also rename mcs to ht_mcs in struct cfg80211_bitrate_mask. Signed-off-by: Janusz Dziedzic [reword commit message] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- include/uapi/linux/nl80211.h | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 884ac69b5e55..6c3a650f3a8f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1766,7 +1766,7 @@ enum wiphy_params_flags { struct cfg80211_bitrate_mask { struct { u32 legacy; - u8 mcs[IEEE80211_HT_MCS_MASK_LEN]; + u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN]; } control[IEEE80211_NUM_BANDS]; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 72ba3584c90d..6e700645cd27 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3110,7 +3110,7 @@ enum nl80211_key_attributes { * in an array of rates as defined in IEEE 802.11 7.3.2.2 (u8 values with * 1 = 500 kbps) but without the IE length restriction (at most * %NL80211_MAX_SUPP_RATES in a single array). - * @NL80211_TXRATE_MCS: HT (MCS) rates allowed for TX rate selection + * @NL80211_TXRATE_HT: HT (MCS) rates allowed for TX rate selection * in an array of MCS numbers. * @__NL80211_TXRATE_AFTER_LAST: internal * @NL80211_TXRATE_MAX: highest TX rate attribute @@ -3118,13 +3118,15 @@ enum nl80211_key_attributes { enum nl80211_tx_rate_attributes { __NL80211_TXRATE_INVALID, NL80211_TXRATE_LEGACY, - NL80211_TXRATE_MCS, + NL80211_TXRATE_HT, /* keep last */ __NL80211_TXRATE_AFTER_LAST, NL80211_TXRATE_MAX = __NL80211_TXRATE_AFTER_LAST - 1 }; +#define NL80211_TXRATE_MCS NL80211_TXRATE_HT + /** * enum nl80211_band - Frequency band * @NL80211_BAND_2GHZ: 2.4 GHz ISM band -- cgit v1.2.2 From 8d549c4f5d92d80fc6f888fd314e10972ae0ec37 Mon Sep 17 00:00:00 2001 From: Fan Du Date: Thu, 7 Nov 2013 17:47:49 +0800 Subject: xfrm: Using the right namespace to migrate key info because the home agent could surely be run on a different net namespace other than init_net. The original behavior could lead into inconsistent of key info. Signed-off-by: Fan Du Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 6b82fdf4ba71..5b522c5f0188 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1581,7 +1581,7 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, struct xfrm_migrate *m); int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, - struct xfrm_kmaddress *k); + struct xfrm_kmaddress *k, struct net *net); #endif int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); -- cgit v1.2.2 From 283bc9f35bbbcb0e9ab4e6d2427da7f9f710d52d Mon Sep 17 00:00:00 2001 From: Fan Du Date: Thu, 7 Nov 2013 17:47:50 +0800 Subject: xfrm: Namespacify xfrm state/policy locks By semantics, xfrm layer is fully name space aware, so will the locks, e.g. xfrm_state/pocliy_lock. Ensure exclusive access into state/policy link list for different name space with one global lock is not right in terms of semantics aspect at first place, as they are indeed mutually independent with each other, but also more seriously causes scalability problem. One practical scenario is on a Open Network Stack, more than hundreds of lxc tenants acts as routers within one host, a global xfrm_state/policy_lock becomes the bottleneck. But onces those locks are decoupled in a per-namespace fashion, locks contend is just with in specific name space scope, without causing additional SPD/SAD access delay for other name space. Also this patch improve scalability while as without changing original xfrm behavior. Signed-off-by: Fan Du Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 4 ++++ include/net/xfrm.h | 11 +++++------ 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 5299e69a32af..ea28404e9d79 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -59,6 +59,10 @@ struct netns_xfrm { #if IS_ENABLED(CONFIG_IPV6) struct dst_ops xfrm6_dst_ops; #endif + spinlock_t xfrm_state_lock; + spinlock_t xfrm_policy_sk_bundle_lock; + rwlock_t xfrm_policy_lock; + struct mutex xfrm_cfg_mutex; }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 5b522c5f0188..59f5d0a6c7f8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -53,7 +53,6 @@ #define XFRM_INC_STATS_USER(net, field) ((void)(net)) #endif -extern struct mutex xfrm_cfg_mutex; /* Organization of SPD aka "XFRM rules" ------------------------------------ @@ -1409,7 +1408,7 @@ static inline void xfrm_sysctl_fini(struct net *net) void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto); int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); -void xfrm_state_walk_done(struct xfrm_state_walk *walk); +void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net); struct xfrm_state *xfrm_state_alloc(struct net *net); struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, @@ -1436,12 +1435,12 @@ struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark, unsigned short family); #ifdef CONFIG_XFRM_SUB_POLICY int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, - unsigned short family); + unsigned short family, struct net *net); int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, unsigned short family); #else static inline int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, - int n, unsigned short family) + int n, unsigned short family, struct net *net) { return -ENOSYS; } @@ -1553,7 +1552,7 @@ void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type); int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *); -void xfrm_policy_walk_done(struct xfrm_policy_walk *walk); +void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, int dir, @@ -1576,7 +1575,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k); -struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m); +struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net); struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, struct xfrm_migrate *m); int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, -- cgit v1.2.2 From 5b8ef3415a21f173ab115e90ec92c071a03f22d7 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 27 Aug 2013 13:43:30 +0200 Subject: xfrm: Remove ancient sleeping when the SA is in acquire state We now queue packets to the policy if the states are not yet resolved, this replaces the ancient sleeping code. Also the sleeping can cause indefinite task hangs if the needed state does not get resolved. Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index ea28404e9d79..1006a265beb3 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -33,8 +33,6 @@ struct netns_xfrm { struct hlist_head state_gc_list; struct work_struct state_gc_work; - wait_queue_head_t km_waitq; - struct list_head policy_all; struct hlist_head *policy_byidx; unsigned int policy_idx_hmask; -- cgit v1.2.2 From 0e0d44ab4275549998567cd4700b43f7496eb62b Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 28 Aug 2013 08:04:14 +0200 Subject: net: Remove FLOWI_FLAG_CAN_SLEEP FLOWI_FLAG_CAN_SLEEP was used to notify xfrm about the posibility to sleep until the needed states are resolved. This code is gone, so FLOWI_FLAG_CAN_SLEEP is not needed anymore. Signed-off-by: Steffen Klassert --- include/net/flow.h | 3 +-- include/net/ipv6.h | 6 ++---- include/net/route.h | 8 +++----- 3 files changed, 6 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index 65ce471d2ab5..d23e7fa2042e 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -20,8 +20,7 @@ struct flowi_common { __u8 flowic_proto; __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 -#define FLOWI_FLAG_CAN_SLEEP 0x02 -#define FLOWI_FLAG_KNOWN_NH 0x04 +#define FLOWI_FLAG_KNOWN_NH 0x02 __u32 flowic_secid; }; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index eb198acaac1d..26c4199ddc36 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -711,11 +711,9 @@ void ip6_flush_pending_frames(struct sock *sk); int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, - const struct in6_addr *final_dst, - bool can_sleep); + const struct in6_addr *final_dst); struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, - const struct in6_addr *final_dst, - bool can_sleep); + const struct in6_addr *final_dst); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *orig_dst); diff --git a/include/net/route.h b/include/net/route.h index f68c167280a7..638e3ebf76f3 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -239,14 +239,12 @@ static inline char rt_tos2priority(u8 tos) static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 src, u32 tos, int oif, u8 protocol, __be16 sport, __be16 dport, - struct sock *sk, bool can_sleep) + struct sock *sk) { __u8 flow_flags = 0; if (inet_sk(sk)->transparent) flow_flags |= FLOWI_FLAG_ANYSRC; - if (can_sleep) - flow_flags |= FLOWI_FLAG_CAN_SLEEP; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, protocol, flow_flags, dst, src, dport, sport); @@ -256,13 +254,13 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst, __be32 src, u32 tos, int oif, u8 protocol, __be16 sport, __be16 dport, - struct sock *sk, bool can_sleep) + struct sock *sk) { struct net *net = sock_net(sk); struct rtable *rt; ip_route_connect_init(fl4, dst, src, tos, oif, protocol, - sport, dport, sk, can_sleep); + sport, dport, sk); if (!dst || !src) { rt = __ip_route_output_key(net, fl4); -- cgit v1.2.2 From 4b2f13a25133b115eb56771bd4a8e71a82aea968 Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Fri, 6 Dec 2013 06:28:48 -0800 Subject: sctp: Fix FSF address in file headers Several files refer to an old address for the Free Software Foundation in the file header comment. Resolve by replacing the address with the URL so that we do not have to keep updating the header comments anytime the address changes. CC: Vlad Yasevich CC: Neil Horman Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/sctp.h | 5 ++--- include/net/sctp/sctp.h | 5 ++--- include/net/sctp/sm.h | 5 ++--- include/net/sctp/structs.h | 5 ++--- include/net/sctp/tsnmap.h | 5 ++--- include/net/sctp/ulpevent.h | 5 ++--- include/net/sctp/ulpqueue.h | 5 ++--- include/uapi/linux/sctp.h | 5 ++--- 8 files changed, 16 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 3bfe8d6ee248..c4c5eba26d9f 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -23,9 +23,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * . * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index c5fe80697f8d..610a8c8738fa 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -21,9 +21,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * . * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 4ef75af340b6..7f4eeb340a54 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -21,9 +21,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * . * * Please send any bug reports or fixes you make to the * email addresses: diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ea0ca5f6e629..e416d6ac9c70 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -19,9 +19,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * . * * Please send any bug reports or fixes you make to the * email addresses: diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h index 54bbbe547303..31b8dbaad45a 100644 --- a/include/net/sctp/tsnmap.h +++ b/include/net/sctp/tsnmap.h @@ -22,9 +22,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * . * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 27b9f5c90153..daacb32b55b5 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -25,9 +25,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * . * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h index b0cf5d54d717..e0dce07b8794 100644 --- a/include/net/sctp/ulpqueue.h +++ b/include/net/sctp/ulpqueue.h @@ -24,9 +24,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * . * * Please send any bug reports or fixes you make to the * email addresses: diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index ca451e99b28b..266022a2be4a 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -22,9 +22,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * . * * Please send any bug reports or fixes you make to the * email address(es): -- cgit v1.2.2 From a6227e26d946bc56df47ca5fe418660a07ef8288 Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Fri, 6 Dec 2013 09:13:40 -0800 Subject: include/net/: Fix FSF address in file headers Several files refer to an old address for the Free Software Foundation in the file header comment. Resolve by replacing the address with the URL so that we do not have to keep updating the header comments anytime the address changes. Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/net/cipso_ipv4.h | 3 +-- include/net/dcbevent.h | 3 +-- include/net/dcbnl.h | 3 +-- include/net/irda/discovery.h | 4 +--- include/net/irda/ircomm_core.h | 4 +--- include/net/irda/ircomm_event.h | 4 +--- include/net/irda/ircomm_lmp.h | 4 +--- include/net/irda/ircomm_param.h | 4 +--- include/net/irda/ircomm_ttp.h | 4 +--- include/net/irda/ircomm_tty.h | 4 +--- include/net/irda/ircomm_tty_attach.h | 4 +--- include/net/irda/irda_device.h | 4 +--- include/net/irda/irlap_event.h | 4 +--- include/net/irda/irlap_frame.h | 4 +--- include/net/irda/parameters.h | 4 +--- include/net/irda/qos.h | 4 +--- include/net/mip6.h | 3 +-- include/net/netlabel.h | 3 +-- include/net/nfc/hci.h | 4 +--- include/net/nfc/llc.h | 4 +--- include/net/nfc/nci.h | 3 +-- include/net/nfc/nci_core.h | 3 +-- include/net/nfc/nfc.h | 4 +--- include/net/sctp/auth.h | 5 ++--- include/net/sctp/checksum.h | 5 ++--- include/net/sctp/command.h | 5 ++--- include/net/sctp/constants.h | 5 ++--- include/net/tc_act/tc_skbedit.h | 3 +-- 28 files changed, 32 insertions(+), 76 deletions(-) (limited to 'include') diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index a8c2ef6d3b93..2179d071f68f 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -26,8 +26,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see . * */ diff --git a/include/net/dcbevent.h b/include/net/dcbevent.h index d2f3041c0dfa..aec07c8a660a 100644 --- a/include/net/dcbevent.h +++ b/include/net/dcbevent.h @@ -11,8 +11,7 @@ * more details. * * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. + * this program; if not, see . * * Author: John Fastabend */ diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index fc5d5dcebb00..a975edf21b22 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -11,8 +11,7 @@ * more details. * * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. + * this program; if not, see . * * Author: Lucy Liu */ diff --git a/include/net/irda/discovery.h b/include/net/irda/discovery.h index 0ce93398720d..63ae32530567 100644 --- a/include/net/irda/discovery.h +++ b/include/net/irda/discovery.h @@ -23,9 +23,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see . * ********************************************************************/ diff --git a/include/net/irda/ircomm_core.h b/include/net/irda/ircomm_core.h index 69b610acd2df..2a580ce9edad 100644 --- a/include/net/irda/ircomm_core.h +++ b/include/net/irda/ircomm_core.h @@ -22,9 +22,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see . * ********************************************************************/ diff --git a/include/net/irda/ircomm_event.h b/include/net/irda/ircomm_event.h index bc0c6f31f1c6..5bbc32998d57 100644 --- a/include/net/irda/ircomm_event.h +++ b/include/net/irda/ircomm_event.h @@ -22,9 +22,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see . * ********************************************************************/ diff --git a/include/net/irda/ircomm_lmp.h b/include/net/irda/ircomm_lmp.h index ae02106be590..5042a5021a04 100644 --- a/include/net/irda/ircomm_lmp.h +++ b/include/net/irda/ircomm_lmp.h @@ -22,9 +22,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see . * ********************************************************************/ diff --git a/include/net/irda/ircomm_param.h b/include/net/irda/ircomm_param.h index e6678800c41f..1f67432321c4 100644 --- a/include/net/irda/ircomm_param.h +++ b/include/net/irda/ircomm_param.h @@ -22,9 +22,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see . * ********************************************************************/ diff --git a/include/net/irda/ircomm_ttp.h b/include/net/irda/ircomm_ttp.h index 403081ed725c..c5627288bca3 100644 --- a/include/net/irda/ircomm_ttp.h +++ b/include/net/irda/ircomm_ttp.h @@ -22,9 +22,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see . * ********************************************************************/ diff --git a/include/net/irda/ircomm_tty.h b/include/net/irda/ircomm_tty.h index 0224402260a7..8d4f588974bc 100644 --- a/include/net/irda/ircomm_tty.h +++ b/include/net/irda/ircomm_tty.h @@ -22,9 +22,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see . * ********************************************************************/ diff --git a/include/net/irda/ircomm_tty_attach.h b/include/net/irda/ircomm_tty_attach.h index 0a63bbb972d7..20dcbdf258cf 100644 --- a/include/net/irda/ircomm_tty_attach.h +++ b/include/net/irda/ircomm_tty_attach.h @@ -22,9 +22,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see . * ********************************************************************/ diff --git a/include/net/irda/irda_device.h b/include/net/irda/irda_device.h index 11417475a6c3..664bf8178412 100644 --- a/include/net/irda/irda_device.h +++ b/include/net/irda/irda_device.h @@ -24,9 +24,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see . * ********************************************************************/ diff --git a/include/net/irda/irlap_event.h b/include/net/irda/irlap_event.h index f9d88da97af2..e4325fee1267 100644 --- a/include/net/irda/irlap_event.h +++ b/include/net/irda/irlap_event.h @@ -25,9 +25,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see . * ********************************************************************/ diff --git a/include/net/irda/irlap_frame.h b/include/net/irda/irlap_frame.h index 57173ae398ae..cbc12a926e5f 100644 --- a/include/net/irda/irlap_frame.h +++ b/include/net/irda/irlap_frame.h @@ -24,9 +24,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see . * ********************************************************************/ diff --git a/include/net/irda/parameters.h b/include/net/irda/parameters.h index c0d938847bd3..42713c931d1f 100644 --- a/include/net/irda/parameters.h +++ b/include/net/irda/parameters.h @@ -22,9 +22,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see . * * Michel Dänzer , 10/2001 * - simplify irda_pv_t to avoid endianness issues diff --git a/include/net/irda/qos.h b/include/net/irda/qos.h index cc577dc0a0ef..05a5a249956f 100644 --- a/include/net/irda/qos.h +++ b/include/net/irda/qos.h @@ -22,9 +22,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * along with this program; if not, see . * ********************************************************************/ diff --git a/include/net/mip6.h b/include/net/mip6.h index 26ba99b5a4b1..0386b618908c 100644 --- a/include/net/mip6.h +++ b/include/net/mip6.h @@ -13,8 +13,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see . */ /* * Authors: diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 2c95d55f7914..24948bedb64c 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -22,8 +22,7 @@ * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see . * */ diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index 2eca2960ca9c..03c4650b548c 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -12,9 +12,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see . */ #ifndef __NET_HCI_H diff --git a/include/net/nfc/llc.h b/include/net/nfc/llc.h index 400ab7ae749d..c25fbdee0d61 100644 --- a/include/net/nfc/llc.h +++ b/include/net/nfc/llc.h @@ -13,9 +13,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see . */ #ifndef __NFC_LLC_H_ diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h index e5aa5acafea0..fbfa4e471abb 100644 --- a/include/net/nfc/nci.h +++ b/include/net/nfc/nci.h @@ -20,8 +20,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see . * */ diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 6126f1f992b4..0ff070e8f8de 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -21,8 +21,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see . * */ diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 82fc4e43fc6e..e80894bca1d0 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -16,9 +16,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, see . */ #ifndef __NET_NFC_H diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h index aa80bef3c9d5..f2d58aa37a6f 100644 --- a/include/net/sctp/auth.h +++ b/include/net/sctp/auth.h @@ -16,9 +16,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * . * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index 6bd44fe94c26..4a5b9a306c69 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -19,9 +19,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * . * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 832f2191489c..4b7cd695e431 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -19,9 +19,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * . * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 2f0a565a0fd5..307728f622ef 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -19,9 +19,8 @@ * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * along with GNU CC; see the file COPYING. If not, see + * . * * Please send any bug reports or fixes you make to the * email address(es): diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index e103fe02f375..dd5d86fab030 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -11,8 +11,7 @@ * more details. * * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. + * this program; if not, see . * * Author: Alexander Duyck */ -- cgit v1.2.2 From e664eabd18c4581b21ea6f62e2e599df5a719a5b Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Fri, 6 Dec 2013 09:13:42 -0800 Subject: netfilter: Fix FSF address in file headers Several files refer to an old address for the Free Software Foundation in the file header comment. Resolve by replacing the address with the URL so that we do not have to keep updating the header comments anytime the address changes. CC: netfilter@vger.kernel.org CC: Pablo Neira Ayuso CC: Patrick McHardy CC: Jozsef Kadlecsik Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/uapi/linux/netfilter/xt_osf.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h index 18afa495f973..5d66caeba3ee 100644 --- a/include/uapi/linux/netfilter/xt_osf.h +++ b/include/uapi/linux/netfilter/xt_osf.h @@ -13,8 +13,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see . */ #ifndef _XT_OSF_H -- cgit v1.2.2 From f54b311142a92ea2e42598e347b84e1655caf8e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Dec 2013 22:36:05 -0800 Subject: tcp: auto corking With the introduction of TCP Small Queues, TSO auto sizing, and TCP pacing, we can implement Automatic Corking in the kernel, to help applications doing small write()/sendmsg() to TCP sockets. Idea is to change tcp_push() to check if the current skb payload is under skb optimal size (a multiple of MSS bytes) If under 'size_goal', and at least one packet is still in Qdisc or NIC TX queues, set the TCP Small Queue Throttled bit, so that the push will be delayed up to TX completion time. This delay might allow the application to coalesce more bytes in the skb in following write()/sendmsg()/sendfile() system calls. The exact duration of the delay is depending on the dynamics of the system, and might be zero if no packet for this flow is actually held in Qdisc or NIC TX ring. Using FQ/pacing is a way to increase the probability of autocorking being triggered. Add a new sysctl (/proc/sys/net/ipv4/tcp_autocorking) to control this feature and default it to 1 (enabled) Add a new SNMP counter : nstat -a | grep TcpExtTCPAutoCorking This counter is incremented every time we detected skb was under used and its flush was deferred. Tested: Interesting effects when using line buffered commands under ssh. Excellent performance results in term of cpu usage and total throughput. lpq83:~# echo 1 >/proc/sys/net/ipv4/tcp_autocorking lpq83:~# perf stat ./super_netperf 4 -t TCP_STREAM -H lpq84 -- -m 128 9410.39 Performance counter stats for './super_netperf 4 -t TCP_STREAM -H lpq84 -- -m 128': 35209.439626 task-clock # 2.901 CPUs utilized 2,294 context-switches # 0.065 K/sec 101 CPU-migrations # 0.003 K/sec 4,079 page-faults # 0.116 K/sec 97,923,241,298 cycles # 2.781 GHz [83.31%] 51,832,908,236 stalled-cycles-frontend # 52.93% frontend cycles idle [83.30%] 25,697,986,603 stalled-cycles-backend # 26.24% backend cycles idle [66.70%] 102,225,978,536 instructions # 1.04 insns per cycle # 0.51 stalled cycles per insn [83.38%] 18,657,696,819 branches # 529.906 M/sec [83.29%] 91,679,646 branch-misses # 0.49% of all branches [83.40%] 12.136204899 seconds time elapsed lpq83:~# echo 0 >/proc/sys/net/ipv4/tcp_autocorking lpq83:~# perf stat ./super_netperf 4 -t TCP_STREAM -H lpq84 -- -m 128 6624.89 Performance counter stats for './super_netperf 4 -t TCP_STREAM -H lpq84 -- -m 128': 40045.864494 task-clock # 3.301 CPUs utilized 171 context-switches # 0.004 K/sec 53 CPU-migrations # 0.001 K/sec 4,080 page-faults # 0.102 K/sec 111,340,458,645 cycles # 2.780 GHz [83.34%] 61,778,039,277 stalled-cycles-frontend # 55.49% frontend cycles idle [83.31%] 29,295,522,759 stalled-cycles-backend # 26.31% backend cycles idle [66.67%] 108,654,349,355 instructions # 0.98 insns per cycle # 0.57 stalled cycles per insn [83.34%] 19,552,170,748 branches # 488.244 M/sec [83.34%] 157,875,417 branch-misses # 0.81% of all branches [83.34%] 12.130267788 seconds time elapsed Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + include/uapi/linux/snmp.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 70e55d200610..f7e1ab2139ef 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -282,6 +282,7 @@ extern int sysctl_tcp_limit_output_bytes; extern int sysctl_tcp_challenge_ack_limit; extern unsigned int sysctl_tcp_notsent_lowat; extern int sysctl_tcp_min_tso_segs; +extern int sysctl_tcp_autocorking; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 1bdb4a39d1e1..bbaba22f2d1b 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -258,6 +258,7 @@ enum LINUX_MIB_TCPFASTOPENCOOKIEREQD, /* TCPFastOpenCookieReqd */ LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES, /* TCPSpuriousRtxHostQueues */ LINUX_MIB_BUSYPOLLRXPACKETS, /* BusyPollRxPackets */ + LINUX_MIB_TCPAUTOCORKING, /* TCPAutoCorking */ __LINUX_MIB_MAX }; -- cgit v1.2.2 From e9fbdf176d2a7993b9d4c487b50c68d1c6019b2c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 5 Dec 2013 14:52:13 -0800 Subject: net: phy: breakdown PHY_*_FEATURES defines Breakdown the PHY_*_FEATURES into per speed defines such that we can easily re-use them individually. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 48a4dc3cb8cf..7ff751ae6f0a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -27,18 +27,27 @@ #include -#define PHY_BASIC_FEATURES (SUPPORTED_10baseT_Half | \ - SUPPORTED_10baseT_Full | \ - SUPPORTED_100baseT_Half | \ - SUPPORTED_100baseT_Full | \ - SUPPORTED_Autoneg | \ +#define PHY_DEFAULT_FEATURES (SUPPORTED_Autoneg | \ SUPPORTED_TP | \ SUPPORTED_MII) -#define PHY_GBIT_FEATURES (PHY_BASIC_FEATURES | \ - SUPPORTED_1000baseT_Half | \ +#define PHY_10BT_FEATURES (SUPPORTED_10baseT_Half | \ + SUPPORTED_10baseT_Full) + +#define PHY_100BT_FEATURES (SUPPORTED_100baseT_Half | \ + SUPPORTED_100baseT_Full) + +#define PHY_1000BT_FEATURES (SUPPORTED_1000baseT_Half | \ SUPPORTED_1000baseT_Full) +#define PHY_BASIC_FEATURES (PHY_10BT_FEATURES | \ + PHY_100BT_FEATURES | \ + PHY_DEFAULT_FEATURES) + +#define PHY_GBIT_FEATURES (PHY_BASIC_FEATURES | \ + PHY_1000BT_FEATURES) + + /* * Set phydev->irq to PHY_POLL if interrupts are not supported, * or not desired for this PHY. Set to PHY_IGNORE_INTERRUPT if -- cgit v1.2.2 From e6247027e5173c00efb2084d688d06ff835bc3b0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Dec 2013 04:45:08 -0800 Subject: net: introduce dev_consume_skb_any() Some network drivers use dev_kfree_skb_any() and dev_kfree_skb_irq() helpers to free skbs, both for dropped packets and TX completed ones. We need to separate the two causes to get better diagnostics given by dropwatch or "perf record -e skb:kfree_skb" This patch provides two new helpers, dev_consume_skb_any() and dev_consume_skb_irq() to be used for consumed skbs. __dev_kfree_skb_irq() is slightly optimized to remove one atomic_dec_and_test() in fast path, and use this_cpu_{r|w} accessors. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 53 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7f0ed423a360..9d55e5188b96 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2368,17 +2368,52 @@ static inline int netif_copy_real_num_queues(struct net_device *to_dev, #define DEFAULT_MAX_NUM_RSS_QUEUES (8) int netif_get_num_default_rss_queues(void); -/* Use this variant when it is known for sure that it - * is executing from hardware interrupt context or with hardware interrupts - * disabled. - */ -void dev_kfree_skb_irq(struct sk_buff *skb); +enum skb_free_reason { + SKB_REASON_CONSUMED, + SKB_REASON_DROPPED, +}; + +void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason); +void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason); -/* Use this variant in places where it could be invoked - * from either hardware interrupt or other context, with hardware interrupts - * either disabled or enabled. +/* + * It is not allowed to call kfree_skb() or consume_skb() from hardware + * interrupt context or with hardware interrupts being disabled. + * (in_irq() || irqs_disabled()) + * + * We provide four helpers that can be used in following contexts : + * + * dev_kfree_skb_irq(skb) when caller drops a packet from irq context, + * replacing kfree_skb(skb) + * + * dev_consume_skb_irq(skb) when caller consumes a packet from irq context. + * Typically used in place of consume_skb(skb) in TX completion path + * + * dev_kfree_skb_any(skb) when caller doesn't know its current irq context, + * replacing kfree_skb(skb) + * + * dev_consume_skb_any(skb) when caller doesn't know its current irq context, + * and consumed a packet. Used in place of consume_skb(skb) */ -void dev_kfree_skb_any(struct sk_buff *skb); +static inline void dev_kfree_skb_irq(struct sk_buff *skb) +{ + __dev_kfree_skb_irq(skb, SKB_REASON_DROPPED); +} + +static inline void dev_consume_skb_irq(struct sk_buff *skb) +{ + __dev_kfree_skb_irq(skb, SKB_REASON_CONSUMED); +} + +static inline void dev_kfree_skb_any(struct sk_buff *skb) +{ + __dev_kfree_skb_any(skb, SKB_REASON_DROPPED); +} + +static inline void dev_consume_skb_any(struct sk_buff *skb) +{ + __dev_kfree_skb_any(skb, SKB_REASON_CONSUMED); +} int netif_rx(struct sk_buff *skb); int netif_rx_ni(struct sk_buff *skb); -- cgit v1.2.2 From 479840ffdbe4242e8a25349218c8e0859223aa35 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 6 Dec 2013 09:45:21 +0100 Subject: ipv6 addrconf: extend ifa_flags to u32 There is no more space in u8 ifa_flags. So do what davem suffested and add another netlink attr called IFA_FLAGS for carry more flags. Signed-off-by: Jiri Pirko Signed-off-by: Thomas Haller Signed-off-by: David S. Miller --- include/net/addrconf.h | 4 ++-- include/net/if_inet6.h | 2 +- include/uapi/linux/if_addr.h | 4 ++++ 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 86505bfa5d2c..e70278eef12a 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -81,9 +81,9 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, - unsigned char banned_flags); + u32 banned_flags); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, - unsigned char banned_flags); + u32 banned_flags); int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2); void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 76d54270f2e2..b58c36c1c3f6 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -50,8 +50,8 @@ struct inet6_ifaddr { int state; + __u32 flags; __u8 dad_probes; - __u8 flags; __u16 scope; diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h index 23357ab81a77..8ab0c2cc9b73 100644 --- a/include/uapi/linux/if_addr.h +++ b/include/uapi/linux/if_addr.h @@ -18,6 +18,9 @@ struct ifaddrmsg { * It makes no difference for normally configured broadcast interfaces, * but for point-to-point IFA_ADDRESS is DESTINATION address, * local address is supplied in IFA_LOCAL attribute. + * + * IFA_FLAGS is a u32 attribute that extends the u8 field ifa_flags. + * If present, the value from struct ifaddrmsg will be ignored. */ enum { IFA_UNSPEC, @@ -28,6 +31,7 @@ enum { IFA_ANYCAST, IFA_CACHEINFO, IFA_MULTICAST, + IFA_FLAGS, __IFA_MAX, }; -- cgit v1.2.2 From 53bd674915379d91e0e505332c89741b34eab05c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 6 Dec 2013 09:45:22 +0100 Subject: ipv6 addrconf: introduce IFA_F_MANAGETEMPADDR to tell kernel to manage temporary addresses Creating an address with this flag set will result in kernel taking care of temporary addresses in the same way as if the address was created by kernel itself (after RA receive). This allows userspace applications implementing the autoconfiguration (NetworkManager for example) to implement ipv6 addresses privacy. Signed-off-by: Jiri Pirko Signed-off-by: Thomas Haller Signed-off-by: David S. Miller --- include/uapi/linux/if_addr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h index 8ab0c2cc9b73..cfed10be7529 100644 --- a/include/uapi/linux/if_addr.h +++ b/include/uapi/linux/if_addr.h @@ -48,6 +48,7 @@ enum { #define IFA_F_DEPRECATED 0x20 #define IFA_F_TENTATIVE 0x40 #define IFA_F_PERMANENT 0x80 +#define IFA_F_MANAGETEMPADDR 0x100 struct ifa_cacheinfo { __u32 ifa_prefered; -- cgit v1.2.2 From 0d74c42f788caf3cad727c61c490d9459bc8918b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 5 Dec 2013 14:54:38 -0800 Subject: ether_addr_equal: Optimize implementation, remove unused compare_ether_addr Add a new check for CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS to reduce the number of or's used in the ether_addr_equal comparison to very slightly improve function performance. Simplify the ether_addr_equal_64bits implementation. Integrate and remove the zap_last_2bytes helper as it's now used only once. Remove the now unused compare_ether_addr function. Update the unaligned-memory-access documentation to remove the compare_ether_addr description and show how unaligned accesses could occur with ether_addr_equal. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 51 ++++++++++++++++----------------------------- 1 file changed, 18 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index fc4a9aa7dd82..3526e819d7ae 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -26,6 +26,7 @@ #include #include #include +#include #ifdef __KERNEL__ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); @@ -210,41 +211,27 @@ static inline void eth_hw_addr_inherit(struct net_device *dst, memcpy(dst->dev_addr, src->dev_addr, ETH_ALEN); } -/** - * compare_ether_addr - Compare two Ethernet addresses - * @addr1: Pointer to a six-byte array containing the Ethernet address - * @addr2: Pointer other six-byte array containing the Ethernet address - * - * Compare two Ethernet addresses, returns 0 if equal, non-zero otherwise. - * Unlike memcmp(), it doesn't return a value suitable for sorting. - */ -static inline unsigned compare_ether_addr(const u8 *addr1, const u8 *addr2) -{ - const u16 *a = (const u16 *) addr1; - const u16 *b = (const u16 *) addr2; - - BUILD_BUG_ON(ETH_ALEN != 6); - return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; -} - /** * ether_addr_equal - Compare two Ethernet addresses * @addr1: Pointer to a six-byte array containing the Ethernet address * @addr2: Pointer other six-byte array containing the Ethernet address * * Compare two Ethernet addresses, returns true if equal + * + * Please note: addr1 & addr2 must both be aligned to u16. */ static inline bool ether_addr_equal(const u8 *addr1, const u8 *addr2) { - return !compare_ether_addr(addr1, addr2); -} +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + u32 fold = ((*(const u32 *)addr1) ^ (*(const u32 *)addr2)) | + ((*(const u16 *)(addr1 + 4)) ^ (*(const u16 *)(addr2 + 4))); -static inline unsigned long zap_last_2bytes(unsigned long value) -{ -#ifdef __BIG_ENDIAN - return value >> 16; + return fold == 0; #else - return value << 16; + const u16 *a = (const u16 *)addr1; + const u16 *b = (const u16 *)addr2; + + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0; #endif } @@ -265,16 +252,14 @@ static inline unsigned long zap_last_2bytes(unsigned long value) static inline bool ether_addr_equal_64bits(const u8 addr1[6+2], const u8 addr2[6+2]) { -#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - unsigned long fold = ((*(unsigned long *)addr1) ^ - (*(unsigned long *)addr2)); - - if (sizeof(fold) == 8) - return zap_last_2bytes(fold) == 0; +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + u64 fold = (*(const u64 *)addr1) ^ (*(const u64 *)addr2); - fold |= zap_last_2bytes((*(unsigned long *)(addr1 + 4)) ^ - (*(unsigned long *)(addr2 + 4))); - return fold == 0; +#ifdef __BIG_ENDIAN + return (fold >> 16) == 0; +#else + return (fold << 16) == 0; +#endif #else return ether_addr_equal(addr1, addr2); #endif -- cgit v1.2.2 From 97a2d41c47a2246c3387a937c62126c9faefe875 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Fri, 6 Dec 2013 00:24:12 +0100 Subject: netfilter: xt_NFQUEUE: separate reusable code This patch prepares the addition of nft_queue module by moving reusable code into a header file. This patch also converts NFQUEUE to use prandom_u32 to initialize the random jhash seed as suggested by Florian Westphal. Signed-off-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 62 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index c1d5b3e34a21..84a53d780306 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -1,6 +1,10 @@ #ifndef _NF_QUEUE_H #define _NF_QUEUE_H +#include +#include +#include + /* Each queued (to userspace) skbuff has one of these. */ struct nf_queue_entry { struct list_head list; @@ -33,4 +37,62 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); void nf_queue_entry_release_refs(struct nf_queue_entry *entry); +static inline void init_hashrandom(u32 *jhash_initval) +{ + while (*jhash_initval == 0) + *jhash_initval = prandom_u32(); +} + +static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval) +{ + const struct iphdr *iph = ip_hdr(skb); + + /* packets in either direction go into same queue */ + if ((__force u32)iph->saddr < (__force u32)iph->daddr) + return jhash_3words((__force u32)iph->saddr, + (__force u32)iph->daddr, iph->protocol, jhash_initval); + + return jhash_3words((__force u32)iph->daddr, + (__force u32)iph->saddr, iph->protocol, jhash_initval); +} + +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + u32 a, b, c; + + if ((__force u32)ip6h->saddr.s6_addr32[3] < + (__force u32)ip6h->daddr.s6_addr32[3]) { + a = (__force u32) ip6h->saddr.s6_addr32[3]; + b = (__force u32) ip6h->daddr.s6_addr32[3]; + } else { + b = (__force u32) ip6h->saddr.s6_addr32[3]; + a = (__force u32) ip6h->daddr.s6_addr32[3]; + } + + if ((__force u32)ip6h->saddr.s6_addr32[1] < + (__force u32)ip6h->daddr.s6_addr32[1]) + c = (__force u32) ip6h->saddr.s6_addr32[1]; + else + c = (__force u32) ip6h->daddr.s6_addr32[1]; + + return jhash_3words(a, b, c, jhash_initval); +} +#endif + +static inline u32 +nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, + u32 jhash_initval) +{ + if (family == NFPROTO_IPV4) + queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32; +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) + else if (family == NFPROTO_IPV6) + queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32; +#endif + + return queue; +} + #endif /* _NF_QUEUE_H */ -- cgit v1.2.2 From 0aff078d58e1c69139189e45ba5e929c030e8056 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Fri, 6 Dec 2013 00:24:13 +0100 Subject: netfilter: nft: add queue module This patch adds a new nft module named "nft_queue" which provides a new nftables expression that allows you to enqueue packets to userspace via the nfnetlink_queue subsystem. It provides the same level of functionality as NFQUEUE and it shares some code with it. Signed-off-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index fbfd229a8e99..256d36b1b94a 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -657,6 +657,26 @@ enum nft_log_attributes { }; #define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1) +/** + * enum nft_queue_attributes - nf_tables queue expression netlink attributes + * + * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16) + * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16) + * @NFTA_QUEUE_FLAGS: various flags (NLA_U16) + */ +enum nft_queue_attributes { + NFTA_QUEUE_UNSPEC, + NFTA_QUEUE_NUM, + NFTA_QUEUE_TOTAL, + NFTA_QUEUE_FLAGS, + __NFTA_QUEUE_MAX +}; +#define NFTA_QUEUE_MAX (__NFTA_QUEUE_MAX - 1) + +#define NFT_QUEUE_FLAG_BYPASS 0x01 /* for compatibility with v2 */ +#define NFT_QUEUE_FLAG_CPU_FANOUT 0x02 /* use current CPU (no hashing) */ +#define NFT_QUEUE_FLAG_MASK 0x03 + /** * enum nft_reject_types - nf_tables reject expression reject types * -- cgit v1.2.2 From 53b834d2333aec1e60fcbfadfddd4ad472329570 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 8 Dec 2013 11:55:33 -0800 Subject: Bluetooth: Use macros for connectionless slave broadcast features Add the LMP feature constants for connectionless slave broadcast and use them for capability testing. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci.h | 6 ++++++ include/net/bluetooth/hci_core.h | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 1784c48699f0..cc2da73055fa 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -275,6 +275,12 @@ enum { #define LMP_EXTFEATURES 0x80 /* Extended LMP features */ +#define LMP_CSB_MASTER 0x01 +#define LMP_CSB_SLAVE 0x02 +#define LMP_SYNC_TRAIN 0x04 +#define LMP_SYNC_SCAN 0x08 + +/* Host features */ #define LMP_HOST_SSP 0x01 #define LMP_HOST_LE 0x02 #define LMP_HOST_LE_BREDR 0x04 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f8555ad7b104..b796161fb04e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -798,6 +798,12 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define lmp_transp_capable(dev) ((dev)->features[0][2] & LMP_TRANSPARENT) /* ----- Extended LMP capabilities ----- */ +#define lmp_csb_master_capable(dev) ((dev)->features[2][0] & LMP_CSB_MASTER) +#define lmp_csb_slave_capable(dev) ((dev)->features[2][0] & LMP_CSB_SLAVE) +#define lmp_sync_train_capable(dev) ((dev)->features[2][0] & LMP_SYNC_TRAIN) +#define lmp_sync_scan_capable(dev) ((dev)->features[2][0] & LMP_SYNC_SCAN) + +/* ----- Host capabilities ----- */ #define lmp_host_ssp_capable(dev) ((dev)->features[1][0] & LMP_HOST_SSP) #define lmp_host_le_capable(dev) (!!((dev)->features[1][0] & LMP_HOST_LE)) #define lmp_host_le_br_capable(dev) (!!((dev)->features[1][0] & LMP_HOST_LE_BREDR)) -- cgit v1.2.2 From 0f978bfaf2600040f9826f394eb12e023bef3068 Mon Sep 17 00:00:00 2001 From: Sujith Manoharan Date: Fri, 6 Dec 2013 16:28:45 +0530 Subject: ath9k: Use a separate TX gain table for WZR-HP-G450H The Buffalo device WZR-HP-G450H uses the index 3 for TX gain, which is set to the high_power table currently. Later variants of the router use the same index, but instead refer to the low_ob_db gain table. This is not handled in the driver since there is no way to distinguish board revisions and the high_power table is used (incorrectly) for the newer variants. By default, devices based on AR9300 using the TX gain index 3 have to use the high_power table. To make sure that WZR-HP-G450H is not broken when the high_power table is updated, use a separate array based on information obtained from the platform data. The current situation where only the original variant of WZR-HP-G450H works properly stays unchanged. Signed-off-by: Sujith Manoharan Signed-off-by: John W. Linville --- include/linux/ath9k_platform.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h index fcdd81bd5314..8598f8eacb20 100644 --- a/include/linux/ath9k_platform.h +++ b/include/linux/ath9k_platform.h @@ -32,6 +32,8 @@ struct ath9k_platform_data { u32 gpio_val; bool is_clk_25mhz; + bool tx_gain_buffalo; + int (*get_mac_revision)(void); int (*external_reset)(void); }; -- cgit v1.2.2 From 95dc19299f741c986227ec33e23cbf9b3321f812 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Dec 2013 11:12:02 -0800 Subject: pkt_sched: give visibility to mq slave qdiscs Commit 6da7c8fcbcbd ("qdisc: allow setting default queuing discipline") added the ability to change default qdisc from pfifo_fast to say fq But as most modern ethernet devices are multiqueue, we cant really see all the statistics from "tc -s qdisc show", as the default root qdisc is mq. This patch adds the calls to qdisc_list_add() to mq and mqprio Signed-off-by: Eric Dumazet Cc: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 59ec3cd15d68..891d80d2c4d2 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -88,6 +88,7 @@ int unregister_qdisc(struct Qdisc_ops *qops); void qdisc_get_default(char *id, size_t len); int qdisc_set_default(const char *id); +void qdisc_list_add(struct Qdisc *q); void qdisc_list_del(struct Qdisc *q); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); -- cgit v1.2.2 From 4262e5ccbbb5171abd2921eed16ed339633d6478 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 6 Dec 2013 11:36:16 +0100 Subject: net: dev: move inline skb_needs_linearize helper to header As we need it elsewhere, move the inline helper function of skb_needs_linearize() over to skbuff.h include file. While at it, also convert the return to 'bool' instead of 'int' and add a proper kernel doc. Signed-off-by: Daniel Borkmann Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/skbuff.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 215b5ea1cb30..77c7aae1c6b2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2392,6 +2392,24 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, return buffer; } +/** + * skb_needs_linearize - check if we need to linearize a given skb + * depending on the given device features. + * @skb: socket buffer to check + * @features: net device features + * + * Returns true if either: + * 1. skb has frag_list and the device doesn't support FRAGLIST, or + * 2. skb is fragmented and the device does not support SG. + */ +static inline bool skb_needs_linearize(struct sk_buff *skb, + netdev_features_t features) +{ + return skb_is_nonlinear(skb) && + ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) || + (skb_shinfo(skb)->nr_frags && !(features & NETIF_F_SG))); +} + static inline void skb_copy_from_linear_data(const struct sk_buff *skb, void *to, const unsigned int len) -- cgit v1.2.2 From d346a3fae3ff1d99f5d0c819bf86edf9094a26a1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 6 Dec 2013 11:36:17 +0100 Subject: packet: introduce PACKET_QDISC_BYPASS socket option This patch introduces a PACKET_QDISC_BYPASS socket option, that allows for using a similar xmit() function as in pktgen instead of taking the dev_queue_xmit() path. This can be very useful when PF_PACKET applications are required to be used in a similar scenario as pktgen, but with full, flexible packet payload that needs to be provided, for example. On default, nothing changes in behaviour for normal PF_PACKET TX users, so everything stays as is for applications. New users, however, can now set PACKET_QDISC_BYPASS if needed to prevent own packets from i) reentering packet_rcv() and ii) to directly push the frame to the driver. In doing so we can increase pps (here 64 byte packets) for PF_PACKET a bit: # CPUs -- QDISC_BYPASS -- qdisc path -- qdisc path[**] 1 CPU == 1,509,628 pps -- 1,208,708 -- 1,247,436 2 CPUs == 3,198,659 pps -- 2,536,012 -- 1,605,779 3 CPUs == 4,787,992 pps -- 3,788,740 -- 1,735,610 4 CPUs == 6,173,956 pps -- 4,907,799 -- 1,909,114 5 CPUs == 7,495,676 pps -- 5,956,499 -- 2,014,422 6 CPUs == 9,001,496 pps -- 7,145,064 -- 2,155,261 7 CPUs == 10,229,776 pps -- 8,190,596 -- 2,220,619 8 CPUs == 11,040,732 pps -- 9,188,544 -- 2,241,879 9 CPUs == 12,009,076 pps -- 10,275,936 -- 2,068,447 10 CPUs == 11,380,052 pps -- 11,265,337 -- 1,578,689 11 CPUs == 11,672,676 pps -- 11,845,344 -- 1,297,412 [...] 20 CPUs == 11,363,192 pps -- 11,014,933 -- 1,245,081 [**]: qdisc path with packet_rcv(), how probably most people seem to use it (hopefully not anymore if not needed) The test was done using a modified trafgen, sending a simple static 64 bytes packet, on all CPUs. The trick in the fast "qdisc path" case, is to avoid reentering packet_rcv() by setting the RAW socket protocol to zero, like: socket(PF_PACKET, SOCK_RAW, 0); Tradeoffs are documented as well in this patch, clearly, if queues are busy, we will drop more packets, tc disciplines are ignored, and these packets are not visible to taps anymore. For a pktgen like scenario, we argue that this is acceptable. The pointer to the xmit function has been placed in packet socket structure hole between cached_dev and prot_hook that is hot anyway as we're working on cached_dev in each send path. Done in joint work together with Jesper Dangaard Brouer. Signed-off-by: Daniel Borkmann Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/uapi/linux/if_packet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index dbf06667394b..1e24aa701cbd 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -51,6 +51,7 @@ struct sockaddr_ll { #define PACKET_TIMESTAMP 17 #define PACKET_FANOUT 18 #define PACKET_TX_HAS_OFF 19 +#define PACKET_QDISC_BYPASS 20 #define PACKET_FANOUT_HASH 0 #define PACKET_FANOUT_LB 1 -- cgit v1.2.2 From 114002bc1af6c63de4c003f8c5d3ca0bb430d987 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 6 Dec 2013 13:01:30 -0800 Subject: net: phy: report link partner features through ethtool The PHY library already reads the MII_STAT1000 and MII_LPA registers in genphy_read_status(), so extend it to also populate the PHY device link partner advertised features such that we can feed this back into ethtool when asked for it in phy_ethtool_gset(). Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 7ff751ae6f0a..90a666e0884b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -287,8 +287,8 @@ struct phy_c45_device_ids { * adjust_state: Callback for the enet driver to respond to * changes in the state machine. * - * speed, duplex, pause, supported, advertising, and - * autoneg are used like in mii_if_info + * speed, duplex, pause, supported, advertising, lp_advertising, + * and autoneg are used like in mii_if_info * * interrupts currently only supports enabled or disabled, * but could be changed in the future to support enabling @@ -340,6 +340,7 @@ struct phy_device { /* See mii.h for more info */ u32 supported; u32 advertising; + u32 lp_advertising; int autoneg; -- cgit v1.2.2 From 1f9248e5606afc6485255e38ad57bdac08fa7711 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 7 Dec 2013 19:26:53 +0100 Subject: neigh: convert parms to an array This patch converts the neigh param members to an array. This allows easier manipulation which will be needed later on to provide better management of default values. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/neighbour.h | 48 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 536501a3e58d..4cb5478e2f6e 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -37,6 +37,32 @@ struct neighbour; +enum { + NEIGH_VAR_MCAST_PROBES, + NEIGH_VAR_UCAST_PROBES, + NEIGH_VAR_APP_PROBES, + NEIGH_VAR_RETRANS_TIME, + NEIGH_VAR_BASE_REACHABLE_TIME, + NEIGH_VAR_DELAY_PROBE_TIME, + NEIGH_VAR_GC_STALETIME, + NEIGH_VAR_QUEUE_LEN_BYTES, + NEIGH_VAR_PROXY_QLEN, + NEIGH_VAR_ANYCAST_DELAY, + NEIGH_VAR_PROXY_DELAY, + NEIGH_VAR_LOCKTIME, +#define NEIGH_VAR_DATA_MAX (NEIGH_VAR_LOCKTIME + 1) + /* Following are used as a second way to access one of the above */ + NEIGH_VAR_QUEUE_LEN, /* same data as NEIGH_VAR_QUEUE_LEN_BYTES */ + NEIGH_VAR_RETRANS_TIME_MS, /* same data as NEIGH_VAR_RETRANS_TIME */ + NEIGH_VAR_BASE_REACHABLE_TIME_MS, /* same data as NEIGH_VAR_BASE_REACHABLE_TIME */ + /* Following are used by "default" only */ + NEIGH_VAR_GC_INTERVAL, + NEIGH_VAR_GC_THRESH1, + NEIGH_VAR_GC_THRESH2, + NEIGH_VAR_GC_THRESH3, + NEIGH_VAR_MAX +}; + struct neigh_parms { #ifdef CONFIG_NET_NS struct net *net; @@ -53,22 +79,18 @@ struct neigh_parms { atomic_t refcnt; struct rcu_head rcu_head; - int base_reachable_time; - int retrans_time; - int gc_staletime; int reachable_time; - int delay_probe_time; - - int queue_len_bytes; - int ucast_probes; - int app_probes; - int mcast_probes; - int anycast_delay; - int proxy_delay; - int proxy_qlen; - int locktime; + int data[NEIGH_VAR_DATA_MAX]; }; +static inline void neigh_var_set(struct neigh_parms *p, int index, int val) +{ + p->data[index] = val; +} + +#define NEIGH_VAR(p, attr) ((p)->data[NEIGH_VAR_ ## attr]) +#define NEIGH_VAR_SET(p, attr, val) neigh_var_set(p, NEIGH_VAR_ ## attr, val) + struct neigh_statistics { unsigned long allocs; /* number of allocated neighs */ unsigned long destroys; /* number of destroyed neighs */ -- cgit v1.2.2 From cb5b09c17fe60056bc8f127ffc987d361c40ed4b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 7 Dec 2013 19:26:54 +0100 Subject: neigh: wrap proc dointvec functions This will be needed later on to provide better management of default values. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/neighbour.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 4cb5478e2f6e..ed7eba045297 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -296,6 +296,15 @@ void *neigh_seq_start(struct seq_file *, loff_t *, struct neigh_table *, void *neigh_seq_next(struct seq_file *, void *, loff_t *); void neigh_seq_stop(struct seq_file *, void *); +int neigh_proc_dointvec(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); +int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos); +int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos); + int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, char *p_name, proc_handler *proc_handler); void neigh_sysctl_unregister(struct neigh_parms *p); -- cgit v1.2.2 From 73af614aedd221df8495fc8c9993c50e87f899f2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 7 Dec 2013 19:26:55 +0100 Subject: neigh: use tbl->family to distinguish ipv4 from ipv6 Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/neighbour.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index ed7eba045297..95615c9ad13a 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -202,6 +202,11 @@ struct neigh_table { struct pneigh_entry **phash_buckets; }; +static inline int neigh_parms_family(struct neigh_parms *p) +{ + return p->tbl->family; +} + #define NEIGH_PRIV_ALIGN sizeof(long long) #define NEIGH_ENTRY_SIZE(size) ALIGN((size), NEIGH_PRIV_ALIGN) @@ -306,7 +311,7 @@ int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, size_t *lenp, loff_t *ppos); int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, - char *p_name, proc_handler *proc_handler); + proc_handler *proc_handler); void neigh_sysctl_unregister(struct neigh_parms *p); static inline void __neigh_parms_put(struct neigh_parms *parms) -- cgit v1.2.2 From 1d4c8c29841b9991cdf3c7cc4ba7f96a94f104ca Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 7 Dec 2013 19:26:56 +0100 Subject: neigh: restore old behaviour of default parms values Previously inet devices were only constructed when addresses are added. Therefore the default neigh parms values they get are the ones at the time of these operations. Now that we're creating inet devices earlier, this changes the behaviour of default neigh parms values in an incompatible way (see bug #8519). This patch creates a compromise by setting the default values at the same point as before but only for those that have not been explicitly set by the user since the inet device's creation. Introduced by: commit 8030f54499925d073a88c09f30d5d844fb1b3190 Author: Herbert Xu Date: Thu Feb 22 01:53:47 2007 +0900 [IPV4] devinet: Register inetdev earlier. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 7 +++++++ include/net/neighbour.h | 13 +++++++++++++ 2 files changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 0d678aefe69d..ae174ca565c9 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -220,6 +220,13 @@ static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev) return rtnl_dereference(dev->ip_ptr); } +static inline struct neigh_parms *__in_dev_arp_parms_get_rcu(const struct net_device *dev) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + + return in_dev ? in_dev->arp_parms : NULL; +} + void in_dev_finish_destroy(struct in_device *idev); static inline void in_dev_put(struct in_device *idev) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 95615c9ad13a..41b1ce6c96a8 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -81,16 +82,28 @@ struct neigh_parms { int reachable_time; int data[NEIGH_VAR_DATA_MAX]; + DECLARE_BITMAP(data_state, NEIGH_VAR_DATA_MAX); }; static inline void neigh_var_set(struct neigh_parms *p, int index, int val) { + set_bit(index, p->data_state); p->data[index] = val; } #define NEIGH_VAR(p, attr) ((p)->data[NEIGH_VAR_ ## attr]) #define NEIGH_VAR_SET(p, attr, val) neigh_var_set(p, NEIGH_VAR_ ## attr, val) +static inline void neigh_parms_data_state_setall(struct neigh_parms *p) +{ + bitmap_fill(p->data_state, NEIGH_VAR_DATA_MAX); +} + +static inline void neigh_parms_data_state_cleanall(struct neigh_parms *p) +{ + bitmap_zero(p->data_state, NEIGH_VAR_DATA_MAX); +} + struct neigh_statistics { unsigned long allocs; /* number of allocated neighs */ unsigned long destroys; /* number of destroyed neighs */ -- cgit v1.2.2 From bba24896f022d4d239494bebf18e713cd8aec7a5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 7 Dec 2013 19:26:57 +0100 Subject: neigh: ipv6: respect default values set before an address is assigned to device Make the behaviour similar to ipv4. This will allow user to set sysctl default neigh param values and these values will be respected even by devices registered before (that ones what do not have address set yet). Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/addrconf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index e70278eef12a..66c4a44d8f5c 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -248,6 +248,13 @@ static inline struct inet6_dev *in6_dev_get(const struct net_device *dev) return idev; } +static inline struct neigh_parms *__in6_dev_nd_parms_get_rcu(const struct net_device *dev) +{ + struct inet6_dev *idev = __in6_dev_get(dev); + + return idev ? idev->nd_parms : NULL; +} + void in6_dev_finish_destroy(struct inet6_dev *idev); static inline void in6_dev_put(struct inet6_dev *idev) -- cgit v1.2.2 From 73eaef87e98a96fe8b8a586f916b2721bf512efa Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 6 Dec 2013 14:21:01 -0800 Subject: etherdevice: Add ether_addr_equal_unaligned Add a generic routine to test if possibly unaligned to u16 Ethernet addresses are equal. If CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is set, this uses the slightly faster generic routine ether_addr_equal, otherwise this uses memcmp. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 3526e819d7ae..2f0e3d0818bc 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -265,6 +265,24 @@ static inline bool ether_addr_equal_64bits(const u8 addr1[6+2], #endif } +/** + * ether_addr_equal_unaligned - Compare two not u16 aligned Ethernet addresses + * @addr1: Pointer to a six-byte array containing the Ethernet address + * @addr2: Pointer other six-byte array containing the Ethernet address + * + * Compare two Ethernet addresses, returns true if equal + * + * Please note: Use only when any Ethernet address may not be u16 aligned. + */ +static inline bool ether_addr_equal_unaligned(const u8 *addr1, const u8 *addr2) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + return ether_addr_equal(addr1, addr2); +#else + return memcmp(addr1, addr2, ETH_ALEN) == 0; +#endif +} + /** * is_etherdev_addr - Tell if given Ethernet address belongs to the device. * @dev: Pointer to a device structure -- cgit v1.2.2 From 2c722fe1c821a100ca87fcc36e90a9bceb497c7c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 6 Dec 2013 15:44:21 -0800 Subject: etherdevice: Optimize a few is__ether_addr functions If CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is set, several is__ether_addr functions can be slightly improved by using u32 dereferences. I believe all current uses of is_zero_ether_addr and is_broadcast_ether_addr are u16 aligned, so always use u16 references to improve those functions performance. Document the u16 alignment requirements. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 2f0e3d0818bc..f344ac04f858 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -61,6 +61,8 @@ static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) = * * Return true if address is link local reserved addr (01:80:c2:00:00:0X) per * IEEE 802.1Q 8.6.3 Frame filtering. + * + * Please note: addr must be aligned to u16. */ static inline bool is_link_local_ether_addr(const u8 *addr) { @@ -68,7 +70,12 @@ static inline bool is_link_local_ether_addr(const u8 *addr) static const __be16 *b = (const __be16 *)eth_reserved_addr_base; static const __be16 m = cpu_to_be16(0xfff0); +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + return (((*(const u32 *)addr) ^ (*(const u32 *)b)) | + ((a[2] ^ b[2]) & m)) == 0; +#else return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0; +#endif } /** @@ -76,10 +83,18 @@ static inline bool is_link_local_ether_addr(const u8 *addr) * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is all zeroes. + * + * Please note: addr must be aligned to u16. */ static inline bool is_zero_ether_addr(const u8 *addr) { - return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]); +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + return ((*(const u32 *)addr) | (*(const u16 *)(addr + 4))) == 0; +#else + return (*(const u16 *)(addr + 0) | + *(const u16 *)(addr + 2) | + *(const u16 *)(addr + 4)) == 0; +#endif } /** @@ -110,10 +125,14 @@ static inline bool is_local_ether_addr(const u8 *addr) * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is the broadcast address. + * + * Please note: addr must be aligned to u16. */ static inline bool is_broadcast_ether_addr(const u8 *addr) { - return (addr[0] & addr[1] & addr[2] & addr[3] & addr[4] & addr[5]) == 0xff; + return (*(const u16 *)(addr + 0) & + *(const u16 *)(addr + 2) & + *(const u16 *)(addr + 4)) == 0xffff; } /** @@ -135,6 +154,8 @@ static inline bool is_unicast_ether_addr(const u8 *addr) * a multicast address, and is not FF:FF:FF:FF:FF:FF. * * Return true if the address is valid. + * + * Please note: addr must be aligned to u16. */ static inline bool is_valid_ether_addr(const u8 *addr) { -- cgit v1.2.2 From 1397ed35f22d7c30d0b89ba74b6b7829220dfcfd Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Sun, 8 Dec 2013 15:46:57 +0100 Subject: ipv6: add flowinfo for tcp6 pkt_options for all cases The current implementation of IPV6_FLOWINFO only gives a result if pktoptions is available (thanks to the ip6_datagram_recv_ctl function). It gives inconsistent results to user space, sometimes there is a result for getsockopt(IPV6_FLOWINFO), sometimes not. This patch add rcv_flowinfo to store it, and return it to the userspace in the same way than other pkt_options. Signed-off-by: Florent Fourcot Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index c56c350324e4..0b2ebfbbe0f4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -201,6 +201,7 @@ struct ipv6_pinfo { __u8 min_hopcount; __u8 tclass; __u8 rcv_tclass; + __be32 rcv_flowinfo; __u32 dst_cookie; __u32 rx_dst_cookie; -- cgit v1.2.2 From 37cfee909c5a396abccf43e3e0a63a5f5ac998af Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Sun, 8 Dec 2013 15:46:58 +0100 Subject: ipv6: move IPV6_TCLASS_MASK definition in ipv6.h Signed-off-by: Florent Fourcot Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 488316e339a1..bce802e7bcfd 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -238,6 +238,7 @@ struct ip6_flowlabel { #define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) #define IPV6_FLOWLABEL_MASK cpu_to_be32(0x000FFFFF) +#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) struct ipv6_fl_socklist { struct ipv6_fl_socklist __rcu *next; -- cgit v1.2.2 From 82e9f105a28006e7ff864220a28cf62ea6d1e280 Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Sun, 8 Dec 2013 15:46:59 +0100 Subject: ipv6: remove rcv_tclass of ipv6_pinfo tclass information in now already stored in rcv_flowinfo We do not need to store the same information twice. Signed-off-by: Florent Fourcot Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 0b2ebfbbe0f4..3fde06645553 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -200,7 +200,6 @@ struct ipv6_pinfo { dontfrag:1; __u8 min_hopcount; __u8 tclass; - __u8 rcv_tclass; __be32 rcv_flowinfo; __u32 dst_cookie; -- cgit v1.2.2 From 3308de2b841e5f25bdc4a7208a451f52f8cff969 Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Sun, 8 Dec 2013 15:47:00 +0100 Subject: ipv6: add ip6_flowlabel helper And use it if possible. Signed-off-by: Florent Fourcot Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ipv6.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index bce802e7bcfd..d0bfe3eeb824 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -679,6 +679,11 @@ static inline __be32 ip6_flowinfo(const struct ipv6hdr *hdr) return *(__be32 *)hdr & IPV6_FLOWINFO_MASK; } +static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr) +{ + return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK; +} + /* * Prototypes exported by ipv6 */ -- cgit v1.2.2 From 10ae76faa97cb3e1020dcf2cdae6b1ea8359abab Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 10 Dec 2013 15:00:50 -0500 Subject: cipso: cleanup cipso_v4_translate() when !CONFIG_NETLABEL Don't needlessly recompute 'opt[opt_iter + 1]' as we already have it stored in 'tag_len'. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/net/cipso_ipv4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 2179d071f68f..a6fd939f202d 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -303,7 +303,7 @@ static inline int cipso_v4_validate(const struct sk_buff *skb, for (opt_iter = 6; opt_iter < opt_len;) { tag_len = opt[opt_iter + 1]; - if ((tag_len == 0) || (opt[opt_iter + 1] > (opt_len - opt_iter))) { + if ((tag_len == 0) || (tag_len > (opt_len - opt_iter))) { err_offset = opt_iter + 1; goto out; } -- cgit v1.2.2 From 9a32b860431aee0cba50ab2414bfa29f4bb2f0d7 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 8 Dec 2013 12:16:09 +0100 Subject: dn_dev: add support for IFA_FLAGS nl attribute Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/dn_dev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h index 20b5ab06032d..197886cd7bdd 100644 --- a/include/net/dn_dev.h +++ b/include/net/dn_dev.h @@ -9,7 +9,7 @@ struct dn_ifaddr { struct dn_dev *ifa_dev; __le16 ifa_local; __le16 ifa_address; - __u8 ifa_flags; + __u32 ifa_flags; __u8 ifa_scope; char ifa_label[IFNAMSIZ]; struct rcu_head rcu; -- cgit v1.2.2 From ad6c81359fc3e6086d1d6f91acda9d5d0e64b2c3 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 8 Dec 2013 12:16:10 +0100 Subject: ipv4: add support for IFA_FLAGS nl attribute Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ae174ca565c9..562e5c7c5d35 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -136,8 +136,8 @@ struct in_ifaddr { __be32 ifa_mask; __be32 ifa_broadcast; unsigned char ifa_scope; - unsigned char ifa_flags; unsigned char ifa_prefixlen; + __u32 ifa_flags; char ifa_label[IFNAMSIZ]; /* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */ -- cgit v1.2.2 From 37cb0620073cb64101d9307931c135c70b2e3f04 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Tue, 10 Dec 2013 20:45:41 -0800 Subject: tipc: remove TIPC usage of field af_packet_priv in struct net_device TIPC is currently using the field 'af_packet_priv' in struct net_device as a handle to find the bearer instance associated to the given network device. But, by doing so it is blocking other networking cleanups, such as the one discussed here: http://patchwork.ozlabs.org/patch/178044/ This commit removes this usage from TIPC. Instead, we introduce a new field, 'tipc_ptr', to the net_device structure, to serve this purpose. When TIPC bearer is enabled, the bearer object is associated to 'tipc_ptr'. When a TIPC packet arrives in the recv_msg() upcall from a networking device, the bearer object can now be obtained from 'tipc_ptr'. When a bearer is disabled, the bearer object is detached from its underlying network device by setting 'tipc_ptr' to NULL. Additionally, an RCU lock is used to protect the new pointer. Henceforth, the existing tipc_net_lock is used in write mode to serialize write accesses to this pointer, while the new RCU lock is applied on the read side to ensure that the pointer is 100% valid within its wrapped area for all readers. Signed-off-by: Ying Xue Cc: Patrick McHardy Reviewed-by: Paul Gortmaker Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9d55e5188b96..0ca8100f9fbc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1282,6 +1282,9 @@ struct net_device { #endif #if IS_ENABLED(CONFIG_NET_DSA) struct dsa_switch_tree *dsa_ptr; /* dsa specific data */ +#endif +#if IS_ENABLED(CONFIG_TIPC) + struct tipc_bearer __rcu *tipc_ptr; /* TIPC specific data */ #endif void *atalk_ptr; /* AppleTalk link */ struct in_device __rcu *ip_ptr; /* IPv4 specific data */ -- cgit v1.2.2 From 98b32decc83ed3137e3ddbc918b102f8fc406b6d Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Fri, 6 Dec 2013 08:56:16 -0800 Subject: nfc: Fix FSF address in file headers Several files refer to an old address for the Free Software Foundation in the file header comment. Resolve by replacing the address with the URL so that we do not have to keep updating the header comments anytime the address changes. CC: linux-wireless@vger.kernel.org CC: Lauro Ramos Venancio CC: Aloisio Almeida Jr CC: Samuel Ortiz Signed-off-by: Jeff Kirsher Signed-off-by: John W. Linville --- include/linux/platform_data/pn544.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/pn544.h b/include/linux/platform_data/pn544.h index 713bfd703342..5ce1ab983f44 100644 --- a/include/linux/platform_data/pn544.h +++ b/include/linux/platform_data/pn544.h @@ -16,8 +16,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see . */ #ifndef _PN544_H_ -- cgit v1.2.2 From b601fa197fff265bf60eaf6950d4c194da080f4a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 10 Dec 2013 15:02:40 +0100 Subject: ipv4: fix wildcard search with inet_confirm_addr() Help of this function says: "in_dev: only on this interface, 0=any interface", but since commit 39a6d0630012 ("[NETNS]: Process inet_confirm_addr in the correct namespace."), the code supposes that it will never be NULL. This function is never called with in_dev == NULL, but it's exported and may be used by an external module. Because this patch restore the ability to call inet_confirm_addr() with in_dev == NULL, I partially revert the above commit, as suggested by Julian. CC: Julian Anastasov Signed-off-by: Nicolas Dichtel Reviewed-by: Julian Anastasov Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 562e5c7c5d35..0068708161ff 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -164,11 +164,10 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *); void devinet_init(void); struct in_device *inetdev_by_index(struct net *, int); __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); -__be32 inet_confirm_addr(struct in_device *in_dev, __be32 dst, __be32 local, - int scope); +__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, + __be32 local, int scope); struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask); - static __inline__ int inet_ifa_match(__be32 addr, struct in_ifaddr *ifa) { return !((addr^ifa->ifa_address)&ifa->ifa_mask); -- cgit v1.2.2 From 0abc652c796dab74d34d60473ec5594cd21620be Mon Sep 17 00:00:00 2001 From: Jukka Rissanen Date: Wed, 11 Dec 2013 17:05:35 +0200 Subject: net: if_arp: add ARPHRD_6LOWPAN type Used for IPv6 over LoWPAN networks. Example of this is Bluetooth 6LoWPAN network. Signed-off-by: Jukka Rissanen Acked-by: David S. Miller Signed-off-by: Marcel Holtmann --- include/uapi/linux/if_arp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h index d7fea3496f32..4d024d75d64b 100644 --- a/include/uapi/linux/if_arp.h +++ b/include/uapi/linux/if_arp.h @@ -94,6 +94,7 @@ #define ARPHRD_CAIF 822 /* CAIF media type */ #define ARPHRD_IP6GRE 823 /* GRE over IPv6 */ #define ARPHRD_NETLINK 824 /* Netlink header */ +#define ARPHRD_6LOWPAN 825 /* IPv6 over LoWPAN */ #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ -- cgit v1.2.2 From 18722c247023035b9e2e2a08a887adec2a9a6e49 Mon Sep 17 00:00:00 2001 From: Jukka Rissanen Date: Wed, 11 Dec 2013 17:05:37 +0200 Subject: Bluetooth: Enable 6LoWPAN support for BT LE devices This is initial version of http://tools.ietf.org/html/draft-ietf-6lo-btle-00 By default the 6LoWPAN support is not activated and user needs to tweak /sys/kernel/debug/bluetooth/hci0/6lowpan file. The kernel needs IPv6 support before 6LoWPAN is usable. Signed-off-by: Jukka Rissanen Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 1 + include/net/bluetooth/hci_core.h | 1 + include/net/bluetooth/l2cap.h | 1 + 3 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index cc2da73055fa..5dc3d9072650 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -131,6 +131,7 @@ enum { HCI_PERIODIC_INQ, HCI_FAST_CONNECTABLE, HCI_BREDR_ENABLED, + HCI_6LOWPAN_ENABLED, }; /* A mask for the flags that are supposed to remain when a reset happens diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index b796161fb04e..f2f0cf5865c4 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -448,6 +448,7 @@ enum { HCI_CONN_SSP_ENABLED, HCI_CONN_POWER_SAVE, HCI_CONN_REMOTE_OOB, + HCI_CONN_6LOWPAN, }; static inline bool hci_conn_ssp_enabled(struct hci_conn *conn) diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index e149e992fdae..dbc4a89984ca 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -136,6 +136,7 @@ struct l2cap_conninfo { #define L2CAP_FC_L2CAP 0x02 #define L2CAP_FC_CONNLESS 0x04 #define L2CAP_FC_A2MP 0x08 +#define L2CAP_FC_6LOWPAN 0x3e /* reserved and temporary value */ /* L2CAP Control Field bit masks */ #define L2CAP_CTRL_SAR 0xC000 -- cgit v1.2.2 From 7e9805696428113e34625a65a30dbc62cb78acc5 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Wed, 11 Dec 2013 13:48:20 +0100 Subject: ipv6: router reachability probing RFC 4191 states in 3.5: When a host avoids using any non-reachable router X and instead sends a data packet to another router Y, and the host would have used router X if router X were reachable, then the host SHOULD probe each such router X's reachability by sending a single Neighbor Solicitation to that router's address. A host MUST NOT probe a router's reachability in the absence of useful traffic that the host would have sent to the router if it were reachable. In any case, these probes MUST be rate-limited to no more than one per minute per router. Currently, when the neighbour corresponding to a router falls into NUD_FAILED, it's never considered again. Introduce a new rt6_nud_state value, RT6_NUD_FAIL_PROBE, which suggests the route should not be used but should be probed with a single NS. The probe is ratelimited by the existing code. To better distinguish meanings of the failure values, rename RT6_NUD_FAIL_SOFT to RT6_NUD_FAIL_DO_RR. Signed-off-by: Jiri Benc Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/neighbour.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 41b1ce6c96a8..4c09bd23b832 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -252,6 +252,7 @@ static inline struct neighbour *neigh_create(struct neigh_table *tbl, void neigh_destroy(struct neighbour *neigh); int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb); int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags); +void __neigh_set_probe_once(struct neighbour *neigh); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev); int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb); -- cgit v1.2.2 From 2f6a1b6607fd6b0eb9501843a40e0c7555f37b4a Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 11 Dec 2013 13:27:11 -0500 Subject: macvlan: Remove custom recieve and forward handlers Since now macvlan and macvtap use the same receive and forward handlers, we can remove them completely and use netif_rx and dev_forward_skb() directly. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/if_macvlan.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 84ba5ac39e03..ea22721ba269 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -69,8 +69,6 @@ struct macvlan_dev { netdev_features_t set_features; enum macvlan_mode mode; u16 flags; - int (*receive)(struct sk_buff *skb); - int (*forward)(struct net_device *dev, struct sk_buff *skb); /* This array tracks active taps. */ struct macvtap_queue __rcu *taps[MAX_MACVTAP_QUEUES]; /* This list tracks all taps (both enabled and disabled) */ @@ -103,10 +101,7 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, extern void macvlan_common_setup(struct net_device *dev); extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], - int (*receive)(struct sk_buff *skb), - int (*forward)(struct net_device *dev, - struct sk_buff *skb)); + struct nlattr *tb[], struct nlattr *data[]); extern void macvlan_count_rx(const struct macvlan_dev *vlan, unsigned int len, bool success, -- cgit v1.2.2 From 299603e8370a93dd5d8e8d800f0dff1ce2c53d36 Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Wed, 11 Dec 2013 20:53:45 -0800 Subject: net-gro: Prepare GRO stack for the upcoming tunneling support This patch modifies the GRO stack to avoid the use of "network_header" and associated macros like ip_hdr() and ipv6_hdr() in order to allow an arbitary number of IP hdrs (v4 or v6) to be used in the encapsulation chain. This lays the foundation for various IP tunneling support (IP-in-IP, GRE, VXLAN, SIT,...) to be added later. With this patch, the GRO stack traversing now is mostly based on skb_gro_offset rather than special hdr offsets saved in skb (e.g., skb->network_header). As a result all but the top layer (i.e., the the transport layer) must have hdrs of the same length in order for a pkt to be considered for aggregation. Therefore when adding a new encap layer (e.g., for tunneling), one must check and skip flows (e.g., by setting NAPI_GRO_CB(p)->same_flow to 0) that have a different hdr length. Note that unlike the network header, the transport header can and will continue to be set by the GRO code since there will be at most one "transport layer" in the encap chain. Signed-off-by: H.K. Jerry Chu Suggested-by: Eric Dumazet Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0ca8100f9fbc..5260d2eae2e6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1676,7 +1676,7 @@ struct offload_callbacks { int (*gso_send_check)(struct sk_buff *skb); struct sk_buff **(*gro_receive)(struct sk_buff **head, struct sk_buff *skb); - int (*gro_complete)(struct sk_buff *skb); + int (*gro_complete)(struct sk_buff *skb, int nhoff); }; struct packet_offload { -- cgit v1.2.2 From 8cf4d6a224a0226987d9cba69cb46d93814fe449 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 15 Nov 2013 15:57:53 +0100 Subject: net: reorder struct netns_ct for better cache-line usage Reorder struct netns_ct so that atomic_t "count" changes don't slowdown users of read mostly fields. This is based on Eric Dumazet's proposed patch: "netfilter: conntrack: remove the central spinlock" http://thread.gmane.org/gmane.linux.network/268758/focus=47306 The tricky part of cache-aligning this structure, that it is getting inlined in struct net (include/net/net_namespace.h), thus changes to other netns_xxx structures affects our alignment. Eric's original patch contained an ambiguity on 32-bit regarding alignment in struct net. This patch also takes 32-bit into account, and in case of changed (struct net) alignment sysctl_xxx entries have been ordered according to how often they are accessed. Signed-off-by: Jesper Dangaard Brouer Reviewed-by: Jiri Benc Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index c9c0c538b68b..fbcc7fa536dc 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -65,6 +65,23 @@ struct nf_ip_net { struct netns_ct { atomic_t count; unsigned int expect_count; +#ifdef CONFIG_SYSCTL + struct ctl_table_header *sysctl_header; + struct ctl_table_header *acct_sysctl_header; + struct ctl_table_header *tstamp_sysctl_header; + struct ctl_table_header *event_sysctl_header; + struct ctl_table_header *helper_sysctl_header; +#endif + char *slabname; + unsigned int sysctl_log_invalid; /* Log invalid packets */ + unsigned int sysctl_events_retry_timeout; + int sysctl_events; + int sysctl_acct; + int sysctl_auto_assign_helper; + bool auto_assign_helper_warned; + int sysctl_tstamp; + int sysctl_checksum; + unsigned int htable_size; struct kmem_cache *nf_conntrack_cachep; struct hlist_nulls_head *hash; @@ -75,14 +92,6 @@ struct netns_ct { struct ip_conntrack_stat __percpu *stat; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_exp_event_notifier __rcu *nf_expect_event_cb; - int sysctl_events; - unsigned int sysctl_events_retry_timeout; - int sysctl_acct; - int sysctl_tstamp; - int sysctl_checksum; - unsigned int sysctl_log_invalid; /* Log invalid packets */ - int sysctl_auto_assign_helper; - bool auto_assign_helper_warned; struct nf_ip_net nf_ct_proto; #if defined(CONFIG_NF_CONNTRACK_LABELS) unsigned int labels_used; @@ -92,13 +101,5 @@ struct netns_ct { struct hlist_head *nat_bysource; unsigned int nat_htable_size; #endif -#ifdef CONFIG_SYSCTL - struct ctl_table_header *sysctl_header; - struct ctl_table_header *acct_sysctl_header; - struct ctl_table_header *tstamp_sysctl_header; - struct ctl_table_header *event_sysctl_header; - struct ctl_table_header *helper_sysctl_header; -#endif - char *slabname; }; #endif -- cgit v1.2.2 From f085ff1c131c08fb6b34802f63c22921c4d8c506 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 12 Dec 2013 13:06:50 -0800 Subject: netconf: add proxy-arp support Add support to netconf to show changes to proxy-arp status on a per interface basis via netlink in a manner similar to forwarding and reverse path state. Signed-off-by: Stephen Hemminger Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/netconf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h index 64804a798b0c..bd969d77ce52 100644 --- a/include/uapi/linux/netconf.h +++ b/include/uapi/linux/netconf.h @@ -14,6 +14,7 @@ enum { NETCONFA_FORWARDING, NETCONFA_RP_FILTER, NETCONFA_MC_FORWARDING, + NETCONFA_PROXY_ARP, __NETCONFA_MAX }; #define NETCONFA_MAX (__NETCONFA_MAX - 1) -- cgit v1.2.2 From eecdaa6e20284efbe9e76eebd44eac2b22f7b5d7 Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Thu, 12 Dec 2013 14:09:55 -0800 Subject: bonding: add miimon netlink support Add IFLA_BOND_MIIMON to allow get/set of bonding parameter miimon via netlink. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6db460121f84..739b2db3f035 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -331,6 +331,7 @@ enum { IFLA_BOND_UNSPEC, IFLA_BOND_MODE, IFLA_BOND_ACTIVE_SLAVE, + IFLA_BOND_MIIMON, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From 25852e29dfc58d249ad0db235996b36c33db6d61 Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Thu, 12 Dec 2013 14:10:02 -0800 Subject: bonding: add updelay netlink support Add IFLA_BOND_UPDELAY to allow get/set of bonding parameter updelay via netlink. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 739b2db3f035..7588c8120e7f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -332,6 +332,7 @@ enum { IFLA_BOND_MODE, IFLA_BOND_ACTIVE_SLAVE, IFLA_BOND_MIIMON, + IFLA_BOND_UPDELAY, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From c7461f9bf5a11bf88fdbd05b26c6d55b77dcd46d Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Thu, 12 Dec 2013 14:10:09 -0800 Subject: bonding: add downdelay netlink support Add IFLA_BOND_DOWNDELAY to allow get/set of bonding parameter downdelay via netlink. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 7588c8120e7f..f95506777710 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -333,6 +333,7 @@ enum { IFLA_BOND_ACTIVE_SLAVE, IFLA_BOND_MIIMON, IFLA_BOND_UPDELAY, + IFLA_BOND_DOWNDELAY, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From 9f53e14e86c46a2300f17309f6308ad0dfbb53ff Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Thu, 12 Dec 2013 14:10:16 -0800 Subject: bonding: add use_carrier netlink support Add IFLA_BOND_USE_CARRIER to allow get/set of bonding parameter use_carrier via netlink. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index f95506777710..77c41bda36a4 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -334,6 +334,7 @@ enum { IFLA_BOND_MIIMON, IFLA_BOND_UPDELAY, IFLA_BOND_DOWNDELAY, + IFLA_BOND_USE_CARRIER, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From 06151dbcf3f76edbe900138cde9e862f429918c9 Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Thu, 12 Dec 2013 14:10:24 -0800 Subject: bonding: add arp_interval netlink support Add IFLA_BOND_ARP_INTERVAL to allow get/set of bonding parameter arp_interval via netlink. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 77c41bda36a4..c73d878afd58 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -335,6 +335,7 @@ enum { IFLA_BOND_UPDELAY, IFLA_BOND_DOWNDELAY, IFLA_BOND_USE_CARRIER, + IFLA_BOND_ARP_INTERVAL, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From 7f28fa10e21376a10d3b9faad5836869465cc376 Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Thu, 12 Dec 2013 14:10:31 -0800 Subject: bonding: add arp_ip_target netlink support Add IFLA_BOND_ARP_IP_TARGET to allow get/set of bonding parameter arp_ip_target via netlink. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index c73d878afd58..5b0c44469279 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -336,6 +336,7 @@ enum { IFLA_BOND_DOWNDELAY, IFLA_BOND_USE_CARRIER, IFLA_BOND_ARP_INTERVAL, + IFLA_BOND_ARP_IP_TARGET, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From 29c4948293bfc426e52a921f4259eb3676961e81 Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Thu, 12 Dec 2013 14:10:38 -0800 Subject: bonding: add arp_validate netlink support Add IFLA_BOND_ARP_VALIDATE to allow get/set of bonding parameter arp_validate via netlink. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 5b0c44469279..3ba16fa9ad6c 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -337,6 +337,7 @@ enum { IFLA_BOND_USE_CARRIER, IFLA_BOND_ARP_INTERVAL, IFLA_BOND_ARP_IP_TARGET, + IFLA_BOND_ARP_VALIDATE, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From d5c842544342fc3f13774ffc5581d4dd3975059b Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Thu, 12 Dec 2013 14:10:45 -0800 Subject: bonding: add arp_all_targets netlink support Add IFLA_BOND_ARP_ALL_TARGETS to allow get/set of bonding parameter arp_all_targets via netlink. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 3ba16fa9ad6c..a897b7e22541 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -338,6 +338,7 @@ enum { IFLA_BOND_ARP_INTERVAL, IFLA_BOND_ARP_IP_TARGET, IFLA_BOND_ARP_VALIDATE, + IFLA_BOND_ARP_ALL_TARGETS, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From e001bfad913bf119fb67c1e8dd2d4ec1f5d392fa Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 13 Dec 2013 10:19:55 +0800 Subject: bonding: create bond_first_slave_rcu() The bond_first_slave_rcu() will be used to instead of bond_first_slave() in rcu_read_lock(). According to the Jay Vosburgh's suggestion, the struct netdev_adjacent should hide from users who wanted to use it directly. so I package a new function to get the first slave of the bond. Suggested-by: Nikolay Aleksandrov Suggested-by: Jay Vosburgh Suggested-by: Veaceslav Falico Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5260d2eae2e6..2c74d20dad34 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2907,6 +2907,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, priv = netdev_lower_get_next_private_rcu(dev, &(iter))) void *netdev_adjacent_get_private(struct list_head *adj_list); +void *netdev_lower_get_first_private_rcu(struct net_device *dev); struct net_device *netdev_master_upper_dev_get(struct net_device *dev); struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev); -- cgit v1.2.2 From 6a9d1b91f34df1935bc0ad98114801a44db0f98c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 4 Dec 2013 22:39:17 +0100 Subject: mac80211: add pre-RCU-sync sta removal driver operation Currently, mac80211 allows drivers to keep RCU-protected station references that are cleared when the station is removed from the driver and consequently needs to synchronize twice, once before removing the station from the driver (so it can guarantee that the station is no longer used in TX towards the driver) and once after the station is removed from the driver. Add a new pre-RCU-synchronisation station removal operation to the API to allow drivers to clear/invalidate their RCU-protected station pointers before the RCU synchronisation. This will allow removing the second synchronisation by changing the driver API so that the driver may no longer assume a valid RCU-protected pointer after sta_remove/sta_state returns. The alternative to this would be to synchronize_rcu() in all the drivers that currently rely on this behaviour (only iwlmvm) but that would defeat the purpose. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c014acc09ebc..22b6dd96dbc6 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2495,7 +2495,11 @@ enum ieee80211_roc_type { * AP, IBSS/WDS/mesh peer etc. This callback can sleep. * * @sta_remove: Notifies low level driver about removal of an associated - * station, AP, IBSS/WDS/mesh peer etc. This callback can sleep. + * station, AP, IBSS/WDS/mesh peer etc. Note that after the callback + * returns it isn't safe to use the pointer, not even RCU protected; + * no RCU grace period is guaranteed between returning here and freeing + * the station. See @sta_pre_rcu_remove if needed. + * This callback can sleep. * * @sta_add_debugfs: Drivers can use this callback to add debugfs files * when a station is added to mac80211's station list. This callback @@ -2514,7 +2518,17 @@ enum ieee80211_roc_type { * station (which can be the AP, a client, IBSS/WDS/mesh peer etc.) * This callback is mutually exclusive with @sta_add/@sta_remove. * It must not fail for down transitions but may fail for transitions - * up the list of states. + * up the list of states. Also note that after the callback returns it + * isn't safe to use the pointer, not even RCU protected - no RCU grace + * period is guaranteed between returning here and freeing the station. + * See @sta_pre_rcu_remove if needed. + * The callback can sleep. + * + * @sta_pre_rcu_remove: Notify driver about station removal before RCU + * synchronisation. This is useful if a driver needs to have station + * pointers protected using RCU, it can then use this call to clear + * the pointers instead of waiting for an RCU grace period to elapse + * in @sta_state. * The callback can sleep. * * @sta_rc_update: Notifies the driver of changes to the bitrates that can be @@ -2827,6 +2841,9 @@ struct ieee80211_ops { struct ieee80211_sta *sta, enum ieee80211_sta_state old_state, enum ieee80211_sta_state new_state); + void (*sta_pre_rcu_remove)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta); void (*sta_rc_update)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, -- cgit v1.2.2 From 70dabeb74ee3bff71b65f47546dafb83edbb06b9 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 14 Dec 2013 13:54:53 +0100 Subject: mac80211: let the driver reserve extra tailroom in beacons Can be used to add extra IEs (such as P2P NoA) without having to reallocate the buffer. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 22b6dd96dbc6..531785f5819e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1613,6 +1613,9 @@ enum ieee80211_hw_flags { * @extra_tx_headroom: headroom to reserve in each transmit skb * for use by the driver (e.g. for transmit headers.) * + * @extra_beacon_tailroom: tailroom to reserve in each beacon tx skb. + * Can be used by drivers to add extra IEs. + * * @channel_change_time: time (in microseconds) it takes to change channels. * * @max_signal: Maximum value for signal (rssi) in RX information, used @@ -1695,6 +1698,7 @@ struct ieee80211_hw { void *priv; u32 flags; unsigned int extra_tx_headroom; + unsigned int extra_beacon_tailroom; int channel_change_time; int vif_data_size; int sta_data_size; -- cgit v1.2.2 From 776e9dd90ca223b82166eb2835389493b5914cba Mon Sep 17 00:00:00 2001 From: Fan Du Date: Mon, 16 Dec 2013 18:47:49 +0800 Subject: xfrm: export verify_userspi_info for pkfey and netlink interface In order to check against valid IPcomp spi range, export verify_userspi_info for both pfkey and netlink interface. Signed-off-by: Fan Du Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 59f5d0a6c7f8..b7635ef4d436 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1563,6 +1563,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8, int dir, u32 id, int delete, int *err); int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info); u32 xfrm_get_acqseq(void); +int verify_spi_info(u8 proto, u32 min, u32 max); int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto, -- cgit v1.2.2 From 204e35a91c4b3327b7239d7687fbd4923edbbf08 Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Thu, 5 Dec 2013 20:42:58 +0100 Subject: nl80211: add VHT support for set_bitrate_mask Add VHT MCS/NSS set support for nl80211_set_tx_bitrate_mask(). This should be used mainly for test purpose, to check different MCS/NSS VHT combinations. Signed-off-by: Janusz Dziedzic Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 1 + include/uapi/linux/nl80211.h | 12 ++++++++++++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6c3a650f3a8f..80a10212b1b9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1767,6 +1767,7 @@ struct cfg80211_bitrate_mask { struct { u32 legacy; u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN]; + u16 vht_mcs[NL80211_VHT_NSS_MAX]; } control[IEEE80211_NUM_BANDS]; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6e700645cd27..e1307909ecf1 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3112,6 +3112,8 @@ enum nl80211_key_attributes { * %NL80211_MAX_SUPP_RATES in a single array). * @NL80211_TXRATE_HT: HT (MCS) rates allowed for TX rate selection * in an array of MCS numbers. + * @NL80211_TXRATE_VHT: VHT rates allowed for TX rate selection, + * see &struct nl80211_txrate_vht * @__NL80211_TXRATE_AFTER_LAST: internal * @NL80211_TXRATE_MAX: highest TX rate attribute */ @@ -3119,6 +3121,7 @@ enum nl80211_tx_rate_attributes { __NL80211_TXRATE_INVALID, NL80211_TXRATE_LEGACY, NL80211_TXRATE_HT, + NL80211_TXRATE_VHT, /* keep last */ __NL80211_TXRATE_AFTER_LAST, @@ -3126,6 +3129,15 @@ enum nl80211_tx_rate_attributes { }; #define NL80211_TXRATE_MCS NL80211_TXRATE_HT +#define NL80211_VHT_NSS_MAX 8 + +/** + * struct nl80211_txrate_vht - VHT MCS/NSS txrate bitmap + * @mcs: MCS bitmap table for each NSS (array index 0 for 1 stream, etc.) + */ +struct nl80211_txrate_vht { + __u16 mcs[NL80211_VHT_NSS_MAX]; +}; /** * enum nl80211_band - Frequency band -- cgit v1.2.2 From d8bcc768c80e73cf4e948cb327949174b4b5b9e7 Mon Sep 17 00:00:00 2001 From: Tomasz Bursztyka Date: Thu, 12 Dec 2013 15:00:42 +0200 Subject: netfilter: nf_tables: Expose the table usage counter via netlink Userspace can therefore know whether a table is in use or not, and by how many chains. Suggested by Pablo Neira Ayuso. Signed-off-by: Tomasz Bursztyka Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 256d36b1b94a..b25481e16f0a 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -110,11 +110,13 @@ enum nft_table_flags { * * @NFTA_TABLE_NAME: name of the table (NLA_STRING) * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32) + * @NFTA_TABLE_USE: number of chains in this table (NLA_U32) */ enum nft_table_attributes { NFTA_TABLE_UNSPEC, NFTA_TABLE_NAME, NFTA_TABLE_FLAGS, + NFTA_TABLE_USE, __NFTA_TABLE_MAX }; #define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) -- cgit v1.2.2 From 71ae8aac3e198c6f3577cb7ad3a17f6128e97bfa Mon Sep 17 00:00:00 2001 From: Francesco Fusco Date: Thu, 12 Dec 2013 16:09:05 +0100 Subject: lib: introduce arch optimized hash library We introduce a new hashing library that is meant to be used in the contexts where speed is more important than uniformity of the hashed values. The hash library leverages architecture specific implementation to achieve high performance and fall backs to jhash() for the generic case. On Intel-based x86 architectures, the library can exploit the crc32l instruction, part of the Intel SSE4.2 instruction set, if the instruction is supported by the processor. This implementation is twice as fast as the jhash() implementation on an i7 processor. Additional architectures, such as Arm64 provide instructions for accelerating the computation of CRC, so they could be added as well in follow-up work. Signed-off-by: Francesco Fusco Signed-off-by: Daniel Borkmann Signed-off-by: Thomas Graf Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- include/asm-generic/hash.h | 9 +++++++++ include/linux/hash.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 include/asm-generic/hash.h (limited to 'include') diff --git a/include/asm-generic/hash.h b/include/asm-generic/hash.h new file mode 100644 index 000000000000..05cb3421ee7e --- /dev/null +++ b/include/asm-generic/hash.h @@ -0,0 +1,9 @@ +#ifndef __ASM_GENERIC_HASH_H +#define __ASM_GENERIC_HASH_H + +struct arch_hash_ops; +static inline void setup_arch_fast_hash(struct arch_hash_ops *ops) +{ +} + +#endif /* __ASM_GENERIC_HASH_H */ diff --git a/include/linux/hash.h b/include/linux/hash.h index f09a0ae4d858..bd1754c7ecef 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h @@ -15,6 +15,7 @@ */ #include +#include #include /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ @@ -78,4 +79,39 @@ static inline u32 hash32_ptr(const void *ptr) #endif return (u32)val; } + +struct fast_hash_ops { + u32 (*hash)(const void *data, u32 len, u32 seed); + u32 (*hash2)(const u32 *data, u32 len, u32 seed); +}; + +/** + * arch_fast_hash - Caclulates a hash over a given buffer that can have + * arbitrary size. This function will eventually use an + * architecture-optimized hashing implementation if + * available, and trades off distribution for speed. + * + * @data: buffer to hash + * @len: length of buffer in bytes + * @seed: start seed + * + * Returns 32bit hash. + */ +extern u32 arch_fast_hash(const void *data, u32 len, u32 seed); + +/** + * arch_fast_hash2 - Caclulates a hash over a given buffer that has a + * size that is of a multiple of 32bit words. This + * function will eventually use an architecture- + * optimized hashing implementation if available, + * and trades off distribution for speed. + * + * @data: buffer to hash (must be 32bit padded) + * @len: number of 32bit words + * @seed: start seed + * + * Returns 32bit hash. + */ +extern u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed); + #endif /* _LINUX_HASH_H */ -- cgit v1.2.2 From be78cfcb25fd163ad7c415cdc0ff343fbc8d22ec Mon Sep 17 00:00:00 2001 From: wangweidong Date: Fri, 13 Dec 2013 13:51:04 +0800 Subject: sctp: Reorder 'struc association' members to reduce its size Members of 'struct association' are not in appropriate order to reuse compiler added padding on 64bit architectures. In this patch we reorder those struct members and help reduce the size of the structure from 2776 bytes to 2720 bytes on 64 bit architectures. Signed-off-by: Wang Weidong Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 59 +++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e416d6ac9c70..e8b2ff5c2804 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1360,12 +1360,6 @@ struct sctp_association { /* This is all information about our peer. */ struct { - /* rwnd - * - * Peer Rwnd : Current calculated value of the peer's rwnd. - */ - __u32 rwnd; - /* transport_addr_list * * Peer : A list of SCTP transport addresses that the @@ -1383,6 +1377,12 @@ struct sctp_association { */ struct list_head transport_addr_list; + /* rwnd + * + * Peer Rwnd : Current calculated value of the peer's rwnd. + */ + __u32 rwnd; + /* transport_count * * Peer : A count of the number of peer addresses @@ -1465,6 +1465,20 @@ struct sctp_association { */ struct sctp_tsnmap tsn_map; + /* This mask is used to disable sending the ASCONF chunk + * with specified parameter to peer. + */ + __be16 addip_disabled_mask; + + /* These are capabilities which our peer advertised. */ + __u8 ecn_capable:1, /* Can peer do ECN? */ + ipv4_address:1, /* Peer understands IPv4 addresses? */ + ipv6_address:1, /* Peer understands IPv6 addresses? */ + hostname_address:1, /* Peer understands DNS addresses? */ + asconf_capable:1, /* Does peer support ADDIP? */ + prsctp_capable:1, /* Can peer do PR-SCTP? */ + auth_capable:1; /* Is peer doing SCTP-AUTH? */ + /* Ack State : This flag indicates if the next received * : packet is to be responded to with a * : SACK. This is initializedto 0. When a packet @@ -1479,25 +1493,11 @@ struct sctp_association { __u32 sack_cnt; __u32 sack_generation; - /* These are capabilities which our peer advertised. */ - __u8 ecn_capable:1, /* Can peer do ECN? */ - ipv4_address:1, /* Peer understands IPv4 addresses? */ - ipv6_address:1, /* Peer understands IPv6 addresses? */ - hostname_address:1, /* Peer understands DNS addresses? */ - asconf_capable:1, /* Does peer support ADDIP? */ - prsctp_capable:1, /* Can peer do PR-SCTP? */ - auth_capable:1; /* Is peer doing SCTP-AUTH? */ - __u32 adaptation_ind; /* Adaptation Code point. */ - /* This mask is used to disable sending the ASCONF chunk - * with specified parameter to peer. - */ - __be16 addip_disabled_mask; - struct sctp_inithdr_host i; - int cookie_len; void *cookie; + int cookie_len; /* ADDIP Section 4.2 Upon reception of an ASCONF Chunk. * C1) ... "Peer-Serial-Number'. This value MUST be initialized to the @@ -1529,14 +1529,14 @@ struct sctp_association { */ sctp_state_t state; - /* The cookie life I award for any cookie. */ - ktime_t cookie_life; - /* Overall : The overall association error count. * Error Count : [Clear this any time I get something.] */ int overall_error_count; + /* The cookie life I award for any cookie. */ + ktime_t cookie_life; + /* These are the association's initial, max, and min RTO values. * These values will be initialized by system defaults, but can * be modified via the SCTP_RTOINFO socket option. @@ -1591,10 +1591,9 @@ struct sctp_association { /* Flags controlling Heartbeat, SACK delay, and Path MTU Discovery. */ __u32 param_flags; + __u32 sackfreq; /* SACK delay timeout */ unsigned long sackdelay; - __u32 sackfreq; - unsigned long timeouts[SCTP_NUM_TIMEOUT_TYPES]; struct timer_list timers[SCTP_NUM_TIMEOUT_TYPES]; @@ -1602,12 +1601,12 @@ struct sctp_association { /* Transport to which SHUTDOWN chunk was last sent. */ struct sctp_transport *shutdown_last_sent_to; - /* How many times have we resent a SHUTDOWN */ - int shutdown_retries; - /* Transport to which INIT chunk was last sent. */ struct sctp_transport *init_last_sent_to; + /* How many times have we resent a SHUTDOWN */ + int shutdown_retries; + /* Next TSN : The next TSN number to be assigned to a new * : DATA chunk. This is sent in the INIT or INIT * : ACK chunk to the peer and incremented each @@ -1818,8 +1817,8 @@ struct sctp_association { * after reaching 4294967295. */ __u32 addip_serial; - union sctp_addr *asconf_addr_del_pending; int src_out_of_asoc_ok; + union sctp_addr *asconf_addr_del_pending; struct sctp_transport *new_transport; /* SCTP AUTH: list of the endpoint shared keys. These -- cgit v1.2.2 From 481b5d938b4a60f052d7e15965a8417125e0d39f Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Fri, 13 Dec 2013 10:20:27 +0100 Subject: net: phy: provide phy_resume/phy_suspend helpers This adds helper functions to resume and suspend a given phy_device by calling the corresponding driver callbacks if available. Signed-off-by: Sebastian Hesselbarth Acked-by: Mugunthan V N Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 90a666e0884b..73384ff3b5e5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -548,6 +548,8 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45); int phy_device_register(struct phy_device *phy); int phy_init_hw(struct phy_device *phydev); +int phy_suspend(struct phy_device *phydev); +int phy_resume(struct phy_device *phydev); struct phy_device * phy_attach(struct net_device *dev, const char *bus_id, phy_interface_t interface); struct phy_device *phy_find_first(struct mii_bus *bus); -- cgit v1.2.2 From 477bb93320cec7ae74d5ccfad4f2bfa0b28fbe90 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 13 Dec 2013 12:35:56 -0800 Subject: net: remove dead code for add/del multiple These function to manipulate multiple addresses are not used anywhere in current net-next tree. Some out of tree code maybe using these but too bad; they should submit their code upstream.. Also, make __hw_addr_flush local since only used by dev_addr_lists.c Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2c74d20dad34..a0dfcc8c002b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2810,17 +2810,10 @@ int register_netdev(struct net_device *dev); void unregister_netdev(struct net_device *dev); /* General hardware address lists handling functions */ -int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, - struct netdev_hw_addr_list *from_list, - int addr_len, unsigned char addr_type); -void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, - struct netdev_hw_addr_list *from_list, - int addr_len, unsigned char addr_type); int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len); void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len); -void __hw_addr_flush(struct netdev_hw_addr_list *list); void __hw_addr_init(struct netdev_hw_addr_list *list); /* Functions used for device addresses handling */ @@ -2828,10 +2821,6 @@ int dev_addr_add(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); int dev_addr_del(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); -int dev_addr_add_multiple(struct net_device *to_dev, - struct net_device *from_dev, unsigned char addr_type); -int dev_addr_del_multiple(struct net_device *to_dev, - struct net_device *from_dev, unsigned char addr_type); void dev_addr_flush(struct net_device *dev); int dev_addr_init(struct net_device *dev); -- cgit v1.2.2 From d4589926d7a9a4b88e650bb9154bab71e8d2a7dd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Dec 2013 13:51:23 -0800 Subject: tcp: refine TSO splits While investigating performance problems on small RPC workloads, I noticed linux TCP stack was always splitting the last TSO skb into two parts (skbs). One being a multiple of MSS, and a small one with the Push flag. This split is done even if TCP_NODELAY is set, or if no small packet is in flight. Example with request/response of 4K/4K IP A > B: . ack 68432 win 2783 IP A > B: . 65537:68433(2896) ack 69632 win 2783 IP A > B: P 68433:69633(1200) ack 69632 win 2783 IP B > A: . ack 68433 win 2768 IP B > A: . 69632:72528(2896) ack 69633 win 2768 IP B > A: P 72528:73728(1200) ack 69633 win 2768 IP A > B: . ack 72528 win 2783 IP A > B: . 69633:72529(2896) ack 73728 win 2783 IP A > B: P 72529:73729(1200) ack 73728 win 2783 We can avoid this split by including the Nagle tests at the right place. Note : If some NIC had trouble sending TSO packets with a partial last segment, we would have hit the problem in GRO/forwarding workload already. tcp_minshall_update() is moved to tcp_output.c and is updated as we might feed a TSO packet with a partial last segment. This patch tremendously improves performance, as the traffic now looks like : IP A > B: . ack 98304 win 2783 IP A > B: P 94209:98305(4096) ack 98304 win 2783 IP B > A: . ack 98305 win 2768 IP B > A: P 98304:102400(4096) ack 98305 win 2768 IP A > B: . ack 102400 win 2783 IP A > B: P 98305:102401(4096) ack 102400 win 2783 IP B > A: . ack 102401 win 2768 IP B > A: P 102400:106496(4096) ack 102401 win 2768 IP A > B: . ack 106496 win 2783 IP A > B: P 102401:106497(4096) ack 106496 win 2783 IP B > A: . ack 106497 win 2768 IP B > A: P 106496:110592(4096) ack 106497 win 2768 Before : lpq83:~# nstat >/dev/null;perf stat ./super_netperf 200 -t TCP_RR -H lpq84 -l 20 -- -r 4K,4K 280774 Performance counter stats for './super_netperf 200 -t TCP_RR -H lpq84 -l 20 -- -r 4K,4K': 205719.049006 task-clock # 9.278 CPUs utilized 8,449,968 context-switches # 0.041 M/sec 1,935,997 CPU-migrations # 0.009 M/sec 160,541 page-faults # 0.780 K/sec 548,478,722,290 cycles # 2.666 GHz [83.20%] 455,240,670,857 stalled-cycles-frontend # 83.00% frontend cycles idle [83.48%] 272,881,454,275 stalled-cycles-backend # 49.75% backend cycles idle [66.73%] 166,091,460,030 instructions # 0.30 insns per cycle # 2.74 stalled cycles per insn [83.39%] 29,150,229,399 branches # 141.699 M/sec [83.30%] 1,943,814,026 branch-misses # 6.67% of all branches [83.32%] 22.173517844 seconds time elapsed lpq83:~# nstat | egrep "IpOutRequests|IpExtOutOctets" IpOutRequests 16851063 0.0 IpExtOutOctets 23878580777 0.0 After patch : lpq83:~# nstat >/dev/null;perf stat ./super_netperf 200 -t TCP_RR -H lpq84 -l 20 -- -r 4K,4K 280877 Performance counter stats for './super_netperf 200 -t TCP_RR -H lpq84 -l 20 -- -r 4K,4K': 107496.071918 task-clock # 4.847 CPUs utilized 5,635,458 context-switches # 0.052 M/sec 1,374,707 CPU-migrations # 0.013 M/sec 160,920 page-faults # 0.001 M/sec 281,500,010,924 cycles # 2.619 GHz [83.28%] 228,865,069,307 stalled-cycles-frontend # 81.30% frontend cycles idle [83.38%] 142,462,742,658 stalled-cycles-backend # 50.61% backend cycles idle [66.81%] 95,227,712,566 instructions # 0.34 insns per cycle # 2.40 stalled cycles per insn [83.43%] 16,209,868,171 branches # 150.795 M/sec [83.20%] 874,252,952 branch-misses # 5.39% of all branches [83.37%] 22.175821286 seconds time elapsed lpq83:~# nstat | egrep "IpOutRequests|IpExtOutOctets" IpOutRequests 11239428 0.0 IpExtOutOctets 23595191035 0.0 Indeed, the occupancy of tx skbs (IpExtOutOctets/IpOutRequests) is higher : 2099 instead of 1417, thus helping GRO to be more efficient when using FQ packet scheduler. Many thanks to Neal for review and ideas. Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Nandita Dukkipati Cc: Van Jacobson Acked-by: Neal Cardwell Tested-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index f7e1ab2139ef..9cd62bc09055 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -978,13 +978,6 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp) } bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight); -static inline void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss, - const struct sk_buff *skb) -{ - if (skb->len < mss) - tp->snd_sml = TCP_SKB_CB(skb)->end_seq; -} - static inline void tcp_check_probe_timer(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); -- cgit v1.2.2 From 0a98a0d12c40f9354b942325045cae123d594341 Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Sun, 15 Dec 2013 16:41:51 -0800 Subject: bonding: add primary attribute netlink support Add IFLA_BOND_PRIMARY to allow get/set of bonding parameter primary via netlink. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index a897b7e22541..fb3cfe2b176a 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -339,6 +339,7 @@ enum { IFLA_BOND_ARP_IP_TARGET, IFLA_BOND_ARP_VALIDATE, IFLA_BOND_ARP_ALL_TARGETS, + IFLA_BOND_PRIMARY, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From 8a41ae4496e534a8b68d9bc3c79113e16d1fcd4c Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Sun, 15 Dec 2013 16:41:58 -0800 Subject: bonding: add primary_select attribute netlink support Add IFLA_BOND_PRIMARY_SELECT to allow get/set of bonding parameter primary_select via netlink. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index fb3cfe2b176a..cf59d54a199d 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -340,6 +340,7 @@ enum { IFLA_BOND_ARP_VALIDATE, IFLA_BOND_ARP_ALL_TARGETS, IFLA_BOND_PRIMARY, + IFLA_BOND_PRIMARY_RESELECT, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From 89901972de4c00e74e56529804493734d77ee3d3 Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Sun, 15 Dec 2013 16:42:05 -0800 Subject: bonding: add fail_over_mac attribute netlink support Add IFLA_BOND_FAIL_OVER_MAC to allow get/set of bonding parameter fail_over_mac via netlink. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index cf59d54a199d..fd181abd19b9 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -341,6 +341,7 @@ enum { IFLA_BOND_ARP_ALL_TARGETS, IFLA_BOND_PRIMARY, IFLA_BOND_PRIMARY_RESELECT, + IFLA_BOND_FAIL_OVER_MAC, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From f70161c67231f54f784529d7447ce4386d258b7a Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Sun, 15 Dec 2013 16:42:12 -0800 Subject: bonding: add xmit_hash_policy attribute netlink support Add IFLA_BOND_XMIT_HASH_POLICY to allow get/set of bonding parameter xmit_hash_policy via netlink. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index fd181abd19b9..1a5d394894ea 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -342,6 +342,7 @@ enum { IFLA_BOND_PRIMARY, IFLA_BOND_PRIMARY_RESELECT, IFLA_BOND_FAIL_OVER_MAC, + IFLA_BOND_XMIT_HASH_POLICY, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From d8838de70adc64e20db531333e035aacd5910fca Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Sun, 15 Dec 2013 16:42:19 -0800 Subject: bonding: add resend_igmp attribute netlink support Add IFLA_BOND_RESEND_IGMP to allow get/set of bonding parameter resend_igmp via netlink. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 1a5d394894ea..6e275d5214f3 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -343,6 +343,7 @@ enum { IFLA_BOND_PRIMARY_RESELECT, IFLA_BOND_FAIL_OVER_MAC, IFLA_BOND_XMIT_HASH_POLICY, + IFLA_BOND_RESEND_IGMP, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From 3958afa1b272eb07109fd31549e69193b4d7c364 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 15 Dec 2013 22:12:06 -0800 Subject: net: Change skb_get_rxhash to skb_get_hash Changing name of function as part of making the hash in skbuff to be generic property, not just for receive path. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 77c7aae1c6b2..4725b953e00d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -703,11 +703,11 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config, struct ts_state *state); -void __skb_get_rxhash(struct sk_buff *skb); -static inline __u32 skb_get_rxhash(struct sk_buff *skb) +void __skb_get_hash(struct sk_buff *skb); +static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_rxhash) - __skb_get_rxhash(skb); + __skb_get_hash(skb); return skb->rxhash; } -- cgit v1.2.2 From 7539fadcb8146a5f0db51e80d99c9e724efec7b0 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 15 Dec 2013 22:12:18 -0800 Subject: net: Add utility functions to clear rxhash In several places 'skb->rxhash = 0' is being done to clear the rxhash value in an skb. This does not clear l4_rxhash which could still be set so that the rxhash wouldn't be recalculated on subsequent call to skb_get_rxhash. This patch adds an explict function to clear all the rxhash related information in the skb properly. skb_clear_hash_if_not_l4 clears the rxhash only if it is not marked as l4_rxhash. Fixed up places where 'skb->rxhash = 0' was being called. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 12 ++++++++++++ include/net/dst.h | 5 ++--- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4725b953e00d..7deb7ad65914 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -712,6 +712,18 @@ static inline __u32 skb_get_hash(struct sk_buff *skb) return skb->rxhash; } +static inline void skb_clear_hash(struct sk_buff *skb) +{ + skb->rxhash = 0; + skb->l4_rxhash = 0; +} + +static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb) +{ + if (!skb->l4_rxhash) + skb_clear_hash(skb); +} + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { diff --git a/include/net/dst.h b/include/net/dst.h index 44995c13e941..77eb53fabfb0 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -322,12 +322,11 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, skb->dev = dev; /* - * Clear rxhash so that we can recalulate the hash for the + * Clear hash so that we can recalulate the hash for the * encapsulated packet, unless we have already determine the hash * over the L4 4-tuple. */ - if (!skb->l4_rxhash) - skb->rxhash = 0; + skb_clear_hash_if_not_l4(skb); skb_set_queue_mapping(skb, 0); skb_scrub_packet(skb, !net_eq(net, dev_net(dev))); } -- cgit v1.2.2 From 09323cc479316e046931a2c679932204b36fea6c Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 15 Dec 2013 22:16:19 -0800 Subject: net: Add function to set the rxhash The function skb_set_rxash was added for drivers to call to set the rxhash in an skb. The type of hash is also specified as a parameter (L2, L3, L4, or unknown type). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7deb7ad65914..99846956dff9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -703,6 +703,46 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config, struct ts_state *state); +/* + * Packet hash types specify the type of hash in skb_set_hash. + * + * Hash types refer to the protocol layer addresses which are used to + * construct a packet's hash. The hashes are used to differentiate or identify + * flows of the protocol layer for the hash type. Hash types are either + * layer-2 (L2), layer-3 (L3), or layer-4 (L4). + * + * Properties of hashes: + * + * 1) Two packets in different flows have different hash values + * 2) Two packets in the same flow should have the same hash value + * + * A hash at a higher layer is considered to be more specific. A driver should + * set the most specific hash possible. + * + * A driver cannot indicate a more specific hash than the layer at which a hash + * was computed. For instance an L3 hash cannot be set as an L4 hash. + * + * A driver may indicate a hash level which is less specific than the + * actual layer the hash was computed on. For instance, a hash computed + * at L4 may be considered an L3 hash. This should only be done if the + * driver can't unambiguously determine that the HW computed the hash at + * the higher layer. Note that the "should" in the second property above + * permits this. + */ +enum pkt_hash_types { + PKT_HASH_TYPE_NONE, /* Undefined type */ + PKT_HASH_TYPE_L2, /* Input: src_MAC, dest_MAC */ + PKT_HASH_TYPE_L3, /* Input: src_IP, dst_IP */ + PKT_HASH_TYPE_L4, /* Input: src_IP, dst_IP, src_port, dst_port */ +}; + +static inline void +skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type) +{ + skb->l4_rxhash = (type == PKT_HASH_TYPE_L4); + skb->rxhash = hash; +} + void __skb_get_hash(struct sk_buff *skb); static inline __u32 skb_get_hash(struct sk_buff *skb) { -- cgit v1.2.2 From 3df7a74e797aa2d8be9b7c649cfd56a8517dcf6e Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 15 Dec 2013 22:16:29 -0800 Subject: net: Add utility function to copy skb hash Adds skb_copy_hash to copy rxhash and l4_rxhash from one skb to another. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 99846956dff9..06bedeb0d49e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -764,6 +764,12 @@ static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb) skb_clear_hash(skb); } +static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) +{ + to->rxhash = from->rxhash; + to->l4_rxhash = from->l4_rxhash; +}; + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { -- cgit v1.2.2 From e4d26f4b080f55e9577b45e6b51a04971eb459e9 Mon Sep 17 00:00:00 2001 From: Atzm Watanabe Date: Tue, 17 Dec 2013 22:53:36 +0900 Subject: packet: fill the gap of TPACKET_ALIGNMENT with zeros struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. Explicitly defining and zeroing the gap of this makes additional changes easier. Signed-off-by: Atzm Watanabe Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/if_packet.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 1e24aa701cbd..9185dc9a4485 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -133,7 +133,7 @@ struct tpacket2_hdr { __u32 tp_sec; __u32 tp_nsec; __u16 tp_vlan_tci; - __u16 tp_padding; + __u8 tp_padding[6]; }; struct tpacket_hdr_variant1 { @@ -154,6 +154,7 @@ struct tpacket3_hdr { union { struct tpacket_hdr_variant1 hv1; }; + __u8 tp_padding[12]; }; struct tpacket_bd_ts { -- cgit v1.2.2 From a0cdfcf39362410d5ea983f4daf67b38de129408 Mon Sep 17 00:00:00 2001 From: Atzm Watanabe Date: Tue, 17 Dec 2013 22:53:40 +0900 Subject: packet: deliver VLAN TPID to userspace This enables userspace to get VLAN TPID as well as the VLAN TCI. Signed-off-by: Atzm Watanabe Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/uapi/linux/if_packet.h | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 9185dc9a4485..e9d844c80c11 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -84,17 +84,18 @@ struct tpacket_auxdata { __u16 tp_mac; __u16 tp_net; __u16 tp_vlan_tci; - __u16 tp_padding; + __u16 tp_vlan_tpid; }; /* Rx ring - header status */ -#define TP_STATUS_KERNEL 0 -#define TP_STATUS_USER (1 << 0) -#define TP_STATUS_COPY (1 << 1) -#define TP_STATUS_LOSING (1 << 2) -#define TP_STATUS_CSUMNOTREADY (1 << 3) -#define TP_STATUS_VLAN_VALID (1 << 4) /* auxdata has valid tp_vlan_tci */ -#define TP_STATUS_BLK_TMO (1 << 5) +#define TP_STATUS_KERNEL 0 +#define TP_STATUS_USER (1 << 0) +#define TP_STATUS_COPY (1 << 1) +#define TP_STATUS_LOSING (1 << 2) +#define TP_STATUS_CSUMNOTREADY (1 << 3) +#define TP_STATUS_VLAN_VALID (1 << 4) /* auxdata has valid tp_vlan_tci */ +#define TP_STATUS_BLK_TMO (1 << 5) +#define TP_STATUS_VLAN_TPID_VALID (1 << 6) /* auxdata has valid tp_vlan_tpid */ /* Tx ring - header status */ #define TP_STATUS_AVAILABLE 0 @@ -133,12 +134,15 @@ struct tpacket2_hdr { __u32 tp_sec; __u32 tp_nsec; __u16 tp_vlan_tci; - __u8 tp_padding[6]; + __u16 tp_vlan_tpid; + __u8 tp_padding[4]; }; struct tpacket_hdr_variant1 { __u32 tp_rxhash; __u32 tp_vlan_tci; + __u16 tp_vlan_tpid; + __u16 tp_padding; }; struct tpacket3_hdr { @@ -154,7 +158,7 @@ struct tpacket3_hdr { union { struct tpacket_hdr_variant1 hv1; }; - __u8 tp_padding[12]; + __u8 tp_padding[8]; }; struct tpacket_bd_ts { -- cgit v1.2.2 From d84231d3a2b20bea26327d9b83c8bd8ba55dc68c Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 15 Dec 2013 20:15:04 -0800 Subject: net_sched: remove get_stats from tc_action_ops It is not used. Cc: Jamal Hadi Salim Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 9e90fdff470d..04c6825e172b 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -72,7 +72,6 @@ struct tc_action_ops { __u32 capab; /* capabilities includes 4 bit version */ struct module *owner; int (*act)(struct sk_buff *, const struct tc_action *, struct tcf_result *); - int (*get_stats)(struct sk_buff *, struct tc_action *); int (*dump)(struct sk_buff *, struct tc_action *, int, int); int (*cleanup)(struct tc_action *, int bind); int (*lookup)(struct tc_action *, u32); -- cgit v1.2.2 From 33be627159913b094bb578e83e9a7fdc66c10208 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 15 Dec 2013 20:15:05 -0800 Subject: net_sched: act: use standard struct list_head Currently actions are chained by a singly linked list, therefore it is a bit hard to add and remove a specific entry. Convert it to struct list_head so that in the latter patch we can remove an action without finding its head. Cc: Jamal Hadi Salim Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 12 ++++++------ include/net/pkt_cls.h | 16 ++++++++++++---- 2 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 04c6825e172b..a72642610790 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -60,7 +60,7 @@ struct tc_action { const struct tc_action_ops *ops; __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ __u32 order; - struct tc_action *next; + struct list_head list; }; #define TCA_CAP_NONE 0 @@ -99,16 +99,16 @@ void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo); int tcf_register_action(struct tc_action_ops *a); int tcf_unregister_action(struct tc_action_ops *a); -void tcf_action_destroy(struct tc_action *a, int bind); -int tcf_action_exec(struct sk_buff *skb, const struct tc_action *a, +void tcf_action_destroy(struct list_head *actions, int bind); +int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions, struct tcf_result *res); -struct tc_action *tcf_action_init(struct net *net, struct nlattr *nla, +int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, char *n, int ovr, - int bind); + int bind, struct list_head *); struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, struct nlattr *est, char *n, int ovr, int bind); -int tcf_action_dump(struct sk_buff *skb, struct tc_action *a, int, int); +int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int); int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 2ebef77a2f9a..34fe693ddf9a 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -62,7 +62,8 @@ tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r) struct tcf_exts { #ifdef CONFIG_NET_CLS_ACT - struct tc_action *action; + __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ + struct list_head actions; #endif }; @@ -74,6 +75,13 @@ struct tcf_ext_map { int police; }; +static inline void tcf_exts_init(struct tcf_exts *exts) +{ +#ifdef CONFIG_NET_CLS_ACT + INIT_LIST_HEAD(&exts->actions); +#endif +} + /** * tcf_exts_is_predicative - check if a predicative extension is present * @exts: tc filter extensions handle @@ -85,7 +93,7 @@ static inline int tcf_exts_is_predicative(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT - return !!exts->action; + return !list_empty(&exts->actions); #else return 0; #endif @@ -120,8 +128,8 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, struct tcf_result *res) { #ifdef CONFIG_NET_CLS_ACT - if (exts->action) - return tcf_action_exec(skb, exts->action, res); + if (!list_empty(&exts->actions)) + return tcf_action_exec(skb, &exts->actions, res); #endif return 0; } -- cgit v1.2.2 From 5da57f422d89c504a1d72dadd4e19d3dca8e974e Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 15 Dec 2013 20:15:07 -0800 Subject: net_sched: cls: refactor out struct tcf_ext_map These information can be saved in tcf_exts, and this will simplify the code. Cc: Jamal Hadi Salim Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 34fe693ddf9a..50ea07969c09 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -65,21 +65,21 @@ struct tcf_exts { __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ struct list_head actions; #endif -}; - -/* Map to export classifier specific extension TLV types to the - * generic extensions API. Unsupported extensions must be set to 0. - */ -struct tcf_ext_map { + /* Map to export classifier specific extension TLV types to the + * generic extensions API. Unsupported extensions must be set to 0. + */ int action; int police; }; -static inline void tcf_exts_init(struct tcf_exts *exts) +static inline void tcf_exts_init(struct tcf_exts *exts, int action, int police) { #ifdef CONFIG_NET_CLS_ACT + exts->type = 0; INIT_LIST_HEAD(&exts->actions); #endif + exts->action = action; + exts->police = police; } /** @@ -136,15 +136,12 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, - struct tcf_exts *exts, - const struct tcf_ext_map *map); + struct tcf_exts *exts); void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts); void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, struct tcf_exts *src); -int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts, - const struct tcf_ext_map *map); -int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts, - const struct tcf_ext_map *map); +int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts); +int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); /** * struct tcf_pkt_info - packet information -- cgit v1.2.2 From 369ba56787d7469c0afda70bb9ff76ad5faaead5 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 15 Dec 2013 20:15:08 -0800 Subject: net_sched: init struct tcf_hashinfo at register time It looks weird to store the lock out of the struct but still points to a static variable. Just move them into the struct. Cc: Jamal Hadi Salim Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index a72642610790..2b5ec5abfeb3 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -38,7 +38,7 @@ struct tcf_common { struct tcf_hashinfo { struct tcf_common **htab; unsigned int hmask; - rwlock_t *lock; + rwlock_t lock; }; static inline unsigned int tcf_hash(u32 index, unsigned int hmask) @@ -46,6 +46,22 @@ static inline unsigned int tcf_hash(u32 index, unsigned int hmask) return index & hmask; } +static inline int tcf_hashinfo_init(struct tcf_hashinfo *hf, unsigned int mask) +{ + rwlock_init(&hf->lock); + hf->hmask = mask; + hf->htab = kzalloc((mask + 1) * sizeof(struct tcf_common *), + GFP_KERNEL); + if (!hf->htab) + return -ENOMEM; + return 0; +} + +static inline void tcf_hashinfo_destroy(struct tcf_hashinfo *hf) +{ + kfree(hf->htab); +} + #ifdef CONFIG_NET_CLS_ACT #define ACT_P_CREATED 1 -- cgit v1.2.2 From 89819dc01f4c5920783f561597a48d9d75220e9e Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 15 Dec 2013 20:15:09 -0800 Subject: net_sched: convert tcf_hashinfo to hlist and use spinlock So that we don't need to play with singly linked list, and since the code is not on hot path, we can use spinlock instead of rwlock. Cc: Jamal Hadi Salim Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 2b5ec5abfeb3..22418d1a8396 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -9,7 +9,7 @@ #include struct tcf_common { - struct tcf_common *tcfc_next; + struct hlist_node tcfc_head; u32 tcfc_index; int tcfc_refcnt; int tcfc_bindcnt; @@ -22,7 +22,7 @@ struct tcf_common { spinlock_t tcfc_lock; struct rcu_head tcfc_rcu; }; -#define tcf_next common.tcfc_next +#define tcf_head common.tcfc_head #define tcf_index common.tcfc_index #define tcf_refcnt common.tcfc_refcnt #define tcf_bindcnt common.tcfc_bindcnt @@ -36,9 +36,9 @@ struct tcf_common { #define tcf_rcu common.tcfc_rcu struct tcf_hashinfo { - struct tcf_common **htab; + struct hlist_head *htab; unsigned int hmask; - rwlock_t lock; + spinlock_t lock; }; static inline unsigned int tcf_hash(u32 index, unsigned int hmask) @@ -48,12 +48,16 @@ static inline unsigned int tcf_hash(u32 index, unsigned int hmask) static inline int tcf_hashinfo_init(struct tcf_hashinfo *hf, unsigned int mask) { - rwlock_init(&hf->lock); + int i; + + spin_lock_init(&hf->lock); hf->hmask = mask; - hf->htab = kzalloc((mask + 1) * sizeof(struct tcf_common *), + hf->htab = kzalloc((mask + 1) * sizeof(struct hlist_head), GFP_KERNEL); if (!hf->htab) return -ENOMEM; + for (i = 0; i < mask + 1; i++) + INIT_HLIST_HEAD(&hf->htab[i]); return 0; } -- cgit v1.2.2 From 1f747c26c48bb290c79c34e155860c7e2ec3926a Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 15 Dec 2013 20:15:10 -0800 Subject: net_sched: convert tc_action_ops to use struct list_head We don't need to maintain our own singly linked list code. Cc: Jamal Hadi Salim Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 22418d1a8396..77d5d8156efc 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -85,7 +85,7 @@ struct tc_action { #define TCA_CAP_NONE 0 struct tc_action_ops { - struct tc_action_ops *next; + struct list_head head; struct tcf_hashinfo *hinfo; char kind[IFNAMSIZ]; __u32 type; /* TBD to match kind */ -- cgit v1.2.2 From 3627287463b4acddb83d24fabb1e0a304e39565c Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 15 Dec 2013 20:15:11 -0800 Subject: net_sched: convert tcf_proto_ops to use struct list_head We don't need to maintain our own singly linked list code. Cc: Jamal Hadi Salim Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d0a6321c302e..013d96dc6918 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -185,7 +185,7 @@ struct tcf_result { }; struct tcf_proto_ops { - struct tcf_proto_ops *next; + struct list_head head; char kind[IFNAMSIZ]; int (*classify)(struct sk_buff *, -- cgit v1.2.2 From 974eda11c54290a1be8f8b155edae7d791e5ce57 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 14 Dec 2013 05:13:38 +0100 Subject: inet: make no_pmtu_disc per namespace and kill ipv4_config The other field in ipv4_config, log_martians, was converted to a per-interface setting, so we can just remove the whole structure. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip.h | 6 ------ include/net/netns/ipv4.h | 1 + 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 5a25f36fe3a7..535664477c4a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -177,12 +177,6 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, unsigned int len); -struct ipv4_config { - int log_martians; - int no_pmtu_disc; -}; - -extern struct ipv4_config ipv4_config; #define IP_INC_STATS(net, field) SNMP_INC_STATS64((net)->mib.ip_statistics, field) #define IP_INC_STATS_BH(net, field) SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field) #define IP_ADD_STATS(net, field, val) SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index ee520cba2ec2..929a668e91a9 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -69,6 +69,7 @@ struct netns_ipv4 { struct local_ports sysctl_local_ports; int sysctl_tcp_ecn; + int sysctl_ip_no_pmtu_disc; kgid_t sysctl_ping_group_range[2]; -- cgit v1.2.2 From 93b36cf3425b9bd9c56df7680fb237686b9c82ae Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 15 Dec 2013 03:41:14 +0100 Subject: ipv6: support IPV6_PMTU_INTERFACE on sockets IPV6_PMTU_INTERFACE is the same as IPV6_PMTU_PROBE for ipv6. Add it nontheless for symmetry with IPv4 sockets. Also drop incoming MTU information if this mode is enabled. The additional bit in ipv6_pinfo just eats in the padding behind the bitfield. There are no changes to the layout of the struct at all. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/ip6_route.h | 7 ++++++- include/uapi/linux/in6.h | 4 ++++ 3 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 3fde06645553..7e1ded0d8e45 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -191,7 +191,7 @@ struct ipv6_pinfo { /* sockopt flags */ __u16 recverr:1, sndflow:1, - pmtudisc:2, + pmtudisc:3, ipv6only:1, srcprefs:3, /* 001: prefer temporary address * 010: prefer public address diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 733747ce163c..c2626ce1f2ad 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -178,10 +178,15 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb) { struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; - return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? + return (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) ? skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); } +static inline bool ip6_sk_accept_pmtu(const struct sock *sk) +{ + return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE; +} + static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt) { return &rt->rt6i_gateway; diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 440d5c479145..f94f1d013bf2 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -188,6 +188,10 @@ enum { #define IPV6_PMTUDISC_WANT 1 #define IPV6_PMTUDISC_DO 2 #define IPV6_PMTUDISC_PROBE 3 +/* same as IPV6_PMTUDISC_PROBE, provided for symetry with IPv4 + * also see comments on IP_PMTUDISC_INTERFACE + */ +#define IPV6_PMTUDISC_INTERFACE 4 /* Flowlabel */ #define IPV6_FLOWLABEL_MGR 32 -- cgit v1.2.2 From 237217546d44fe06c16b8895eecaef22f18ba5ee Mon Sep 17 00:00:00 2001 From: Francesco Fusco Date: Wed, 18 Dec 2013 16:05:48 +0100 Subject: lib: hash: follow-up fixups for arch hash This patch adds the include file to pull in __read_mostly on some architectures e.g. ppc and also fixes up signatures in generic asm. Reported-by: Fengguang Wu Signed-off-by: Francesco Fusco Signed-off-by: David S. Miller --- include/asm-generic/hash.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/hash.h b/include/asm-generic/hash.h index 05cb3421ee7e..b6312843dbd9 100644 --- a/include/asm-generic/hash.h +++ b/include/asm-generic/hash.h @@ -1,8 +1,8 @@ #ifndef __ASM_GENERIC_HASH_H #define __ASM_GENERIC_HASH_H -struct arch_hash_ops; -static inline void setup_arch_fast_hash(struct arch_hash_ops *ops) +struct fast_hash_ops; +static inline void setup_arch_fast_hash(struct fast_hash_ops *ops) { } -- cgit v1.2.2 From a7022e65c68ad89d6eb64f21aa4831c3822403d4 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 16 Dec 2013 21:49:14 +0100 Subject: mac80211: add helper functions for tracking P2P NoA state Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/net/mac80211.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 531785f5819e..f838af816b56 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4652,4 +4652,51 @@ bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct sk_buff *skb, int band, struct ieee80211_sta **sta); +/** + * struct ieee80211_noa_data - holds temporary data for tracking P2P NoA state + * + * @next_tsf: TSF timestamp of the next absent state change + * @has_next_tsf: next absent state change event pending + * + * @absent: descriptor bitmask, set if GO is currently absent + * + * private: + * + * @count: count fields from the NoA descriptors + * @desc: adjusted data from the NoA + */ +struct ieee80211_noa_data { + u32 next_tsf; + bool has_next_tsf; + + u8 absent; + + u8 count[IEEE80211_P2P_NOA_DESC_MAX]; + struct { + u32 start; + u32 duration; + u32 interval; + } desc[IEEE80211_P2P_NOA_DESC_MAX]; +}; + +/** + * ieee80211_parse_p2p_noa - initialize NoA tracking data from P2P IE + * + * @attr: P2P NoA IE + * @data: NoA tracking data + * @tsf: current TSF timestamp + * + * Return: number of successfully parsed descriptors + */ +int ieee80211_parse_p2p_noa(const struct ieee80211_p2p_noa_attr *attr, + struct ieee80211_noa_data *data, u32 tsf); + +/** + * ieee80211_update_p2p_noa - get next pending P2P GO absent state change + * + * @data: NoA tracking data + * @tsf: current TSF timestamp + */ +void ieee80211_update_p2p_noa(struct ieee80211_noa_data *data, u32 tsf); + #endif /* MAC80211_H */ -- cgit v1.2.2 From 567ffc3509b2d3f965a49a18631d3da7f9a96d4f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Dec 2013 14:43:31 +0100 Subject: nl80211: support vendor-specific events In addition to vendor-specific commands, also support vendor-specific events. These must be registered with cfg80211 before they can be used. They're also advertised in nl80211 in the wiphy information so that userspace knows can be expected. The events themselves are sent on a new multicast group called "vendor". Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 65 +++++++++++++++++++++++++++++++++++++++++--- include/uapi/linux/nl80211.h | 3 ++ 2 files changed, 64 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 80a10212b1b9..6d238a4331bd 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2824,6 +2824,8 @@ struct wiphy_vendor_command { * * @vendor_commands: array of vendor commands supported by the hardware * @n_vendor_commands: number of vendor commands + * @vendor_events: array of vendor events supported by the hardware + * @n_vendor_events: number of vendor events */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -2936,7 +2938,8 @@ struct wiphy { const struct wiphy_coalesce_support *coalesce; const struct wiphy_vendor_command *vendor_commands; - int n_vendor_commands; + const struct nl80211_vendor_cmd_info *vendor_events; + int n_vendor_commands, n_vendor_events; char priv[0] __aligned(NETDEV_ALIGN); }; @@ -3907,6 +3910,14 @@ struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy, enum nl80211_attrs attr, int approxlen); +struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy, + enum nl80211_commands cmd, + enum nl80211_attrs attr, + int vendor_event_idx, + int approxlen, gfp_t gfp); + +void __cfg80211_send_event_skb(struct sk_buff *skb, gfp_t gfp); + /** * cfg80211_vendor_cmd_alloc_reply_skb - allocate vendor command reply * @wiphy: the wiphy @@ -3951,6 +3962,44 @@ cfg80211_vendor_cmd_alloc_reply_skb(struct wiphy *wiphy, int approxlen) */ int cfg80211_vendor_cmd_reply(struct sk_buff *skb); +/** + * cfg80211_vendor_event_alloc - allocate vendor-specific event skb + * @wiphy: the wiphy + * @event_idx: index of the vendor event in the wiphy's vendor_events + * @approxlen: an upper bound of the length of the data that will + * be put into the skb + * @gfp: allocation flags + * + * This function allocates and pre-fills an skb for an event on the + * vendor-specific multicast group. + * + * When done filling the skb, call cfg80211_vendor_event() with the + * skb to send the event. + * + * Return: An allocated and pre-filled skb. %NULL if any errors happen. + */ +static inline struct sk_buff * +cfg80211_vendor_event_alloc(struct wiphy *wiphy, int approxlen, + int event_idx, gfp_t gfp) +{ + return __cfg80211_alloc_event_skb(wiphy, NL80211_CMD_VENDOR, + NL80211_ATTR_VENDOR_DATA, + event_idx, approxlen, gfp); +} + +/** + * cfg80211_vendor_event - send the event + * @skb: The skb, must have been allocated with cfg80211_vendor_event_alloc() + * @gfp: allocation flags + * + * This function sends the given @skb, which must have been allocated + * by cfg80211_vendor_event_alloc(), as an event. It always consumes it. + */ +static inline void cfg80211_vendor_event(struct sk_buff *skb, gfp_t gfp) +{ + __cfg80211_send_event_skb(skb, gfp); +} + #ifdef CONFIG_NL80211_TESTMODE /** * DOC: Test mode @@ -4031,8 +4080,13 @@ static inline int cfg80211_testmode_reply(struct sk_buff *skb) * * Return: An allocated and pre-filled skb. %NULL if any errors happen. */ -struct sk_buff *cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy, - int approxlen, gfp_t gfp); +static inline struct sk_buff * +cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy, int approxlen, gfp_t gfp) +{ + return __cfg80211_alloc_event_skb(wiphy, NL80211_CMD_TESTMODE, + NL80211_ATTR_TESTDATA, -1, + approxlen, gfp); +} /** * cfg80211_testmode_event - send the event @@ -4044,7 +4098,10 @@ struct sk_buff *cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy, * by cfg80211_testmode_alloc_event_skb(), as an event. It always * consumes it. */ -void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp); +static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) +{ + __cfg80211_send_event_skb(skb, gfp); +} #define CFG80211_TESTMODE_CMD(cmd) .testmode_cmd = (cmd), #define CFG80211_TESTMODE_DUMP(cmd) .testmode_dump = (cmd), diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index e1307909ecf1..381184eb6c96 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1540,6 +1540,8 @@ enum nl80211_commands { * @NL80211_ATTR_VENDOR_SUBCMD: vendor sub-command * @NL80211_ATTR_VENDOR_DATA: data for the vendor command, if any; this * attribute is also used for vendor command feature advertisement + * @NL80211_ATTR_VENDOR_EVENTS: used for event list advertising in the wiphy + * info, containing a nested array of possible events * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -1865,6 +1867,7 @@ enum nl80211_attrs { NL80211_ATTR_VENDOR_ID, NL80211_ATTR_VENDOR_SUBCMD, NL80211_ATTR_VENDOR_DATA, + NL80211_ATTR_VENDOR_EVENTS, /* add attributes here, update the policy in nl80211.c */ -- cgit v1.2.2 From fa9ffc745610f31c6bc136d5a6a1782e00870e72 Mon Sep 17 00:00:00 2001 From: Kyeyoon Park Date: Mon, 16 Dec 2013 23:01:30 -0800 Subject: cfg80211: Add support for QoS mapping This allows QoS mapping from external networks to be implemented as defined in IEEE Std 802.11-2012, 10.24.9. APs can use this to advertise DSCP ranges and exceptions for mapping frames to a specific UP over Wi-Fi. The payload of the QoS Map Set element (IEEE Std 802.11-2012, 8.4.2.97) is sent to the driver through the new NL80211_ATTR_QOS_MAP attribute to configure the local behavior either on the AP (based on local configuration) or on a station (based on information received from the AP). Signed-off-by: Kyeyoon Park Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 53 +++++++++++++++++++++++++++++++++++++++++++- include/uapi/linux/nl80211.h | 14 ++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6d238a4331bd..56c597793d6d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1971,6 +1971,50 @@ struct cfg80211_mgmt_tx_params { bool dont_wait_for_ack; }; +/** + * struct cfg80211_dscp_exception - DSCP exception + * + * @dscp: DSCP value that does not adhere to the user priority range definition + * @up: user priority value to which the corresponding DSCP value belongs + */ +struct cfg80211_dscp_exception { + u8 dscp; + u8 up; +}; + +/** + * struct cfg80211_dscp_range - DSCP range definition for user priority + * + * @low: lowest DSCP value of this user priority range, inclusive + * @high: highest DSCP value of this user priority range, inclusive + */ +struct cfg80211_dscp_range { + u8 low; + u8 high; +}; + +/* QoS Map Set element length defined in IEEE Std 802.11-2012, 8.4.2.97 */ +#define IEEE80211_QOS_MAP_MAX_EX 21 +#define IEEE80211_QOS_MAP_LEN_MIN 16 +#define IEEE80211_QOS_MAP_LEN_MAX \ + (IEEE80211_QOS_MAP_LEN_MIN + 2 * IEEE80211_QOS_MAP_MAX_EX) + +/** + * struct cfg80211_qos_map - QoS Map Information + * + * This struct defines the Interworking QoS map setting for DSCP values + * + * @num_des: number of DSCP exceptions (0..21) + * @dscp_exception: optionally up to maximum of 21 DSCP exceptions from + * the user priority DSCP range definition + * @up: DSCP range definition for a particular user priority + */ +struct cfg80211_qos_map { + u8 num_des; + struct cfg80211_dscp_exception dscp_exception[IEEE80211_QOS_MAP_MAX_EX]; + struct cfg80211_dscp_range up[8]; +}; + /** * struct cfg80211_ops - backend description for wireless configuration * @@ -2213,6 +2257,8 @@ struct cfg80211_mgmt_tx_params { * @set_coalesce: Set coalesce parameters. * * @channel_switch: initiate channel-switch procedure (with CSA) + * + * @set_qos_map: Set QoS mapping information to the driver */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2454,6 +2500,9 @@ struct cfg80211_ops { int (*channel_switch)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params); + int (*set_qos_map)(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_qos_map *qos_map); }; /* @@ -3432,9 +3481,11 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, /** * cfg80211_classify8021d - determine the 802.1p/1d tag for a data frame * @skb: the data frame + * @qos_map: Interworking QoS mapping or %NULL if not in use * Return: The 802.1p/1d tag. */ -unsigned int cfg80211_classify8021d(struct sk_buff *skb); +unsigned int cfg80211_classify8021d(struct sk_buff *skb, + struct cfg80211_qos_map *qos_map); /** * cfg80211_find_ie - find information element in data diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 381184eb6c96..91054fd660e0 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -702,6 +702,12 @@ * (&struct nl80211_vendor_cmd_info) of the supported vendor commands. * This may also be sent as an event with the same attributes. * + * @NL80211_CMD_SET_QOS_MAP: Set Interworking QoS mapping for IP DSCP values. + * The QoS mapping information is included in %NL80211_ATTR_QOS_MAP. If + * that attribute is not included, QoS mapping is disabled. Since this + * QoS mapping is relevant for IP packets, it is only valid during an + * association. This is cleared on disassociation and AP restart. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -871,6 +877,8 @@ enum nl80211_commands { NL80211_CMD_VENDOR, + NL80211_CMD_SET_QOS_MAP, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1543,6 +1551,10 @@ enum nl80211_commands { * @NL80211_ATTR_VENDOR_EVENTS: used for event list advertising in the wiphy * info, containing a nested array of possible events * + * @NL80211_ATTR_QOS_MAP: IP DSCP mapping for Interworking QoS mapping. This + * data is in the format defined for the payload of the QoS Map Set element + * in IEEE Std 802.11-2012, 8.4.2.97. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1869,6 +1881,8 @@ enum nl80211_attrs { NL80211_ATTR_VENDOR_DATA, NL80211_ATTR_VENDOR_EVENTS, + NL80211_ATTR_QOS_MAP, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, -- cgit v1.2.2 From 10239edf86f137ce4c39b62ea9575e8053c549a0 Mon Sep 17 00:00:00 2001 From: Terry Lam Date: Sun, 15 Dec 2013 00:30:21 -0800 Subject: net-qdisc-hhf: Heavy-Hitter Filter (HHF) qdisc This patch implements the first size-based qdisc that attempts to differentiate between small flows and heavy-hitters. The goal is to catch the heavy-hitters and move them to a separate queue with less priority so that bulk traffic does not affect the latency of critical traffic. Currently "less priority" means less weight (2:1 in particular) in a Weighted Deficit Round Robin (WDRR) scheduler. In essence, this patch addresses the "delay-bloat" problem due to bloated buffers. In some systems, large queues may be necessary for obtaining CPU efficiency, or due to the presence of unresponsive traffic like UDP, or just a large number of connections with each having a small amount of outstanding traffic. In these circumstances, HHF aims to reduce the HoL blocking for latency sensitive traffic, while not impacting the queues built up by bulk traffic. HHF can also be used in conjunction with other AQM mechanisms such as CoDel. To capture heavy-hitters, we implement the "multi-stage filter" design in the following paper: C. Estan and G. Varghese, "New Directions in Traffic Measurement and Accounting", in ACM SIGCOMM, 2002. Some configurable qdisc settings through 'tc': - hhf_reset_timeout: period to reset counter values in the multi-stage filter (default 40ms) - hhf_admit_bytes: threshold to classify heavy-hitters (default 128KB) - hhf_evict_timeout: threshold to evict idle heavy-hitters (default 1s) - hhf_non_hh_weight: Weighted Deficit Round Robin (WDRR) weight for non-heavy-hitters (default 2) - hh_flows_limit: max number of heavy-hitter flow entries (default 2048) Note that the ratio between hhf_admit_bytes and hhf_reset_timeout reflects the bandwidth of heavy-hitters that we attempt to capture (25Mbps with the above default settings). The false negative rate (heavy-hitter flows getting away unclassified) is zero by the design of the multi-stage filter algorithm. With 100 heavy-hitter flows, using four hashes and 4000 counters yields a false positive rate (non-heavy-hitters mistakenly classified as heavy-hitters) of less than 1e-4. Signed-off-by: Terry Lam Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index a806687ad98f..4566993b8385 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -790,4 +790,29 @@ struct tc_fq_qd_stats { __u32 throttled_flows; __u32 pad; }; + +/* Heavy-Hitter Filter */ + +enum { + TCA_HHF_UNSPEC, + TCA_HHF_BACKLOG_LIMIT, + TCA_HHF_QUANTUM, + TCA_HHF_HH_FLOWS_LIMIT, + TCA_HHF_RESET_TIMEOUT, + TCA_HHF_ADMIT_BYTES, + TCA_HHF_EVICT_TIMEOUT, + TCA_HHF_NON_HH_WEIGHT, + __TCA_HHF_MAX +}; + +#define TCA_HHF_MAX (__TCA_HHF_MAX - 1) + +struct tc_hhf_xstats { + __u32 drop_overlimit; /* number of times max qdisc packet limit + * was hit + */ + __u32 hh_overlimit; /* number of times max heavy-hitters was hit */ + __u32 hh_tot_count; /* number of captured heavy-hitters so far */ + __u32 hh_cur_count; /* number of current heavy-hitters */ +}; #endif -- cgit v1.2.2 From 78ea85f17b15390e30d8b47488ec7b6cf0790663 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 16 Dec 2013 23:27:09 +0100 Subject: net: skbuff: improve comment on checksumming It can be a bit confusing when looking for checksumming flags that the actual comment for this resides elsewhere further below in the header file. Thus, bring the documentation where we define these flags, and slightly improve the doc text to make it a bit more clear/readable. Also, whitespace-align values of the define while at it. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/skbuff.h | 131 ++++++++++++++++++++++++++++--------------------- 1 file changed, 75 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 06bedeb0d49e..c5cd016f5120 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -34,11 +34,82 @@ #include #include +/* A. Checksumming of received packets by device. + * + * CHECKSUM_NONE: + * + * Device failed to checksum this packet e.g. due to lack of capabilities. + * The packet contains full (though not verified) checksum in packet but + * not in skb->csum. Thus, skb->csum is undefined in this case. + * + * CHECKSUM_UNNECESSARY: + * + * The hardware you're dealing with doesn't calculate the full checksum + * (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums + * for specific protocols e.g. TCP/UDP/SCTP, then, for such packets it will + * set CHECKSUM_UNNECESSARY if their checksums are okay. skb->csum is still + * undefined in this case though. It is a bad option, but, unfortunately, + * nowadays most vendors do this. Apparently with the secret goal to sell + * you new devices, when you will add new protocol to your host, f.e. IPv6 8) + * + * CHECKSUM_COMPLETE: + * + * This is the most generic way. The device supplied checksum of the _whole_ + * packet as seen by netif_rx() and fills out in skb->csum. Meaning, the + * hardware doesn't need to parse L3/L4 headers to implement this. + * + * Note: Even if device supports only some protocols, but is able to produce + * skb->csum, it MUST use CHECKSUM_COMPLETE, not CHECKSUM_UNNECESSARY. + * + * CHECKSUM_PARTIAL: + * + * This is identical to the case for output below. This may occur on a packet + * received directly from another Linux OS, e.g., a virtualized Linux kernel + * on the same host. The packet can be treated in the same way as + * CHECKSUM_UNNECESSARY, except that on output (i.e., forwarding) the + * checksum must be filled in by the OS or the hardware. + * + * B. Checksumming on output. + * + * CHECKSUM_NONE: + * + * The skb was already checksummed by the protocol, or a checksum is not + * required. + * + * CHECKSUM_PARTIAL: + * + * The device is required to checksum the packet as seen by hard_start_xmit() + * from skb->csum_start up to the end, and to record/write the checksum at + * offset skb->csum_start + skb->csum_offset. + * + * The device must show its capabilities in dev->features, set up at device + * setup time, e.g. netdev_features.h: + * + * NETIF_F_HW_CSUM - It's a clever device, it's able to checksum everything. + * NETIF_F_IP_CSUM - Device is dumb, it's able to checksum only TCP/UDP over + * IPv4. Sigh. Vendors like this way for an unknown reason. + * Though, see comment above about CHECKSUM_UNNECESSARY. 8) + * NETIF_F_IPV6_CSUM - About as dumb as the last one but does IPv6 instead. + * NETIF_F_... - Well, you get the picture. + * + * CHECKSUM_UNNECESSARY: + * + * Normally, the device will do per protocol specific checksumming. Protocol + * implementations that do not want the NIC to perform the checksum + * calculation should use this flag in their outgoing skbs. + * + * NETIF_F_FCOE_CRC - This indicates that the device can do FCoE FC CRC + * offload. Correspondingly, the FCoE protocol driver + * stack should use CHECKSUM_UNNECESSARY. + * + * Any questions? No questions, good. --ANK + */ + /* Don't change this without changing skb_csum_unnecessary! */ -#define CHECKSUM_NONE 0 -#define CHECKSUM_UNNECESSARY 1 -#define CHECKSUM_COMPLETE 2 -#define CHECKSUM_PARTIAL 3 +#define CHECKSUM_NONE 0 +#define CHECKSUM_UNNECESSARY 1 +#define CHECKSUM_COMPLETE 2 +#define CHECKSUM_PARTIAL 3 #define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \ ~(SMP_CACHE_BYTES - 1)) @@ -54,58 +125,6 @@ SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) -/* A. Checksumming of received packets by device. - * - * NONE: device failed to checksum this packet. - * skb->csum is undefined. - * - * UNNECESSARY: device parsed packet and wouldbe verified checksum. - * skb->csum is undefined. - * It is bad option, but, unfortunately, many of vendors do this. - * Apparently with secret goal to sell you new device, when you - * will add new protocol to your host. F.e. IPv6. 8) - * - * COMPLETE: the most generic way. Device supplied checksum of _all_ - * the packet as seen by netif_rx in skb->csum. - * NOTE: Even if device supports only some protocols, but - * is able to produce some skb->csum, it MUST use COMPLETE, - * not UNNECESSARY. - * - * PARTIAL: identical to the case for output below. This may occur - * on a packet received directly from another Linux OS, e.g., - * a virtualised Linux kernel on the same host. The packet can - * be treated in the same way as UNNECESSARY except that on - * output (i.e., forwarding) the checksum must be filled in - * by the OS or the hardware. - * - * B. Checksumming on output. - * - * NONE: skb is checksummed by protocol or csum is not required. - * - * PARTIAL: device is required to csum packet as seen by hard_start_xmit - * from skb->csum_start to the end and to record the checksum - * at skb->csum_start + skb->csum_offset. - * - * Device must show its capabilities in dev->features, set - * at device setup time. - * NETIF_F_HW_CSUM - it is clever device, it is able to checksum - * everything. - * NETIF_F_IP_CSUM - device is dumb. It is able to csum only - * TCP/UDP over IPv4. Sigh. Vendors like this - * way by an unknown reason. Though, see comment above - * about CHECKSUM_UNNECESSARY. 8) - * NETIF_F_IPV6_CSUM about as dumb as the last one but does IPv6 instead. - * - * UNNECESSARY: device will do per protocol specific csum. Protocol drivers - * that do not want net to perform the checksum calculation should use - * this flag in their outgoing skbs. - * NETIF_F_FCOE_CRC this indicates the device can do FCoE FC CRC - * offload. Correspondingly, the FCoE protocol driver - * stack should use CHECKSUM_UNNECESSARY. - * - * Any questions? No questions, good. --ANK - */ - struct net_device; struct scatterlist; struct pipe_inode_info; -- cgit v1.2.2 From 2c9839c143bbc8c6612f56351dae8d57111aee37 Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Tue, 17 Dec 2013 21:30:09 -0800 Subject: bonding: add num_grat_arp attribute netlink support Add IFLA_BOND_NUM_PEER_NOTIF to allow get/set of bonding parameter num_grat_arp via netlink. Bonding parameter num_unsol_na is synonymous with num_grat_arp, so add only one netlink attribute to represent both bonding parameters. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6e275d5214f3..bfe3d3b561ee 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -344,6 +344,7 @@ enum { IFLA_BOND_FAIL_OVER_MAC, IFLA_BOND_XMIT_HASH_POLICY, IFLA_BOND_RESEND_IGMP, + IFLA_BOND_NUM_PEER_NOTIF, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From 1cc0b1e30c662d84a89690f42826cf49e2278b97 Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Tue, 17 Dec 2013 21:30:16 -0800 Subject: bonding: add all_slaves_active attribute netlink support Add IFLA_BOND_ALL_SLAVES_ACTIVE to allow get/set of bonding parameter all_slaves_active via netlink. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index bfe3d3b561ee..93e9c1b99f7a 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -345,6 +345,7 @@ enum { IFLA_BOND_XMIT_HASH_POLICY, IFLA_BOND_RESEND_IGMP, IFLA_BOND_NUM_PEER_NOTIF, + IFLA_BOND_ALL_SLAVES_ACTIVE, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From 7d1010082785b2020b1e2e1211b76209976a33df Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Tue, 17 Dec 2013 21:30:23 -0800 Subject: bonding: add min_links attribute netlink support Add IFLA_BOND_MIN_LINKS to allow get/set of bonding parameter min_links via netlink. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 93e9c1b99f7a..c7a0f89d59b2 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -346,6 +346,7 @@ enum { IFLA_BOND_RESEND_IGMP, IFLA_BOND_NUM_PEER_NOTIF, IFLA_BOND_ALL_SLAVES_ACTIVE, + IFLA_BOND_MIN_LINKS, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From 8d836d092ed7b77d13ac1108399165ee7de7463f Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Tue, 17 Dec 2013 21:30:30 -0800 Subject: bonding: add lp_interval attribute netlink support Add IFLA_BOND_LP_INTERVAL to allow get/set of bonding parameter lp_interval via netlink. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index c7a0f89d59b2..4825ae74f1aa 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -347,6 +347,7 @@ enum { IFLA_BOND_NUM_PEER_NOTIF, IFLA_BOND_ALL_SLAVES_ACTIVE, IFLA_BOND_MIN_LINKS, + IFLA_BOND_LP_INTERVAL, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From c13ab3ff176eab78b6ee93817484584af5807cf2 Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Tue, 17 Dec 2013 21:30:37 -0800 Subject: bonding: add packets_per_slave attribute netlink support Add IFLA_BOND_PACKETS_PER_SLAVE to allow get/set of bonding parameter packets_per_slave via netlink. Signed-off-by: Scott Feldman Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4825ae74f1aa..8fbc7ca878b4 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -348,6 +348,7 @@ enum { IFLA_BOND_ALL_SLAVES_ACTIVE, IFLA_BOND_MIN_LINKS, IFLA_BOND_LP_INTERVAL, + IFLA_BOND_PACKETS_PER_SLAVE, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From b6835f9c5d3199e6fa17ffad023f4b7dbad4f1a3 Mon Sep 17 00:00:00 2001 From: Rashika Kheria Date: Thu, 19 Dec 2013 14:22:37 +0530 Subject: drivers: net: Include new header file in sbni.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create a new header file include/net/Space.h which contains prototype declaration of sbni_probe(). Include the new header file in drivers/net/Space.c and drivers/net/wan/sbni.c because they use this function. This eliminates the following warning in wan/sbni.c: drivers/net/wan/sbni.c:224:12: warning: no previous prototype for ‘sbni_probe’ [-Wmissing-prototypes] Signed-off-by: Rashika Kheria Reviewed-by: Josh Triplett Signed-off-by: David S. Miller --- include/net/Space.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/net/Space.h (limited to 'include') diff --git a/include/net/Space.h b/include/net/Space.h new file mode 100644 index 000000000000..8a32771e4215 --- /dev/null +++ b/include/net/Space.h @@ -0,0 +1,31 @@ +/* A unified ethernet device probe. This is the easiest way to have every + * ethernet adaptor have the name "eth[0123...]". + */ + +struct net_device *hp100_probe(int unit); +struct net_device *ultra_probe(int unit); +struct net_device *wd_probe(int unit); +struct net_device *ne_probe(int unit); +struct net_device *fmv18x_probe(int unit); +struct net_device *i82596_probe(int unit); +struct net_device *ni65_probe(int unit); +struct net_device *sonic_probe(int unit); +struct net_device *smc_init(int unit); +struct net_device *atarilance_probe(int unit); +struct net_device *sun3lance_probe(int unit); +struct net_device *sun3_82586_probe(int unit); +struct net_device *apne_probe(int unit); +struct net_device *cs89x0_probe(int unit); +struct net_device *mvme147lance_probe(int unit); +struct net_device *tc515_probe(int unit); +struct net_device *lance_probe(int unit); +struct net_device *mac8390_probe(int unit); +struct net_device *mac89x0_probe(int unit); +struct net_device *cops_probe(int unit); +struct net_device *ltpc_probe(void); + +/* Fibre Channel adapters */ +int iph5526_probe(struct net_device *dev); + +/* SBNI adapters */ +int sbni_probe(int unit); -- cgit v1.2.2 From 8e1a28e8e6797449dfdfa4739002d1e5939355a8 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Thu, 19 Dec 2013 21:20:12 +0200 Subject: net/mlx4_core: Expose physical port id as PF/VF capability Add the infrastructure needed to support ndo_get_phys_port_id which allows users to identify to which physical port a net-device is connected to by reading a unique port id. This will work for VFs and PFs. The driver uses a new device capability - phys_port_id, The PF driver reads the port phys_port_id from Firmware and stores it. The VF driver reads the port phys_port_id from the PF using QUERY_FUNC_CAP command. Signed-off-by: Hadar Hen Zion Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7d3a523160ba..294b7c58fe95 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -454,6 +454,7 @@ struct mlx4_caps { u32 userspace_caps; /* userspace must be aware of these */ u32 function_caps; /* VFs must be aware of these */ u16 hca_core_clock; + u64 phys_port_id[MLX4_MAX_PORTS + 1]; }; struct mlx4_buf_list { @@ -1113,6 +1114,7 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, int *vector); void mlx4_release_eq(struct mlx4_dev *dev, int vec); +int mlx4_get_phys_port_id(struct mlx4_dev *dev); int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port); int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); -- cgit v1.2.2 From 18be099badcfc4a12f058addc55c4270d5a8bec8 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Fri, 20 Dec 2013 01:39:52 +0300 Subject: sh_eth: add PHY IRQ to platform data Allow the platform code to pass PHY's IRQ to the driver. Print this IRQ along with the other PHY datails in sh_eth_phy_init(). Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- include/linux/sh_eth.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h index 90b5e30c2f22..8c9131db2b25 100644 --- a/include/linux/sh_eth.h +++ b/include/linux/sh_eth.h @@ -8,6 +8,7 @@ enum {EDMAC_LITTLE_ENDIAN, EDMAC_BIG_ENDIAN}; struct sh_eth_plat_data { int phy; + int phy_irq; int edmac_endian; phy_interface_t phy_interface; void (*set_mdio_gate)(void *addr); -- cgit v1.2.2 From 08c0cad69f32ad1e881fa3fb7f5e0a25db5b07ce Mon Sep 17 00:00:00 2001 From: Valentina Giusti Date: Fri, 20 Dec 2013 17:28:53 +0100 Subject: netfilter: nfnetlink_queue: enable UID/GID socket info retrieval Thanks to commits 41063e9 (ipv4: Early TCP socket demux) and 421b388 (udp: ipv4: Add udp early demux) it is now possible to parse UID and GID socket info also for incoming TCP and UDP connections. Having this info available, it is convenient to let NFQUEUE parse it in order to improve and refine the traffic analysis in userspace. Signed-off-by: Valentina Giusti Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_queue.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h index 0132bad79de7..8dd819e2b5fe 100644 --- a/include/uapi/linux/netfilter/nfnetlink_queue.h +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h @@ -47,6 +47,8 @@ enum nfqnl_attr_type { NFQA_CAP_LEN, /* __u32 length of captured packet */ NFQA_SKB_INFO, /* __u32 skb meta information */ NFQA_EXP, /* nf_conntrack_netlink.h */ + NFQA_UID, /* __u32 sk uid */ + NFQA_GID, /* __u32 sk gid */ __NFQA_MAX }; @@ -99,7 +101,8 @@ enum nfqnl_attr_config { #define NFQA_CFG_F_FAIL_OPEN (1 << 0) #define NFQA_CFG_F_CONNTRACK (1 << 1) #define NFQA_CFG_F_GSO (1 << 2) -#define NFQA_CFG_F_MAX (1 << 3) +#define NFQA_CFG_F_UID_GID (1 << 3) +#define NFQA_CFG_F_MAX (1 << 4) /* flags for NFQA_SKB_INFO */ /* packet appears to have wrong checksums, but they are ok */ -- cgit v1.2.2 From 09aea5df7fbf0b987623f7007a11e64008284a0e Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 17 Dec 2013 22:35:52 -0800 Subject: netconf: rename PROXY_ARP to NEIGH_PROXY Use same field for both IPv4 (proxy_arp) and IPv6 (proxy_ndp) so fix it before API is set to be a common name Signed-off-by: Stephen Hemminger Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/netconf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h index bd969d77ce52..669a1f0b1d97 100644 --- a/include/uapi/linux/netconf.h +++ b/include/uapi/linux/netconf.h @@ -14,7 +14,7 @@ enum { NETCONFA_FORWARDING, NETCONFA_RP_FILTER, NETCONFA_MC_FORWARDING, - NETCONFA_PROXY_ARP, + NETCONFA_PROXY_NEIGH, __NETCONFA_MAX }; #define NETCONFA_MAX (__NETCONFA_MAX - 1) -- cgit v1.2.2 From 6a649f339802f104549e1fb211e381036661e244 Mon Sep 17 00:00:00 2001 From: "fan.du" Date: Wed, 18 Dec 2013 11:27:02 +0800 Subject: netfilter: add IPv4/6 IPComp extension match support With this plugin, user could specify IPComp tagged with certain CPI that host not interested will be DROPped or any other action. For example: iptables -A INPUT -p 108 -m ipcomp --ipcompspi 0x87 -j DROP ip6tables -A INPUT -p 108 -m ipcomp --ipcompspi 0x87 -j DROP Then input IPComp packet with CPI equates 0x87 will not reach upper layer anymore. Signed-off-by: Fan Du Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/Kbuild | 1 + include/uapi/linux/netfilter/xt_ipcomp.h | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 include/uapi/linux/netfilter/xt_ipcomp.h (limited to 'include') diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 17c3af2c4bb9..91be8ce623f0 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -54,6 +54,7 @@ header-y += xt_ecn.h header-y += xt_esp.h header-y += xt_hashlimit.h header-y += xt_helper.h +header-y += xt_ipcomp.h header-y += xt_iprange.h header-y += xt_ipvs.h header-y += xt_length.h diff --git a/include/uapi/linux/netfilter/xt_ipcomp.h b/include/uapi/linux/netfilter/xt_ipcomp.h new file mode 100644 index 000000000000..45c7e40eb8e1 --- /dev/null +++ b/include/uapi/linux/netfilter/xt_ipcomp.h @@ -0,0 +1,16 @@ +#ifndef _XT_IPCOMP_H +#define _XT_IPCOMP_H + +#include + +struct xt_ipcomp { + __u32 spis[2]; /* Security Parameter Index */ + __u8 invflags; /* Inverse flags */ + __u8 hdrres; /* Test of the Reserved Filed */ +}; + +/* Values for "invflags" field in struct xt_ipcomp. */ +#define XT_IPCOMP_INV_SPI 0x01 /* Invert the sense of spi. */ +#define XT_IPCOMP_INV_MASK 0x01 /* All possible flags. */ + +#endif /*_XT_IPCOMP_H*/ -- cgit v1.2.2 From 652a4a53fce4b50e4cfa92055deefadc3a3627fa Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 26 Dec 2013 10:12:41 -0800 Subject: mdio: unused ethtool functions Use it or lose it. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/mdio.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 3d15c838116c..b42963bc81dd 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -70,9 +70,6 @@ extern int mdio45_nway_restart(const struct mdio_if_info *mdio); extern void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio, struct ethtool_cmd *ecmd, u32 npage_adv, u32 npage_lpa); -extern void -mdio45_ethtool_spauseparam_an(const struct mdio_if_info *mdio, - const struct ethtool_pauseparam *ecmd); /** * mdio45_ethtool_gset - get settings for ETHTOOL_GSET -- cgit v1.2.2 From 2e04ad424b03661ec8239acd52146497eb33be1c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 20 Dec 2013 09:24:47 +0800 Subject: sch_tbf: add TBF_BURST/TBF_PBURST attribute When we set burst to 1514 with low rate in userspace, the kernel get a value of burst that less than 1514, which doesn't work. Because it may make some loss when transform burst to buffer in userspace. This makes burst lose some bytes, when the kernel transform the buffer back to burst. This patch adds two new attributes to support sending burst/mtu to kernel directly to avoid the loss. Signed-off-by: Yang Yingliang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 4566993b8385..fe1192056fd1 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -173,6 +173,8 @@ enum { TCA_TBF_PTAB, TCA_TBF_RATE64, TCA_TBF_PRATE64, + TCA_TBF_BURST, + TCA_TBF_PBURST, __TCA_TBF_MAX, }; -- cgit v1.2.2 From 0db901bd33f09b0dbf39d12261f689812e343c26 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 27 Dec 2013 12:06:46 -0800 Subject: macvlan: make start_xmit local Only used in one file, no need to expose Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/if_macvlan.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index ea22721ba269..551bdd3722a2 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -111,9 +111,6 @@ extern void macvlan_dellink(struct net_device *dev, struct list_head *head); extern int macvlan_link_register(struct rtnl_link_ops *ops); -extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb, - struct net_device *dev); - #if IS_ENABLED(CONFIG_MACVLAN) static inline struct net_device * macvlan_dev_real_dev(const struct net_device *dev) -- cgit v1.2.2 From e035b77ac7be430a5fef8c9c23f60b6b50ec81c5 Mon Sep 17 00:00:00 2001 From: Arturo Borrero Gonzalez Date: Thu, 26 Dec 2013 16:38:01 +0100 Subject: netfilter: nf_tables: nft_meta module get/set ops This patch adds kernel support for the meta expression in get/set flavour. The set operation indicates that a given packet has to be set with a property, currently one of mark, priority, nftrace. The get op is what was currently working: evaluate the given packet property. In the nftrace case, the value is always 1. Such behaviour is copied from net/netfilter/xt_TRACE.c The NFTA_META_DREG and NFTA_META_SREG attributes are mutually exclusives. Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index b25481e16f0a..aa86a15293e1 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -555,11 +555,13 @@ enum nft_meta_keys { * * @NFTA_META_DREG: destination register (NLA_U32) * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys) + * @NFTA_META_SREG: source register (NLA_U32) */ enum nft_meta_attributes { NFTA_META_UNSPEC, NFTA_META_DREG, NFTA_META_KEY, + NFTA_META_SREG, __NFTA_META_MAX }; #define NFTA_META_MAX (__NFTA_META_MAX - 1) -- cgit v1.2.2 From 7195cf72211d77108de8271a247ec2876ae60502 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 28 Dec 2013 11:07:18 -0800 Subject: arp: make arp_invalidate static Don't export arp_invalidate, only used in arp.c Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/arp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/arp.h b/include/net/arp.h index 7509d9da4e36..73c49864076b 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -62,6 +62,5 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, const unsigned char *src_hw, const unsigned char *target_hw); void arp_xmit(struct sk_buff *skb); -int arp_invalidate(struct net_device *dev, __be32 ip); #endif /* _ARP_H */ -- cgit v1.2.2 From 068a6e183484b1c619025da502a6fbfa53c50991 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 28 Dec 2013 11:08:22 -0800 Subject: ipv4: remove unused function inetpeer_invalidate_family defined but never used Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/inetpeer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index f4e127af4e17..6efe73c79c52 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -163,7 +163,6 @@ void inet_putpeer(struct inet_peer *p); bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); void inetpeer_invalidate_tree(struct inet_peer_base *); -void inetpeer_invalidate_family(int family); /* * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, -- cgit v1.2.2 From ea074b3495a023c2cf969605d51991b2b9df9514 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 28 Dec 2013 11:11:42 -0800 Subject: ipv4: ping make local stuff static Don't export ping_table or ping_v4_sendmsg. Both are only used inside ping code. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/ping.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/net/ping.h b/include/net/ping.h index 90f48417b03d..76013653e08c 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -42,11 +42,6 @@ struct pingv6_ops { const struct net_device *dev, int strict); }; -struct ping_table { - struct hlist_nulls_head hash[PING_HTABLE_SIZE]; - rwlock_t lock; -}; - struct ping_iter_state { struct seq_net_private p; int bucket; @@ -54,7 +49,6 @@ struct ping_iter_state { }; extern struct proto ping_prot; -extern struct ping_table ping_table; #if IS_ENABLED(CONFIG_IPV6) extern struct pingv6_ops pingv6_ops; #endif @@ -81,8 +75,6 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, void *user_icmph, size_t icmph_len); -int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len); int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len); int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -- cgit v1.2.2 From f7e56a76acf642d21a8e7bd587e270ae7addc4db Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Sun, 29 Dec 2013 11:39:51 -0800 Subject: tcp: make local functions static The following are only used in one file: tcp_connect_init tcp_set_rto Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/tcp.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 9cd62bc09055..56fc366da6d5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -468,7 +468,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct tcp_fastopen_cookie *foc); int tcp_disconnect(struct sock *sk, int flags); -void tcp_connect_init(struct sock *sk); void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size); void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); @@ -623,8 +622,6 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return (tp->srtt >> 3) + tp->rttvar; } -void tcp_set_rto(struct sock *sk); - static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) { tp->pred_flags = htonl((tp->tcp_header_len << 26) | -- cgit v1.2.2 From 24245a1b055df246dc94517c1a8b1fdfe7668da0 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Sun, 29 Dec 2013 11:43:35 -0800 Subject: lro: remove dead code Remove leftover code that is not used anywhere in current tree. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/inet_lro.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'include') diff --git a/include/linux/inet_lro.h b/include/linux/inet_lro.h index 2cf55afbcd4e..9a715cfa1fe3 100644 --- a/include/linux/inet_lro.h +++ b/include/linux/inet_lro.h @@ -133,33 +133,10 @@ struct net_lro_mgr { void lro_receive_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, void *priv); - -/* - * Processes a fragment list - * - * This functions aggregate fragments and generate SKBs do pass - * the packets to the stack. - * - * @lro_mgr: LRO manager to use - * @frags: Fragment to be processed. Must contain entire header in first - * element. - * @len: Length of received data - * @true_size: Actual size of memory the fragment is consuming - * @priv: Private data that may be used by driver functions - * (for example get_tcp_ip_hdr) - */ - -void lro_receive_frags(struct net_lro_mgr *lro_mgr, - struct skb_frag_struct *frags, - int len, int true_size, void *priv, __wsum sum); - /* * Forward all aggregated SKBs held by lro_mgr to network stack */ void lro_flush_all(struct net_lro_mgr *lro_mgr); -void lro_flush_pkt(struct net_lro_mgr *lro_mgr, - struct iphdr *iph, struct tcphdr *tcph); - #endif -- cgit v1.2.2 From cc70d069e2b9cece683206c0f6a1d1484414e577 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Sun, 29 Dec 2013 12:28:13 +0100 Subject: netfilter: REJECT: separate reusable code This patch prepares the addition of TCP reset support in the nft_reject module by moving reusable code into a header file. Signed-off-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_reject.h | 128 ++++++++++++++++++++++++ include/net/netfilter/ipv6/nf_reject.h | 171 +++++++++++++++++++++++++++++++++ 2 files changed, 299 insertions(+) create mode 100644 include/net/netfilter/ipv4/nf_reject.h create mode 100644 include/net/netfilter/ipv6/nf_reject.h (limited to 'include') diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h new file mode 100644 index 000000000000..931fbf812171 --- /dev/null +++ b/include/net/netfilter/ipv4/nf_reject.h @@ -0,0 +1,128 @@ +#ifndef _IPV4_NF_REJECT_H +#define _IPV4_NF_REJECT_H + +#include +#include +#include +#include + +static inline void nf_send_unreach(struct sk_buff *skb_in, int code) +{ + icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); +} + +/* Send RST reply */ +static void nf_send_reset(struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + const struct iphdr *oiph; + struct iphdr *niph; + const struct tcphdr *oth; + struct tcphdr _otcph, *tcph; + + /* IP header checks: fragment. */ + if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) + return; + + oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), + sizeof(_otcph), &_otcph); + if (oth == NULL) + return; + + /* No RST for RST. */ + if (oth->rst) + return; + + if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + return; + + /* Check checksum */ + if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) + return; + oiph = ip_hdr(oldskb); + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + + skb_reset_network_header(nskb); + niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); + niph->version = 4; + niph->ihl = sizeof(struct iphdr) / 4; + niph->tos = 0; + niph->id = 0; + niph->frag_off = htons(IP_DF); + niph->protocol = IPPROTO_TCP; + niph->check = 0; + niph->saddr = oiph->daddr; + niph->daddr = oiph->saddr; + + skb_reset_transport_header(nskb); + tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); + memset(tcph, 0, sizeof(*tcph)); + tcph->source = oth->dest; + tcph->dest = oth->source; + tcph->doff = sizeof(struct tcphdr) / 4; + + if (oth->ack) + tcph->seq = oth->ack_seq; + else { + tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + + oldskb->len - ip_hdrlen(oldskb) - + (oth->doff << 2)); + tcph->ack = 1; + } + + tcph->rst = 1; + tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, + niph->daddr, 0); + nskb->ip_summed = CHECKSUM_PARTIAL; + nskb->csum_start = (unsigned char *)tcph - nskb->head; + nskb->csum_offset = offsetof(struct tcphdr, check); + + /* ip_route_me_harder expects skb->dst to be set */ + skb_dst_set_noref(nskb, skb_dst(oldskb)); + + nskb->protocol = htons(ETH_P_IP); + if (ip_route_me_harder(nskb, RTN_UNSPEC)) + goto free_nskb; + + niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); + + /* "Never happens" */ + if (nskb->len > dst_mtu(skb_dst(nskb))) + goto free_nskb; + + nf_ct_attach(nskb, oldskb); + +#ifdef CONFIG_BRIDGE_NETFILTER + /* If we use ip_local_out for bridged traffic, the MAC source on + * the RST will be ours, instead of the destination's. This confuses + * some routers/firewalls, and they drop the packet. So we need to + * build the eth header using the original destination's MAC as the + * source, and send the RST packet directly. + */ + if (oldskb->nf_bridge) { + struct ethhdr *oeth = eth_hdr(oldskb); + nskb->dev = oldskb->nf_bridge->physindev; + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), + oeth->h_source, oeth->h_dest, nskb->len) < 0) + goto free_nskb; + dev_queue_xmit(nskb); + } else +#endif + ip_local_out(nskb); + + return; + + free_nskb: + kfree_skb(nskb); +} + + +#endif /* _IPV4_NF_REJECT_H */ diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h new file mode 100644 index 000000000000..710d17ed70b4 --- /dev/null +++ b/include/net/netfilter/ipv6/nf_reject.h @@ -0,0 +1,171 @@ +#ifndef _IPV6_NF_REJECT_H +#define _IPV6_NF_REJECT_H + +#include +#include +#include +#include +#include + +static inline void +nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code, + unsigned int hooknum) +{ + if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) + skb_in->dev = net->loopback_dev; + + icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); +} + +/* Send RST reply */ +static void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + struct tcphdr otcph, *tcph; + unsigned int otcplen, hh_len; + int tcphoff, needs_ack; + const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); + struct ipv6hdr *ip6h; +#define DEFAULT_TOS_VALUE 0x0U + const __u8 tclass = DEFAULT_TOS_VALUE; + struct dst_entry *dst = NULL; + u8 proto; + __be16 frag_off; + struct flowi6 fl6; + + if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || + (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { + pr_debug("addr is not unicast.\n"); + return; + } + + proto = oip6h->nexthdr; + tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); + + if ((tcphoff < 0) || (tcphoff > oldskb->len)) { + pr_debug("Cannot get TCP header.\n"); + return; + } + + otcplen = oldskb->len - tcphoff; + + /* IP header checks: fragment, too short. */ + if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { + pr_debug("proto(%d) != IPPROTO_TCP, " + "or too short. otcplen = %d\n", + proto, otcplen); + return; + } + + if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) + BUG(); + + /* No RST for RST. */ + if (otcph.rst) { + pr_debug("RST is set\n"); + return; + } + + /* Check checksum. */ + if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { + pr_debug("TCP checksum is invalid\n"); + return; + } + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + fl6.saddr = oip6h->daddr; + fl6.daddr = oip6h->saddr; + fl6.fl6_sport = otcph.dest; + fl6.fl6_dport = otcph.source; + security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); + dst = ip6_route_output(net, NULL, &fl6); + if (dst == NULL || dst->error) { + dst_release(dst); + return; + } + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) + return; + + hh_len = (dst->dev->hard_header_len + 15)&~15; + nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + dst->trailer_len, + GFP_ATOMIC); + + if (!nskb) { + net_dbg_ratelimited("cannot alloc skb\n"); + dst_release(dst); + return; + } + + skb_dst_set(nskb, dst); + + skb_reserve(nskb, hh_len + dst->header_len); + + skb_put(nskb, sizeof(struct ipv6hdr)); + skb_reset_network_header(nskb); + ip6h = ipv6_hdr(nskb); + ip6_flow_hdr(ip6h, tclass, 0); + ip6h->hop_limit = ip6_dst_hoplimit(dst); + ip6h->nexthdr = IPPROTO_TCP; + ip6h->saddr = oip6h->daddr; + ip6h->daddr = oip6h->saddr; + + skb_reset_transport_header(nskb); + tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); + /* Truncate to length (no data) */ + tcph->doff = sizeof(struct tcphdr)/4; + tcph->source = otcph.dest; + tcph->dest = otcph.source; + + if (otcph.ack) { + needs_ack = 0; + tcph->seq = otcph.ack_seq; + tcph->ack_seq = 0; + } else { + needs_ack = 1; + tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin + + otcplen - (otcph.doff<<2)); + tcph->seq = 0; + } + + /* Reset flags */ + ((u_int8_t *)tcph)[13] = 0; + tcph->rst = 1; + tcph->ack = needs_ack; + tcph->window = 0; + tcph->urg_ptr = 0; + tcph->check = 0; + + /* Adjust TCP checksum */ + tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr, + &ipv6_hdr(nskb)->daddr, + sizeof(struct tcphdr), IPPROTO_TCP, + csum_partial(tcph, + sizeof(struct tcphdr), 0)); + + nf_ct_attach(nskb, oldskb); + +#ifdef CONFIG_BRIDGE_NETFILTER + /* If we use ip6_local_out for bridged traffic, the MAC source on + * the RST will be ours, instead of the destination's. This confuses + * some routers/firewalls, and they drop the packet. So we need to + * build the eth header using the original destination's MAC as the + * source, and send the RST packet directly. + */ + if (oldskb->nf_bridge) { + struct ethhdr *oeth = eth_hdr(oldskb); + nskb->dev = oldskb->nf_bridge->physindev; + nskb->protocol = htons(ETH_P_IPV6); + ip6h->payload_len = htons(sizeof(struct tcphdr)); + if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), + oeth->h_source, oeth->h_dest, nskb->len) < 0) + return; + dev_queue_xmit(nskb); + } else +#endif + ip6_local_out(nskb); +} + +#endif /* _IPV6_NF_REJECT_H */ -- cgit v1.2.2 From fe47755852d1f299b55a6e6594bb6e082ac103d4 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 22 Dec 2013 18:54:31 +0800 Subject: net: Allow setting sock flow hash without a sock This patch adds sock_rps_record_flow_hash and sock_rps_reset_flow_hash which take a hash value as an argument and sets the sock_flow_table accordingly. This allows the table to be populated in cases where flow is being tracked outside of a sock structure. sock_rps_record_flow and sock_rps_reset_flow call this function where the hash is taken from sk_rxhash. Signed-off-by: Tom Herbert Signed-off-by: Zhi Yong Wu Signed-off-by: David S. Miller --- include/net/sock.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 2ef3c3eca47a..8ee90add69d2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -820,30 +820,40 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) return sk->sk_backlog_rcv(sk, skb); } -static inline void sock_rps_record_flow(const struct sock *sk) +static inline void sock_rps_record_flow_hash(__u32 hash) { #ifdef CONFIG_RPS struct rps_sock_flow_table *sock_flow_table; rcu_read_lock(); sock_flow_table = rcu_dereference(rps_sock_flow_table); - rps_record_sock_flow(sock_flow_table, sk->sk_rxhash); + rps_record_sock_flow(sock_flow_table, hash); rcu_read_unlock(); #endif } -static inline void sock_rps_reset_flow(const struct sock *sk) +static inline void sock_rps_reset_flow_hash(__u32 hash) { #ifdef CONFIG_RPS struct rps_sock_flow_table *sock_flow_table; rcu_read_lock(); sock_flow_table = rcu_dereference(rps_sock_flow_table); - rps_reset_sock_flow(sock_flow_table, sk->sk_rxhash); + rps_reset_sock_flow(sock_flow_table, hash); rcu_read_unlock(); #endif } +static inline void sock_rps_record_flow(const struct sock *sk) +{ + sock_rps_record_flow_hash(sk->sk_rxhash); +} + +static inline void sock_rps_reset_flow(const struct sock *sk) +{ + sock_rps_reset_flow_hash(sk->sk_rxhash); +} + static inline void sock_rps_save_rxhash(struct sock *sk, const struct sk_buff *skb) { -- cgit v1.2.2 From 0a9a8bfd29aa5ba2298dc80852cdbb63a42f6576 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 23 Dec 2013 08:29:42 -0500 Subject: printk: Add a DEPRECATED macro sctp has several points in its setsockopt path in which it issues deprecation warnings. It seems like it might be handy to macrotize such a warning so other subsystems can use it easily Signed-off-by: Neil Horman CC: Greg Kroah-Hartman CC: "David S. Miller" CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org Signed-off-by: David S. Miller --- include/linux/printk.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index 694925837a16..26fb95ce5080 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -87,6 +87,13 @@ struct va_format { */ #define HW_ERR "[Hardware Error]: " +/* + * DEPRECATED + * Add this to a message whenever you want to warn user space about the use + * of a deprecated aspect of an API so they can stop using it + */ +#define DEPRECATED "[Deprecated]: " + /* * Dummy printk for disabled debugging statements to use whilst maintaining * gcc's format and side-effect checking. -- cgit v1.2.2 From 604d13c97f0d863e41da3f5835c62e3cf899962b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 23 Dec 2013 14:35:56 +0100 Subject: netlink: specify netlink packet direction for nlmon In order to facilitate development for netlink protocol dissector, fill the unused field skb->pkt_type of the cloned skb with a hint of the address space of the new owner (receiver) socket in the notion of "to kernel" resp. "to user". At the time we invoke __netlink_deliver_tap_skb(), we already have set the new skb owner via netlink_skb_set_owner_r(), so we can use that for netlink_is_kernel() probing. In normal PF_PACKET network traffic, this field denotes if the packet is destined for us (PACKET_HOST), if it's broadcast (PACKET_BROADCAST), etc. As we only have 3 bit reserved, we can use the value (= 6) of PACKET_FASTROUTE as it's _not used_ anywhere in the whole kernel and not supported anywhere, and packets of such type were never exposed to user space, so there are no overlapping users of such kind. Thus, as wished, that seems the only way to make both PACKET_* values non-overlapping and therefore device agnostic. By using those two flags for netlink skbs on nlmon devices, they can be made available and picked up via sll_pkttype (previously unused in netlink context) in struct sockaddr_ll. We now have these two directions: - PACKET_USER (= 6) -> to user space - PACKET_KERNEL (= 7) -> to kernel space Partial `ip a` example strace for sa_family=AF_NETLINK with detected nl msg direction: syscall: direction: sendto(3, ...) = 40 /* to kernel */ recvmsg(3, ...) = 3404 /* to user */ recvmsg(3, ...) = 1120 /* to user */ recvmsg(3, ...) = 20 /* to user */ sendto(3, ...) = 40 /* to kernel */ recvmsg(3, ...) = 168 /* to user */ recvmsg(3, ...) = 144 /* to user */ recvmsg(3, ...) = 20 /* to user */ Signed-off-by: Daniel Borkmann Signed-off-by: Jakub Zawadzki Signed-off-by: David S. Miller --- include/uapi/linux/if_packet.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index e9d844c80c11..1988a02842cc 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -26,8 +26,10 @@ struct sockaddr_ll { #define PACKET_MULTICAST 2 /* To group */ #define PACKET_OTHERHOST 3 /* To someone else */ #define PACKET_OUTGOING 4 /* Outgoing of any type */ -/* These ones are invisible by user level */ #define PACKET_LOOPBACK 5 /* MC/BRD frame looped back */ +#define PACKET_USER 6 /* To user space */ +#define PACKET_KERNEL 7 /* To kernel space */ +/* Unused, PACKET_FASTROUTE and PACKET_LOOPBACK are invisible to user space */ #define PACKET_FASTROUTE 6 /* Fastrouted frame */ /* Packet socket options */ -- cgit v1.2.2 From 7ffdf726cfe0d188907bdbb0e7729fb35a69c219 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 23 Dec 2013 16:09:43 +0200 Subject: net/mlx4_core: Add basic support for TCP/IP offloads under tunneling Add the low-level device commands and definitions used for TCP/IP HW offloads of tunneled/vxlan traffic which are supported by the ConnectX3-pro NIC. This is done through the following elements: - read tunneling device caps in QUERY_DEV_CAP - add helper function to do SET_PORT for tunneling - add DMFS VXLAN steering rule definitions - add CQE and WQE checksum offload field definitions Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/cmd.h | 1 + include/linux/mlx4/cq.h | 5 +++++ include/linux/mlx4/device.h | 37 ++++++++++++++++++++++++++++++++++++- include/linux/mlx4/qp.h | 6 ++++++ 4 files changed, 48 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 8df61bc5da00..b0ec0fe035c0 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -180,6 +180,7 @@ enum { MLX4_SET_PORT_GID_TABLE = 0x5, MLX4_SET_PORT_PRIO2TC = 0x8, MLX4_SET_PORT_SCHEDULER = 0x9, + MLX4_SET_PORT_VXLAN = 0xB }; enum { diff --git a/include/linux/mlx4/cq.h b/include/linux/mlx4/cq.h index 98fa492cf406..5dd0d70a0689 100644 --- a/include/linux/mlx4/cq.h +++ b/include/linux/mlx4/cq.h @@ -81,7 +81,12 @@ struct mlx4_ts_cqe { } __packed; enum { + MLX4_CQE_L2_TUNNEL_IPOK = 1 << 31, MLX4_CQE_VLAN_PRESENT_MASK = 1 << 29, + MLX4_CQE_L2_TUNNEL = 1 << 27, + MLX4_CQE_L2_TUNNEL_CSUM = 1 << 26, + MLX4_CQE_L2_TUNNEL_IPV4 = 1 << 25, + MLX4_CQE_QPN_MASK = 0xffffff, }; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 294b7c58fe95..c99ecf6d2c67 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -118,6 +118,11 @@ static inline const char *mlx4_steering_mode_str(int steering_mode) } } +enum { + MLX4_TUNNEL_OFFLOAD_MODE_NONE, + MLX4_TUNNEL_OFFLOAD_MODE_VXLAN +}; + enum { MLX4_DEV_CAP_FLAG_RC = 1LL << 0, MLX4_DEV_CAP_FLAG_UC = 1LL << 1, @@ -160,7 +165,8 @@ enum { MLX4_DEV_CAP_FLAG2_TS = 1LL << 5, MLX4_DEV_CAP_FLAG2_VLAN_CONTROL = 1LL << 6, MLX4_DEV_CAP_FLAG2_FSM = 1LL << 7, - MLX4_DEV_CAP_FLAG2_UPDATE_QP = 1LL << 8 + MLX4_DEV_CAP_FLAG2_UPDATE_QP = 1LL << 8, + MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS = 1LL << 9 }; enum { @@ -455,6 +461,7 @@ struct mlx4_caps { u32 function_caps; /* VFs must be aware of these */ u16 hca_core_clock; u64 phys_port_id[MLX4_MAX_PORTS + 1]; + int tunnel_offload_mode; }; struct mlx4_buf_list { @@ -909,6 +916,7 @@ enum mlx4_net_trans_rule_id { MLX4_NET_TRANS_RULE_ID_IPV4, MLX4_NET_TRANS_RULE_ID_TCP, MLX4_NET_TRANS_RULE_ID_UDP, + MLX4_NET_TRANS_RULE_ID_VXLAN, MLX4_NET_TRANS_RULE_NUM, /* should be last */ }; @@ -966,6 +974,12 @@ struct mlx4_spec_ib { u8 dst_gid_msk[16]; }; +struct mlx4_spec_vxlan { + __be32 vni; + __be32 vni_mask; + +}; + struct mlx4_spec_list { struct list_head list; enum mlx4_net_trans_rule_id id; @@ -974,6 +988,7 @@ struct mlx4_spec_list { struct mlx4_spec_ib ib; struct mlx4_spec_ipv4 ipv4; struct mlx4_spec_tcp_udp tcp_udp; + struct mlx4_spec_vxlan vxlan; }; }; @@ -1060,6 +1075,15 @@ struct mlx4_net_trans_rule_hw_ipv4 { __be32 src_ip_msk; } __packed; +struct mlx4_net_trans_rule_hw_vxlan { + u8 size; + u8 rsvd; + __be16 id; + __be32 rsvd1; + __be32 vni; + __be32 vni_mask; +} __packed; + struct _rule_hw { union { struct { @@ -1071,9 +1095,19 @@ struct _rule_hw { struct mlx4_net_trans_rule_hw_ib ib; struct mlx4_net_trans_rule_hw_ipv4 ipv4; struct mlx4_net_trans_rule_hw_tcp_udp tcp_udp; + struct mlx4_net_trans_rule_hw_vxlan vxlan; }; }; +enum { + VXLAN_STEER_BY_OUTER_MAC = 1 << 0, + VXLAN_STEER_BY_OUTER_VLAN = 1 << 1, + VXLAN_STEER_BY_VSID_VNI = 1 << 2, + VXLAN_STEER_BY_INNER_MAC = 1 << 3, + VXLAN_STEER_BY_INNER_VLAN = 1 << 4, +}; + + int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn, enum mlx4_net_trans_promisc_mode mode); int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port, @@ -1096,6 +1130,7 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc); int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, u8 *pg, u16 *ratelimit); +int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 6d351473c292..59f8ba84568b 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -109,6 +109,10 @@ enum { MLX4_RSS_TCP_IPV4 = 1 << 4, MLX4_RSS_IPV4 = 1 << 5, + MLX4_RSS_BY_OUTER_HEADERS = 0 << 6, + MLX4_RSS_BY_INNER_HEADERS = 2 << 6, + MLX4_RSS_BY_INNER_HEADERS_IPONLY = 3 << 6, + /* offset of mlx4_rss_context within mlx4_qp_context.pri_path */ MLX4_RSS_OFFSET_IN_QPC_PRI_PATH = 0x24, /* offset of being RSS indirection QP within mlx4_qp_context.flags */ @@ -252,6 +256,8 @@ enum { /* param3 */ enum { MLX4_WQE_CTRL_NEC = 1 << 29, + MLX4_WQE_CTRL_IIP = 1 << 28, + MLX4_WQE_CTRL_ILP = 1 << 27, MLX4_WQE_CTRL_FENCE = 1 << 6, MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2, MLX4_WQE_CTRL_SOLICITED = 1 << 1, -- cgit v1.2.2 From 6a031f67c83aa175aedd10d4ae64750415ab57b0 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 25 Dec 2013 17:35:15 +0800 Subject: sch_netem: support of 64bit rates Add a new attribute to support 64bit rates so that tc can use them to break the 32bit limit. Signed-off-by: Yang Yingliang Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index fe1192056fd1..77eb331810b8 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -525,6 +525,7 @@ enum { TCA_NETEM_LOSS, TCA_NETEM_RATE, TCA_NETEM_ECN, + TCA_NETEM_RATE64, __TCA_NETEM_MAX, }; -- cgit v1.2.2 From c9d8ca0454a28d0f835138d7294ede4fc6d95572 Mon Sep 17 00:00:00 2001 From: Zhi Yong Wu Date: Wed, 1 Jan 2014 04:31:01 +0800 Subject: net, rps: fix build failure when CONFIG_RPS isn't set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In file included from net/socket.c:99:0: include/net/sock.h: In function ‘sock_rps_record_flow’: include/net/sock.h:849:30: error: ‘const struct sock’ has no member named ‘sk_rxhash’ include/net/sock.h: In function ‘sock_rps_reset_flow’: include/net/sock.h:854:29: error: ‘const struct sock’ has no member named ‘sk_rxhash’ Reported-by: Fengguang Wu Signed-off-by: Zhi Yong Wu Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 8ee90add69d2..bd716b6996ff 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -846,12 +846,16 @@ static inline void sock_rps_reset_flow_hash(__u32 hash) static inline void sock_rps_record_flow(const struct sock *sk) { +#ifdef CONFIG_RPS sock_rps_record_flow_hash(sk->sk_rxhash); +#endif } static inline void sock_rps_reset_flow(const struct sock *sk) { +#ifdef CONFIG_RPS sock_rps_reset_flow_hash(sk->sk_rxhash); +#endif } static inline void sock_rps_save_rxhash(struct sock *sk, -- cgit v1.2.2 From 3678a9d86324e457d0ff9d898747ee7e787f4bb8 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 30 Dec 2013 10:41:32 -0800 Subject: netlink: cleanup rntl_af_register The function __rtnl_af_register is never called outside this code, and the return value is always 0. Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index bb13a182fba6..8fb42070a2c1 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -115,10 +115,9 @@ struct rtnl_af_ops { const struct nlattr *attr); }; -int __rtnl_af_register(struct rtnl_af_ops *ops); void __rtnl_af_unregister(struct rtnl_af_ops *ops); -int rtnl_af_register(struct rtnl_af_ops *ops); +void rtnl_af_register(struct rtnl_af_ops *ops); void rtnl_af_unregister(struct rtnl_af_ops *ops); struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]); -- cgit v1.2.2 From 2173f8d953e76026bf6ede8ec81ad35ec158dae9 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 30 Dec 2013 10:49:22 -0800 Subject: netlink: cleanup tap related functions Cleanups in netlink_tap code * remove unused function netlink_clear_multicast_users * make local function static Signed-off-by: Stephen Hemminger Reviewed-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 7a6c396a263b..aad8eeaf416d 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -62,7 +62,6 @@ extern void netlink_kernel_release(struct sock *sk); extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups); extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group); -extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); extern struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, @@ -168,7 +167,6 @@ struct netlink_tap { }; extern int netlink_add_tap(struct netlink_tap *nt); -extern int __netlink_remove_tap(struct netlink_tap *nt); extern int netlink_remove_tap(struct netlink_tap *nt); #endif /* __LINUX_NETLINK_H */ -- cgit v1.2.2 From 1d143d9f0c833fcf38cc737eb0a8698fa2dd144c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Sun, 29 Dec 2013 14:01:29 -0800 Subject: net: core functions cleanup The following functions are not used outside of net/core/dev.c and should be declared static. call_netdevice_notifiers_info __dev_remove_offload netdev_has_any_upper_dev __netdev_adjacent_dev_remove __netdev_adjacent_dev_link_lists __netdev_adjacent_dev_unlink_lists __netdev_adjacent_dev_unlink __netdev_adjacent_dev_link_neighbour __netdev_adjacent_dev_unlink_neighbour And the following are never used and should be deleted netdev_lower_dev_get_private_rcu __netdev_find_adj_rcu Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 88afa8048a7c..bec60c481966 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1741,8 +1741,6 @@ netdev_notifier_info_to_dev(const struct netdev_notifier_info *info) return info->dev; } -int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, - struct netdev_notifier_info *info); int call_netdevice_notifiers(unsigned long val, struct net_device *dev); @@ -1809,7 +1807,6 @@ void dev_remove_pack(struct packet_type *pt); void __dev_remove_pack(struct packet_type *pt); void dev_add_offload(struct packet_offload *po); void dev_remove_offload(struct packet_offload *po); -void __dev_remove_offload(struct packet_offload *po); struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags, unsigned short mask); @@ -2867,7 +2864,6 @@ extern int weight_p; extern int bpf_jit_enable; bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); -bool netdev_has_any_upper_dev(struct net_device *dev); struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, struct list_head **iter); @@ -2907,8 +2903,6 @@ int netdev_master_upper_dev_link_private(struct net_device *dev, void *private); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev); -void *netdev_lower_dev_get_private_rcu(struct net_device *dev, - struct net_device *lower_dev); void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); int skb_checksum_help(struct sk_buff *skb); -- cgit v1.2.2 From e82435341ff08769b70400b72a7a8efda83c5014 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Sun, 29 Dec 2013 14:03:31 -0800 Subject: ipv6: namespace cleanups Running 'make namespacecheck' shows: net/ipv6/route.o ipv6_route_table_template rt6_bind_peer net/ipv6/icmp.o icmpv6_route_lookup ipv6_icmp_table_template This addresses some of those warnings by: * make icmpv6_route_lookup static * move inline's out of ip6_route.h since only used into route.c * move rt6_bind_peer into route.c Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/ip6_route.h | 20 -------------------- include/net/ipv6.h | 4 ---- 2 files changed, 24 deletions(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index c2626ce1f2ad..1fb6cddbd448 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -51,26 +51,6 @@ static inline unsigned int rt6_flags2srcprefs(int flags) return (flags >> 3) & 7; } -void rt6_bind_peer(struct rt6_info *rt, int create); - -static inline struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) -{ - if (rt6_has_peer(rt)) - return rt6_peer_ptr(rt); - - rt6_bind_peer(rt, create); - return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL); -} - -static inline struct inet_peer *rt6_get_peer(struct rt6_info *rt) -{ - return __rt6_get_peer(rt, 0); -} - -static inline struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) -{ - return __rt6_get_peer(rt, 1); -} void ip6_route_input(struct sk_buff *skb); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index e600b89811aa..12079c65ea3e 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -267,9 +267,6 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info); int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len); -struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, - struct sock *sk, struct flowi6 *fl6); - int ip6_ra_control(struct sock *sk, int sel); int ipv6_parse_hopopts(struct sk_buff *skb); @@ -839,7 +836,6 @@ static inline int snmp6_unregister_dev(struct inet6_dev *idev) { return 0; } #ifdef CONFIG_SYSCTL extern struct ctl_table ipv6_route_table_template[]; -extern struct ctl_table ipv6_icmp_table_template[]; struct ctl_table *ipv6_icmp_sysctl_init(struct net *net); struct ctl_table *ipv6_route_sysctl_init(struct net *net); -- cgit v1.2.2 From 0c3584d58913a72a6b28a976f7c0bfd2afb65237 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 27 Dec 2013 16:32:38 +0800 Subject: ipv6: remove prune parameter for fib6_clean_all since the prune parameter for fib6_clean_all always is 0, remove it. Signed-off-by: Li RongQing Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 2182525e4d74..aca0c2709fd6 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -282,7 +282,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root, const struct in6_addr *saddr, int src_len); void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), - int prune, void *arg); + void *arg); int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info); -- cgit v1.2.2 From 96bfc80d83d4b109bd5c5af6bc323424e920c84d Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Mon, 30 Dec 2013 10:41:56 +0530 Subject: net: Cleanup in eth-netx.h Commit 2960ed346877 ("ARM: netx: move platform_data definitions") moved the file to the current location but forgot to remove the pointer to its previous location. Clean it up. While at it also change the header file protection macros appropriately. Signed-off-by: Sachin Kamat Cc: Sascha Hauer Signed-off-by: David S. Miller --- include/linux/platform_data/eth-netx.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/eth-netx.h b/include/linux/platform_data/eth-netx.h index 88af1ac28ead..a395159725d5 100644 --- a/include/linux/platform_data/eth-netx.h +++ b/include/linux/platform_data/eth-netx.h @@ -1,6 +1,4 @@ /* - * arch/arm/mach-netx/include/mach/eth.h - * * Copyright (c) 2005 Sascha Hauer , Pengutronix * * This program is free software; you can redistribute it and/or modify @@ -17,8 +15,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef ASMARM_ARCH_ETH_H -#define ASMARM_ARCH_ETH_H +#ifndef __ETH_NETX_H +#define __ETH_NETX_H struct netxeth_platform_data { unsigned int xcno; /* number of xmac/xpec engine this eth uses */ -- cgit v1.2.2 From 9c75f4029cf494ae810a54e34ede9803cff60c7c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 31 Dec 2013 11:54:00 -0800 Subject: sched action: make local function static No need to export functions only used in one file. Signed-off-by: Stephen Hemminger Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 77d5d8156efc..d34e1f4d897b 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -105,10 +105,7 @@ struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo); void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo); int tcf_hash_release(struct tcf_common *p, int bind, struct tcf_hashinfo *hinfo); -int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, - int type, struct tc_action *a); u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo); -int tcf_hash_search(struct tc_action *a, u32 index); struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind, struct tcf_hashinfo *hinfo); struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, -- cgit v1.2.2 From c454997e68eb013510ba128283ad3b4aefeff630 Mon Sep 17 00:00:00 2001 From: Fan Du Date: Fri, 3 Jan 2014 11:18:32 +0800 Subject: {pktgen, xfrm} Introduce xfrm_state_lookup_byspi for pktgen Introduce xfrm_state_lookup_byspi to find user specified by custom from "pgset spi xxx". Using this scheme, any flow regardless its saddr/daddr could be transform by SA specified with configurable spi. Signed-off-by: Fan Du Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b7635ef4d436..cd7c46ff6f1f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1421,6 +1421,8 @@ struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, xfrm_address_t *saddr, unsigned short family, u8 mode, u8 proto, u32 reqid); +struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi, + unsigned short family); int xfrm_state_check_expire(struct xfrm_state *x); void xfrm_state_insert(struct xfrm_state *x); int xfrm_state_add(struct xfrm_state *x); -- cgit v1.2.2 From 34ce324019e76f6d93768d68343a0e78f464d754 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 20 Dec 2013 22:40:29 +0100 Subject: netfilter: nf_nat: add full port randomization support We currently use prandom_u32() for allocation of ports in tcp bind(0) and udp code. In case of plain SNAT we try to keep the ports as is or increment on collision. SNAT --random mode does use per-destination incrementing port allocation. As a recent paper pointed out in [1] that this mode of port allocation makes it possible to an attacker to find the randomly allocated ports through a timing side-channel in a socket overloading attack conducted through an off-path attacker. So, NF_NAT_RANGE_PROTO_RANDOM actually weakens the port randomization in regard to the attack described in this paper. As we need to keep compatibility, add another flag called NF_NAT_RANGE_PROTO_RANDOM_FULLY that would replace the NF_NAT_RANGE_PROTO_RANDOM hash-based port selection algorithm with a simple prandom_u32() in order to mitigate this attack vector. Note that the lfsr113's internal state is periodically reseeded by the kernel through a local secure entropy source. More details can be found in [1], the basic idea is to send bursts of packets to a socket to overflow its receive queue and measure the latency to detect a possible retransmit when the port is found. Because of increasing ports to given destination and port, further allocations can be predicted. This information could then be used by an attacker for e.g. for cache-poisoning, NS pinning, and degradation of service attacks against DNS servers [1]: The best defense against the poisoning attacks is to properly deploy and validate DNSSEC; DNSSEC provides security not only against off-path attacker but even against MitM attacker. We hope that our results will help motivate administrators to adopt DNSSEC. However, full DNSSEC deployment make take significant time, and until that happens, we recommend short-term, non-cryptographic defenses. We recommend to support full port randomisation, according to practices recommended in [2], and to avoid per-destination sequential port allocation, which we show may be vulnerable to derandomisation attacks. Joint work between Hannes Frederic Sowa and Daniel Borkmann. [1] https://sites.google.com/site/hayashulman/files/NIC-derandomisation.pdf [2] http://arxiv.org/pdf/1205.5190v1.pdf Signed-off-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_nat.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h index bf0cc373ffb6..1ad3659102b6 100644 --- a/include/uapi/linux/netfilter/nf_nat.h +++ b/include/uapi/linux/netfilter/nf_nat.h @@ -4,10 +4,14 @@ #include #include -#define NF_NAT_RANGE_MAP_IPS 1 -#define NF_NAT_RANGE_PROTO_SPECIFIED 2 -#define NF_NAT_RANGE_PROTO_RANDOM 4 -#define NF_NAT_RANGE_PERSISTENT 8 +#define NF_NAT_RANGE_MAP_IPS (1 << 0) +#define NF_NAT_RANGE_PROTO_SPECIFIED (1 << 1) +#define NF_NAT_RANGE_PROTO_RANDOM (1 << 2) +#define NF_NAT_RANGE_PERSISTENT (1 << 3) +#define NF_NAT_RANGE_PROTO_RANDOM_FULLY (1 << 4) + +#define NF_NAT_RANGE_PROTO_RANDOM_ALL \ + (NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY) struct nf_nat_ipv4_range { unsigned int flags; -- cgit v1.2.2 From 02eca9d2cc541806e8f03b4131c7ee9120246df7 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 30 Dec 2013 17:13:10 -0800 Subject: netfilter: ipset: remove unused code Function never used in current upstream code. Signed-off-by: Stephen Hemminger Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index c7174b816674..0c7d01eae56c 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -331,7 +331,6 @@ extern ip_set_id_t ip_set_get_byname(struct net *net, const char *name, struct ip_set **set); extern void ip_set_put_byindex(struct net *net, ip_set_id_t index); extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index); -extern ip_set_id_t ip_set_nfnl_get(struct net *net, const char *name); extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index); extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index); -- cgit v1.2.2 From dcd93ed4cd1669b2c1510e801fe5f1132390761c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 30 Dec 2013 17:16:08 -0800 Subject: netfilter: nf_conntrack: remove dead code The following code is not used in current upstream code. Some of this seems to be old hooks, other might be used by some out of tree module (which I don't care about breaking), and the need_ipv4_conntrack was used by old NAT code but no longer called. Signed-off-by: Stephen Hemminger Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 2 -- include/net/netfilter/nf_conntrack_l3proto.h | 1 - 2 files changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 6c3d12e2949f..981c327374da 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -19,6 +19,4 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; int nf_conntrack_ipv4_compat_init(void); void nf_conntrack_ipv4_compat_fini(void); -void need_ipv4_conntrack(void); - #endif /*_NF_CONNTRACK_IPV4_H*/ diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 3efab704b7eb..adc1fa3dd7ab 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -87,7 +87,6 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto); void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto); struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto); -void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p); /* Existing built-in protocols */ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic; -- cgit v1.2.2 From fe1217c4f3f7d7cbf8efdd8dd5fdc7204a1d65a8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 29 Dec 2013 18:27:10 +0100 Subject: net: net_cls: move cgroupfs classid handling into core Zefan Li requested [1] to perform the following cleanup/refactoring: - Split cgroupfs classid handling into net core to better express a possible more generic use. - Disable module support for cgroupfs bits as the majority of other cgroupfs subsystems do not have that, and seems to be not wished from cgroup side. Zefan probably might want to follow-up for netprio later on. - By this, code can be further reduced which previously took care of functionality built when compiled as module. cgroupfs bits are being placed under net/core/netclassid_cgroup.c, so that we are consistent with {netclassid,netprio}_cgroup naming that is under net/core/ as suggested by Zefan. No change in functionality, but only code refactoring that is being done here. [1] http://patchwork.ozlabs.org/patch/304825/ Suggested-by: Li Zefan Signed-off-by: Daniel Borkmann Cc: Zefan Li Cc: Thomas Graf Cc: cgroups@vger.kernel.org Acked-by: Li Zefan Signed-off-by: Pablo Neira Ayuso --- include/linux/cgroup_subsys.h | 2 +- include/net/cls_cgroup.h | 40 ++++++++++++---------------------------- 2 files changed, 13 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index b613ffd402d1..58bf94de4b8e 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -31,7 +31,7 @@ SUBSYS(devices) SUBSYS(freezer) #endif -#if IS_SUBSYS_ENABLED(CONFIG_NET_CLS_CGROUP) +#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_NET_CLASSID) SUBSYS(net_cls) #endif diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 33d03b648646..9cf2d5ef38d9 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -16,17 +16,16 @@ #include #include #include +#include -#if IS_ENABLED(CONFIG_NET_CLS_CGROUP) -struct cgroup_cls_state -{ +#ifdef CONFIG_CGROUP_NET_CLASSID +struct cgroup_cls_state { struct cgroup_subsys_state css; u32 classid; }; -void sock_update_classid(struct sock *sk); +struct cgroup_cls_state *task_cls_state(struct task_struct *p); -#if IS_BUILTIN(CONFIG_NET_CLS_CGROUP) static inline u32 task_cls_classid(struct task_struct *p) { u32 classid; @@ -41,33 +40,18 @@ static inline u32 task_cls_classid(struct task_struct *p) return classid; } -#elif IS_MODULE(CONFIG_NET_CLS_CGROUP) -static inline u32 task_cls_classid(struct task_struct *p) -{ - struct cgroup_subsys_state *css; - u32 classid = 0; - - if (in_interrupt()) - return 0; - - rcu_read_lock(); - css = task_css(p, net_cls_subsys_id); - if (css) - classid = container_of(css, - struct cgroup_cls_state, css)->classid; - rcu_read_unlock(); - return classid; -} -#endif -#else /* !CGROUP_NET_CLS_CGROUP */ static inline void sock_update_classid(struct sock *sk) { -} + u32 classid; -static inline u32 task_cls_classid(struct task_struct *p) + classid = task_cls_classid(current); + if (classid != sk->sk_classid) + sk->sk_classid = classid; +} +#else /* !CONFIG_CGROUP_NET_CLASSID */ +static inline void sock_update_classid(struct sock *sk) { - return 0; } -#endif /* CGROUP_NET_CLS_CGROUP */ +#endif /* CONFIG_CGROUP_NET_CLASSID */ #endif /* _NET_CLS_CGROUP_H */ -- cgit v1.2.2 From 86f8515f9721fa171483f0fe0391968fbb949cc9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 29 Dec 2013 17:27:11 +0100 Subject: net: netprio: rename config to be more consistent with cgroup configs While we're at it and introduced CGROUP_NET_CLASSID, lets also make NETPRIO_CGROUP more consistent with the rest of cgroups and rename it into CONFIG_CGROUP_NET_PRIO so that for networking, we now have CONFIG_CGROUP_NET_{PRIO,CLASSID}. This not only makes the CONFIG option consistent among networking cgroups, but also among cgroups CONFIG conventions in general as the vast majority has a prefix of CONFIG_CGROUP_. Signed-off-by: Daniel Borkmann Cc: Zefan Li Cc: cgroups@vger.kernel.org Acked-by: Li Zefan Signed-off-by: Pablo Neira Ayuso --- include/linux/cgroup_subsys.h | 2 +- include/linux/netdevice.h | 2 +- include/net/netprio_cgroup.h | 18 ++++++------------ include/net/sock.h | 2 +- 4 files changed, 9 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 58bf94de4b8e..7b99d717411d 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -43,7 +43,7 @@ SUBSYS(blkio) SUBSYS(perf) #endif -#if IS_SUBSYS_ENABLED(CONFIG_NETPRIO_CGROUP) +#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_NET_PRIO) SUBSYS(net_prio) #endif diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5260d2eae2e6..45cf68194aa8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1444,7 +1444,7 @@ struct net_device { /* max exchange id for FCoE LRO by ddp */ unsigned int fcoe_ddp_xid; #endif -#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) +#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) struct netprio_map __rcu *priomap; #endif /* phy device may attach itself for hardware timestamping */ diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index 099d02782e22..dafc09f0fdbc 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -13,12 +13,12 @@ #ifndef _NETPRIO_CGROUP_H #define _NETPRIO_CGROUP_H + #include #include #include - -#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) +#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) struct netprio_map { struct rcu_head rcu; u32 priomap_len; @@ -27,8 +27,7 @@ struct netprio_map { void sock_update_netprioidx(struct sock *sk); -#if IS_BUILTIN(CONFIG_NETPRIO_CGROUP) - +#if IS_BUILTIN(CONFIG_CGROUP_NET_PRIO) static inline u32 task_netprioidx(struct task_struct *p) { struct cgroup_subsys_state *css; @@ -40,9 +39,7 @@ static inline u32 task_netprioidx(struct task_struct *p) rcu_read_unlock(); return idx; } - -#elif IS_MODULE(CONFIG_NETPRIO_CGROUP) - +#elif IS_MODULE(CONFIG_CGROUP_NET_PRIO) static inline u32 task_netprioidx(struct task_struct *p) { struct cgroup_subsys_state *css; @@ -56,9 +53,7 @@ static inline u32 task_netprioidx(struct task_struct *p) return idx; } #endif - -#else /* !CONFIG_NETPRIO_CGROUP */ - +#else /* !CONFIG_CGROUP_NET_PRIO */ static inline u32 task_netprioidx(struct task_struct *p) { return 0; @@ -66,6 +61,5 @@ static inline u32 task_netprioidx(struct task_struct *p) #define sock_update_netprioidx(sk) -#endif /* CONFIG_NETPRIO_CGROUP */ - +#endif /* CONFIG_CGROUP_NET_PRIO */ #endif /* _NET_CLS_CGROUP_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 2ef3c3eca47a..ef5e2be6eaf3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -395,7 +395,7 @@ struct sock { unsigned short sk_ack_backlog; unsigned short sk_max_ack_backlog; __u32 sk_priority; -#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) +#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) __u32 sk_cgrp_prioidx; #endif struct pid *sk_peer_pid; -- cgit v1.2.2 From 82a37132f300ea53bdcd812917af5a6329ec80c3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 29 Dec 2013 18:27:12 +0100 Subject: netfilter: x_tables: lightweight process control group matching It would be useful e.g. in a server or desktop environment to have a facility in the notion of fine-grained "per application" or "per application group" firewall policies. Probably, users in the mobile, embedded area (e.g. Android based) with different security policy requirements for application groups could have great benefit from that as well. For example, with a little bit of configuration effort, an admin could whitelist well-known applications, and thus block otherwise unwanted "hard-to-track" applications like [1] from a user's machine. Blocking is just one example, but it is not limited to that, meaning we can have much different scenarios/policies that netfilter allows us than just blocking, e.g. fine grained settings where applications are allowed to connect/send traffic to, application traffic marking/conntracking, application-specific packet mangling, and so on. Implementation of PID-based matching would not be appropriate as they frequently change, and child tracking would make that even more complex and ugly. Cgroups would be a perfect candidate for accomplishing that as they associate a set of tasks with a set of parameters for one or more subsystems, in our case the netfilter subsystem, which, of course, can be combined with other cgroup subsystems into something more complex if needed. As mentioned, to overcome this constraint, such processes could be placed into one or multiple cgroups where different fine-grained rules can be defined depending on the application scenario, while e.g. everything else that is not part of that could be dropped (or vice versa), thus making life harder for unwanted processes to communicate to the outside world. So, we make use of cgroups here to track jobs and limit their resources in terms of iptables policies; in other words, limiting, tracking, etc what they are allowed to communicate. In our case we're working on outgoing traffic based on which local socket that originated from. Also, one doesn't even need to have an a-prio knowledge of the application internals regarding their particular use of ports or protocols. Matching is *extremly* lightweight as we just test for the sk_classid marker of sockets, originating from net_cls. net_cls and netfilter do not contradict each other; in fact, each construct can live as standalone or they can be used in combination with each other, which is perfectly fine, plus it serves Tejun's requirement to not introduce a new cgroups subsystem. Through this, we result in a very minimal and efficient module, and don't add anything except netfilter code. One possible, minimal usage example (many other iptables options can be applied obviously): 1) Configuring cgroups if not already done, e.g.: mkdir /sys/fs/cgroup/net_cls mount -t cgroup -o net_cls net_cls /sys/fs/cgroup/net_cls mkdir /sys/fs/cgroup/net_cls/0 echo 1 > /sys/fs/cgroup/net_cls/0/net_cls.classid (resp. a real flow handle id for tc) 2) Configuring netfilter (iptables-nftables), e.g.: iptables -A OUTPUT -m cgroup ! --cgroup 1 -j DROP 3) Running applications, e.g.: ping 208.67.222.222 echo 1799 > /sys/fs/cgroup/net_cls/0/tasks 64 bytes from 208.67.222.222: icmp_seq=44 ttl=49 time=11.9 ms [...] ping 208.67.220.220 ping: sendmsg: Operation not permitted [...] echo 1804 > /sys/fs/cgroup/net_cls/0/tasks 64 bytes from 208.67.220.220: icmp_seq=89 ttl=56 time=19.0 ms [...] Of course, real-world deployments would make use of cgroups user space toolsuite, or own custom policy daemons dynamically moving applications from/to various cgroups. [1] http://www.blackhat.com/presentations/bh-europe-06/bh-eu-06-biondi/bh-eu-06-biondi-up.pdf Signed-off-by: Daniel Borkmann Cc: Tejun Heo Cc: cgroups@vger.kernel.org Acked-by: Li Zefan Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/Kbuild | 1 + include/uapi/linux/netfilter/xt_cgroup.h | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 include/uapi/linux/netfilter/xt_cgroup.h (limited to 'include') diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 91be8ce623f0..2344f5a319fc 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -39,6 +39,7 @@ header-y += xt_TEE.h header-y += xt_TPROXY.h header-y += xt_addrtype.h header-y += xt_bpf.h +header-y += xt_cgroup.h header-y += xt_cluster.h header-y += xt_comment.h header-y += xt_connbytes.h diff --git a/include/uapi/linux/netfilter/xt_cgroup.h b/include/uapi/linux/netfilter/xt_cgroup.h new file mode 100644 index 000000000000..43acb7e175f6 --- /dev/null +++ b/include/uapi/linux/netfilter/xt_cgroup.h @@ -0,0 +1,11 @@ +#ifndef _UAPI_XT_CGROUP_H +#define _UAPI_XT_CGROUP_H + +#include + +struct xt_cgroup_info { + __u32 id; + __u32 invert; +}; + +#endif /* _UAPI_XT_CGROUP_H */ -- cgit v1.2.2 From 7d442fab0a6777fd7612cfcada32ea859553d370 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 2 Jan 2014 11:48:26 -0800 Subject: ipv4: Cache dst in tunnels Avoid doing a route lookup on every packet being tunneled. In ip_tunnel.c cache the route returned from ip_route_output if the tunnel is "connected" so that all the rouitng parameters are taken from tunnel parms for a packet. Specifically, not NBMA tunnel and tos is from tunnel parms (not inner packet). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 732f8c6ae975..bde50fc5b4f0 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -54,6 +54,9 @@ struct ip_tunnel { int hlen; /* Precalculated header length */ int mlink; + struct dst_entry __rcu *dst_cache; + spinlock_t dst_lock; + struct ip_tunnel_parm parms; /* for SIT */ -- cgit v1.2.2 From 9a4aa9af447f784f0a47313c8dcb79ac63442cf7 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 2 Jan 2014 11:48:33 -0800 Subject: ipv4: Use percpu Cache route in IP tunnels percpu route cache eliminates share of dst refcnt between CPUs. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index bde50fc5b4f0..9e25b1bc31da 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -38,6 +38,11 @@ struct ip_tunnel_prl_entry { struct rcu_head rcu_head; }; +struct ip_tunnel_dst { + struct dst_entry __rcu *dst; + spinlock_t lock; +}; + struct ip_tunnel { struct ip_tunnel __rcu *next; struct hlist_node hash_node; @@ -54,8 +59,7 @@ struct ip_tunnel { int hlen; /* Precalculated header length */ int mlink; - struct dst_entry __rcu *dst_cache; - spinlock_t dst_lock; + struct ip_tunnel_dst __percpu *dst_cache; struct ip_tunnel_parm parms; -- cgit v1.2.2 From 8f09898bf02fc24b7a525e9cfc78f38dcdf3a4eb Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 3 Jan 2014 09:17:14 -0800 Subject: socket: cleanups Namespace related cleaning * make cred_to_ucred static * remove unused sock_rmalloc function Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/socket.h | 2 -- include/net/sock.h | 2 -- 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/socket.h b/include/linux/socket.h index 445ef7519dc2..5d488a65ad20 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -305,8 +305,6 @@ struct ucred { /* IPX options */ #define IPX_TYPE 1 -extern void cred_to_ucred(struct pid *pid, const struct cred *cred, struct ucred *ucred); - extern int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, int offset, int len); extern int csum_partial_copy_fromiovecend(unsigned char *kdata, diff --git a/include/net/sock.h b/include/net/sock.h index bd716b6996ff..8d9af66ccf2c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1549,8 +1549,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority); -struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, - gfp_t priority); void sock_wfree(struct sk_buff *skb); void skb_orphan_partial(struct sk_buff *skb); void sock_rfree(struct sk_buff *skb); -- cgit v1.2.2 From 5e419e68a6450da279e8add304629774133576a9 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 3 Jan 2014 09:19:51 -0800 Subject: llc: make lock static The llc_sap_list_lock does not need to be global, only acquired in core. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/llc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/llc.h b/include/net/llc.h index 68490cbc8a65..e8e61d4fb458 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -93,7 +93,6 @@ struct hlist_nulls_head *llc_sk_laddr_hash(struct llc_sap *sap, #define LLC_DEST_CONN 2 /* Type 2 goes here */ extern struct list_head llc_sap_list; -extern spinlock_t llc_sap_list_lock; int llc_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); -- cgit v1.2.2 From 998e40bbf8f0e10b5d84107afc61e29dbc8d2de4 Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Fri, 3 Jan 2014 14:18:41 -0800 Subject: bonding: add lacp_rate attribute netlink support Add IFLA_BOND_AD_LACP_RATE to allow get/set of bonding parameter lacp_rate via netlink. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8fbc7ca878b4..0f8f38f965cb 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -349,6 +349,7 @@ enum { IFLA_BOND_MIN_LINKS, IFLA_BOND_LP_INTERVAL, IFLA_BOND_PACKETS_PER_SLAVE, + IFLA_BOND_AD_LACP_RATE, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From ec029fac3e96980fa8f6f81b8327787a9600dfaa Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Fri, 3 Jan 2014 14:18:49 -0800 Subject: bonding: add ad_select attribute netlink support Add IFLA_BOND_AD_SELECT to allow get/set of bonding parameter ad_select via netlink. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 0f8f38f965cb..0fbb01824dd3 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -350,6 +350,7 @@ enum { IFLA_BOND_LP_INTERVAL, IFLA_BOND_PACKETS_PER_SLAVE, IFLA_BOND_AD_LACP_RATE, + IFLA_BOND_AD_SELECT, __IFLA_BOND_MAX, }; -- cgit v1.2.2 From 4ee7ac7526d4a9413cafa733d824edfe49fdcc46 Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Fri, 3 Jan 2014 14:18:56 -0800 Subject: bonding: add ad_info attribute netlink support Add nested IFLA_BOND_AD_INFO for bonding 802.3ad info. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 0fbb01824dd3..3e6bd3c7445d 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -351,11 +351,23 @@ enum { IFLA_BOND_PACKETS_PER_SLAVE, IFLA_BOND_AD_LACP_RATE, IFLA_BOND_AD_SELECT, + IFLA_BOND_AD_INFO, __IFLA_BOND_MAX, }; #define IFLA_BOND_MAX (__IFLA_BOND_MAX - 1) +enum { + IFLA_BOND_AD_INFO_AGGREGATOR, + IFLA_BOND_AD_INFO_NUM_PORTS, + IFLA_BOND_AD_INFO_ACTOR_KEY, + IFLA_BOND_AD_INFO_PARTNER_KEY, + IFLA_BOND_AD_INFO_PARTNER_MAC, + __IFLA_BOND_AD_INFO_MAX, +}; + +#define IFLA_BOND_AD_INFO_MAX (__IFLA_BOND_AD_INFO_MAX - 1) + /* SR-IOV virtual function management section */ enum { -- cgit v1.2.2 From 55fdbfe7beb68c71060a27965a72f7eed09fa81d Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Sat, 28 Dec 2013 04:28:18 -0800 Subject: pci_regs.h: Add PCI bus link speed and width defines Add missing PCI bus link speed 8.0 GT/s and bus link widths of x1, x2, x4 and x8. CC: CC: Bjorn Helgaas Signed-off-by: Jeff Kirsher Acked-by: Bjorn Helgaas --- include/uapi/linux/pci_regs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 4a98e85438a7..c870c2a71d65 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -489,7 +489,12 @@ #define PCI_EXP_LNKSTA_CLS 0x000f /* Current Link Speed */ #define PCI_EXP_LNKSTA_CLS_2_5GB 0x0001 /* Current Link Speed 2.5GT/s */ #define PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */ +#define PCI_EXP_LNKSTA_CLS_8_0GB 0x0003 /* Current Link Speed 8.0GT/s */ #define PCI_EXP_LNKSTA_NLW 0x03f0 /* Negotiated Link Width */ +#define PCI_EXP_LNKSTA_NLW_X1 0x0010 /* Current Link Width x1 */ +#define PCI_EXP_LNKSTA_NLW_X2 0x0020 /* Current Link Width x2 */ +#define PCI_EXP_LNKSTA_NLW_X4 0x0040 /* Current Link Width x4 */ +#define PCI_EXP_LNKSTA_NLW_X8 0x0080 /* Current Link Width x8 */ #define PCI_EXP_LNKSTA_NLW_SHIFT 4 /* start of NLW mask in link status */ #define PCI_EXP_LNKSTA_LT 0x0800 /* Link Training */ #define PCI_EXP_LNKSTA_SLC 0x1000 /* Slot Clock Configuration */ -- cgit v1.2.2 From 444fb98eed98f7292a83f9bf123d1c78f171327e Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Thu, 2 Jan 2014 11:58:12 +0100 Subject: NFC: digital: Add a note about asynchronous functions This explains how and why the timeout parameter must be handled by the driver implementation. Signed-off-by: Samuel Ortiz --- include/net/nfc/digital.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h index 36acecd5f06c..81af21e9bcd4 100644 --- a/include/net/nfc/digital.h +++ b/include/net/nfc/digital.h @@ -122,6 +122,16 @@ typedef void (*nfc_digital_cmd_complete_t)(struct nfc_digital_dev *ddev, * switch_rf to turn the radio on. A call to in|tg_configure_hw must turn * the device radio on. * @abort_cmd: Discard the last sent command. + * + * Notes: Asynchronous functions have a timeout parameter. It is the driver + * responsibility to call the digital stack back through the + * nfc_digital_cmd_complete_t callback when no RF respsonse has been + * received within the specified time (in milliseconds). In that case the + * driver must set the resp sk_buff to ERR_PTR(-ETIMEDOUT). + * Since the digital stack serializes commands to be sent, it's mandatory + * for the driver to handle the timeout correctly. Otherwise the stack + * would not be able to send new commands, waiting for the reply of the + * current one. */ struct nfc_digital_ops { int (*in_configure_hw)(struct nfc_digital_dev *ddev, int type, -- cgit v1.2.2 From f9f462faa02777f497eb25255683a94e0c054de6 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 3 Jan 2014 03:02:35 -0800 Subject: Bluetooth: Add quirk for disabling Delete Stored Link Key command Some controller pretend they support the Delete Stored Link Key command, but in reality they really don't support it. < HCI Command: Delete Stored Link Key (0x03|0x0012) plen 7 bdaddr 00:00:00:00:00:00 all 1 > HCI Event: Command Complete (0x0e) plen 4 Delete Stored Link Key (0x03|0x0012) ncmd 1 status 0x11 deleted 0 Error: Unsupported Feature or Parameter Value Not correctly supporting this command causes the controller setup to fail and will make a device not work. However sending the command for controller that handle stored link keys is important. This quirk allows a driver to disable the command if it knows that this command handling is broken. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 5dc3d9072650..66c1cd87bfe7 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -83,7 +83,8 @@ enum { HCI_QUIRK_RESET_ON_CLOSE, HCI_QUIRK_RAW_DEVICE, - HCI_QUIRK_FIXUP_BUFFER_SIZE + HCI_QUIRK_FIXUP_BUFFER_SIZE, + HCI_QUIRK_BROKEN_STORED_LINK_KEY, }; /* HCI device flags */ -- cgit v1.2.2 From 4017b4d3212b7b9d979d7174cbe340c9acbb4666 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sun, 5 Jan 2014 03:20:17 +0300 Subject: : coding style fixes Running 'checkpatch.pl' gives some errors and warnings: - no spaces around =; - * separated by space from the function name; - { in function definition not on a separate line; - line over 80 characters. While fixing these, also fix the following style issues: - file name in the heading comment; - alignment not matching open paren. Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- include/linux/phy.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 73384ff3b5e5..d4cb756a41c5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1,6 +1,4 @@ /* - * include/linux/phy.h - * * Framework and drivers for configuring and reading different PHYs * Based on code in sungem_phy.c and gianfar_phy.c * @@ -240,7 +238,7 @@ int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); * - phy_stop moves to HALTED */ enum phy_state { - PHY_DOWN=0, + PHY_DOWN = 0, PHY_STARTING, PHY_READY, PHY_PENDING, @@ -544,21 +542,22 @@ static inline bool phy_is_internal(struct phy_device *phydev) } struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, - bool is_c45, struct phy_c45_device_ids *c45_ids); + bool is_c45, + struct phy_c45_device_ids *c45_ids); struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45); int phy_device_register(struct phy_device *phy); int phy_init_hw(struct phy_device *phydev); int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); -struct phy_device * phy_attach(struct net_device *dev, - const char *bus_id, phy_interface_t interface); +struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, + phy_interface_t interface); struct phy_device *phy_find_first(struct mii_bus *bus); int phy_connect_direct(struct net_device *dev, struct phy_device *phydev, - void (*handler)(struct net_device *), - phy_interface_t interface); -struct phy_device * phy_connect(struct net_device *dev, const char *bus_id, - void (*handler)(struct net_device *), - phy_interface_t interface); + void (*handler)(struct net_device *), + phy_interface_t interface); +struct phy_device *phy_connect(struct net_device *dev, const char *bus_id, + void (*handler)(struct net_device *), + phy_interface_t interface); void phy_disconnect(struct phy_device *phydev); void phy_detach(struct phy_device *phydev); void phy_start(struct phy_device *phydev); @@ -567,7 +566,8 @@ int phy_start_aneg(struct phy_device *phydev); int phy_stop_interrupts(struct phy_device *phydev); -static inline int phy_read_status(struct phy_device *phydev) { +static inline int phy_read_status(struct phy_device *phydev) +{ return phydev->drv->read_status(phydev); } @@ -586,22 +586,21 @@ void phy_state_machine(struct work_struct *work); void phy_change(struct work_struct *work); void phy_mac_interrupt(struct phy_device *phydev, int new_link); void phy_start_machine(struct phy_device *phydev, - void (*handler)(struct net_device *)); + void (*handler)(struct net_device *)); void phy_stop_machine(struct phy_device *phydev); int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); -int phy_mii_ioctl(struct phy_device *phydev, - struct ifreq *ifr, int cmd); +int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); int phy_start_interrupts(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); void phy_device_free(struct phy_device *phydev); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, - int (*run)(struct phy_device *)); + int (*run)(struct phy_device *)); int phy_register_fixup_for_id(const char *bus_id, - int (*run)(struct phy_device *)); + int (*run)(struct phy_device *)); int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask, - int (*run)(struct phy_device *)); + int (*run)(struct phy_device *)); int phy_scan_fixups(struct phy_device *phydev); int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable); @@ -609,7 +608,8 @@ int phy_get_eee_err(struct phy_device *phydev); int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data); int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data); int phy_ethtool_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); -void phy_ethtool_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); +void phy_ethtool_get_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol); int __init mdio_bus_init(void); void mdio_bus_exit(void); -- cgit v1.2.2 From 29935aebc7a8f2d3f9cc1743f24f0db8b4610ece Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sun, 5 Jan 2014 03:27:17 +0300 Subject: phylib: remove unused adjust_state() callback Remove adjust_state() callback from 'struct phy_device' since it seems to have never been really used from the inception: phy_start_machine() has been always called with 2nd argument equal to NULL. Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- include/linux/phy.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index d4cb756a41c5..cf1bb480cfb1 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -282,8 +282,6 @@ struct phy_c45_device_ids { * attached_dev: The attached enet driver's device instance ptr * adjust_link: Callback for the enet controller to respond to * changes in the link state. - * adjust_state: Callback for the enet driver to respond to - * changes in the state machine. * * speed, duplex, pause, supported, advertising, lp_advertising, * and autoneg are used like in mii_if_info @@ -364,8 +362,6 @@ struct phy_device { struct net_device *attached_dev; void (*adjust_link)(struct net_device *dev); - - void (*adjust_state)(struct net_device *dev); }; #define to_phy_device(d) container_of(d, struct phy_device, dev) @@ -585,8 +581,7 @@ int phy_drivers_register(struct phy_driver *new_driver, int n); void phy_state_machine(struct work_struct *work); void phy_change(struct work_struct *work); void phy_mac_interrupt(struct phy_device *phydev, int new_link); -void phy_start_machine(struct phy_device *phydev, - void (*handler)(struct net_device *)); +void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); -- cgit v1.2.2 From fbfcec635dba38345a446f44b22352cd96ad9463 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sun, 5 Jan 2014 03:28:27 +0300 Subject: phylib: make phy_scan_fixups() static phy_scan_fixups() isn't and shouldn't be called by the drivers directly, so unexport it. And since Florian Fainelli's recent patches, the function is only called locally, so we can make it static as well. Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- include/linux/phy.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index cf1bb480cfb1..7c81dd8870d4 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -596,7 +596,6 @@ int phy_register_fixup_for_id(const char *bus_id, int (*run)(struct phy_device *)); int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)); -int phy_scan_fixups(struct phy_device *phydev); int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable); int phy_get_eee_err(struct phy_device *phydev); -- cgit v1.2.2 From 8f84985fec10de64a6b4cdfea45f2b0ab8f07c78 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Sat, 4 Jan 2014 13:57:59 +0800 Subject: net: unify the pcpu_tstats and br_cpu_netstats as one They are same, so unify them as one, pcpu_sw_netstats. Define pcpu_sw_netstat in netdevice.h, remove pcpu_tstats from if_tunnel and remove br_cpu_netstats from br_private.h Cc: Cong Wang Cc: Stephen Hemminger Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- include/linux/if_tunnel.h | 9 --------- include/linux/netdevice.h | 11 ++++++++++- include/net/ip6_tunnel.h | 2 +- include/net/ip_tunnels.h | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index f4e56ecd0b1a..712710bc0580 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -13,13 +13,4 @@ #define for_each_ip_tunnel_rcu(pos, start) \ for (pos = rcu_dereference(start); pos; pos = rcu_dereference(pos->next)) -/* often modified stats are per cpu, other are shared (netdev->stats) */ -struct pcpu_tstats { - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; - struct u64_stats_sync syncp; -}; - #endif /* _IF_TUNNEL_H_ */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bec60c481966..51c0fe258163 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1409,7 +1409,7 @@ struct net_device { union { void *ml_priv; struct pcpu_lstats __percpu *lstats; /* loopback stats */ - struct pcpu_tstats __percpu *tstats; /* tunnel stats */ + struct pcpu_sw_netstats __percpu *tstats; struct pcpu_dstats __percpu *dstats; /* dummy stats */ struct pcpu_vstats __percpu *vstats; /* veth stats */ }; @@ -1685,6 +1685,15 @@ struct packet_offload { struct list_head list; }; +/* often modified stats are per cpu, other are shared (netdev->stats) */ +struct pcpu_sw_netstats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; +}; + #include /* netdevice notifier chain. Please remember to update the rtnetlink diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 6d1549c4893c..a5593dab6af7 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -79,7 +79,7 @@ static inline void ip6tunnel_xmit(struct sk_buff *skb, struct net_device *dev) err = ip6_local_out(skb); if (net_xmit_eval(err) == 0) { - struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); + struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->tx_bytes += pkt_len; tstats->tx_packets++; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 9e25b1bc31da..cd729becbb07 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -162,10 +162,10 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum, static inline void iptunnel_xmit_stats(int err, struct net_device_stats *err_stats, - struct pcpu_tstats __percpu *stats) + struct pcpu_sw_netstats __percpu *stats) { if (err > 0) { - struct pcpu_tstats *tstats = this_cpu_ptr(stats); + struct pcpu_sw_netstats *tstats = this_cpu_ptr(stats); u64_stats_update_begin(&tstats->syncp); tstats->tx_bytes += err; -- cgit v1.2.2 From cdf3e274cf1b36e9a2fef2d175cabc566af841b2 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Sat, 4 Jan 2014 14:22:34 +0800 Subject: macvlan: unify macvlan_pcpu_stats and vlan_pcpu_stats They are same, so unify them as one; since macvlan is a kind of vlan, vlan_pcpu_stats should be a proper name for vlan and macvlan. Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- include/linux/if_macvlan.h | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 551bdd3722a2..7c8b20b120ea 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -2,6 +2,7 @@ #define _LINUX_IF_MACVLAN_H #include +#include #include #include #include @@ -24,28 +25,6 @@ static inline struct socket *macvtap_get_socket(struct file *f) struct macvlan_port; struct macvtap_queue; -/** - * struct macvlan_pcpu_stats - MACVLAN percpu stats - * @rx_packets: number of received packets - * @rx_bytes: number of received bytes - * @rx_multicast: number of received multicast packets - * @tx_packets: number of transmitted packets - * @tx_bytes: number of transmitted bytes - * @syncp: synchronization point for 64bit counters - * @rx_errors: number of rx errors - * @tx_dropped: number of tx dropped packets - */ -struct macvlan_pcpu_stats { - u64 rx_packets; - u64 rx_bytes; - u64 rx_multicast; - u64 tx_packets; - u64 tx_bytes; - struct u64_stats_sync syncp; - u32 rx_errors; - u32 tx_dropped; -}; - /* * Maximum times a macvtap device can be opened. This can be used to * configure the number of receive queue, e.g. for multiqueue virtio. @@ -62,7 +41,7 @@ struct macvlan_dev { struct macvlan_port *port; struct net_device *lowerdev; void *fwd_priv; - struct macvlan_pcpu_stats __percpu *pcpu_stats; + struct vlan_pcpu_stats __percpu *pcpu_stats; DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ); @@ -84,7 +63,7 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, bool multicast) { if (likely(success)) { - struct macvlan_pcpu_stats *pcpu_stats; + struct vlan_pcpu_stats *pcpu_stats; pcpu_stats = this_cpu_ptr(vlan->pcpu_stats); u64_stats_update_begin(&pcpu_stats->syncp); -- cgit v1.2.2 From b2395b8aeaa2f181ee87f5840c6268992b30ed95 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sun, 5 Jan 2014 01:10:43 +0100 Subject: bcma: export bcma_find_core_unit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function is used to get a specific core when there is more than one core of that specific type. This is used in bgmac to reset all GMAC cores. Signed-off-by: Hauke Mehrtens Acked-by: Rafał Miłecki Signed-off-by: David S. Miller --- include/linux/bcma/bcma.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 4d043c30216f..0b3bb16c705a 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -418,7 +418,14 @@ static inline void bcma_maskset16(struct bcma_device *cc, bcma_write16(cc, offset, (bcma_read16(cc, offset) & mask) | set); } -extern struct bcma_device *bcma_find_core(struct bcma_bus *bus, u16 coreid); +extern struct bcma_device *bcma_find_core_unit(struct bcma_bus *bus, u16 coreid, + u8 unit); +static inline struct bcma_device *bcma_find_core(struct bcma_bus *bus, + u16 coreid) +{ + return bcma_find_core_unit(bus, coreid, 0); +} + extern bool bcma_core_is_enabled(struct bcma_device *core); extern void bcma_core_disable(struct bcma_device *core, u32 flags); extern int bcma_core_enable(struct bcma_device *core, u32 flags); -- cgit v1.2.2 From 1e85c9b66d86a776e78c332d5d32ae370ab84d3f Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 6 Jan 2014 01:41:20 +0100 Subject: 8021q: make vlan_pcpu_stats visible without CONFIG_VLAN_8021Q macvlan needs vlan_pcpu_stats so make it visible even if compiling without VLAN_8021Q support. Otherwise a very long compiler error happens. Fixes: cdf3e274cf1b36 ("macvlan: unify macvlan_pcpu_stats and vlan_pcpu_stats") Cc: Li RongQing Signed-off-by: Hannes Frederic Sowa Acked-By: Li RongQing Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index f252deb99454..bbedfb56bd66 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -82,25 +82,6 @@ static inline int is_vlan_dev(struct net_device *dev) #define vlan_tx_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) #define vlan_tx_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - -extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev, - __be16 vlan_proto, u16 vlan_id); -extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); -extern u16 vlan_dev_vlan_id(const struct net_device *dev); - -/** - * struct vlan_priority_tci_mapping - vlan egress priority mappings - * @priority: skb priority - * @vlan_qos: vlan priority: (skb->priority << 13) & 0xE000 - * @next: pointer to next struct - */ -struct vlan_priority_tci_mapping { - u32 priority; - u16 vlan_qos; - struct vlan_priority_tci_mapping *next; -}; - /** * struct vlan_pcpu_stats - VLAN percpu rx/tx stats * @rx_packets: number of received packets @@ -123,6 +104,25 @@ struct vlan_pcpu_stats { u32 tx_dropped; }; +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + +extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev, + __be16 vlan_proto, u16 vlan_id); +extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); +extern u16 vlan_dev_vlan_id(const struct net_device *dev); + +/** + * struct vlan_priority_tci_mapping - vlan egress priority mappings + * @priority: skb priority + * @vlan_qos: vlan priority: (skb->priority << 13) & 0xE000 + * @next: pointer to next struct + */ +struct vlan_priority_tci_mapping { + u32 priority; + u16 vlan_qos; + struct vlan_priority_tci_mapping *next; +}; + struct proc_dir_entry; struct netpoll; -- cgit v1.2.2 From d4b36210c2e6ecef0ce52fb6c18c51144f5c2d88 Mon Sep 17 00:00:00 2001 From: Vijay Subramanian Date: Sat, 4 Jan 2014 17:33:55 -0800 Subject: net: pkt_sched: PIE AQM scheme Proportional Integral controller Enhanced (PIE) is a scheduler to address the bufferbloat problem. >From the IETF draft below: " Bufferbloat is a phenomenon where excess buffers in the network cause high latency and jitter. As more and more interactive applications (e.g. voice over IP, real time video streaming and financial transactions) run in the Internet, high latency and jitter degrade application performance. There is a pressing need to design intelligent queue management schemes that can control latency and jitter; and hence provide desirable quality of service to users. We present here a lightweight design, PIE(Proportional Integral controller Enhanced) that can effectively control the average queueing latency to a target value. Simulation results, theoretical analysis and Linux testbed results have shown that PIE can ensure low latency and achieve high link utilization under various congestion situations. The design does not require per-packet timestamp, so it incurs very small overhead and is simple enough to implement in both hardware and software. " Many thanks to Dave Taht for extensive feedback, reviews, testing and suggestions. Thanks also to Stephen Hemminger and Eric Dumazet for reviews and suggestions. Naeem Khademi and Dave Taht independently contributed to ECN support. For more information, please see technical paper about PIE in the IEEE Conference on High Performance Switching and Routing 2013. A copy of the paper can be found at ftp://ftpeng.cisco.com/pie/. Please also refer to the IETF draft submission at http://tools.ietf.org/html/draft-pan-tsvwg-pie-00 All relevant code, documents and test scripts and results can be found at ftp://ftpeng.cisco.com/pie/. For problems with the iproute2/tc or Linux kernel code, please contact Vijay Subramanian (vijaynsu@cisco.com or subramanian.vijay@gmail.com) Mythili Prabhu (mysuryan@cisco.com) Signed-off-by: Vijay Subramanian Signed-off-by: Mythili Prabhu CC: Dave Taht Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 77eb331810b8..d62316baae94 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -818,4 +818,29 @@ struct tc_hhf_xstats { __u32 hh_tot_count; /* number of captured heavy-hitters so far */ __u32 hh_cur_count; /* number of current heavy-hitters */ }; + +/* PIE */ +enum { + TCA_PIE_UNSPEC, + TCA_PIE_TARGET, + TCA_PIE_LIMIT, + TCA_PIE_TUPDATE, + TCA_PIE_ALPHA, + TCA_PIE_BETA, + TCA_PIE_ECN, + TCA_PIE_BYTEMODE, + __TCA_PIE_MAX +}; +#define TCA_PIE_MAX (__TCA_PIE_MAX - 1) + +struct tc_pie_xstats { + __u32 prob; /* current probability */ + __u32 delay; /* current delay in ms */ + __u32 avg_dq_rate; /* current average dq_rate in bits/pie_time */ + __u32 packets_in; /* total number of packets enqueued */ + __u32 dropped; /* packets dropped due to pie_action */ + __u32 overlimit; /* dropped due to lack of space in queue */ + __u32 maxq; /* maximum queue size */ + __u32 ecn_mark; /* packets marked with ecn*/ +}; #endif -- cgit v1.2.2 From 6e08d757b72f280c45cfec61e63216adb419e2dd Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Mon, 6 Jan 2014 12:40:50 +0100 Subject: mmc: add SDIO identifiers for Broadcom WLAN devices The SDIO identifier for Broadcom WLAN devices were defined in the brcmfmac SDIO driver. Moving the definitions in MMC header file seems common sense. Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Arend van Spriel Signed-off-by: John W. Linville --- include/linux/mmc/sdio_ids.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 9f03feedc8e7..d8836623f36a 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -23,6 +23,15 @@ /* * Vendors and devices. Sort key: vendor first, device next. */ +#define SDIO_VENDOR_ID_BROADCOM 0x02d0 +#define SDIO_DEVICE_ID_BROADCOM_43143 43143 +#define SDIO_DEVICE_ID_BROADCOM_43241 0x4324 +#define SDIO_DEVICE_ID_BROADCOM_4329 0x4329 +#define SDIO_DEVICE_ID_BROADCOM_4330 0x4330 +#define SDIO_DEVICE_ID_BROADCOM_4334 0x4334 +#define SDIO_DEVICE_ID_BROADCOM_4335_4339 0x4335 +#define SDIO_DEVICE_ID_BROADCOM_43362 43362 + #define SDIO_VENDOR_ID_INTEL 0x0089 #define SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX 0x1402 #define SDIO_DEVICE_ID_INTEL_IWMC3200WIFI 0x1403 -- cgit v1.2.2 From 996b175e39ed42ec2aa0c63b4a03cc500aa6269f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jan 2014 09:36:12 -0800 Subject: tcp: out_of_order_queue do not use its lock TCP out_of_order_queue lock is not used, as queue manipulation happens with socket lock held and we therefore use the lockless skb queue routines (as __skb_queue_head()) We can use __skb_queue_head_init() instead of skb_queue_head_init() to make this more consistent. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index d68633452d9b..4ad0706d40eb 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -248,7 +248,10 @@ struct tcp_sock { struct sk_buff* lost_skb_hint; struct sk_buff *retransmit_skb_hint; - struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ + /* OOO segments go in this list. Note that socket lock must be held, + * as we do not use sk_buff_head lock. + */ + struct sk_buff_head out_of_order_queue; /* SACKs data, these 2 need to be together (see tcp_options_write) */ struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ -- cgit v1.2.2 From bb9b18fb55b03477fe5bdd3e97245d6d4d3dee4f Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 30 Nov 2013 13:21:30 +0100 Subject: genl: Add genlmsg_new_unicast() for unicast message allocation Allocates a new sk_buff large enough to cover the specified payload plus required Netlink headers. Will check receiving socket for memory mapped i/o capability and use it if enabled. Will fall back to non-mapped skb if message size exceeds the frame size of the ring. Signed-of-by: Thomas Graf Reviewed-by: Daniel Borkmann Signed-off-by: Jesse Gross --- include/net/genetlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 1b177ed803b7..93695f0e22a5 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -73,6 +73,7 @@ struct genl_family { * @attrs: netlink attributes * @_net: network namespace * @user_ptr: user pointers + * @dst_sk: destination socket */ struct genl_info { u32 snd_seq; @@ -85,6 +86,7 @@ struct genl_info { struct net * _net; #endif void * user_ptr[2]; + struct sock * dst_sk; }; static inline struct net *genl_info_net(struct genl_info *info) @@ -177,6 +179,8 @@ void genl_notify(struct genl_family *family, struct sk_buff *skb, struct net *net, u32 portid, u32 group, struct nlmsghdr *nlh, gfp_t flags); +struct sk_buff *genlmsg_new_unicast(size_t payload, struct genl_info *info, + gfp_t flags); void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, struct genl_family *family, int flags, u8 cmd); -- cgit v1.2.2 From af2806f8f90a150160be898cd85332459c83c5cb Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 13 Dec 2013 15:22:17 +0100 Subject: net: Export skb_zerocopy() to zerocopy from one skb to another Make the skb zerocopy logic written for nfnetlink queue available for use by other modules. Signed-off-by: Thomas Graf Reviewed-by: Daniel Borkmann Acked-by: David S. Miller Signed-off-by: Jesse Gross --- include/linux/skbuff.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bec1cc7d5e3c..7c48e2d4c72b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2345,6 +2345,9 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct pipe_inode_info *pipe, unsigned int len, unsigned int flags); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); +unsigned int skb_zerocopy_headlen(const struct sk_buff *from); +void skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, + int len, int hlen); void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); -- cgit v1.2.2 From 43d4be9cb55f3bac5253e9289996fd9d735531db Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 13 Dec 2013 15:22:18 +0100 Subject: openvswitch: Allow user space to announce ability to accept unaligned Netlink messages Signed-off-by: Thomas Graf Reviewed-by: Daniel Borkmann Signed-off-by: Jesse Gross --- include/uapi/linux/openvswitch.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index d120f9fe0017..07ef2c3e21fa 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -75,6 +75,7 @@ enum ovs_datapath_attr { OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */ + OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ __OVS_DP_ATTR_MAX }; @@ -106,6 +107,9 @@ struct ovs_vport_stats { __u64 tx_dropped; /* no space available in linux */ }; +/* Allow last Netlink attribute to be unaligned */ +#define OVS_DP_F_UNALIGNED (1 << 0) + /* Fixed logical ports. */ #define OVSP_LOCAL ((__u32)0) -- cgit v1.2.2 From 44da5ae5fbea4686f667dc854e5ea16814e44c59 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 13 Dec 2013 15:22:19 +0100 Subject: openvswitch: Drop user features if old user space attempted to create datapath Drop user features if an outdated user space instance that does not understand the concept of user_features attempted to create a new datapath. Signed-off-by: Thomas Graf Signed-off-by: Jesse Gross --- include/uapi/linux/openvswitch.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 07ef2c3e21fa..970553cbbc8e 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -40,7 +40,15 @@ struct ovs_header { #define OVS_DATAPATH_FAMILY "ovs_datapath" #define OVS_DATAPATH_MCGROUP "ovs_datapath" -#define OVS_DATAPATH_VERSION 0x1 + +/* V2: + * - API users are expected to provide OVS_DP_ATTR_USER_FEATURES + * when creating the datapath. + */ +#define OVS_DATAPATH_VERSION 2 + +/* First OVS datapath version to support features */ +#define OVS_DP_VER_FEATURES 2 enum ovs_datapath_cmd { OVS_DP_CMD_UNSPEC, -- cgit v1.2.2 From 86e8586ed5beea15ce7c359f02a1084c2da93bc7 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Mon, 6 Jan 2014 12:58:17 -0800 Subject: NFC: NCI: Add setup handler Some drivers require special configuration while initializing. This patch adds setup handler for this custom configuration. Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: Samuel Ortiz --- include/net/nfc/nci_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 6126f1f992b4..5c6fadd63483 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -68,6 +68,7 @@ struct nci_ops { int (*open)(struct nci_dev *ndev); int (*close)(struct nci_dev *ndev); int (*send)(struct nci_dev *ndev, struct sk_buff *skb); + int (*setup)(struct nci_dev *ndev); }; #define NCI_MAX_SUPPORTED_RF_INTERFACES 4 -- cgit v1.2.2 From 22c15bf30b70ab2eae300f093ffc64e182620aba Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Mon, 6 Jan 2014 12:58:18 -0800 Subject: NFC: NCI: Add set_config API This API can be used by drivers to send their custom configuration using SET_CONFIG NCI command to the device. Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: Samuel Ortiz --- include/net/nfc/nci_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 5c6fadd63483..2b93b77b210c 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -155,6 +155,7 @@ void nci_free_device(struct nci_dev *ndev); int nci_register_device(struct nci_dev *ndev); void nci_unregister_device(struct nci_dev *ndev); int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb); +int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val); static inline struct sk_buff *nci_skb_alloc(struct nci_dev *ndev, unsigned int len, -- cgit v1.2.2 From 438e38fadca2f6e57eeecc08326c8a95758594d4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jan 2014 14:03:07 -0800 Subject: gre_offload: statically build GRE offloading support GRO/GSO layers can be enabled on a node, even if said node is only forwarding packets. This patch permits GSO (and upcoming GRO) support for GRE encapsulated packets, even if the host has no GRE tunnel setup. Signed-off-by: Eric Dumazet Cc: H.K. Jerry Chu Signed-off-by: David S. Miller --- include/net/gre.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/gre.h b/include/net/gre.h index dcd9ae3270d3..70046a0b0b89 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -33,9 +33,6 @@ struct gre_cisco_protocol { int gre_cisco_register(struct gre_cisco_protocol *proto); int gre_cisco_unregister(struct gre_cisco_protocol *proto); -int gre_offload_init(void); -void gre_offload_exit(void); - void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, int hdr_len); -- cgit v1.2.2 From 509aba3b0d366b7f16a9a2eebac1156b25f5f622 Mon Sep 17 00:00:00 2001 From: FX Le Bail Date: Tue, 7 Jan 2014 14:57:27 +0100 Subject: IPv6: add the option to use anycast addresses as source addresses in echo reply This change allows to follow a recommandation of RFC4942. - Add "anycast_src_echo_reply" sysctl to control the use of anycast addresses as source addresses for ICMPv6 echo reply. This sysctl is false by default to preserve existing behavior. - Add inline check ipv6_anycast_destination(). - Use them in icmpv6_echo_reply(). Reference: RFC4942 - IPv6 Transition/Coexistence Security Considerations (http://tools.ietf.org/html/rfc4942#section-2.1.6) 2.1.6. Anycast Traffic Identification and Security [...] To avoid exposing knowledge about the internal structure of the network, it is recommended that anycast servers now take advantage of the ability to return responses with the anycast address as the source address if possible. Signed-off-by: Francois-Xavier Le Bail Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip6_route.h | 7 +++++++ include/net/netns/ipv6.h | 1 + 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 1fb6cddbd448..017badb1aec7 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -152,6 +152,13 @@ static inline bool ipv6_unicast_destination(const struct sk_buff *skb) return rt->rt6i_flags & RTF_LOCAL; } +static inline bool ipv6_anycast_destination(const struct sk_buff *skb) +{ + struct rt6_info *rt = (struct rt6_info *) skb_dst(skb); + + return rt->rt6i_flags & RTF_ANYCAST; +} + int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); static inline int ip6_skb_dst_mtu(struct sk_buff *skb) diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 0fb2401197c5..76fc7d1dbfd3 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -73,6 +73,7 @@ struct netns_ipv6 { #endif atomic_t dev_addr_genid; atomic_t rt_genid; + int anycast_src_echo_reply; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) -- cgit v1.2.2 From bf5a755f5e9186406bbf50f4087100af5bd68e40 Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Tue, 7 Jan 2014 10:23:19 -0800 Subject: net-gre-gro: Add GRE support to the GRO stack This patch built on top of Commit 299603e8370a93dd5d8e8d800f0dff1ce2c53d36 ("net-gro: Prepare GRO stack for the upcoming tunneling support") to add the support of the standard GRE (RFC1701/RFC2784/RFC2890) to the GRO stack. It also serves as an example for supporting other encapsulation protocols in the GRO stack in the future. The patch supports version 0 and all the flags (key, csum, seq#) but will flush any pkt with the S (seq#) flag. This is because the S flag is not support by GSO, and a GRO pkt may end up in the forwarding path, thus requiring GSO support to break it up correctly. Currently the "packet_offload" structure only contains L3 (ETH_P_IP/ ETH_P_IPV6) GRO offload support so the encapped pkts are limited to IP pkts (i.e., w/o L2 hdr). But support for other protocol type can be easily added, so is the support for GRE variations like NVGRE. The patch also support csum offload. Specifically if the csum flag is on and the h/w is capable of checksumming the payload (CHECKSUM_COMPLETE), the code will take advantage of the csum computed by the h/w when validating the GRE csum. Note that commit 60769a5dcd8755715c7143b4571d5c44f01796f1 "ipv4: gre: add GRO capability" already introduces GRO capability to IPv4 GRE tunnels, using the gro_cells infrastructure. But GRO is done after GRE hdr has been removed (i.e., decapped). The following patch applies GRO when pkts first come in (before hitting the GRE tunnel code). There is some performance advantage for applying GRO as early as possible. Also this approach is transparent to other subsystem like Open vSwitch where GRE decap is handled outside of the IP stack hence making it harder for the gro_cells stuff to apply. On the other hand, some NICs are still not capable of hashing on the inner hdr of a GRE pkt (RSS). In that case the GRO processing of pkts from the same remote host will all happen on the same CPU and the performance may be suboptimal. I'm including some rough preliminary performance numbers below. Note that the performance will be highly dependent on traffic load, mix as usual. Moreover it also depends on NIC offload features hence the following is by no means a comprehesive study. Local testing and tuning will be needed to decide the best setting. All tests spawned 50 copies of netperf TCP_STREAM and ran for 30 secs. (super_netperf 50 -H 192.168.1.18 -l 30) An IP GRE tunnel with only the key flag on (e.g., ip tunnel add gre1 mode gre local 10.246.17.18 remote 10.246.17.17 ttl 255 key 123) is configured. The GRO support for pkts AFTER decap are controlled through the device feature of the GRE device (e.g., ethtool -K gre1 gro on/off). 1.1 ethtool -K gre1 gro off; ethtool -K eth0 gro off thruput: 9.16Gbps CPU utilization: 19% 1.2 ethtool -K gre1 gro on; ethtool -K eth0 gro off thruput: 5.9Gbps CPU utilization: 15% 1.3 ethtool -K gre1 gro off; ethtool -K eth0 gro on thruput: 9.26Gbps CPU utilization: 12-13% 1.4 ethtool -K gre1 gro on; ethtool -K eth0 gro on thruput: 9.26Gbps CPU utilization: 10% The following tests were performed on a different NIC that is capable of csum offload. I.e., the h/w is capable of computing IP payload csum (CHECKSUM_COMPLETE). 2.1 ethtool -K gre1 gro on (hence will use gro_cells) 2.1.1 ethtool -K eth0 gro off; csum offload disabled thruput: 8.53Gbps CPU utilization: 9% 2.1.2 ethtool -K eth0 gro off; csum offload enabled thruput: 8.97Gbps CPU utilization: 7-8% 2.1.3 ethtool -K eth0 gro on; csum offload disabled thruput: 8.83Gbps CPU utilization: 5-6% 2.1.4 ethtool -K eth0 gro on; csum offload enabled thruput: 8.98Gbps CPU utilization: 5% 2.2 ethtool -K gre1 gro off 2.2.1 ethtool -K eth0 gro off; csum offload disabled thruput: 5.93Gbps CPU utilization: 9% 2.2.2 ethtool -K eth0 gro off; csum offload enabled thruput: 5.62Gbps CPU utilization: 8% 2.2.3 ethtool -K eth0 gro on; csum offload disabled thruput: 7.69Gbps CPU utilization: 8% 2.2.4 ethtool -K eth0 gro on; csum offload enabled thruput: 8.96Gbps CPU utilization: 5-6% Signed-off-by: H.K. Jerry Chu Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d9c961aa6a7f..a2a70cc70e7b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1632,7 +1632,10 @@ struct napi_gro_cb { int data_offset; /* This is non-zero if the packet cannot be merged with the new skb. */ - int flush; + u16 flush; + + /* Save the IP ID here and check when we get to the transport layer */ + u16 flush_id; /* Number of segments aggregated. */ u16 count; @@ -1651,6 +1654,9 @@ struct napi_gro_cb { /* Used in ipv6_gro_receive() */ int proto; + /* used to support CHECKSUM_COMPLETE for tunneling protocols */ + __wsum csum; + /* used in skb_gro_receive() slow path */ struct sk_buff *last; }; @@ -1900,6 +1906,14 @@ static inline void *skb_gro_network_header(struct sk_buff *skb) skb_network_offset(skb); } +static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, + const void *start, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum, + csum_partial(start, len, 0)); +} + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, @@ -2440,6 +2454,8 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); gro_result_t napi_gro_frags(struct napi_struct *napi); +struct packet_offload *gro_find_receive_by_type(__be16 type); +struct packet_offload *gro_find_complete_by_type(__be16 type); static inline void napi_free_frags(struct napi_struct *napi) { -- cgit v1.2.2 From 685328b296acc810541d2532957912690273c64a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 Jan 2014 22:24:00 +0100 Subject: mac80211: remove channel_change_time This value is no longer used by mac80211, and practically no driver ever set it to a correct value anyway, so remove it. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f838af816b56..25b18877747f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1616,8 +1616,6 @@ enum ieee80211_hw_flags { * @extra_beacon_tailroom: tailroom to reserve in each beacon tx skb. * Can be used by drivers to add extra IEs. * - * @channel_change_time: time (in microseconds) it takes to change channels. - * * @max_signal: Maximum value for signal (rssi) in RX information, used * only when @IEEE80211_HW_SIGNAL_UNSPEC or @IEEE80211_HW_SIGNAL_DB * @@ -1699,7 +1697,6 @@ struct ieee80211_hw { u32 flags; unsigned int extra_tx_headroom; unsigned int extra_beacon_tailroom; - int channel_change_time; int vif_data_size; int sta_data_size; int chanctx_data_size; -- cgit v1.2.2 From c9484874e7596d6c890e4130336f5379f6a59c5f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 3 Jan 2014 12:16:14 +0000 Subject: netfilter: nf_tables: add hook ops to struct nft_pktinfo Multi-family tables need the AF from the hook ops. Add a pointer to the hook ops and replace usage of the hooknum member in struct nft_pktinfo. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5a91abfc0c30..c9e63167f9a2 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -13,7 +13,7 @@ struct nft_pktinfo { struct sk_buff *skb; const struct net_device *in; const struct net_device *out; - u8 hooknum; + const struct nf_hook_ops *ops; u8 nhoff; u8 thoff; /* for x_tables compatibility */ @@ -29,7 +29,8 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, pkt->skb = skb; pkt->in = pkt->xt.in = in; pkt->out = pkt->xt.out = out; - pkt->hooknum = pkt->xt.hooknum = ops->hooknum; + pkt->ops = ops; + pkt->xt.hooknum = ops->hooknum; pkt->xt.family = ops->pf; } -- cgit v1.2.2 From 115a60b173af0170e0db26b9a3fd6a911fba70a3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 3 Jan 2014 12:16:15 +0000 Subject: netfilter: nf_tables: add support for multi family tables Add support to register chains to multiple hooks for different address families for mixed IPv4/IPv6 tables. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_tables.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index c9e63167f9a2..f066f252e5e5 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -422,6 +422,8 @@ struct nft_stats { u64 pkts; }; +#define NFT_HOOK_OPS_MAX 2 + /** * struct nft_base_chain - nf_tables base chain * @@ -432,7 +434,7 @@ struct nft_stats { * @chain: the chain */ struct nft_base_chain { - struct nf_hook_ops ops; + struct nf_hook_ops ops[NFT_HOOK_OPS_MAX]; enum nft_chain_type type; u8 policy; struct nft_stats __percpu *stats; @@ -476,6 +478,8 @@ struct nft_table { * @nhooks: number of hooks in this family * @owner: module owner * @tables: used internally + * @nops: number of hook ops in this family + * @hook_ops_init: initialization function for chain hook ops * @hooks: hookfn overrides for packet validation */ struct nft_af_info { @@ -484,6 +488,9 @@ struct nft_af_info { unsigned int nhooks; struct module *owner; struct list_head tables; + unsigned int nops; + void (*hook_ops_init)(struct nf_hook_ops *, + unsigned int); nf_hookfn *hooks[NF_MAX_HOOKS]; }; -- cgit v1.2.2 From 1d49144c0aaa61be4e3ccbef9cc5c40b0ec5f2fe Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 3 Jan 2014 12:16:16 +0000 Subject: netfilter: nf_tables: add "inet" table for IPv4/IPv6 This patch adds a new table family and a new filter chain that you can use to attach IPv4 and IPv6 rules. This should help to simplify rule-set maintainance in dual-stack setups. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv4.h | 2 ++ include/net/netfilter/nf_tables_ipv6.h | 2 ++ include/net/netns/nftables.h | 1 + include/uapi/linux/netfilter.h | 1 + 4 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index 1be1c2c197ee..f7b3a669aad3 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -20,4 +20,6 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET; } +extern struct nft_af_info nft_af_ipv4; + #endif diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 4a9b88a65963..3d8ae489be0d 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -27,4 +27,6 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, return 0; } +extern struct nft_af_info nft_af_ipv6; + #endif diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index 15d056d534e3..26a394cb91a8 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -10,6 +10,7 @@ struct netns_nftables { struct list_head commit_list; struct nft_af_info *ipv4; struct nft_af_info *ipv6; + struct nft_af_info *inet; struct nft_af_info *arp; struct nft_af_info *bridge; u8 gencursor; diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h index f7dc0ebeeba5..ef1b1f88ca18 100644 --- a/include/uapi/linux/netfilter.h +++ b/include/uapi/linux/netfilter.h @@ -53,6 +53,7 @@ enum nf_inet_hooks { enum { NFPROTO_UNSPEC = 0, + NFPROTO_INET = 1, NFPROTO_IPV4 = 2, NFPROTO_ARP = 3, NFPROTO_BRIDGE = 7, -- cgit v1.2.2 From 124edfa9e0451e97d621cd2796a44ff499e21036 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 3 Jan 2014 12:16:17 +0000 Subject: netfilter: nf_tables: add nfproto support to meta expression Needed by multi-family tables to distinguish IPv4 and IPv6 packets. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index aa86a15293e1..10afbfc0e66a 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -531,6 +531,7 @@ enum nft_exthdr_attributes { * @NFT_META_NFTRACE: packet nftrace bit * @NFT_META_RTCLASSID: realm value of packet's route (skb->dst->tclassid) * @NFT_META_SECMARK: packet secmark (skb->secmark) + * @NFT_META_NFPROTO: netfilter protocol */ enum nft_meta_keys { NFT_META_LEN, @@ -548,6 +549,7 @@ enum nft_meta_keys { NFT_META_NFTRACE, NFT_META_RTCLASSID, NFT_META_SECMARK, + NFT_META_NFPROTO, }; /** -- cgit v1.2.2 From 4566bf27069b7780e453cffb24ea5f5323059885 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 3 Jan 2014 12:16:18 +0000 Subject: netfilter: nft_meta: add l4proto support For L3-proto independant rules we need to get at the L4 protocol value directly. Add it to the nft_pktinfo struct and use the meta expression to retrieve it. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 1 + include/net/netfilter/nf_tables_ipv4.h | 3 ++- include/net/netfilter/nf_tables_ipv6.h | 1 + include/uapi/linux/netfilter/nf_tables.h | 2 ++ 4 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f066f252e5e5..5d2b703efe1c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -16,6 +16,7 @@ struct nft_pktinfo { const struct nf_hook_ops *ops; u8 nhoff; u8 thoff; + u8 tprot; /* for x_tables compatibility */ struct xt_action_param xt; }; diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index f7b3a669aad3..cba143fbd2e4 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -15,8 +15,9 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, nft_set_pktinfo(pkt, ops, skb, in, out); - pkt->xt.thoff = ip_hdrlen(pkt->skb); ip = ip_hdr(pkt->skb); + pkt->tprot = ip->protocol; + pkt->xt.thoff = ip_hdrlen(pkt->skb); pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET; } diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 3d8ae489be0d..74d976137658 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -21,6 +21,7 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, if (protohdr < 0) return -1; + pkt->tprot = protohdr; pkt->xt.thoff = thoff; pkt->xt.fragoff = frag_off; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 10afbfc0e66a..448593c07120 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -532,6 +532,7 @@ enum nft_exthdr_attributes { * @NFT_META_RTCLASSID: realm value of packet's route (skb->dst->tclassid) * @NFT_META_SECMARK: packet secmark (skb->secmark) * @NFT_META_NFPROTO: netfilter protocol + * @NFT_META_L4PROTO: layer 4 protocol number */ enum nft_meta_keys { NFT_META_LEN, @@ -550,6 +551,7 @@ enum nft_meta_keys { NFT_META_RTCLASSID, NFT_META_SECMARK, NFT_META_NFPROTO, + NFT_META_L4PROTO, }; /** -- cgit v1.2.2 From fd44b93cb5eee218231f6ce5883df937b3b9c3eb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 7 Jan 2014 23:23:44 +0100 Subject: net: skbuff: const-ify casts in skb_queue_* functions We should const-ify comparisons on skb_queue_* inline helper functions as their parameters are const as well, so lets not drop that. Suggested-by: Brad Spengler Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 956e11a168d8..d97f2d07d02b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -827,7 +827,7 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) */ static inline int skb_queue_empty(const struct sk_buff_head *list) { - return list->next == (struct sk_buff *)list; + return list->next == (const struct sk_buff *) list; } /** @@ -840,7 +840,7 @@ static inline int skb_queue_empty(const struct sk_buff_head *list) static inline bool skb_queue_is_last(const struct sk_buff_head *list, const struct sk_buff *skb) { - return skb->next == (struct sk_buff *)list; + return skb->next == (const struct sk_buff *) list; } /** @@ -853,7 +853,7 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list, static inline bool skb_queue_is_first(const struct sk_buff_head *list, const struct sk_buff *skb) { - return skb->prev == (struct sk_buff *)list; + return skb->prev == (const struct sk_buff *) list; } /** -- cgit v1.2.2 From bdfbec2d2d240e9c528caae9c743801629b60166 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 9 Jan 2014 11:37:23 +0200 Subject: cfg80211: Add a function to get the number of supported channels Add a utility function to get the number of channels supported by the device, and update the places in the code that need this data. Signed-off-by: Ilan Peer [replace another occurrence in libertas, fix kernel-doc, fix bugs] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 56c597793d6d..b1f84b05c67e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4640,6 +4640,14 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, */ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp); +/** + * ieee80211_get_num_supported_channels - get number of channels device has + * @wiphy: the wiphy + * + * Return: the number of channels supported by the device. + */ +unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ -- cgit v1.2.2 From c4ede3d3821a732120fd671846c2606a1eb4e8b3 Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Tue, 7 Jan 2014 16:43:54 +0100 Subject: netfilter: nft_ct: Add support to set the connmark This patch adds kernel support for setting properties of tracked connections. Currently, only connmark is supported. One use-case for this feature is to provide the same functionality as -j CONNMARK --save-mark in iptables. Some restructuring was needed to implement the set op. The new structure follows that of nft_meta. Signed-off-by: Kristian Evensen Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 448593c07120..83c985a6170b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -609,12 +609,14 @@ enum nft_ct_keys { * @NFTA_CT_DREG: destination register (NLA_U32) * @NFTA_CT_KEY: conntrack data item to load (NLA_U32: nft_ct_keys) * @NFTA_CT_DIRECTION: direction in case of directional keys (NLA_U8) + * @NFTA_CT_SREG: source register (NLA_U32) */ enum nft_ct_attributes { NFTA_CT_UNSPEC, NFTA_CT_DREG, NFTA_CT_KEY, NFTA_CT_DIRECTION, + NFTA_CT_SREG, __NFTA_CT_MAX }; #define NFTA_CT_MAX (__NFTA_CT_MAX - 1) -- cgit v1.2.2 From baae3e62f31618d90e08fb886b4481e5d7b7f27c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 9 Jan 2014 18:42:34 +0000 Subject: netfilter: nf_tables: fix chain type module reference handling The chain type module reference handling makes no sense at all: we take a reference immediately when the module is registered, preventing the module from ever being unloaded. Fix by taking a reference when we're actually creating a chain of the chain type and release the reference when destroying the chain. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5d2b703efe1c..e9b97862bf52 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -436,7 +436,7 @@ struct nft_stats { */ struct nft_base_chain { struct nf_hook_ops ops[NFT_HOOK_OPS_MAX]; - enum nft_chain_type type; + struct nf_chain_type *type; u8 policy; struct nft_stats __percpu *stats; struct nft_chain chain; -- cgit v1.2.2 From 2a37d755b885995443f11cdcaf1f9d4b5f246eab Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 9 Jan 2014 18:42:37 +0000 Subject: netfilter: nf_tables: constify chain type definitions and pointers Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e9b97862bf52..d3f70530a59a 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -436,7 +436,7 @@ struct nft_stats { */ struct nft_base_chain { struct nf_hook_ops ops[NFT_HOOK_OPS_MAX]; - struct nf_chain_type *type; + const struct nf_chain_type *type; u8 policy; struct nft_stats __percpu *stats; struct nft_chain chain; @@ -507,8 +507,8 @@ struct nf_chain_type { int family; }; -int nft_register_chain_type(struct nf_chain_type *); -void nft_unregister_chain_type(struct nf_chain_type *); +int nft_register_chain_type(const struct nf_chain_type *); +void nft_unregister_chain_type(const struct nf_chain_type *); int nft_register_expr(struct nft_expr_type *); void nft_unregister_expr(struct nft_expr_type *); -- cgit v1.2.2 From fa2c1de0bbd98985f7f930205de97ae0d3e86c16 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 9 Jan 2014 18:42:38 +0000 Subject: netfilter: nf_tables: minor nf_chain_type cleanups Minor nf_chain_type cleanups: - reorder struct to plug a hoe - rename struct module member to "owner" for consistency - rename nf_hookfn array to "hooks" for consistency - reorder initializers for better readability Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d3f70530a59a..342236550ef9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -498,13 +498,23 @@ struct nft_af_info { int nft_register_afinfo(struct net *, struct nft_af_info *); void nft_unregister_afinfo(struct nft_af_info *); +/** + * struct nf_chain_type - nf_tables chain type info + * + * @name: name of the type + * @type: numeric identifier + * @family: address family + * @owner: module owner + * @hook_mask: mask of valid hooks + * @hooks: hookfn overrides + */ struct nf_chain_type { - unsigned int hook_mask; - const char *name; - enum nft_chain_type type; - nf_hookfn *fn[NF_MAX_HOOKS]; - struct module *me; - int family; + const char *name; + enum nft_chain_type type; + int family; + struct module *owner; + unsigned int hook_mask; + nf_hookfn *hooks[NF_MAX_HOOKS]; }; int nft_register_chain_type(const struct nf_chain_type *); -- cgit v1.2.2 From 3876d22dba62ebf6582f33e1ef2160eeb95e1129 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 9 Jan 2014 18:42:43 +0000 Subject: netfilter: nf_tables: rename nft_do_chain_pktinfo() to nft_do_chain() We don't encode argument types into function names and since besides nft_do_chain() there are only AF-specific versions, there is no risk of confusion. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 342236550ef9..57c8ff7955df 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -447,8 +447,8 @@ static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chai return container_of(chain, struct nft_base_chain, chain); } -unsigned int nft_do_chain_pktinfo(struct nft_pktinfo *pkt, - const struct nf_hook_ops *ops); +unsigned int nft_do_chain(struct nft_pktinfo *pkt, + const struct nf_hook_ops *ops); /** * struct nft_table - nf_tables table -- cgit v1.2.2 From 74f77a6b2b1c98d3f14364dccdd2353b99ecfeda Mon Sep 17 00:00:00 2001 From: James Chapman Date: Mon, 6 Jan 2014 10:17:08 +0000 Subject: netfilter: introduce l2tp match extension Introduce an xtables add-on for matching L2TP packets. Supports L2TPv2 and L2TPv3 over IPv4 and IPv6. As well as filtering on L2TP tunnel-id and session-id, the filtering decision can also include the L2TP packet type (control or data), protocol version (2 or 3) and encapsulation type (UDP or IP). The most common use for this will likely be to filter L2TP data packets of individual L2TP tunnels or sessions. While a u32 match can be used, the L2TP protocol headers are such that field offsets differ depending on bits set in the header, making rules for matching generic L2TP connections cumbersome. This match extension takes care of all that. Signed-off-by: James Chapman Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/Kbuild | 1 + include/uapi/linux/netfilter/xt_l2tp.h | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 include/uapi/linux/netfilter/xt_l2tp.h (limited to 'include') diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 2344f5a319fc..1d973d2ba417 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -58,6 +58,7 @@ header-y += xt_helper.h header-y += xt_ipcomp.h header-y += xt_iprange.h header-y += xt_ipvs.h +header-y += xt_l2tp.h header-y += xt_length.h header-y += xt_limit.h header-y += xt_mac.h diff --git a/include/uapi/linux/netfilter/xt_l2tp.h b/include/uapi/linux/netfilter/xt_l2tp.h new file mode 100644 index 000000000000..7dccfa0acbfa --- /dev/null +++ b/include/uapi/linux/netfilter/xt_l2tp.h @@ -0,0 +1,27 @@ +#ifndef _LINUX_NETFILTER_XT_L2TP_H +#define _LINUX_NETFILTER_XT_L2TP_H + +#include + +enum xt_l2tp_type { + XT_L2TP_TYPE_CONTROL, + XT_L2TP_TYPE_DATA, +}; + +/* L2TP matching stuff */ +struct xt_l2tp_info { + __u32 tid; /* tunnel id */ + __u32 sid; /* session id */ + __u8 version; /* L2TP protocol version */ + __u8 type; /* L2TP packet type */ + __u8 flags; /* which fields to match */ +}; + +enum { + XT_L2TP_TID = (1 << 0), /* match L2TP tunnel id */ + XT_L2TP_SID = (1 << 1), /* match L2TP session id */ + XT_L2TP_VERSION = (1 << 2), /* match L2TP protocol version */ + XT_L2TP_TYPE = (1 << 3), /* match L2TP packet type */ +}; + +#endif /* _LINUX_NETFILTER_XT_L2TP_H */ -- cgit v1.2.2 From 852c0153df5082311f50c062275813905f39f56e Mon Sep 17 00:00:00 2001 From: Avinash Patil Date: Thu, 9 Jan 2014 15:22:57 -0800 Subject: ieee80211: add definition for TDLS wide band extended capability Seventh bit of 8th byte of extended capabilities specifies wide bandwidth support for TDLS links. Add this definition to ieee80211. Signed-off-by: Avinash Patil Signed-off-by: Bing Zhao Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 776cbb80d098..b382c7c49d84 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1902,6 +1902,7 @@ enum ieee80211_tdls_actioncode { #define WLAN_EXT_CAPA5_TDLS_PROHIBITED BIT(6) #define WLAN_EXT_CAPA8_OPMODE_NOTIF BIT(6) +#define WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED BIT(7) /* TDLS specific payload type in the LLC/SNAP header */ #define WLAN_TDLS_SNAP_RFTYPE 0x2 -- cgit v1.2.2 From b77cf4f8e1892e192ec52df5dd8c158b300fc496 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 9 Jan 2014 00:00:38 +0100 Subject: mac80211: handle MMPDUs at EOSP correctly If a uAPSD service period ends with an MMPDU, we currently just send that MMPDU, but it obviously won't get the EOSP bit set as it doesn't have a QoS header. This contradicts the standard, so add a QoS-nulldata frame after the MMPDU to properly terminate the service period with a frame that has EOSP set. Also fix a bug wrt. the TID for the MMPDU, it shouldn't be set to 0 unconditionally but use the actual TID that was assigned. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 25b18877747f..f4ab2fb4d50c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2119,6 +2119,11 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * appropriately (only the last frame may have %IEEE80211_TX_STATUS_EOSP) * and also take care of the EOSP and MORE_DATA bits in the frame. * The driver may also use ieee80211_sta_eosp() in this case. + * + * Note that if the driver ever buffers frames other than QoS-data + * frames, it must take care to never send a non-QoS-data frame as + * the last frame in a service period, adding a QoS-nulldata frame + * after a non-QoS-data frame if needed. */ /** -- cgit v1.2.2 From 28cb1749e8dcf7f2b49174ae8531e3e5be741736 Mon Sep 17 00:00:00 2001 From: Avinash Patil Date: Fri, 10 Jan 2014 11:08:55 -0800 Subject: ieee80211: add definition for SMS4 key len Add SMS4 key length definition to ieee80211_key_len enum. It's used by WAPI. Signed-off-by: Avinash Patil Signed-off-by: Bing Zhao Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index b382c7c49d84..e526a8cecb70 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1857,6 +1857,7 @@ enum ieee80211_key_len { WLAN_KEY_LEN_CCMP = 16, WLAN_KEY_LEN_TKIP = 32, WLAN_KEY_LEN_AES_CMAC = 16, + WLAN_KEY_LEN_SMS4 = 32, }; #define IEEE80211_WEP_IV_LEN 4 -- cgit v1.2.2 From 8a59359cb80f448923a7bc9f555d477e74547d7a Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Wed, 8 Jan 2014 16:05:57 +0100 Subject: tcp: metrics: New netlink attribute for src IP and dumped in netlink reply This patch adds a new netlink attribute for the source-IP and appends it to the netlink reply. Now, iproute2 can have access to the source-IP. Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- include/uapi/linux/tcp_metrics.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h index cb5157b55f32..54a37b13f2c4 100644 --- a/include/uapi/linux/tcp_metrics.h +++ b/include/uapi/linux/tcp_metrics.h @@ -35,6 +35,8 @@ enum { TCP_METRICS_ATTR_FOPEN_SYN_DROPS, /* u16, count of drops */ TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS, /* msecs age */ TCP_METRICS_ATTR_FOPEN_COOKIE, /* binary */ + TCP_METRICS_ATTR_SADDR_IPV4, /* u32 */ + TCP_METRICS_ATTR_SADDR_IPV6, /* binary */ __TCP_METRICS_ATTR_MAX, }; -- cgit v1.2.2 From f87c10a8aa1e82498c42d0335524d6ae7cf5a52b Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 9 Jan 2014 10:01:15 +0100 Subject: ipv4: introduce ip_dst_mtu_maybe_forward and protect forwarding path against pmtu spoofing While forwarding we should not use the protocol path mtu to calculate the mtu for a forwarded packet but instead use the interface mtu. We mark forwarded skbs in ip_forward with IPSKB_FORWARDED, which was introduced for multicast forwarding. But as it does not conflict with our usage in unicast code path it is perfect for reuse. I moved the functions ip_sk_accept_pmtu, ip_sk_use_pmtu and ip_skb_dst_mtu along with the new ip_dst_mtu_maybe_forward to net/ip.h to fix circular dependencies because of IPSKB_FORWARDED. Because someone might have written a software which does probe destinations manually and expects the kernel to honour those path mtus I introduced a new per-namespace "ip_forward_use_pmtu" knob so someone can disable this new behaviour. We also still use mtus which are locked on a route for forwarding. The reason for this change is, that path mtus information can be injected into the kernel via e.g. icmp_err protocol handler without verification of local sockets. As such, this could cause the IPv4 forwarding path to wrongfully emit fragmentation needed notifications or start to fragment packets along a path. Tunnel and ipsec output paths clear IPCB again, thus IPSKB_FORWARDED won't be set and further fragmentation logic will use the path mtu to determine the fragmentation size. They also recheck packet size with help of path mtu discovery and report appropriate errors. Cc: Eric Dumazet Cc: David Miller Cc: John Heffner Cc: Steffen Klassert Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip.h | 33 +++++++++++++++++++++++++++++++++ include/net/netns/ipv4.h | 1 + include/net/route.h | 19 +++---------------- 3 files changed, 37 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 535664477c4a..0dab95c2e4d5 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -263,6 +263,39 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst) !(dst_metric_locked(dst, RTAX_MTU))); } +static inline bool ip_sk_accept_pmtu(const struct sock *sk) +{ + return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE; +} + +static inline bool ip_sk_use_pmtu(const struct sock *sk) +{ + return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE; +} + +static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, + bool forwarding) +{ + struct net *net = dev_net(dst->dev); + + if (net->ipv4.sysctl_ip_fwd_use_pmtu || + dst_metric_locked(dst, RTAX_MTU) || + !forwarding) + return dst_mtu(dst); + + return min(dst->dev->mtu, IP_MAX_MTU); +} + +static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb) +{ + if (!skb->sk || ip_sk_use_pmtu(skb->sk)) { + bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; + return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding); + } else { + return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU); + } +} + void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more); static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 929a668e91a9..80f500a29498 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -70,6 +70,7 @@ struct netns_ipv4 { int sysctl_tcp_ecn; int sysctl_ip_no_pmtu_disc; + int sysctl_ip_fwd_use_pmtu; kgid_t sysctl_ping_group_range[2]; diff --git a/include/net/route.h b/include/net/route.h index 638e3ebf76f3..9d1f423d5944 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -36,6 +36,9 @@ #include #include +/* IPv4 datagram length is stored into 16bit field (tot_len) */ +#define IP_MAX_MTU 0xFFFFU + #define RTO_ONLINK 0x01 #define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) @@ -311,20 +314,4 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) return hoplimit; } -static inline bool ip_sk_accept_pmtu(const struct sock *sk) -{ - return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE; -} - -static inline bool ip_sk_use_pmtu(const struct sock *sk) -{ - return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE; -} - -static inline int ip_skb_dst_mtu(const struct sk_buff *skb) -{ - return (!skb->sk || ip_sk_use_pmtu(skb->sk)) ? - dst_mtu(skb_dst(skb)) : skb_dst(skb)->dev->mtu; -} - #endif /* _ROUTE_H */ -- cgit v1.2.2 From 8ed1dc44d3e9e8387a104b1ae8f92e9a3fbf1b1e Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 9 Jan 2014 10:01:17 +0100 Subject: ipv4: introduce hardened ip_no_pmtu_disc mode This new ip_no_pmtu_disc mode only allowes fragmentation-needed errors to be honored by protocols which do more stringent validation on the ICMP's packet payload. This knob is useful for people who e.g. want to run an unmodified DNS server in a namespace where they need to use pmtu for TCP connections (as they are used for zone transfers or fallback for requests) but don't want to use possibly spoofed UDP pmtu information. Currently the whitelisted protocols are TCP, SCTP and DCCP as they check if the returned packet is in the window or if the association is valid. Cc: Eric Dumazet Cc: David Miller Cc: John Heffner Suggested-by: Florian Weimer Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/protocol.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/protocol.h b/include/net/protocol.h index fbf7676c9a02..0e5f8665d7fb 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -43,7 +43,12 @@ struct net_protocol { int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); unsigned int no_policy:1, - netns_ok:1; + netns_ok:1, + /* does the protocol do more stringent + * icmp tag validation than simple + * socket lookup? + */ + icmp_strict_tag_validation:1; }; #if IS_ENABLED(CONFIG_IPV6) -- cgit v1.2.2 From 4f7b91404cd5da3657a82b00394f4f5dfbff13d6 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 14 Dec 2013 20:09:06 +0100 Subject: cfg80211: make regulatory_hint() remove REGULATORY_CUSTOM_REG The REGULATORY_CUSTOM_REG can be used during early init with the goal of overriding the wiphy's default regulatory settings in case the alpha2 of the device is not known. In the case that the alpha2 becomes known lets avoid having drivers having to clear the REGULATORY_CUSTOM_REG flag by doing it for them when regulatory_hint() is used. Cc: Sujith Manoharan Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/net/regulatory.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/regulatory.h b/include/net/regulatory.h index c96a0b86f342..b07cdc9fa454 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -96,6 +96,10 @@ struct regulatory_request { * initiator is %REGDOM_SET_BY_CORE). Drivers that use * wiphy_apply_custom_regulatory() should have this flag set * or the regulatory core will set it for the wiphy. + * If you use regulatory_hint() *after* using + * wiphy_apply_custom_regulatory() the wireless core will + * clear the REGULATORY_CUSTOM_REG for your wiphy as it would be + * implied that the device somehow gained knowledge of its region. * @REGULATORY_STRICT_REG: tells us that the wiphy for this device * has regulatory domain that it wishes to be considered as the * superset for regulatory rules. After this device gets its regulatory -- cgit v1.2.2 From ddafd34f419546f1eb7c343178685f059c3cf127 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 9 Jan 2014 16:13:59 -0800 Subject: net_sched: act: move idx_gen into struct tcf_hashinfo There is no need to store the index separatedly since tcf_hashinfo is allocated statically too. Cc: Jamal Hadi Salim Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index d34e1f4d897b..268f9e6babd7 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -39,6 +39,7 @@ struct tcf_hashinfo { struct hlist_head *htab; unsigned int hmask; spinlock_t lock; + u32 index; }; static inline unsigned int tcf_hash(u32 index, unsigned int hmask) @@ -51,6 +52,7 @@ static inline int tcf_hashinfo_init(struct tcf_hashinfo *hf, unsigned int mask) int i; spin_lock_init(&hf->lock); + hf->index = 0; hf->hmask = mask; hf->htab = kzalloc((mask + 1) * sizeof(struct hlist_head), GFP_KERNEL); @@ -105,13 +107,12 @@ struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo); void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo); int tcf_hash_release(struct tcf_common *p, int bind, struct tcf_hashinfo *hinfo); -u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo); +u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo); struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind, struct tcf_hashinfo *hinfo); struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, int size, - int bind, u32 *idx_gen, - struct tcf_hashinfo *hinfo); + int bind, struct tcf_hashinfo *hinfo); void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo); int tcf_register_action(struct tc_action_ops *a); -- cgit v1.2.2 From 832d1d5bfaefafa5aa40282f6765c6d996fe384e Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 9 Jan 2014 16:14:01 -0800 Subject: net_sched: add struct net pointer to tcf_proto_ops->dump It will be needed by the next patch. Cc: Jamal Hadi Salim Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 013d96dc6918..d062f81c692f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -204,7 +204,7 @@ struct tcf_proto_ops { void (*walk)(struct tcf_proto*, struct tcf_walker *arg); /* rtnetlink specific */ - int (*dump)(struct tcf_proto*, unsigned long, + int (*dump)(struct net*, struct tcf_proto*, unsigned long, struct sk_buff *skb, struct tcmsg*); struct module *owner; -- cgit v1.2.2 From 2519a602c273c5254781bc55b6e678a17e469a12 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 9 Jan 2014 16:14:02 -0800 Subject: net_sched: optimize tcf_match_indev() tcf_match_indev() is called in fast path, it is not wise to search for a netdev by ifindex and then compare by its name, just compare the ifindex. Also, dev->name could be changed by user-space, therefore the match would be always fail, but dev->ifindex could be consistent. BTW, this will also save some bytes from the core struct of u32. Cc: Jamal Hadi Salim Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 50ea07969c09..a2441fb1428f 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -338,27 +338,27 @@ static inline int tcf_valid_offset(const struct sk_buff *skb, #include static inline int -tcf_change_indev(struct tcf_proto *tp, char *indev, struct nlattr *indev_tlv) +tcf_change_indev(struct net *net, struct nlattr *indev_tlv) { + char indev[IFNAMSIZ]; + struct net_device *dev; + if (nla_strlcpy(indev, indev_tlv, IFNAMSIZ) >= IFNAMSIZ) return -EINVAL; - return 0; + dev = __dev_get_by_name(net, indev); + if (!dev) + return -ENODEV; + return dev->ifindex; } -static inline int -tcf_match_indev(struct sk_buff *skb, char *indev) +static inline bool +tcf_match_indev(struct sk_buff *skb, int ifindex) { - struct net_device *dev; - - if (indev[0]) { - if (!skb->skb_iif) - return 0; - dev = __dev_get_by_index(dev_net(skb->dev), skb->skb_iif); - if (!dev || strcmp(indev, dev->name)) - return 0; - } - - return 1; + if (!ifindex) + return true; + if (!skb->skb_iif) + return false; + return ifindex == skb->skb_iif; } #endif /* CONFIG_NET_CLS_IND */ -- cgit v1.2.2 From 7eb8896df03d0f4a8b301cb177d7f31c0d70e112 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 9 Jan 2014 16:14:05 -0800 Subject: net_sched: act: remove struct tcf_act_hdr It is not necessary at all. Cc: Jamal Hadi Salim Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 268f9e6babd7..e171387512b3 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -73,10 +73,6 @@ static inline void tcf_hashinfo_destroy(struct tcf_hashinfo *hf) #define ACT_P_CREATED 1 #define ACT_P_DELETED 1 -struct tcf_act_hdr { - struct tcf_common common; -}; - struct tc_action { void *priv; const struct tc_action_ops *ops; -- cgit v1.2.2 From efabdfb95ab698b51009e7783ddc2714cef0aecf Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Fri, 10 Jan 2014 14:25:09 +0800 Subject: phylib: Add Clause 45 read/write functions Need an extra parameter to read or write Clause 45 PHYs, so need a different API with the extra parameter. Signed-off-by: Andy Fleming Signed-off-by: Shaohui Xie Signed-off-by: David S. Miller --- include/linux/phy.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 7c81dd8870d4..3eda43c90875 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -487,6 +487,24 @@ struct phy_fixup { int (*run)(struct phy_device *phydev); }; +/** + * phy_read_mmd - Convenience function for reading a register + * from an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to read from + * @regnum: The register on the MMD to read + * + * Same rules as for phy_read(); + */ +static inline int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) +{ + if (!phydev->is_c45) + return -EOPNOTSUPP; + + return mdiobus_read(phydev->bus, phydev->addr, + MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff)); +} + /** * phy_read - Convenience function for reading a given PHY register * @phydev: the phy_device struct @@ -537,6 +555,27 @@ static inline bool phy_is_internal(struct phy_device *phydev) return phydev->is_internal; } +/** + * phy_write_mmd - Convenience function for writing a register + * on an MMD on a given PHY. + * @phydev: The phy_device struct + * @devad: The MMD to read from + * @regnum: The register on the MMD to read + * @val: value to write to @regnum + * + * Same rules as for phy_write(); + */ +static inline int phy_write_mmd(struct phy_device *phydev, int devad, + u32 regnum, u16 val) +{ + if (!phydev->is_c45) + return -EOPNOTSUPP; + + regnum = MII_ADDR_C45 | ((devad & 0x1f) << 16) | (regnum & 0xffff); + + return mdiobus_write(phydev->bus, phydev->addr, regnum, val); +} + struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, bool is_c45, struct phy_c45_device_ids *c45_ids); -- cgit v1.2.2 From 898dd0bda366edd0b077db4389a26c00509c73c9 Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Fri, 10 Jan 2014 14:26:46 +0800 Subject: phylib: introduce PHY_INTERFACE_MODE_XGMII for 10G PHY Signed-off-by: Andy Fleming Signed-off-by: Shaohui Xie Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 3eda43c90875..a9685b109049 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -73,6 +73,7 @@ typedef enum { PHY_INTERFACE_MODE_RGMII_TXID, PHY_INTERFACE_MODE_RTBI, PHY_INTERFACE_MODE_SMII, + PHY_INTERFACE_MODE_XGMII, } phy_interface_t; -- cgit v1.2.2 From 257184d7cc03519e1ea37b421879767b4194ce8c Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Fri, 10 Jan 2014 14:27:54 +0800 Subject: phylib: Support attaching to generic 10g driver phy_attach_direct() may now attach to a generic 10G driver. It can also be used exactly as phy_connect_direct(), which will be useful when using of_mdio, as phy_connect (and therefore of_phy_connect) start the PHY state machine, which is currently irrelevant for 10G PHYs. Signed-off-by: Andy Fleming Signed-off-by: Shaohui Xie Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index a9685b109049..565188ca328f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -588,6 +588,8 @@ int phy_resume(struct phy_device *phydev); struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, phy_interface_t interface); struct phy_device *phy_find_first(struct mii_bus *bus); +int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, + u32 flags, phy_interface_t interface); int phy_connect_direct(struct net_device *dev, struct phy_device *phydev, void (*handler)(struct net_device *), phy_interface_t interface); -- cgit v1.2.2 From 7614aba6a330a6c4937edc24fd4c6a935c1ae703 Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Fri, 10 Jan 2014 14:28:11 +0800 Subject: phylib: Add of_phy_attach 10G PHYs don't currently support running the state machine, which is implicitly setup via of_phy_connect(). Therefore, it is necessary to implement an OF version of phy_attach(), which does everything except start the state machine. Signed-off-by: Andy Fleming Signed-off-by: Shaohui Xie Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 8163107b94b4..6fe8464ed767 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -19,6 +19,9 @@ extern struct phy_device *of_phy_connect(struct net_device *dev, struct device_node *phy_np, void (*hndlr)(struct net_device *), u32 flags, phy_interface_t iface); +struct phy_device *of_phy_attach(struct net_device *dev, + struct device_node *phy_np, u32 flags, + phy_interface_t iface); extern struct phy_device *of_phy_connect_fixed_link(struct net_device *dev, void (*hndlr)(struct net_device *), phy_interface_t iface); @@ -44,6 +47,13 @@ static inline struct phy_device *of_phy_connect(struct net_device *dev, return NULL; } +static inline struct phy_device *of_phy_attach(struct net_device *dev, + struct device_node *phy_np, + u32 flags, phy_interface_t iface) +{ + return NULL; +} + static inline struct phy_device *of_phy_connect_fixed_link(struct net_device *dev, void (*hndlr)(struct net_device *), phy_interface_t iface) -- cgit v1.2.2 From 6daaf0de2f3170206f57e7881adfbd8682cdd7fb Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 9 Jan 2014 22:31:11 -0800 Subject: sctp: make sctp_addto_chunk_fixed local Signed-off-by: Stephen Hemminger Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e9f732fda950..d992ca3145fe 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -649,7 +649,6 @@ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int off, int len, struct iovec *data); void sctp_chunk_free(struct sctp_chunk *); void *sctp_addto_chunk(struct sctp_chunk *, int len, const void *data); -void *sctp_addto_chunk_fixed(struct sctp_chunk *, int len, const void *data); struct sctp_chunk *sctp_chunkify(struct sk_buff *, const struct sctp_association *, struct sock *); -- cgit v1.2.2 From ed1f50c3a7c1ad1b1b4d584308eab77d57a330f8 Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Thu, 9 Jan 2014 10:02:46 +0000 Subject: net: add skb_checksum_setup This patch adds a function to set up the partial checksum offset for IP packets (and optionally re-calculate the pseudo-header checksum) into the core network code. The implementation was previously private and duplicated between xen-netback and xen-netfront, however it is not xen-specific and is potentially useful to any network driver. Signed-off-by: Paul Durrant Cc: David Miller Cc: Eric Dumazet Cc: Veaceslav Falico Cc: Alexander Duyck Cc: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d97f2d07d02b..48b760505cb6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2893,6 +2893,8 @@ static inline void skb_checksum_none_assert(const struct sk_buff *skb) bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); +int skb_checksum_setup(struct sk_buff *skb, bool recalculate); + u32 __skb_get_poff(const struct sk_buff *skb); /** -- cgit v1.2.2 From d87d04a7851f067a06bb10acca8aca62a8cad6f0 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 10 Jan 2014 22:17:03 +0000 Subject: net: Add net_dev_start_xmit trace event, exposing more skb fields The existing net/net_dev_xmit trace event provides little information about the skb that has been passed to the driver, and it is not simple to add more since the skb may already have been freed at the point the event is emitted. Add a separate trace event before the skb is passed to the driver, including most fields that are likely to be interesting for debugging driver datapath behaviour. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/trace/events/net.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'include') diff --git a/include/trace/events/net.h b/include/trace/events/net.h index f99645d05a8f..7a72f44ae6ab 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -6,9 +6,67 @@ #include #include +#include #include #include +TRACE_EVENT(net_dev_start_xmit, + + TP_PROTO(const struct sk_buff *skb, const struct net_device *dev), + + TP_ARGS(skb, dev), + + TP_STRUCT__entry( + __string( name, dev->name ) + __field( u16, queue_mapping ) + __field( const void *, skbaddr ) + __field( bool, vlan_tagged ) + __field( u16, vlan_proto ) + __field( u16, vlan_tci ) + __field( u16, protocol ) + __field( u8, ip_summed ) + __field( unsigned int, len ) + __field( unsigned int, data_len ) + __field( int, network_offset ) + __field( bool, transport_offset_valid) + __field( int, transport_offset) + __field( u8, tx_flags ) + __field( u16, gso_size ) + __field( u16, gso_segs ) + __field( u16, gso_type ) + ), + + TP_fast_assign( + __assign_str(name, dev->name); + __entry->queue_mapping = skb->queue_mapping; + __entry->skbaddr = skb; + __entry->vlan_tagged = vlan_tx_tag_present(skb); + __entry->vlan_proto = ntohs(skb->vlan_proto); + __entry->vlan_tci = vlan_tx_tag_get(skb); + __entry->protocol = ntohs(skb->protocol); + __entry->ip_summed = skb->ip_summed; + __entry->len = skb->len; + __entry->data_len = skb->data_len; + __entry->network_offset = skb_network_offset(skb); + __entry->transport_offset_valid = + skb_transport_header_was_set(skb); + __entry->transport_offset = skb_transport_offset(skb); + __entry->tx_flags = skb_shinfo(skb)->tx_flags; + __entry->gso_size = skb_shinfo(skb)->gso_size; + __entry->gso_segs = skb_shinfo(skb)->gso_segs; + __entry->gso_type = skb_shinfo(skb)->gso_type; + ), + + TP_printk("dev=%s queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d len=%u data_len=%u network_offset=%d transport_offset_valid=%d transport_offset=%d tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x", + __get_str(name), __entry->queue_mapping, __entry->skbaddr, + __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci, + __entry->protocol, __entry->ip_summed, __entry->len, + __entry->data_len, + __entry->network_offset, __entry->transport_offset_valid, + __entry->transport_offset, __entry->tx_flags, + __entry->gso_size, __entry->gso_segs, __entry->gso_type) +); + TRACE_EVENT(net_dev_xmit, TP_PROTO(struct sk_buff *skb, -- cgit v1.2.2 From ae78dbfa40c629f79c72ab93525508ef49e798b6 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 10 Jan 2014 22:17:24 +0000 Subject: net: Add trace events for all receive entry points, exposing more skb fields The existing net/netif_rx and net/netif_receive_skb trace events provide little information about the skb, nor do they indicate how it entered the stack. Add trace events at entry of each of the exported functions, including most fields that are likely to be interesting for debugging driver datapath behaviour. Split netif_rx() and netif_receive_skb() so that internal calls are not traced. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/trace/events/net.h | 100 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) (limited to 'include') diff --git a/include/trace/events/net.h b/include/trace/events/net.h index 7a72f44ae6ab..a34f27b2e394 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -136,6 +136,106 @@ DEFINE_EVENT(net_dev_template, netif_rx, TP_ARGS(skb) ); + +DECLARE_EVENT_CLASS(net_dev_rx_verbose_template, + + TP_PROTO(const struct sk_buff *skb), + + TP_ARGS(skb), + + TP_STRUCT__entry( + __string( name, skb->dev->name ) + __field( unsigned int, napi_id ) + __field( u16, queue_mapping ) + __field( const void *, skbaddr ) + __field( bool, vlan_tagged ) + __field( u16, vlan_proto ) + __field( u16, vlan_tci ) + __field( u16, protocol ) + __field( u8, ip_summed ) + __field( u32, rxhash ) + __field( bool, l4_rxhash ) + __field( unsigned int, len ) + __field( unsigned int, data_len ) + __field( unsigned int, truesize ) + __field( bool, mac_header_valid) + __field( int, mac_header ) + __field( unsigned char, nr_frags ) + __field( u16, gso_size ) + __field( u16, gso_type ) + ), + + TP_fast_assign( + __assign_str(name, skb->dev->name); +#ifdef CONFIG_NET_RX_BUSY_POLL + __entry->napi_id = skb->napi_id; +#else + __entry->napi_id = 0; +#endif + __entry->queue_mapping = skb->queue_mapping; + __entry->skbaddr = skb; + __entry->vlan_tagged = vlan_tx_tag_present(skb); + __entry->vlan_proto = ntohs(skb->vlan_proto); + __entry->vlan_tci = vlan_tx_tag_get(skb); + __entry->protocol = ntohs(skb->protocol); + __entry->ip_summed = skb->ip_summed; + __entry->rxhash = skb->rxhash; + __entry->l4_rxhash = skb->l4_rxhash; + __entry->len = skb->len; + __entry->data_len = skb->data_len; + __entry->truesize = skb->truesize; + __entry->mac_header_valid = skb_mac_header_was_set(skb); + __entry->mac_header = skb_mac_header(skb) - skb->data; + __entry->nr_frags = skb_shinfo(skb)->nr_frags; + __entry->gso_size = skb_shinfo(skb)->gso_size; + __entry->gso_type = skb_shinfo(skb)->gso_type; + ), + + TP_printk("dev=%s napi_id=%#x queue_mapping=%u skbaddr=%p vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d rxhash=0x%08x l4_rxhash=%d len=%u data_len=%u truesize=%u mac_header_valid=%d mac_header=%d nr_frags=%d gso_size=%d gso_type=%#x", + __get_str(name), __entry->napi_id, __entry->queue_mapping, + __entry->skbaddr, __entry->vlan_tagged, __entry->vlan_proto, + __entry->vlan_tci, __entry->protocol, __entry->ip_summed, + __entry->rxhash, __entry->l4_rxhash, __entry->len, + __entry->data_len, __entry->truesize, + __entry->mac_header_valid, __entry->mac_header, + __entry->nr_frags, __entry->gso_size, __entry->gso_type) +); + +DEFINE_EVENT(net_dev_rx_verbose_template, napi_gro_frags_entry, + + TP_PROTO(const struct sk_buff *skb), + + TP_ARGS(skb) +); + +DEFINE_EVENT(net_dev_rx_verbose_template, napi_gro_receive_entry, + + TP_PROTO(const struct sk_buff *skb), + + TP_ARGS(skb) +); + +DEFINE_EVENT(net_dev_rx_verbose_template, netif_receive_skb_entry, + + TP_PROTO(const struct sk_buff *skb), + + TP_ARGS(skb) +); + +DEFINE_EVENT(net_dev_rx_verbose_template, netif_rx_entry, + + TP_PROTO(const struct sk_buff *skb), + + TP_ARGS(skb) +); + +DEFINE_EVENT(net_dev_rx_verbose_template, netif_rx_ni_entry, + + TP_PROTO(const struct sk_buff *skb), + + TP_ARGS(skb) +); + #endif /* _TRACE_NET_H */ /* This part must be outside protection */ -- cgit v1.2.2 From 72c1d3bdd5bf10a789608336ba0d61f1e44e4350 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 10 Jan 2014 16:09:45 -0800 Subject: ipv4: register igmp_notifier even when !CONFIG_PROC_FS We still need this notifier even when we don't config PROC_FS. It should be rare to have a kernel without PROC_FS, so just for completeness. Cc: Stephen Hemminger Cc: David S. Miller Cc: Patrick McHardy Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 0dab95c2e4d5..7876e3b96ce3 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -90,7 +90,7 @@ struct packet_type; struct rtable; struct sockaddr; -int igmp_mc_proc_init(void); +int igmp_mc_init(void); /* * Functions provided by ip.c -- cgit v1.2.2 From 63862b5bef7349dd1137e4c70702c67d77565785 Mon Sep 17 00:00:00 2001 From: Aruna-Hewapathirane Date: Sat, 11 Jan 2014 07:15:59 -0500 Subject: net: replace macros net_random and net_srandom with direct calls to prandom This patch removes the net_random and net_srandom macros and replaces them with direct calls to the prandom ones. As new commits only seem to use prandom_u32 there is no use to keep them around. This change makes it easier to grep for users of prandom_u32. Signed-off-by: Aruna-Hewapathirane Suggested-by: Hannes Frederic Sowa Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/net.h | 3 --- include/net/red.h | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index 69be3e6079c8..94734a6259a4 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -245,9 +245,6 @@ do { \ #define net_dbg_ratelimited(fmt, ...) \ net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__) -#define net_random() prandom_u32() -#define net_srandom(seed) prandom_seed((__force u32)(seed)) - bool __net_get_random_once(void *buf, int nbytes, bool *done, struct static_key *done_key); diff --git a/include/net/red.h b/include/net/red.h index ef46058d35bf..168bb2f495f2 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -303,7 +303,7 @@ static inline unsigned long red_calc_qavg(const struct red_parms *p, static inline u32 red_random(const struct red_parms *p) { - return reciprocal_divide(net_random(), p->max_P_reciprocal); + return reciprocal_divide(prandom_u32(), p->max_P_reciprocal); } static inline int red_mark_probability(const struct red_parms *p, -- cgit v1.2.2 From ec35b61ea59aa6b1ecc8d8452b225e4677e32627 Mon Sep 17 00:00:00 2001 From: FX Le Bail Date: Mon, 13 Jan 2014 15:59:01 +0100 Subject: IPv6: move the anycast_src_echo_reply sysctl to netns_sysctl_ipv6 This change move anycast_src_echo_reply sysctl with other ipv6 sysctls. Suggested-by: Hannes Frederic Sowa Signed-off-by: Francois-Xavier Le Bail Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 76fc7d1dbfd3..592fecd2e59e 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 { int ip6_rt_mtu_expires; int ip6_rt_min_advmss; int icmpv6_time; + int anycast_src_echo_reply; }; struct netns_ipv6 { @@ -73,7 +74,6 @@ struct netns_ipv6 { #endif atomic_t dev_addr_genid; atomic_t rt_genid; - int anycast_src_echo_reply; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) -- cgit v1.2.2 From 5bb025fae53889cc99a21058c5dd369bf8cce820 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Tue, 14 Jan 2014 21:58:51 +0100 Subject: net: rename sysfs symlinks on device name change Currently, we don't rename the upper/lower_ifc symlinks in /sys/class/net/*/ , which might result stale/duplicate links/names. Fix this by adding netdev_adjacent_rename_links(dev, oldname) which renames all the upper/lower interface's links to dev from the upper/lower_oldname to the new name. We don't need a rollback because only we control these symlinks and if we fail to rename them - sysfs will anyway complain. Reported-by: Ding Tianhong CC: Ding Tianhong CC: "David S. Miller" CC: Eric Dumazet CC: Nicolas Dichtel CC: Cong Wang Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5c88ab19b3eb..30f6513f3b4d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2941,6 +2941,7 @@ int netdev_master_upper_dev_link_private(struct net_device *dev, void *private); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev); +void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); int skb_checksum_help(struct sk_buff *skb); -- cgit v1.2.2 From 286ab723d4b83d37deb4017008ef1444a95cfb0d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 14 Jan 2014 15:18:47 -0800 Subject: etherdevice: Use ether_addr_copy to copy an Ethernet address Some systems can use the normally known u16 alignment of Ethernet addresses to save some code/text bytes and cycles. This does not change currently emitted code on x86 by gcc 4.8. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index f344ac04f858..9c5529dc6d07 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -217,6 +217,28 @@ static inline void eth_hw_addr_random(struct net_device *dev) eth_random_addr(dev->dev_addr); } +/** + * ether_addr_copy - Copy an Ethernet address + * @dst: Pointer to a six-byte array Ethernet address destination + * @src: Pointer to a six-byte array Ethernet address source + * + * Please note: dst & src must both be aligned to u16. + */ +static inline void ether_addr_copy(u8 *dst, const u8 *src) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + *(u32 *)dst = *(const u32 *)src; + *(u16 *)(dst + 4) = *(const u16 *)(src + 4); +#else + u16 *a = (u16 *)dst; + const u16 *b = (const u16 *)src; + + a[0] = b[0]; + a[1] = b[1]; + a[2] = b[2]; +#endif +} + /** * eth_hw_addr_inherit - Copy dev_addr from another net_device * @dst: pointer to net_device to copy dev_addr to @@ -229,7 +251,7 @@ static inline void eth_hw_addr_inherit(struct net_device *dst, struct net_device *src) { dst->addr_assign_type = src->addr_assign_type; - memcpy(dst->dev_addr, src->dev_addr, ETH_ALEN); + ether_addr_copy(dst->dev_addr, src->dev_addr); } /** -- cgit v1.2.2 From d76ed22b225c02228c643ae336f76e086fdc32f0 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Wed, 15 Jan 2014 17:03:30 +0800 Subject: ipv6: move IPV6_TCLASS_SHIFT into ipv6.h and define a helper Two places defined IPV6_TCLASS_SHIFT, so we should move it into ipv6.h, and use this macro as possible. And define ip6_tclass helper to return tclass Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- include/net/ipv6.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 12079c65ea3e..6d80f51897a5 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -239,6 +239,7 @@ struct ip6_flowlabel { #define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) #define IPV6_FLOWLABEL_MASK cpu_to_be32(0x000FFFFF) #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) +#define IPV6_TCLASS_SHIFT 20 struct ipv6_fl_socklist { struct ipv6_fl_socklist __rcu *next; @@ -681,6 +682,10 @@ static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr) return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK; } +static inline u8 ip6_tclass(__be32 flowinfo) +{ + return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT; +} /* * Prototypes exported by ipv6 */ -- cgit v1.2.2 From 761aac737eb11901c382a3f021dead59a26983fc Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Wed, 15 Jan 2014 15:36:58 +0100 Subject: ipv6 addrconf: add IFA_F_NOPREFIXROUTE flag to suppress creation of IP6 routes When adding/modifying an IPv6 address, the userspace application needs a way to suppress adding a prefix route. This is for example relevant together with IFA_F_MANAGERTEMPADDR, where userspace creates autoconf generated addresses, but depending on on-link, no route for the prefix should be added. Signed-off-by: Thomas Haller Signed-off-by: David S. Miller --- include/uapi/linux/if_addr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h index cfed10be7529..dea10a87dfd1 100644 --- a/include/uapi/linux/if_addr.h +++ b/include/uapi/linux/if_addr.h @@ -49,6 +49,7 @@ enum { #define IFA_F_TENTATIVE 0x40 #define IFA_F_PERMANENT 0x80 #define IFA_F_MANAGETEMPADDR 0x100 +#define IFA_F_NOPREFIXROUTE 0x200 struct ifa_cacheinfo { __u32 ifa_prefered; -- cgit v1.2.2 From 89740ca74f5bb45d3e0dcb4aa9eab0ba9864bff5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 9 Jan 2014 14:13:47 +0100 Subject: neigh: use NEIGH_VAR_INIT in ndo_neigh_setup functions. When ndo_neigh_setup is called, the bitfield used by NEIGH_VAR_SET is not initialized yet. This might cause confusion for the people who use NEIGH_VAR_SET in ndo_neigh_setup. So rather introduce NEIGH_VAR_INIT for usage in ndo_neigh_setup. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/neighbour.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 4c09bd23b832..7277caf3743d 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -92,6 +92,11 @@ static inline void neigh_var_set(struct neigh_parms *p, int index, int val) } #define NEIGH_VAR(p, attr) ((p)->data[NEIGH_VAR_ ## attr]) + +/* In ndo_neigh_setup, NEIGH_VAR_INIT should be used. + * In other cases, NEIGH_VAR_SET should be used. + */ +#define NEIGH_VAR_INIT(p, attr, val) (NEIGH_VAR(p, attr) = val) #define NEIGH_VAR_SET(p, attr, val) neigh_var_set(p, NEIGH_VAR_ ## attr, val) static inline void neigh_parms_data_state_setall(struct neigh_parms *p) -- cgit v1.2.2 From 9cbadf094d9d479413dc8cfa77dff9e732184337 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 16 Jan 2014 10:51:43 +0000 Subject: net: stmmac: support max-speed device tree property This patch adds support to "max-speed" property which is a standard Ethernet device tree property. max-speed specifies maximum speed (specified in megabits per second) supported the device. Depending on the clocking schemes some of the boards can only support few link speeds, so having a way to limit the link speed in the mac driver would allow such setups to work reliably. Without this patch there is no way to tell the driver to limit the link speed. Signed-off-by: Srinivas Kandagatla Acked-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index bb5deb0feb6b..33ace712e7e8 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -110,6 +110,7 @@ struct plat_stmmacenet_data { int force_sf_dma_mode; int force_thresh_dma_mode; int riwt_off; + int max_speed; void (*fix_mac_speed)(void *priv, unsigned int speed); void (*bus_setup)(void __iomem *ioaddr); int (*init)(struct platform_device *pdev); -- cgit v1.2.2 From 57bdf7f42be05640f8080b06844c94367ad1884b Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 15 Jan 2014 08:57:54 -0800 Subject: net: Add skb_get_hash_raw Function to just return skb->rxhash without checking to see if it needs to be recomputed. Signed-off-by: Tom Herbert Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 48b760505cb6..1f689e62e4cb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -771,6 +771,11 @@ static inline __u32 skb_get_hash(struct sk_buff *skb) return skb->rxhash; } +static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) +{ + return skb->rxhash; +} + static inline void skb_clear_hash(struct sk_buff *skb) { skb->rxhash = 0; -- cgit v1.2.2 From 1d486bfb66971ebacc2a46a23431ace9af70dc66 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Thu, 16 Jan 2014 00:02:18 +0100 Subject: net: add NETDEV_PRECHANGEMTU to notify before mtu change happens Currently, if a device changes its mtu, first the change happens (invloving all the side effects), and after that the NETDEV_CHANGEMTU is sent so that other devices can catch up with the new mtu. However, if they return NOTIFY_BAD, then the change is reverted and error returned. This is a really long and costy operation (sometimes). To fix this, add NETDEV_PRECHANGEMTU notification which is called prior to any change actually happening, and if any callee returns NOTIFY_BAD - the change is aborted. This way we're skipping all the playing with apply/revert the mtu. CC: "David S. Miller" CC: Jiri Pirko CC: Eric Dumazet CC: Nicolas Dichtel CC: Cong Wang Signed-off-by: Veaceslav Falico Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 30f6513f3b4d..d7668b881d08 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1718,7 +1718,7 @@ struct pcpu_sw_netstats { #define NETDEV_CHANGE 0x0004 /* Notify device state change */ #define NETDEV_REGISTER 0x0005 #define NETDEV_UNREGISTER 0x0006 -#define NETDEV_CHANGEMTU 0x0007 +#define NETDEV_CHANGEMTU 0x0007 /* notify after mtu change happened */ #define NETDEV_CHANGEADDR 0x0008 #define NETDEV_GOING_DOWN 0x0009 #define NETDEV_CHANGENAME 0x000A @@ -1734,6 +1734,7 @@ struct pcpu_sw_netstats { #define NETDEV_JOIN 0x0014 #define NETDEV_CHANGEUPPER 0x0015 #define NETDEV_RESEND_IGMP 0x0016 +#define NETDEV_PRECHANGEMTU 0x0017 /* notify before mtu change happened */ int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); -- cgit v1.2.2 From 6c80563c2fdd9b32e37e2570e5b1ba9befd591c0 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 15 Jan 2014 15:38:43 -0800 Subject: net_sched: act: pick a different type for act_xt In tcf_register_action() we check either ->type or ->kind to see if there is an existing action registered, but ipt action registers two actions with same type but different kinds. They should have different types too. Cc: Jamal Hadi Salim Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_ipt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/tc_act/tc_ipt.h b/include/uapi/linux/tc_act/tc_ipt.h index a2335563d21f..130aaadf6fac 100644 --- a/include/uapi/linux/tc_act/tc_ipt.h +++ b/include/uapi/linux/tc_act/tc_ipt.h @@ -4,6 +4,7 @@ #include #define TCA_ACT_IPT 6 +#define TCA_ACT_XT 10 enum { TCA_IPT_UNSPEC, -- cgit v1.2.2 From a953be53ce40440acb4740edb48577b9468d4c3d Mon Sep 17 00:00:00 2001 From: Michael Dalton Date: Thu, 16 Jan 2014 22:23:28 -0800 Subject: net-sysfs: add support for device-specific rx queue sysfs attributes Extend existing support for netdevice receive queue sysfs attributes to permit a device-specific attribute group. Initial use case for this support will be to allow the virtio-net device to export per-receive queue mergeable receive buffer size. Signed-off-by: Michael Dalton Signed-off-by: David S. Miller --- include/linux/netdevice.h | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d7668b881d08..e985231fe04b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -668,15 +668,28 @@ extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, u16 filter_id); #endif +#endif /* CONFIG_RPS */ /* This structure contains an instance of an RX queue. */ struct netdev_rx_queue { +#ifdef CONFIG_RPS struct rps_map __rcu *rps_map; struct rps_dev_flow_table __rcu *rps_flow_table; +#endif struct kobject kobj; struct net_device *dev; } ____cacheline_aligned_in_smp; -#endif /* CONFIG_RPS */ + +/* + * RX queue sysfs structures and functions. + */ +struct rx_queue_attribute { + struct attribute attr; + ssize_t (*show)(struct netdev_rx_queue *queue, + struct rx_queue_attribute *attr, char *buf); + ssize_t (*store)(struct netdev_rx_queue *queue, + struct rx_queue_attribute *attr, const char *buf, size_t len); +}; #ifdef CONFIG_XPS /* @@ -1313,7 +1326,7 @@ struct net_device { unicast) */ -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS struct netdev_rx_queue *_rx; /* Number of RX queues allocated at register_netdev() time */ @@ -1424,6 +1437,8 @@ struct net_device { struct device dev; /* space for optional device, statistics, and wireless sysfs groups */ const struct attribute_group *sysfs_groups[4]; + /* space for optional per-rx queue attributes */ + const struct attribute_group *sysfs_rx_queue_group; /* rtnetlink link ops */ const struct rtnl_link_ops *rtnl_link_ops; @@ -2375,7 +2390,7 @@ static inline bool netif_is_multiqueue(const struct net_device *dev) int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq); -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq); #else static inline int netif_set_real_num_rx_queues(struct net_device *dev, @@ -2394,7 +2409,7 @@ static inline int netif_copy_real_num_queues(struct net_device *to_dev, from_dev->real_num_tx_queues); if (err) return err; -#ifdef CONFIG_RPS +#ifdef CONFIG_SYSFS return netif_set_real_num_rx_queues(to_dev, from_dev->real_num_rx_queues); #else @@ -2402,6 +2417,18 @@ static inline int netif_copy_real_num_queues(struct net_device *to_dev, #endif } +#ifdef CONFIG_SYSFS +static inline unsigned int get_netdev_rx_queue_index( + struct netdev_rx_queue *queue) +{ + struct net_device *dev = queue->dev; + int index = queue - dev->_rx; + + BUG_ON(index >= dev->num_rx_queues); + return index; +} +#endif + #define DEFAULT_MAX_NUM_RSS_QUEUES (8) int netif_get_num_default_rss_queues(void); -- cgit v1.2.2 From 1d13a96c74fc4802a775189ddb58bc6469ffdaa3 Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Thu, 16 Jan 2014 17:21:22 +0100 Subject: ipv6: tcp: fix flowlabel value in ACK messages send from TIME_WAIT This patch is following the commit b903d324bee262 (ipv6: tcp: fix TCLASS value in ACK messages sent from TIME_WAIT). For the same reason than tclass, we have to store the flow label in the inet_timewait_sock to provide consistency of flow label on the last ACK. Signed-off-by: Florent Fourcot Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 71c6e264e5b5..61474ea02152 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -133,9 +133,9 @@ struct inet_timewait_sock { /* And these are ours. */ unsigned int tw_ipv6only : 1, tw_transparent : 1, - tw_pad : 6, /* 6 bits hole */ - tw_tos : 8, - tw_pad2 : 16; /* 16 bits hole */ + tw_flowlabel : 20, + tw_pad : 2, /* 2 bits hole */ + tw_tos : 8; kmemcheck_bitfield_end(flags); u32 tw_ttd; struct inet_bind_bucket *tw_tb; -- cgit v1.2.2 From 6c7e7610ff6888ea15a901fbcb30c5d461816b34 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Jan 2014 16:41:19 -0800 Subject: ipv4: fix a dst leak in tunnels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch : 1) Remove a dst leak if DST_NOCACHE was set on dst Fix this by holding a reference only if dst really cached. 2) Remove a lockdep warning in __tunnel_dst_set() This was reported by Cong Wang. 3) Remove usage of a spinlock where xchg() is enough 4) Remove some spurious inline keywords. Let compiler decide for us. Fixes: 7d442fab0a67 ("ipv4: Cache dst in tunnels") Signed-off-by: Eric Dumazet Cc: Cong Wang Cc: Tom Herbert Cc: Maciej Żenczykowski Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index cd729becbb07..48ed75c21260 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -40,7 +40,6 @@ struct ip_tunnel_prl_entry { struct ip_tunnel_dst { struct dst_entry __rcu *dst; - spinlock_t lock; }; struct ip_tunnel { -- cgit v1.2.2 From 1d3ee88ae0d605629bf369ab0b868dae8ca62a48 Mon Sep 17 00:00:00 2001 From: "sfeldma@cumulusnetworks.com" Date: Thu, 16 Jan 2014 22:57:56 -0800 Subject: bonding: add netlink attributes to slave link dev If link is IFF_SLAVE, extend link dev netlink attributes to include slave attributes with new IFLA_SLAVE nest. Add netlink notification (RTM_NEWLINK) when slave status changes from backup to active, or visa-versa. Adds new ndo_get_slave op to net_device_ops to fill skb with IFLA_SLAVE attributes. Currently only used by bonding driver, but could be used by other aggregating devices with slaves. Signed-off-by: Scott Feldman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ include/uapi/linux/if_link.h | 13 +++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e985231fe04b..83ce2aee65e6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -921,6 +921,9 @@ struct netdev_phys_port_id { * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); * Called to release previously enslaved netdev. * + * int (*ndo_get_slave)(struct net_device *slave_dev, struct sk_buff *skb); + * Called to fill netlink skb with slave info. + * * Feature/offload setting functions. * netdev_features_t (*ndo_fix_features)(struct net_device *dev, * netdev_features_t features); @@ -1093,6 +1096,8 @@ struct net_device_ops { struct net_device *slave_dev); int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); + int (*ndo_get_slave)(struct net_device *slave_dev, + struct sk_buff *skb); netdev_features_t (*ndo_fix_features)(struct net_device *dev, netdev_features_t features); int (*ndo_set_features)(struct net_device *dev, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 3e6bd3c7445d..ba2f3bf5fdf5 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -144,6 +144,7 @@ enum { IFLA_NUM_RX_QUEUES, IFLA_CARRIER, IFLA_PHYS_PORT_ID, + IFLA_SLAVE, __IFLA_MAX }; @@ -368,6 +369,18 @@ enum { #define IFLA_BOND_AD_INFO_MAX (__IFLA_BOND_AD_INFO_MAX - 1) +enum { + IFLA_SLAVE_STATE, + IFLA_SLAVE_MII_STATUS, + IFLA_SLAVE_LINK_FAILURE_COUNT, + IFLA_SLAVE_PERM_HWADDR, + IFLA_SLAVE_QUEUE_ID, + IFLA_SLAVE_AD_AGGREGATOR_ID, + __IFLA_SLAVE_MAX, +}; + +#define IFLA_SLAVE_MAX (__IFLA_SLAVE_MAX - 1) + /* SR-IOV virtual function management section */ enum { -- cgit v1.2.2 From ea02f9411d9faa3553ed09ce0ec9f00ceae9885e Mon Sep 17 00:00:00 2001 From: Michal Sekletar Date: Fri, 17 Jan 2014 17:09:45 +0100 Subject: net: introduce SO_BPF_EXTENSIONS For user space packet capturing libraries such as libpcap, there's currently only one way to check which BPF extensions are supported by the kernel, that is, commit aa1113d9f85d ("net: filter: return -EINVAL if BPF_S_ANC* operation is not supported"). For querying all extensions at once this might be rather inconvenient. Therefore, this patch introduces a new option which can be used as an argument for getsockopt(), and allows one to obtain information about which BPF extensions are supported by the current kernel. As David Miller suggests, we do not need to define any bits right now and status quo can just return 0 in order to state that this versions supports SKF_AD_PROTOCOL up to SKF_AD_PAY_OFFSET. Later additions to BPF extensions need to add their bits to the bpf_tell_extensions() function, as documented in the comment. Signed-off-by: Michal Sekletar Cc: David Miller Reviewed-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 11 +++++++++++ include/uapi/asm-generic/socket.h | 2 ++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index ff4e40cd45b1..1a95a2dcf113 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -83,6 +83,17 @@ static inline void bpf_jit_free(struct sk_filter *fp) #define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns) #endif +static inline int bpf_tell_extensions(void) +{ + /* When adding new BPF extension it is necessary to enumerate + * it here, so userspace software which wants to know what is + * supported can do so by inspecting return value of this + * function + */ + + return 0; +} + enum { BPF_S_RET_K = 1, BPF_S_RET_A, diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 38f14d0264c3..ea0796bdcf88 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -80,4 +80,6 @@ #define SO_MAX_PACING_RATE 47 +#define SO_BPF_EXTENSIONS 48 + #endif /* __ASM_GENERIC_SOCKET_H */ -- cgit v1.2.2 From df3687ffc6653e4d32168338b4dee20c164ed7c9 Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Fri, 17 Jan 2014 17:15:03 +0100 Subject: ipv6: add the IPV6_FL_F_REFLECT flag to IPV6_FL_A_GET With this option, the socket will reply with the flow label value read on received packets. The goal is to have a connection with the same flow label in both direction of the communication. Changelog of V4: * Do not erase the flow label on the listening socket. Use pktopts to store the received value Signed-off-by: Florent Fourcot Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/uapi/linux/in6.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7e1ded0d8e45..1084304fd75a 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -191,6 +191,7 @@ struct ipv6_pinfo { /* sockopt flags */ __u16 recverr:1, sndflow:1, + repflow:1, pmtudisc:3, ipv6only:1, srcprefs:3, /* 001: prefer temporary address diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index f94f1d013bf2..02c0cd685a27 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -85,6 +85,7 @@ struct in6_flowlabel_req { #define IPV6_FL_F_CREATE 1 #define IPV6_FL_F_EXCL 2 +#define IPV6_FL_F_REFLECT 4 #define IPV6_FL_S_NONE 0 #define IPV6_FL_S_EXCL 1 -- cgit v1.2.2 From 46e5f401762c639e38eea350d335c0f54ec2442f Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Fri, 17 Jan 2014 17:15:04 +0100 Subject: ipv6: add a flag to get the flow label used remotly This information is already available via IPV6_FLOWINFO of IPV6_2292PKTOPTIONS, and them a filtering to get the flow label information. But it is probably logical and easier for users to add this here, and to control both sent/received flow label values with the IPV6_FLOWLABEL_MGR option. Signed-off-by: Florent Fourcot Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ipv6.h | 3 ++- include/uapi/linux/in6.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 6d80f51897a5..78d3d5124918 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -253,7 +253,8 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *fopt); void fl6_free_socklist(struct sock *sk); int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen); -int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq); +int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, + int flags); int ip6_flowlabel_init(void); void ip6_flowlabel_cleanup(void); diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 02c0cd685a27..633b93cac1ed 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -86,6 +86,7 @@ struct in6_flowlabel_req { #define IPV6_FL_F_CREATE 1 #define IPV6_FL_F_EXCL 2 #define IPV6_FL_F_REFLECT 4 +#define IPV6_FL_F_REMOTE 8 #define IPV6_FL_S_NONE 0 #define IPV6_FL_S_EXCL 1 -- cgit v1.2.2 From 6444f72b4b74f627c51891101e93ba2b94078b0a Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Fri, 17 Jan 2014 17:15:05 +0100 Subject: ipv6: add flowlabel_consistency sysctl With the introduction of IPV6_FL_F_REFLECT, there is no guarantee of flow label unicity. This patch introduces a new sysctl to protect the old behaviour, enable by default. Changelog of V3: * rename ip6_flowlabel_consistency to flowlabel_consistency * use net_info_ratelimited() * checkpatch cleanups Signed-off-by: Florent Fourcot Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 592fecd2e59e..21edaf1f7916 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -27,6 +27,7 @@ struct netns_sysctl_ipv6 { int ip6_rt_gc_elasticity; int ip6_rt_mtu_expires; int ip6_rt_min_advmss; + int flowlabel_consistency; int icmpv6_time; int anycast_src_echo_reply; }; -- cgit v1.2.2 From 4b261c75a99f29c93a0b6babfc180cdf566bd654 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 20 Jan 2014 03:43:08 +0100 Subject: ipv6: make IPV6_RECVPKTINFO work for ipv4 datagrams We currently don't report IPV6_RECVPKTINFO in cmsg access ancillary data for IPv4 datagrams on IPv6 sockets. This patch splits the ip6_datagram_recv_ctl into two functions, one which handles both protocol families, AF_INET and AF_INET6, while the ip6_datagram_recv_specific_ctl only handles IPv6 cmsg data. ip6_datagram_recv_*_ctl never reported back any errors, so we can make them return void. Also provide a helper for protocols which don't offer dual personality to further use ip6_datagram_recv_ctl, which is exported to modules. I needed to shuffle the code for ping around a bit to make it easier to implement dual personality for ping ipv6 sockets in future. Reported-by: Gert Doering Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +++ include/net/ip.h | 1 + include/net/ping.h | 8 ++++++-- include/net/transp_v6.h | 9 +++++++-- 4 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 1084304fd75a..2faef339d8f2 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -284,6 +284,8 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, #define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only) #define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk)) +#define ipv6_sk_rxinfo(sk) ((sk)->sk_family == PF_INET6 && \ + inet6_sk(sk)->rxopt.bits.rxinfo) static inline const struct in6_addr *inet6_rcv_saddr(const struct sock *sk) { @@ -300,6 +302,7 @@ static inline int inet_v6_ipv6only(const struct sock *sk) #else #define __ipv6_only_sock(sk) 0 #define ipv6_only_sock(sk) 0 +#define ipv6_sk_rxinfo(sk) 0 static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) { diff --git a/include/net/ip.h b/include/net/ip.h index 7876e3b96ce3..23be0fd37937 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -63,6 +63,7 @@ struct ipcm_cookie { }; #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) +#define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb)) struct ip_ra_chain { struct ip_ra_chain __rcu *next; diff --git a/include/net/ping.h b/include/net/ping.h index 76013653e08c..026479b61a2d 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -33,8 +33,12 @@ struct pingv6_ops { int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len, int *addr_len); - int (*ip6_datagram_recv_ctl)(struct sock *sk, struct msghdr *msg, - struct sk_buff *skb); + void (*ip6_datagram_recv_common_ctl)(struct sock *sk, + struct msghdr *msg, + struct sk_buff *skb); + void (*ip6_datagram_recv_specific_ctl)(struct sock *sk, + struct msghdr *msg, + struct sk_buff *skb); int (*icmpv6_err_convert)(u8 type, u8 code, int *err); void (*ipv6_icmp_error)(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 48660e50ae90..b927413dde86 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -32,8 +32,13 @@ void tcpv6_exit(void); int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); -int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, - struct sk_buff *skb); +/* this does all the common and the specific ctl work */ +void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb); +void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb); +void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb); int ip6_datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg, struct flowi6 *fl6, struct ipv6_txoptions *opt, -- cgit v1.2.2 From 31956660693f130ff1de1040725e0636adb9261f Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Sun, 19 Jan 2014 15:24:53 -0700 Subject: sctp: fix missing SCTP mailing list address update The commit 91705c61b5202 ("net: sctp: trivial: update mailing list address") updated almost all the SCTP mailing list address from "lksctp-developers@lists.sourceforge.net" to "linux-sctp@vger.kernel.org" except for the one in include/linux/sctp.h file. Fix this way trivial one so that all is updated. Signed-off-by: Jean Sacren Cc: Daniel Borkmann Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/sctp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index c4c5eba26d9f..a9414fd49dc6 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -28,7 +28,7 @@ * * Please send any bug reports or fixes you make to the * email address(es): - * lksctp developers + * lksctp developers * * Or submit a bug report through the following website: * http://www.sf.net/projects/lksctp -- cgit v1.2.2 From 671314a5abb7abb8346cd3f16f75c1e55ed7667b Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 15 Jan 2014 15:49:30 -0800 Subject: net_sched: act: remove capab from struct tc_action_ops It is not actually implemented. Cc: Jamal Hadi Salim Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index e171387512b3..8ed974665640 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -81,13 +81,11 @@ struct tc_action { struct list_head list; }; -#define TCA_CAP_NONE 0 struct tc_action_ops { struct list_head head; struct tcf_hashinfo *hinfo; char kind[IFNAMSIZ]; __u32 type; /* TBD to match kind */ - __u32 capab; /* capabilities includes 4 bit version */ struct module *owner; int (*act)(struct sk_buff *, const struct tc_action *, struct tcf_result *); int (*dump)(struct sk_buff *, struct tc_action *, int, int); -- cgit v1.2.2 From 938dfdaa3c0f92e9a490d324f3bce43bbaef7632 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 17 Jan 2014 21:24:42 +0800 Subject: net: stmmac: Allocate and pass soc/board specific data to callbacks The current .init and .exit callbacks requires access to driver private data structures. This is not a good seperation and abstraction. Instead, we add a new .setup callback for allocating private data, and pass the returned pointer to the other callbacks. Signed-off-by: Chen-Yu Tsai Signed-off-by: David S. Miller --- include/linux/stmmac.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 33ace712e7e8..0a5a7aca562e 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -113,8 +113,10 @@ struct plat_stmmacenet_data { int max_speed; void (*fix_mac_speed)(void *priv, unsigned int speed); void (*bus_setup)(void __iomem *ioaddr); - int (*init)(struct platform_device *pdev); - void (*exit)(struct platform_device *pdev); + void *(*setup)(struct platform_device *pdev); + void (*free)(struct platform_device *pdev, void *priv); + int (*init)(struct platform_device *pdev, void *priv); + void (*exit)(struct platform_device *pdev, void *priv); void *custom_cfg; void *custom_data; void *bsp_priv; -- cgit v1.2.2 From 022066f50f53000679d31eb407693085f37b3f14 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 17 Jan 2014 21:24:46 +0800 Subject: net: stmmac: Use driver data and callbacks tied with compatible strings The stmmac driver core allows passing feature flags and callbacks via platform data. Add a similar stmmac_of_data to pass flags and callbacks tied to compatible strings. This allows us to extend stmmac with glue layers for different SoCs. Signed-off-by: Chen-Yu Tsai Signed-off-by: David S. Miller --- include/linux/stmmac.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 0a5a7aca562e..1367974b7e84 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -121,4 +121,22 @@ struct plat_stmmacenet_data { void *custom_data; void *bsp_priv; }; + +/* of_data for SoC glue layer device tree bindings */ + +struct stmmac_of_data { + int has_gmac; + int enh_desc; + int tx_coe; + int rx_coe; + int bugged_jumbo; + int pmt; + int riwt_off; + void (*fix_mac_speed)(void *priv, unsigned int speed); + void (*bus_setup)(void __iomem *ioaddr); + void *(*setup)(struct platform_device *pdev); + void (*free)(struct platform_device *pdev, void *priv); + int (*init)(struct platform_device *pdev, void *priv); + void (*exit)(struct platform_device *pdev, void *priv); +}; #endif -- cgit v1.2.2 From c779f7af99f73abb7270dcaa4c29178ab5ef7472 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 17 Jan 2014 11:37:02 -0800 Subject: net_sched: act: fetch hinfo from a->ops->hinfo Every action ops has a pointer to hash info, so we don't need to hard-code it in each module. Cc: Jamal Hadi Salim Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 8ed974665640..152316b3f59a 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -103,10 +103,10 @@ int tcf_hash_release(struct tcf_common *p, int bind, struct tcf_hashinfo *hinfo); u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo); struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, - int bind, struct tcf_hashinfo *hinfo); + int bind); struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, int size, - int bind, struct tcf_hashinfo *hinfo); + int bind); void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo); int tcf_register_action(struct tc_action_ops *a); -- cgit v1.2.2 From 6e6a50c254220acb19bd338ce433b1770e4a7a3c Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 17 Jan 2014 11:37:03 -0800 Subject: net_sched: act: export tcf_hash_search() instead of tcf_hash_lookup() So that we will not expose struct tcf_common to modules. Cc: Jamal Hadi Salim Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/act_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 152316b3f59a..788d8378e587 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -97,7 +97,7 @@ struct tc_action_ops { int (*walk)(struct sk_buff *, struct netlink_callback *, int, struct tc_action *); }; -struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo); +int tcf_hash_search(struct tc_action *a, u32 index); void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo); int tcf_hash_release(struct tcf_common *p, int bind, struct tcf_hashinfo *hinfo); -- cgit v1.2.2 From 82b276cd2b0bacd58e7c307bf8856925a68c4d14 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 20 Jan 2014 05:16:39 +0100 Subject: ipv6: protect protocols not handling ipv4 from v4 connection/bind attempts Some ipv6 protocols cannot handle ipv4 addresses, so we must not allow connecting and binding to them. sendmsg logic does already check msg->name for this but must trust already connected sockets which could be set up for connection to ipv4 address family. Per-socket flag ipv6only is of no use here, as it is under users control by setsockopt. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 78d3d5124918..4f541f11ce63 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -783,6 +783,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); +int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr, + int addr_len); int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); -- cgit v1.2.2 From 2618abb73c8953f0848511fc13f68da4d8337574 Mon Sep 17 00:00:00 2001 From: Vince Bridgers Date: Mon, 20 Jan 2014 05:39:01 -0600 Subject: stmmac: Fix kernel crashes for jumbo frames These changes correct the following issues with jumbo frames on the stmmac driver: 1) The Synopsys EMAC can be configured to support different FIFO sizes at core configuration time. There's no way to query the controller and know the FIFO size, so the driver needs to get this information from the device tree in order to know how to correctly handle MTU changes and setting up dma buffers. The default max-frame-size is as currently used, which is the size of a jumbo frame. 2) The driver was enabling Jumbo frames by default, but was not allocating dma buffers of sufficient size to handle the maximum possible packet size that could be received. This led to memory corruption since DMAs were occurring beyond the extent of the allocated receive buffers for certain types of network traffic. kernel BUG at net/core/skbuff.c:126! Internal error: Oops - BUG: 0 [#1] SMP ARM Modules linked in: CPU: 0 PID: 563 Comm: sockperf Not tainted 3.13.0-rc6-01523-gf7111b9 #31 task: ef35e580 ti: ef252000 task.ti: ef252000 PC is at skb_panic+0x60/0x64 LR is at skb_panic+0x60/0x64 pc : [] lr : [] psr: 60000113 sp : ef253c18 ip : 60000113 fp : 00000000 r10: ef3a5400 r9 : 00000ebc r8 : ef3a546c r7 : ee59f000 r6 : ee59f084 r5 : ee59ff40 r4 : ee59f140 r3 : 000003e2 r2 : 00000007 r1 : c0b9c420 r0 : 0000007d Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user Control: 10c5387d Table: 2e8ac04a DAC: 00000015 Process sockperf (pid: 563, stack limit = 0xef252248) Stack: (0xef253c18 to 0xef254000) 3c00: 00000ebc ee59f000 3c20: ee59f084 ee59ff40 ee59f140 c04a9cd8 ee8c50c0 00000ebc ee59ff40 00000000 3c40: ee59f140 c02d0ef0 00000056 ef1eda80 ee8c50c0 00000ebc 22bbef29 c0318f8c 3c60: 00000056 ef3a547c ffe2c716 c02c9c90 c0ba1298 ef3a5838 ef3a5838 ef3a5400 3c80: 000020c0 ee573840 000055cb ef3f2050 c053f0e0 c0319214 22b9b085 22d92813 3ca0: 00001c80 004b8e00 ef3a5400 ee573840 ef3f2064 22d92813 ef3f2064 000055cb 3cc0: ef3f2050 c031a19c ef252000 00000000 00000000 c0561bc0 00000000 ff00ffff 3ce0: c05621c0 ef3a5400 ef3f2064 ee573840 00000020 ef3f2064 000055cb ef3f2050 3d00: c053f0e0 c031cad0 c053e740 00000e60 00000000 00000000 ee573840 ef3a5400 3d20: ef0a6e00 00000000 ef3f2064 c032507c 00010000 00000020 c0561bc0 c0561bc0 3d40: ee599850 c032799c 00000000 ee573840 c055a380 ef3a5400 00000000 ef3f2064 3d60: ef3f2050 c032799c 0101c7c0 2b6755cb c059a280 c030e4d8 000055cb ffffffff 3d80: ee574fc0 c055a380 ee574000 ee573840 00002b67 ee573840 c03fe9c4 c053fa68 3da0: c055a380 00001f6f 00000000 ee573840 c053f0e0 c0304fdc ef0a6e01 ef3f2050 3dc0: ee573858 ef031000 ee573840 c03055d8 c0ba0c40 ef000f40 00100100 c053f0dc 3de0: c053ffdc c053f0f0 00000008 00000000 ef031000 c02da948 00001140 00000000 3e00: c0563c78 ef253e5f 00000020 ee573840 00000020 c053f0f0 ef313400 ee573840 3e20: c053f0e0 00000000 00000000 c05380c0 ef313400 00001000 00000015 c02df280 3e40: ee574000 ef001e00 00000000 00001080 00000042 005cd980 ef031500 ef031500 3e60: 00000000 c02df824 ef031500 c053e390 c0541084 f00b1e00 c05925e8 c02df864 3e80: 00001f5c ef031440 c053e390 c0278524 00000002 00000000 c0b9eb48 c02df280 3ea0: ee8c7180 00000100 c0542ca8 00000015 00000040 ef031500 ef031500 ef031500 3ec0: c027803c ef252000 00000040 000000ec c05380c0 c0b9eb40 c0b9eb48 c02df940 3ee0: ef060780 ffffa4dd c0564a9c c056343c 002e80a8 00000080 ef031500 00000001 3f00: c053808c ef252000 fffec100 00000003 00000004 002e80a8 0000000c c00258f0 3f20: 002e80a8 c005e704 00000005 00000100 c05634d0 c0538080 c05333e0 00000000 3f40: 0000000a c0565580 c05380c0 ffffa4dc c05434f4 00400100 00000004 c0534cd4 3f60: 00000098 00000000 fffec100 002e80a8 00000004 002e80a8 002a20e0 c0025da8 3f80: c0534cd4 c000f020 fffec10c c053ea60 ef253fb0 c0008530 0000ffe2 b6ef67f4 3fa0: 40000010 ffffffff 00000124 c0012f3c 0000ffe2 002e80f0 0000ffe2 00004000 3fc0: becb6338 becb6334 00000004 00000124 002e80a8 00000004 002e80a8 002a20e0 3fe0: becb6300 becb62f4 002773bb b6ef67f4 40000010 ffffffff 00000000 00000000 [] (skb_panic+0x60/0x64) from [] (skb_put+0x4c/0x50) [] (skb_put+0x4c/0x50) from [] (tcp_collapse+0x314/0x3ec) [] (tcp_collapse+0x314/0x3ec) from [] (tcp_try_rmem_schedule+0x1b0/0x3c4) [] (tcp_try_rmem_schedule+0x1b0/0x3c4) from [] (tcp_data_queue+0x480/0xe6c) [] (tcp_data_queue+0x480/0xe6c) from [] (tcp_rcv_established+0x180/0x62c) [] (tcp_rcv_established+0x180/0x62c) from [] (tcp_v4_do_rcv+0x13c/0x31c) [] (tcp_v4_do_rcv+0x13c/0x31c) from [] (tcp_v4_rcv+0x718/0x73c) [] (tcp_v4_rcv+0x718/0x73c) from [] (ip_local_deliver+0x98/0x274) [] (ip_local_deliver+0x98/0x274) from [] (ip_rcv+0x420/0x758) [] (ip_rcv+0x420/0x758) from [] (__netif_receive_skb_core+0x44c/0x5bc) [] (__netif_receive_skb_core+0x44c/0x5bc) from [] (netif_receive_skb+0x48/0xb4) [] (netif_receive_skb+0x48/0xb4) from [] (napi_gro_flush+0x70/0x94) [] (napi_gro_flush+0x70/0x94) from [] (napi_complete+0x1c/0x34) [] (napi_complete+0x1c/0x34) from [] (stmmac_poll+0x4e8/0x5c8) [] (stmmac_poll+0x4e8/0x5c8) from [] (net_rx_action+0xc4/0x1e4) [] (net_rx_action+0xc4/0x1e4) from [] (__do_softirq+0x12c/0x2e8) [] (__do_softirq+0x12c/0x2e8) from [] (irq_exit+0x78/0xac) [] (irq_exit+0x78/0xac) from [] (handle_IRQ+0x44/0x90) [] (handle_IRQ+0x44/0x90) from [] (gic_handle_irq+0x2c/0x5c) [] (gic_handle_irq+0x2c/0x5c) from [] (__irq_usr+0x3c/0x60) 3) The driver was setting the dma buffer size after allocating dma buffers, which caused a system panic when changing the MTU. BUG: Bad page state in process ifconfig pfn:2e850 page:c0b72a00 count:0 mapcount:0 mapping: (null) index:0x0 page flags: 0x200(arch_1) Modules linked in: CPU: 0 PID: 566 Comm: ifconfig Not tainted 3.13.0-rc6-01523-gf7111b9 #29 [] (unwind_backtrace+0x0/0xf8) from [] (show_stack+0x10/0x14) [] (show_stack+0x10/0x14) from [] (dump_stack+0x70/0x88) [] (dump_stack+0x70/0x88) from [] (bad_page+0xc8/0x118) [] (bad_page+0xc8/0x118) from [] (get_page_from_freelist+0x744/0x870) [] (get_page_from_freelist+0x744/0x870) from [] (__alloc_pages_nodemask+0x118/0x86c) [] (__alloc_pages_nodemask+0x118/0x86c) from [] (__get_free_pages+0x10/0x54) [] (__get_free_pages+0x10/0x54) from [] (kmalloc_order_trace+0x24/0xa0) [] (kmalloc_order_trace+0x24/0xa0) from [] (__kmalloc_reserve.isra.21+0x24/0x70) [] (__kmalloc_reserve.isra.21+0x24/0x70) from [] (__alloc_skb+0x68/0x13c) [] (__alloc_skb+0x68/0x13c) from [] (__netdev_alloc_skb+0x3c/0xe8) [] (__netdev_alloc_skb+0x3c/0xe8) from [] (stmmac_open+0x63c/0x1024) [] (stmmac_open+0x63c/0x1024) from [] (__dev_open+0xa0/0xfc) [] (__dev_open+0xa0/0xfc) from [] (__dev_change_flags+0x94/0x158) [] (__dev_change_flags+0x94/0x158) from [] (dev_change_flags+0x18/0x48) [] (dev_change_flags+0x18/0x48) from [] (devinet_ioctl+0x638/0x700) [] (devinet_ioctl+0x638/0x700) from [] (sock_ioctl+0x64/0x290) [] (sock_ioctl+0x64/0x290) from [] (do_vfs_ioctl+0x78/0x5b8) [] (do_vfs_ioctl+0x78/0x5b8) from [] (SyS_ioctl+0x3c/0x5c) [] (SyS_ioctl+0x3c/0x5c) from [] The fixes have been verified using reproducible, automated testing. Signed-off-by: Vince Bridgers Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 1367974b7e84..6f27d4f957bd 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -111,6 +111,7 @@ struct plat_stmmacenet_data { int force_thresh_dma_mode; int riwt_off; int max_speed; + int maxmtu; void (*fix_mac_speed)(void *priv, unsigned int speed); void (*bus_setup)(void __iomem *ioaddr); void *(*setup)(struct platform_device *pdev); -- cgit v1.2.2 From b582ef0990d457f7ce8ccf827af51a575ca0b4a6 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 20 Jan 2014 13:59:19 +0200 Subject: net: Add GRO support for UDP encapsulating protocols Add GRO handlers for protocols that do UDP encapsulation, with the intent of being able to coalesce packets which encapsulate packets belonging to the same TCP session. For GRO purposes, the destination UDP port takes the role of the ether type field in the ethernet header or the next protocol in the IP header. The UDP GRO handler will only attempt to coalesce packets whose destination port is registered to have gro handler. Use a mark on the skb GRO CB data to disallow (flush) running the udp gro receive code twice on a packet. This solves the problem of udp encapsulated packets whose inner VM packet is udp and happen to carry a port which has registered offloads. Signed-off-by: Shlomo Pongratz Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 +++++++++- include/net/protocol.h | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 83ce2aee65e6..c31022980e18 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1675,7 +1675,10 @@ struct napi_gro_cb { unsigned long age; /* Used in ipv6_gro_receive() */ - int proto; + u16 proto; + + /* Used in udp_gro_receive */ + u16 udp_mark; /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; @@ -1714,6 +1717,11 @@ struct packet_offload { struct list_head list; }; +struct udp_offload { + __be16 port; + struct offload_callbacks callbacks; +}; + /* often modified stats are per cpu, other are shared (netdev->stats) */ struct pcpu_sw_netstats { u64 rx_packets; diff --git a/include/net/protocol.h b/include/net/protocol.h index 0e5f8665d7fb..a7e986b08147 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -108,6 +108,9 @@ int inet_del_offload(const struct net_offload *prot, unsigned char num); void inet_register_protosw(struct inet_protosw *p); void inet_unregister_protosw(struct inet_protosw *p); +int udp_add_offload(struct udp_offload *prot); +void udp_del_offload(struct udp_offload *prot); + #if IS_ENABLED(CONFIG_IPV6) int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char num); int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char num); -- cgit v1.2.2 From dc01e7d3447793fd9e4090aa9d50c549848b5a18 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 20 Jan 2014 13:59:21 +0200 Subject: net: Add GRO support for vxlan traffic Add GRO handlers for vxlann, by using the UDP GRO infrastructure. For single TCP session that goes through vxlan tunneling I got nice improvement from 6.8Gbs to 11.5Gbs --> UDP/VXLAN GRO disabled $ netperf -H 192.168.52.147 -c -C $ netperf -t TCP_STREAM -H 192.168.52.147 -c -C MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.52.147 () port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 87380 65536 65536 10.00 6799.75 12.54 24.79 0.604 1.195 --> UDP/VXLAN GRO enabled $ netperf -t TCP_STREAM -H 192.168.52.147 -c -C MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.52.147 () port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 87380 65536 65536 10.00 11562.72 24.90 20.34 0.706 0.577 Signed-off-by: Shlomo Pongratz Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/net/vxlan.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 6b6d180fb91a..5deef1ae78c9 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -21,6 +21,7 @@ struct vxlan_sock { struct rcu_head rcu; struct hlist_head vni_list[VNI_HASH_SIZE]; atomic_t refcnt; + struct udp_offload udp_offloads; }; struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, -- cgit v1.2.2 From 940287ee10e270b78fbda6c44eaa1def5fde897d Mon Sep 17 00:00:00 2001 From: wangweidong Date: Tue, 21 Jan 2014 15:44:06 +0800 Subject: sctp: remove macros sctp_spin_[un]lock_irqrestore Redefined spin_[un]lock_irqstore to sctp_spin_[un]lock_irqrestore for user space friendly code which we haven't use in years, so removing them. Signed-off-by: Wang Weidong Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 610a8c8738fa..b180a8fb68c5 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -171,9 +171,6 @@ extern struct kmem_cache *sctp_bucket_cachep __read_mostly; */ /* spin lock wrappers. */ -#define sctp_spin_lock_irqsave(lock, flags) spin_lock_irqsave(lock, flags) -#define sctp_spin_unlock_irqrestore(lock, flags) \ - spin_unlock_irqrestore(lock, flags) #define sctp_local_bh_disable() local_bh_disable() #define sctp_local_bh_enable() local_bh_enable() #define sctp_spin_lock(lock) spin_lock(lock) @@ -353,13 +350,13 @@ static inline void sctp_skb_list_tail(struct sk_buff_head *list, { unsigned long flags; - sctp_spin_lock_irqsave(&head->lock, flags); + spin_lock_irqsave(&head->lock, flags); sctp_spin_lock(&list->lock); skb_queue_splice_tail_init(list, head); sctp_spin_unlock(&list->lock); - sctp_spin_unlock_irqrestore(&head->lock, flags); + spin_unlock_irqrestore(&head->lock, flags); } /** -- cgit v1.2.2 From 79b91130a2679ea5063d49c021d97346a09eb0a5 Mon Sep 17 00:00:00 2001 From: wangweidong Date: Tue, 21 Jan 2014 15:44:07 +0800 Subject: sctp: remove macros sctp_local_bh_{disable|enable} Redefined local_bh_{disable|enable} to sctp_local_bh_{disable|enable} for user space friendly code which we haven't use in years, so removing them. Signed-off-by: Wang Weidong Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index b180a8fb68c5..c17121afe182 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -171,8 +171,6 @@ extern struct kmem_cache *sctp_bucket_cachep __read_mostly; */ /* spin lock wrappers. */ -#define sctp_local_bh_disable() local_bh_disable() -#define sctp_local_bh_enable() local_bh_enable() #define sctp_spin_lock(lock) spin_lock(lock) #define sctp_spin_unlock(lock) spin_unlock(lock) #define sctp_write_lock(lock) write_lock(lock) -- cgit v1.2.2 From 3c8e43ba9fe93b2670decc119f82f072a8d2459f Mon Sep 17 00:00:00 2001 From: wangweidong Date: Tue, 21 Jan 2014 15:44:08 +0800 Subject: sctp: remove macros sctp_spin_[un]lock Redefined spin_[un]lock to sctp_spin_[un]lock for user space friendly code which we haven't use in years, so removing them. Signed-off-by: Wang Weidong Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index c17121afe182..4f77e9811f9e 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -171,8 +171,6 @@ extern struct kmem_cache *sctp_bucket_cachep __read_mostly; */ /* spin lock wrappers. */ -#define sctp_spin_lock(lock) spin_lock(lock) -#define sctp_spin_unlock(lock) spin_unlock(lock) #define sctp_write_lock(lock) write_lock(lock) #define sctp_write_unlock(lock) write_unlock(lock) #define sctp_read_lock(lock) read_lock(lock) @@ -349,11 +347,11 @@ static inline void sctp_skb_list_tail(struct sk_buff_head *list, unsigned long flags; spin_lock_irqsave(&head->lock, flags); - sctp_spin_lock(&list->lock); + spin_lock(&list->lock); skb_queue_splice_tail_init(list, head); - sctp_spin_unlock(&list->lock); + spin_unlock(&list->lock); spin_unlock_irqrestore(&head->lock, flags); } -- cgit v1.2.2 From 387602dfdc3332f786f6195c74d53f9e8f7af402 Mon Sep 17 00:00:00 2001 From: wangweidong Date: Tue, 21 Jan 2014 15:44:09 +0800 Subject: sctp: remove macros sctp_write_[un]_lock Redefined write_[un]lock to sctp_write_[un]lock for user space friendly code which we haven't use in years, so removing them. Signed-off-by: Wang Weidong Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 4f77e9811f9e..73dcc0c82749 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -171,8 +171,6 @@ extern struct kmem_cache *sctp_bucket_cachep __read_mostly; */ /* spin lock wrappers. */ -#define sctp_write_lock(lock) write_lock(lock) -#define sctp_write_unlock(lock) write_unlock(lock) #define sctp_read_lock(lock) read_lock(lock) #define sctp_read_unlock(lock) read_unlock(lock) -- cgit v1.2.2 From 1b0de194f11450d1c1e164bf2826fe80a38e1f62 Mon Sep 17 00:00:00 2001 From: wangweidong Date: Tue, 21 Jan 2014 15:44:10 +0800 Subject: sctp: remove macros sctp_read_[un]lock Redefined read_[un]lock to sctp_read_[un]lock for user space friendly code which we haven't use in years, and the macros we never used, so removing them. Signed-off-by: Wang Weidong Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 73dcc0c82749..572cd5a5292a 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -170,10 +170,6 @@ extern struct kmem_cache *sctp_bucket_cachep __read_mostly; * Section: Macros, externs, and inlines */ -/* spin lock wrappers. */ -#define sctp_read_lock(lock) read_lock(lock) -#define sctp_read_unlock(lock) read_unlock(lock) - /* sock lock wrappers. */ #define sctp_lock_sock(sk) lock_sock(sk) #define sctp_release_sock(sk) release_sock(sk) -- cgit v1.2.2 From 048ed4b6266144fdee55089c9eef55b0c1d42ba1 Mon Sep 17 00:00:00 2001 From: wangweidong Date: Tue, 21 Jan 2014 15:44:11 +0800 Subject: sctp: remove macros sctp_{lock|release}_sock Redefined {lock|release}_sock to sctp_{lock|release}_sock for user space friendly code which we haven't use in years, so removing them. Signed-off-by: Wang Weidong Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 572cd5a5292a..ec18d306bad7 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -171,8 +171,6 @@ extern struct kmem_cache *sctp_bucket_cachep __read_mostly; */ /* sock lock wrappers. */ -#define sctp_lock_sock(sk) lock_sock(sk) -#define sctp_release_sock(sk) release_sock(sk) #define sctp_bh_lock_sock(sk) bh_lock_sock(sk) #define sctp_bh_unlock_sock(sk) bh_unlock_sock(sk) -- cgit v1.2.2 From 5bc1d1b4a261a865cbde65b1561748df5b9c724b Mon Sep 17 00:00:00 2001 From: wangweidong Date: Tue, 21 Jan 2014 15:44:12 +0800 Subject: sctp: remove macros sctp_bh_[un]lock_sock Redefined bh_[un]lock_sock to sctp_bh[un]lock_sock for user space friendly code which we haven't use in years, so removing them. Signed-off-by: Wang Weidong Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index ec18d306bad7..a3353f45ef94 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -170,10 +170,6 @@ extern struct kmem_cache *sctp_bucket_cachep __read_mostly; * Section: Macros, externs, and inlines */ -/* sock lock wrappers. */ -#define sctp_bh_lock_sock(sk) bh_lock_sock(sk) -#define sctp_bh_unlock_sock(sk) bh_unlock_sock(sk) - /* SCTP SNMP MIB stats handlers */ #define SCTP_INC_STATS(net, field) SNMP_INC_STATS((net)->sctp.sctp_statistics, field) #define SCTP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->sctp.sctp_statistics, field) -- cgit v1.2.2 From 37692299319db31f33f04ce418604781fa5710fb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 21 Jan 2014 00:19:37 +0100 Subject: net: filter: let bpf_tell_extensions return SKF_AD_MAX Michal Sekletar added in commit ea02f9411d9f ("net: introduce SO_BPF_EXTENSIONS") a facility where user space can enquire the BPF ancillary instruction set, which is imho a step into the right direction for letting user space high-level to BPF optimizers make an informed decision for possibly using these extensions. The original rationale was to return through a getsockopt(2) a bitfield of which instructions are supported and which are not, as of right now, we just return 0 to indicate a base support for SKF_AD_PROTOCOL up to SKF_AD_PAY_OFFSET. Limitations of this approach are that this API which we need to maintain for a long time can only support a maximum of 32 extensions, and needs to be additionally maintained/updated when each new extension that comes in. I thought about this a bit more and what we can do here to overcome this is to just return SKF_AD_MAX. Since we never remove any extension since we cannot break user space and always linearly increase SKF_AD_MAX on each newly added extension, user space can make a decision on what extensions are supported in the whole set of extensions and which aren't, by just checking which of them from the whole set have an offset < SKF_AD_MAX of the underlying kernel. Since SKF_AD_MAX must be updated each time we add new ones, we don't need to introduce an additional enum and got maintenance for free. At some point in time when SO_BPF_EXTENSIONS becomes ubiquitous for most kernels, then an application can simply make use of this and easily be run on newer or older underlying kernels without needing to be recompiled, of course. Since that is for 3.14, it's not too late to do this change. Cc: Michal Sekletar Cc: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Michal Sekletar Signed-off-by: David S. Miller --- include/linux/filter.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 1a95a2dcf113..e568c8ef896b 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -85,13 +85,7 @@ static inline void bpf_jit_free(struct sk_filter *fp) static inline int bpf_tell_extensions(void) { - /* When adding new BPF extension it is necessary to enumerate - * it here, so userspace software which wants to know what is - * supported can do so by inspecting return value of this - * function - */ - - return 0; + return SKF_AD_MAX; } enum { -- cgit v1.2.2 From f337db64af059c9a94278a8b0ab97d87259ff62f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 22 Jan 2014 02:29:39 +0100 Subject: random32: add prandom_u32_max and convert open coded users Many functions have open coded a function that returns a random number in range [0,N-1]. Under the assumption that we have a PRNG such as taus113 with being well distributed in [0, ~0U] space, we can implement such a function as uword t = (n*m')>>32, where m' is a random number obtained from PRNG, n the right open interval border and t our resulting random number, with n,m',t in u32 universe. Lets go with Joe and simply call it prandom_u32_max(), although technically we have an right open interval endpoint, but that we have documented. Other users can further be migrated to the new prandom_u32_max() function later on; for now, we need to make sure to migrate reciprocal_divide() users for the reciprocal_divide() follow-up fixup since their function signatures are going to change. Joint work with Hannes Frederic Sowa. Cc: Jakub Zawadzki Cc: Eric Dumazet Cc: linux-kernel@vger.kernel.org Signed-off-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/random.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index 4002b3df4c85..1cfce0e24dbd 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -8,7 +8,6 @@ #include - extern void add_device_randomness(const void *, unsigned int); extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value); @@ -38,6 +37,23 @@ struct rnd_state { u32 prandom_u32_state(struct rnd_state *state); void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes); +/** + * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro) + * @ep_ro: right open interval endpoint + * + * Returns a pseudo-random number that is in interval [0, ep_ro). Note + * that the result depends on PRNG being well distributed in [0, ~0U] + * u32 space. Here we use maximally equidistributed combined Tausworthe + * generator, that is, prandom_u32(). This is useful when requesting a + * random index of an array containing ep_ro elements, for example. + * + * Returns: pseudo-random number in interval [0, ep_ro) + */ +static inline u32 prandom_u32_max(u32 ep_ro) +{ + return (u32)(((u64) prandom_u32() * ep_ro) >> 32); +} + /* * Handle minimum values for seeds */ -- cgit v1.2.2 From 89770b0a69ee0e0e5e99c722192d535115f73778 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 22 Jan 2014 02:29:40 +0100 Subject: net: introduce reciprocal_scale helper and convert users As David Laight suggests, we shouldn't necessarily call this reciprocal_divide() when users didn't requested a reciprocal_value(); lets keep the basic idea and call it reciprocal_scale(). More background information on this topic can be found in [1]. Joint work with Hannes Frederic Sowa. [1] http://homepage.cs.uiowa.edu/~jones/bcd/divide.html Suggested-by: David Laight Cc: Jakub Zawadzki Cc: Eric Dumazet Cc: linux-kernel@vger.kernel.org Signed-off-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/kernel.h | 19 +++++++++++++++++++ include/net/codel.h | 4 +--- 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index ecb87544cc5d..03d8a6b0e2e8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -193,6 +193,25 @@ extern int _cond_resched(void); (__x < 0) ? -__x : __x; \ }) +/** + * reciprocal_scale - "scale" a value into range [0, ep_ro) + * @val: value + * @ep_ro: right open interval endpoint + * + * Perform a "reciprocal multiplication" in order to "scale" a value into + * range [0, ep_ro), where the upper interval endpoint is right-open. + * This is useful, e.g. for accessing a index of an array containing + * ep_ro elements, for example. Think of it as sort of modulus, only that + * the result isn't that of modulo. ;) Note that if initial input is a + * small value, then result will return 0. + * + * Return: a result based on val in interval [0, ep_ro). + */ +static inline u32 reciprocal_scale(u32 val, u32 ep_ro) +{ + return (u32)(((u64) val * ep_ro) >> 32); +} + #if defined(CONFIG_MMU) && \ (defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)) void might_fault(void); diff --git a/include/net/codel.h b/include/net/codel.h index 3b04ff5f6f8d..fe0eab32ce76 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -46,7 +46,6 @@ #include #include #include -#include /* Controlling Queue Delay (CoDel) algorithm * ========================================= @@ -211,10 +210,9 @@ static codel_time_t codel_control_law(codel_time_t t, codel_time_t interval, u32 rec_inv_sqrt) { - return t + reciprocal_divide(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT); + return t + reciprocal_scale(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT); } - static bool codel_should_drop(const struct sk_buff *skb, struct Qdisc *sch, struct codel_vars *vars, -- cgit v1.2.2 From 809fa972fd90ff27225294b17a027e908b2d7b7a Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 22 Jan 2014 02:29:41 +0100 Subject: reciprocal_divide: update/correction of the algorithm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jakub Zawadzki noticed that some divisions by reciprocal_divide() were not correct [1][2], which he could also show with BPF code after divisions are transformed into reciprocal_value() for runtime invariance which can be passed to reciprocal_divide() later on; reverse in BPF dump ended up with a different, off-by-one K in some situations. This has been fixed by Eric Dumazet in commit aee636c4809fa5 ("bpf: do not use reciprocal divide"). This follow-up patch improves reciprocal_value() and reciprocal_divide() to work in all cases by using Granlund and Montgomery method, so that also future use is safe and without any non-obvious side-effects. Known problems with the old implementation were that division by 1 always returned 0 and some off-by-ones when the dividend and divisor where very large. This seemed to not be problematic with its current users, as far as we can tell. Eric Dumazet checked for the slab usage, we cannot surely say so in the case of flex_array. Still, in order to fix that, we propose an extension from the original implementation from commit 6a2d7a955d8d resp. [3][4], by using the algorithm proposed in "Division by Invariant Integers Using Multiplication" [5], Torbjörn Granlund and Peter L. Montgomery, that is, pseudocode for q = n/d where q, n, d is in u32 universe: 1) Initialization: int l = ceil(log_2 d) uword m' = floor((1<<32)*((1<>32 q = (t+((n-t)>>sh_1))>>sh_2 The assembler implementation from Agner Fog [6] also helped a lot while implementing. We have tested the implementation on x86_64, ppc64, i686, s390x; on x86_64/haswell we're still half the latency compared to normal divide. Joint work with Daniel Borkmann. [1] http://www.wireshark.org/~darkjames/reciprocal-buggy.c [2] http://www.wireshark.org/~darkjames/set-and-dump-filter-k-bug.c [3] https://gmplib.org/~tege/division-paper.pdf [4] http://homepage.cs.uiowa.edu/~jones/bcd/divide.html [5] http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.1.2556 [6] http://www.agner.org/optimize/asmlib.zip Reported-by: Jakub Zawadzki Cc: Eric Dumazet Cc: Austin S Hemmelgarn Cc: linux-kernel@vger.kernel.org Cc: Jesse Gross Cc: Jamal Hadi Salim Cc: Stephen Hemminger Cc: Matt Mackall Cc: Pekka Enberg Cc: Christoph Lameter Cc: Andy Gospodarek Cc: Veaceslav Falico Cc: Jay Vosburgh Cc: Jakub Zawadzki Signed-off-by: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/flex_array.h | 3 ++- include/linux/reciprocal_div.h | 39 +++++++++++++++++++++------------------ include/linux/slab_def.h | 4 +++- include/net/red.h | 3 ++- 4 files changed, 28 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index 6843cf193a44..b6efb0c64408 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h @@ -2,6 +2,7 @@ #define _FLEX_ARRAY_H #include +#include #include #define FLEX_ARRAY_PART_SIZE PAGE_SIZE @@ -22,7 +23,7 @@ struct flex_array { int element_size; int total_nr_elements; int elems_per_part; - u32 reciprocal_elems; + struct reciprocal_value reciprocal_elems; struct flex_array_part *parts[]; }; /* diff --git a/include/linux/reciprocal_div.h b/include/linux/reciprocal_div.h index f9c90b33285b..8c5a3fb6c6c5 100644 --- a/include/linux/reciprocal_div.h +++ b/include/linux/reciprocal_div.h @@ -4,29 +4,32 @@ #include /* - * This file describes reciprocical division. + * This algorithm is based on the paper "Division by Invariant + * Integers Using Multiplication" by Torbjörn Granlund and Peter + * L. Montgomery. * - * This optimizes the (A/B) problem, when A and B are two u32 - * and B is a known value (but not known at compile time) + * The assembler implementation from Agner Fog, which this code is + * based on, can be found here: + * http://www.agner.org/optimize/asmlib.zip * - * The math principle used is : - * Let RECIPROCAL_VALUE(B) be (((1LL << 32) + (B - 1))/ B) - * Then A / B = (u32)(((u64)(A) * (R)) >> 32) - * - * This replaces a divide by a multiply (and a shift), and - * is generally less expensive in CPU cycles. + * This optimization for A/B is helpful if the divisor B is mostly + * runtime invariant. The reciprocal of B is calculated in the + * slow-path with reciprocal_value(). The fast-path can then just use + * a much faster multiplication operation with a variable dividend A + * to calculate the division A/B. */ -/* - * Computes the reciprocal value (R) for the value B of the divisor. - * Should not be called before each reciprocal_divide(), - * or else the performance is slower than a normal divide. - */ -extern u32 reciprocal_value(u32 B); +struct reciprocal_value { + u32 m; + u8 sh1, sh2; +}; +struct reciprocal_value reciprocal_value(u32 d); -static inline u32 reciprocal_divide(u32 A, u32 R) +static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R) { - return (u32)(((u64)A * R) >> 32); + u32 t = (u32)(((u64)a * R.m) >> 32); + return (t + ((a - t) >> R.sh1)) >> R.sh2; } -#endif + +#endif /* _LINUX_RECIPROCAL_DIV_H */ diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 09bfffb08a56..96e8abae19a9 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -1,6 +1,8 @@ #ifndef _LINUX_SLAB_DEF_H #define _LINUX_SLAB_DEF_H +#include + /* * Definitions unique to the original Linux SLAB allocator. */ @@ -12,7 +14,7 @@ struct kmem_cache { unsigned int shared; unsigned int size; - u32 reciprocal_buffer_size; + struct reciprocal_value reciprocal_buffer_size; /* 2) touched by every alloc & free from the backend */ unsigned int flags; /* constant flags */ diff --git a/include/net/red.h b/include/net/red.h index 168bb2f495f2..76e0b5f922c6 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -130,7 +130,8 @@ struct red_parms { u32 qth_max; /* Max avg length threshold: Wlog scaled */ u32 Scell_max; u32 max_P; /* probability, [0 .. 1.0] 32 scaled */ - u32 max_P_reciprocal; /* reciprocal_value(max_P / qth_delta) */ + /* reciprocal_value(max_P / qth_delta) */ + struct reciprocal_value max_P_reciprocal; u32 qth_delta; /* max_th - min_th */ u32 target_min; /* min_th + 0.4*(max_th - min_th) */ u32 target_max; /* min_th + 0.6*(max_th - min_th) */ -- cgit v1.2.2 From 2d36097d26b5991d71a2cf4a20c1a158f0f1bfcd Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 22 Jan 2014 16:01:44 -0500 Subject: af_packet: Add Queue mapping mode to af_packet fanout operation This patch adds a queue mapping mode to the fanout operation of af_packet sockets. This allows user space af_packet users to better filter on flows ingressing and egressing via a specific hardware queue, and avoids the potential packet reordering that can occur when FANOUT_CPU is being used and irq affinity varies. Tested successfully by myself. applies to net-next Signed-off-by: Neil Horman CC: "David S. Miller" Signed-off-by: David S. Miller --- include/uapi/linux/if_packet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 1988a02842cc..bac27fa05f5b 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -60,6 +60,7 @@ struct sockaddr_ll { #define PACKET_FANOUT_CPU 2 #define PACKET_FANOUT_ROLLOVER 3 #define PACKET_FANOUT_RND 4 +#define PACKET_FANOUT_QM 5 #define PACKET_FANOUT_FLAG_ROLLOVER 0x1000 #define PACKET_FANOUT_FLAG_DEFRAG 0x8000 -- cgit v1.2.2 From eb97768acb3f7749a3a03ee35fee9088244113ea Mon Sep 17 00:00:00 2001 From: FX Le Bail Date: Wed, 22 Jan 2014 05:39:46 +0100 Subject: net: update comments of "struct msghdr" with the more accurate RFC3542 ones Signed-off-by: Francois-Xavier Le Bail Signed-off-by: David S. Miller --- include/linux/socket.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/socket.h b/include/linux/socket.h index 5d488a65ad20..8e98297f1388 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -45,13 +45,13 @@ struct linger { */ struct msghdr { - void * msg_name; /* Socket name */ - int msg_namelen; /* Length of name */ - struct iovec * msg_iov; /* Data blocks */ - __kernel_size_t msg_iovlen; /* Number of blocks */ - void * msg_control; /* Per protocol magic (eg BSD file descriptor passing) */ - __kernel_size_t msg_controllen; /* Length of cmsg list */ - unsigned int msg_flags; + void *msg_name; /* ptr to socket address structure */ + int msg_namelen; /* size of socket address structure */ + struct iovec *msg_iov; /* scatter/gather array */ + __kernel_size_t msg_iovlen; /* # elements in msg_iov */ + void *msg_control; /* ancillary data */ + __kernel_size_t msg_controllen; /* ancillary data buffer length */ + unsigned int msg_flags; /* flags on received message */ }; /* For recvmmsg/sendmmsg */ -- cgit v1.2.2 From 7c90cc2d40cab15adc78545edba8b5996bd4cade Mon Sep 17 00:00:00 2001 From: FX Le Bail Date: Wed, 22 Jan 2014 07:42:37 +0100 Subject: ipv6: enable anycast addresses as source addresses for datagrams This change allows to consider an anycast address valid as source address when given via an IPV6_PKTINFO or IPV6_2292PKTINFO ancillary data item. So, when sending a datagram with ancillary data, the unicast and anycast addresses are handled in the same way. - Adds ipv6_chk_acast_addr_src() to check if an anycast address is link-local on given interface or is global. - Uses it in ip6_datagram_send_ctl(). Signed-off-by: Francois-Xavier Le Bail Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/addrconf.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 66c4a44d8f5c..50e39a8822b4 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -205,8 +205,9 @@ void ipv6_sock_ac_close(struct sock *sk); int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr); int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr); bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, - const struct in6_addr *addr); - + const struct in6_addr *addr); +bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, + const struct in6_addr *addr); /* Device notifier */ int register_inet6addr_notifier(struct notifier_block *nb); -- cgit v1.2.2 From df7dbcbbafc0b8f3fb31a40c6f3c4a7e15cb0b40 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 22 Jan 2014 09:05:54 +0100 Subject: rtnetlink: put "BOND" into nl attribute names which are related to bonding Signed-off-by: Jiri Pirko Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ba2f3bf5fdf5..1f30b85ebf9d 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -144,7 +144,7 @@ enum { IFLA_NUM_RX_QUEUES, IFLA_CARRIER, IFLA_PHYS_PORT_ID, - IFLA_SLAVE, + IFLA_BOND_SLAVE, __IFLA_MAX }; @@ -370,16 +370,17 @@ enum { #define IFLA_BOND_AD_INFO_MAX (__IFLA_BOND_AD_INFO_MAX - 1) enum { - IFLA_SLAVE_STATE, - IFLA_SLAVE_MII_STATUS, - IFLA_SLAVE_LINK_FAILURE_COUNT, - IFLA_SLAVE_PERM_HWADDR, - IFLA_SLAVE_QUEUE_ID, - IFLA_SLAVE_AD_AGGREGATOR_ID, - __IFLA_SLAVE_MAX, + IFLA_BOND_SLAVE_UNSPEC, + IFLA_BOND_SLAVE_STATE, + IFLA_BOND_SLAVE_MII_STATUS, + IFLA_BOND_SLAVE_LINK_FAILURE_COUNT, + IFLA_BOND_SLAVE_PERM_HWADDR, + IFLA_BOND_SLAVE_QUEUE_ID, + IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, + __IFLA_BOND_SLAVE_MAX, }; -#define IFLA_SLAVE_MAX (__IFLA_SLAVE_MAX - 1) +#define IFLA_BOND_SLAVE_MAX (__IFLA_BOND_SLAVE_MAX - 1) /* SR-IOV virtual function management section */ -- cgit v1.2.2 From ba7d49b1f0f8e5f24294a880ed576964059af5ef Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 22 Jan 2014 09:05:55 +0100 Subject: rtnetlink: provide api for getting and setting slave info Recent patch bonding: add netlink attributes to slave link dev (1d3ee88ae0d6) Introduced yet another device specific way to access slave information over rtnetlink. There is one already there for bridge. This patch introduces generic way to do this, for getting and setting info as well by extending link_ops. Later on, this new interface will be used for bridge ports as well. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 14 ++++++++++++++ include/uapi/linux/if_link.h | 2 ++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 8fb42070a2c1..661e45d38051 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -79,6 +79,20 @@ struct rtnl_link_ops { const struct net_device *dev); unsigned int (*get_num_tx_queues)(void); unsigned int (*get_num_rx_queues)(void); + + int slave_maxtype; + const struct nla_policy *slave_policy; + int (*slave_validate)(struct nlattr *tb[], + struct nlattr *data[]); + int (*slave_changelink)(struct net_device *dev, + struct net_device *slave_dev, + struct nlattr *tb[], + struct nlattr *data[]); + size_t (*get_slave_size)(const struct net_device *dev, + const struct net_device *slave_dev); + int (*fill_slave_info)(struct sk_buff *skb, + const struct net_device *dev, + const struct net_device *slave_dev); }; int __rtnl_link_register(struct rtnl_link_ops *ops); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 1f30b85ebf9d..b8fb352b58f6 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -241,6 +241,8 @@ enum { IFLA_INFO_KIND, IFLA_INFO_DATA, IFLA_INFO_XSTATS, + IFLA_INFO_SLAVE_KIND, + IFLA_INFO_SLAVE_DATA, __IFLA_INFO_MAX, }; -- cgit v1.2.2 From a9517d0f43832d787a4a0348163d659641bfd83c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 22 Jan 2014 09:05:57 +0100 Subject: rtnetlink: remove ndo_get_slave No longer used API bond-specific can be removed now. This is now handled in a generic way in rtnl_link_ops. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c31022980e18..440a02ee6f92 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -921,9 +921,6 @@ struct netdev_phys_port_id { * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); * Called to release previously enslaved netdev. * - * int (*ndo_get_slave)(struct net_device *slave_dev, struct sk_buff *skb); - * Called to fill netlink skb with slave info. - * * Feature/offload setting functions. * netdev_features_t (*ndo_fix_features)(struct net_device *dev, * netdev_features_t features); @@ -1096,8 +1093,6 @@ struct net_device_ops { struct net_device *slave_dev); int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); - int (*ndo_get_slave)(struct net_device *slave_dev, - struct sk_buff *skb); netdev_features_t (*ndo_fix_features)(struct net_device *dev, netdev_features_t features); int (*ndo_set_features)(struct net_device *dev, -- cgit v1.2.2 From 237266f76d417b5fb1984ac3774e3a4fe070fe5d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 23 Jan 2014 16:51:27 +0100 Subject: rtnetlink: add missing IFLA_BOND_AD_INFO_UNSPEC Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b8fb352b58f6..206f6511449d 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -361,6 +361,7 @@ enum { #define IFLA_BOND_MAX (__IFLA_BOND_MAX - 1) enum { + IFLA_BOND_AD_INFO_UNSPEC, IFLA_BOND_AD_INFO_AGGREGATOR, IFLA_BOND_AD_INFO_NUM_PORTS, IFLA_BOND_AD_INFO_ACTOR_KEY, -- cgit v1.2.2 From f55aa836fb7a90b62cb2c533b899e331cdffcf0c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 24 Jan 2014 08:39:16 +0100 Subject: rtnetlink: remove IFLA_BOND_SLAVE definition This is in net-next only, for couple of days. Not used anymore, and never should have been. So just remove it and pretend it was never there. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 206f6511449d..16410b6e7819 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -144,7 +144,6 @@ enum { IFLA_NUM_RX_QUEUES, IFLA_CARRIER, IFLA_PHYS_PORT_ID, - IFLA_BOND_SLAVE, __IFLA_MAX }; -- cgit v1.2.2