From 8975ae88e137ea02a71b7a86af2f8eb790c2f1e7 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Sun, 14 Sep 2014 21:48:28 +0300 Subject: mac80211: fix warning on htmldocs for last_tdls_pkt_time Forgot to add an entry to the struct description of sta_info. Signed-off-by: Liad Kaufman Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/sta_info.h | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 42f68cb8957e..bcda2ac7d844 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -336,6 +336,7 @@ struct ieee80211_tx_latency_stat { * @known_smps_mode: the smps_mode the client thinks we are in. Relevant for * AP only. * @cipher_scheme: optional cipher scheme for this station + * @last_tdls_pkt_time: holds the time in jiffies of last TDLS pkt ACKed */ struct sta_info { /* General information, mostly static */ -- cgit v1.2.2 From c12bc4885f4b3bab0ed779c69d5d7e3223fa5003 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Tue, 30 Sep 2014 07:08:02 +0300 Subject: mac80211: return the vif's chandef in ieee80211_cfg_get_channel() The chandef of the channel context a vif is using may be different than the chandef of the vif itself. For instance, the bandwidth used by the vif may be narrower than the one configured in the channel context. To avoid confusion, return the vif's chandef in ieee80211_cfg_get_channel() instead of the chandef of the channel context. Signed-off-by: Luciano Coelho Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fb6a1502b6df..343da1e35025 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3458,7 +3458,7 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy, rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (chanctx_conf) { - *chandef = chanctx_conf->def; + *chandef = sdata->vif.bss_conf.chandef; ret = 0; } else if (local->open_count > 0 && local->open_count == local->monitors && -- cgit v1.2.2 From bc37b16870a382e8b71d881444c19a16de1c1a7f Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Tue, 7 Oct 2014 22:20:23 +0200 Subject: net: rfkill: kernel-doc warning fixes Correct the kernel-doc, the parameter is called "blocked" not "state". Signed-off-by: Fabian Frederick Signed-off-by: Johannes Berg --- net/rfkill/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/rfkill/core.c b/net/rfkill/core.c index b3b16c070a7f..fa7cd792791c 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -329,7 +329,7 @@ static atomic_t rfkill_input_disabled = ATOMIC_INIT(0); /** * __rfkill_switch_all - Toggle state of all switches of given type * @type: type of interfaces to be affected - * @state: the new state + * @blocked: the new state * * This function sets the state of all switches of given type, * unless a specific switch is claimed by userspace (in which case, @@ -353,7 +353,7 @@ static void __rfkill_switch_all(const enum rfkill_type type, bool blocked) /** * rfkill_switch_all - Toggle state of all switches of given type * @type: type of interfaces to be affected - * @state: the new state + * @blocked: the new state * * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state). * Please refer to __rfkill_switch_all() for details. -- cgit v1.2.2 From 252e07ca5f64dd31fdfca8027287e7d75fefdab1 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Wed, 8 Oct 2014 09:48:34 +0300 Subject: nl80211: sanity check the channel switch counter value The nl80211 channel switch count attribute (NL80211_ATTR_CH_SWITCH_COUNT) is specified as u32, but the specification uses u8 for the counter. To make sure strange things don't happen without informing the user, sanity check the value and return -EINVAL if it doesn't fit in u8. Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index cb9f5a44ffad..5839c85075f1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5927,6 +5927,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) int err; bool need_new_beacon = false; int len, i; + u32 cs_count; if (!rdev->ops->channel_switch || !(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)) @@ -5963,7 +5964,14 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) if (need_new_beacon && !info->attrs[NL80211_ATTR_CSA_IES]) return -EINVAL; - params.count = nla_get_u32(info->attrs[NL80211_ATTR_CH_SWITCH_COUNT]); + /* Even though the attribute is u32, the specification says + * u8, so let's make sure we don't overflow. + */ + cs_count = nla_get_u32(info->attrs[NL80211_ATTR_CH_SWITCH_COUNT]); + if (cs_count > 255) + return -EINVAL; + + params.count = cs_count; if (!need_new_beacon) goto skip_beacons; -- cgit v1.2.2 From ab2d7251d666995740da17b2a51ca545ac5dd037 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 10 Oct 2014 11:25:20 +0200 Subject: netfilter: missing module license in the nf_reject_ipvX modules [ 23.545204] nf_reject_ipv4: module license 'unspecified' taints kernel. Fixes: c8d7b98 ("netfilter: move nf_send_resetX() code to nf_reject_ipvX modules") Reported-by: Dave Young Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_reject_ipv4.c | 3 +++ net/ipv6/netfilter/nf_reject_ipv6.c | 4 ++++ 2 files changed, 7 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index b023b4eb1a96..92b303dbd5fc 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -6,6 +6,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -125,3 +126,5 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) kfree_skb(nskb); } EXPORT_SYMBOL_GPL(nf_send_reset); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 5f5f0438d74d..20d9defc6c59 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -5,6 +5,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + +#include #include #include #include @@ -161,3 +163,5 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) ip6_local_out(nskb); } EXPORT_SYMBOL_GPL(nf_send_reset6); + +MODULE_LICENSE("GPL"); -- cgit v1.2.2 From 7210e4e38f945dfa173c4a4e59ad827c9ecad541 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 13 Oct 2014 19:50:22 +0200 Subject: netfilter: nf_tables: restrict nat/masq expressions to nat chain type This adds the missing validation code to avoid the use of nat/masq from non-nat chains. The validation assumes two possible configuration scenarios: 1) Use of nat from base chain that is not of nat type. Reject this configuration from the nft_*_init() path of the expression. 2) Use of nat from non-base chain. In this case, we have to wait until the non-base chain is referenced by at least one base chain via jump/goto. This is resolved from the nft_*_validate() path which is called from nf_tables_check_loops(). The user gets an -EOPNOTSUPP in both cases. Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nft_masq_ipv4.c | 1 + net/ipv6/netfilter/nft_masq_ipv6.c | 1 + net/netfilter/nf_tables_api.c | 14 ++++++++++++++ net/netfilter/nft_masq.c | 12 ++++++++++++ net/netfilter/nft_nat.c | 12 ++++++++++++ 5 files changed, 40 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index 1c636d6b5b50..c1023c445920 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -39,6 +39,7 @@ static const struct nft_expr_ops nft_masq_ipv4_ops = { .eval = nft_masq_ipv4_eval, .init = nft_masq_init, .dump = nft_masq_dump, + .validate = nft_masq_validate, }; static struct nft_expr_type nft_masq_ipv4_type __read_mostly = { diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index 556262f40761..8a7ac685076d 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -39,6 +39,7 @@ static const struct nft_expr_ops nft_masq_ipv6_ops = { .eval = nft_masq_ipv6_eval, .init = nft_masq_init, .dump = nft_masq_dump, + .validate = nft_masq_validate, }; static struct nft_expr_type nft_masq_ipv6_type __read_mostly = { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 556a0dfa4abc..65eb2a1160d5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3744,6 +3744,20 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .abort = nf_tables_abort, }; +int nft_chain_validate_dependency(const struct nft_chain *chain, + enum nft_chain_type type) +{ + const struct nft_base_chain *basechain; + + if (chain->flags & NFT_BASE_CHAIN) { + basechain = nft_base_chain(chain); + if (basechain->type->type != type) + return -EOPNOTSUPP; + } + return 0; +} +EXPORT_SYMBOL_GPL(nft_chain_validate_dependency); + /* * Loop detection - walk through the ruleset beginning at the destination chain * of a new jump until either the source chain is reached (loop) or all diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 6637bab00567..d1ffd5eb3a9b 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -26,6 +26,11 @@ int nft_masq_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_masq *priv = nft_expr_priv(expr); + int err; + + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + if (err < 0) + return err; if (tb[NFTA_MASQ_FLAGS] == NULL) return 0; @@ -55,5 +60,12 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_masq_dump); +int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); +} +EXPORT_SYMBOL_GPL(nft_masq_validate); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez "); diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 799550b476fb..0f0af6e86fb8 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -95,6 +95,10 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, u32 family; int err; + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + if (err < 0) + return err; + if (tb[NFTA_NAT_TYPE] == NULL) return -EINVAL; @@ -205,6 +209,13 @@ nla_put_failure: return -1; } +static int nft_nat_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); +} + static struct nft_expr_type nft_nat_type; static const struct nft_expr_ops nft_nat_ops = { .type = &nft_nat_type, @@ -212,6 +223,7 @@ static const struct nft_expr_ops nft_nat_ops = { .eval = nft_nat_eval, .init = nft_nat_init, .dump = nft_nat_dump, + .validate = nft_nat_validate, }; static struct nft_expr_type nft_nat_type __read_mostly = { -- cgit v1.2.2 From c7abf25af0f41be4b50d44c5b185d52eea360cb8 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Mon, 13 Oct 2014 14:34:41 +0200 Subject: mac80211: fix typo in starting baserate for rts_cts_rate_idx It affects non-(V)HT rates and can lead to selecting an rts_cts rate that is not a basic rate or way superior to the reference rate (ATM rates[0] used for the 1st attempt of the protected frame data). E.g, assuming drivers register growing (bitrate) sorted tables of ieee80211_rate-s, having : - rates[0].idx == d'2 and basic_rates == b'10100 will select rts_cts idx b'10011 & ~d'(BIT(2)-1), i.e. 1, likewise - rates[0].idx == d'2 and basic_rates == b'10001 will select rts_cts idx b'10000 The first is not a basic rate and the second is > rates[0]. Also, wrt severity of the addressed misbehavior, ATM we only have one rts_cts_rate_idx rather than one per rate table entry, so this idx might still point to bitrates > rates[1..MAX_RATES]. Fixes: 5253ffb8c9e1 ("mac80211: always pick a basic rate to tx RTS/CTS for pre-HT rates") Cc: stable@vger.kernel.org Signed-off-by: Karl Beldan Signed-off-by: Johannes Berg --- net/mac80211/rate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 8fdadfd94ba8..6081329784dd 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -448,7 +448,7 @@ static void rate_fixup_ratelist(struct ieee80211_vif *vif, */ if (!(rates[0].flags & IEEE80211_TX_RC_MCS)) { u32 basic_rates = vif->bss_conf.basic_rates; - s8 baserate = basic_rates ? ffs(basic_rates - 1) : 0; + s8 baserate = basic_rates ? ffs(basic_rates) - 1 : 0; rate = &sband->bitrates[rates[0].idx]; -- cgit v1.2.2 From 493618a92c6afdd3f6224ab586f169d6a259bb06 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 14 Oct 2014 12:43:50 +0200 Subject: netfilter: nft_compat: fix hook validation for non-base chains Set hook_mask to zero for non-base chains, otherwise people may hit bogus errors from the xt_check_target() and xt_check_match() when validating the uninitialized hook_mask. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7e2683c8a44a..44ae273b4391 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -95,6 +95,8 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, const struct nf_hook_ops *ops = &basechain->ops[0]; par->hook_mask = 1 << ops->hooknum; + } else { + par->hook_mask = 0; } par->family = ctx->afi->family; } @@ -293,6 +295,8 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; par->hook_mask = 1 << ops->hooknum; + } else { + par->hook_mask = 0; } par->family = ctx->afi->family; } -- cgit v1.2.2 From f3f5ddeddd6aeadcef523d55ea9288e3d5c1cbc3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 14 Oct 2014 10:13:48 +0200 Subject: netfilter: nft_compat: validate chain type in match/target We have to validate the real chain type to ensure that matches/targets are not used out from their scope (eg. MASQUERADE in nat chain type). The existing validation relies on the table name, but this is not sufficient since userspace can fool us by using the appropriate table name with a different chain type. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 75 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 44ae273b4391..0480f57a4eb6 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -19,9 +19,52 @@ #include #include #include -#include /* for set_fs */ #include +static const struct { + const char *name; + u8 type; +} table_to_chaintype[] = { + { "filter", NFT_CHAIN_T_DEFAULT }, + { "raw", NFT_CHAIN_T_DEFAULT }, + { "security", NFT_CHAIN_T_DEFAULT }, + { "mangle", NFT_CHAIN_T_ROUTE }, + { "nat", NFT_CHAIN_T_NAT }, + { }, +}; + +static int nft_compat_table_to_chaintype(const char *table) +{ + int i; + + for (i = 0; table_to_chaintype[i].name != NULL; i++) { + if (strcmp(table_to_chaintype[i].name, table) == 0) + return table_to_chaintype[i].type; + } + + return -1; +} + +static int nft_compat_chain_validate_dependency(const char *tablename, + const struct nft_chain *chain) +{ + enum nft_chain_type type; + const struct nft_base_chain *basechain; + + if (!tablename || !(chain->flags & NFT_BASE_CHAIN)) + return 0; + + type = nft_compat_table_to_chaintype(tablename); + if (type < 0) + return -EINVAL; + + basechain = nft_base_chain(chain); + if (basechain->type->type != type) + return -EINVAL; + + return 0; +} + union nft_entry { struct ipt_entry e4; struct ip6t_entry e6; @@ -153,6 +196,10 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, union nft_entry e = {}; int ret; + ret = nft_compat_chain_validate_dependency(target->table, ctx->chain); + if (ret < 0) + goto err; + target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info); if (ctx->nla[NFTA_RULE_COMPAT]) { @@ -218,6 +265,7 @@ static int nft_target_validate(const struct nft_ctx *ctx, { struct xt_target *target = expr->ops->data; unsigned int hook_mask = 0; + int ret; if (ctx->chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = @@ -225,11 +273,13 @@ static int nft_target_validate(const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; - if (hook_mask & target->hooks) - return 0; + if (!(hook_mask & target->hooks)) + return -EINVAL; - /* This target is being called from an invalid chain */ - return -EINVAL; + ret = nft_compat_chain_validate_dependency(target->table, + ctx->chain); + if (ret < 0) + return ret; } return 0; } @@ -324,6 +374,10 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, union nft_entry e = {}; int ret; + ret = nft_compat_chain_validate_dependency(match->name, ctx->chain); + if (ret < 0) + goto err; + match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info); if (ctx->nla[NFTA_RULE_COMPAT]) { @@ -383,6 +437,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, { struct xt_match *match = expr->ops->data; unsigned int hook_mask = 0; + int ret; if (ctx->chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = @@ -390,11 +445,13 @@ static int nft_match_validate(const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; - if (hook_mask & match->hooks) - return 0; + if (!(hook_mask & match->hooks)) + return -EINVAL; - /* This match is being called from an invalid chain */ - return -EINVAL; + ret = nft_compat_chain_validate_dependency(match->name, + ctx->chain); + if (ret < 0) + return ret; } return 0; } -- cgit v1.2.2 From 5c819a39753d6a3ae9c0092236f59730a369b619 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 16 Oct 2014 00:16:57 +0200 Subject: netfilter: nft_nat: insufficient attribute validation We have to validate that we at least get an NFTA_NAT_REG_ADDR_MIN or NFTA_NFT_REG_PROTO_MIN attribute. Reject the configuration if none of them are present. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_nat.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 0f0af6e86fb8..5078f1f1c569 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -99,7 +99,9 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (err < 0) return err; - if (tb[NFTA_NAT_TYPE] == NULL) + if (tb[NFTA_NAT_TYPE] == NULL || + (tb[NFTA_NAT_REG_ADDR_MIN] == NULL && + tb[NFTA_NAT_REG_PROTO_MIN] == NULL)) return -EINVAL; switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) { -- cgit v1.2.2 From 61cfac6b42af98ab46bcb3a47e150e7b20d5015e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 16 Oct 2014 00:19:35 +0200 Subject: netfilter: nft_nat: NFTA_NAT_REG_ADDR_MAX depends on NFTA_NAT_REG_ADDR_MIN Interpret NFTA_NAT_REG_ADDR_MAX if NFTA_NAT_REG_ADDR_MIN is present, otherwise, skip it. Same thing with NFTA_NAT_REG_PROTO_MAX. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_nat.c | 50 +++++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 5078f1f1c569..a95e0c1addd3 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -126,38 +126,44 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, priv->family = family; if (tb[NFTA_NAT_REG_ADDR_MIN]) { - priv->sreg_addr_min = ntohl(nla_get_be32( - tb[NFTA_NAT_REG_ADDR_MIN])); + priv->sreg_addr_min = + ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MIN])); + err = nft_validate_input_register(priv->sreg_addr_min); if (err < 0) return err; - } - if (tb[NFTA_NAT_REG_ADDR_MAX]) { - priv->sreg_addr_max = ntohl(nla_get_be32( - tb[NFTA_NAT_REG_ADDR_MAX])); - err = nft_validate_input_register(priv->sreg_addr_max); - if (err < 0) - return err; - } else - priv->sreg_addr_max = priv->sreg_addr_min; + if (tb[NFTA_NAT_REG_ADDR_MAX]) { + priv->sreg_addr_max = + ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MAX])); + + err = nft_validate_input_register(priv->sreg_addr_max); + if (err < 0) + return err; + } else { + priv->sreg_addr_max = priv->sreg_addr_min; + } + } if (tb[NFTA_NAT_REG_PROTO_MIN]) { - priv->sreg_proto_min = ntohl(nla_get_be32( - tb[NFTA_NAT_REG_PROTO_MIN])); + priv->sreg_proto_min = + ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MIN])); + err = nft_validate_input_register(priv->sreg_proto_min); if (err < 0) return err; - } - if (tb[NFTA_NAT_REG_PROTO_MAX]) { - priv->sreg_proto_max = ntohl(nla_get_be32( - tb[NFTA_NAT_REG_PROTO_MAX])); - err = nft_validate_input_register(priv->sreg_proto_max); - if (err < 0) - return err; - } else - priv->sreg_proto_max = priv->sreg_proto_min; + if (tb[NFTA_NAT_REG_PROTO_MAX]) { + priv->sreg_proto_max = + ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MAX])); + + err = nft_validate_input_register(priv->sreg_proto_max); + if (err < 0) + return err; + } else { + priv->sreg_proto_max = priv->sreg_proto_min; + } + } if (tb[NFTA_NAT_FLAGS]) { priv->flags = ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS])); -- cgit v1.2.2 From 1e2d56a5d33a7e1fcd21ed3859f52596d02708b0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 16 Oct 2014 00:24:14 +0200 Subject: netfilter: nft_nat: dump attributes if they are set Dump NFTA_NAT_REG_ADDR_MIN if this is non-zero. Same thing with NFTA_NAT_REG_PROTO_MIN. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_nat.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index a95e0c1addd3..afe2b0b45ec4 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -191,17 +191,19 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_be32(skb, NFTA_NAT_FAMILY, htonl(priv->family))) goto nla_put_failure; - if (nla_put_be32(skb, - NFTA_NAT_REG_ADDR_MIN, htonl(priv->sreg_addr_min))) - goto nla_put_failure; - if (nla_put_be32(skb, - NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max))) - goto nla_put_failure; + + if (priv->sreg_addr_min) { + if (nla_put_be32(skb, NFTA_NAT_REG_ADDR_MIN, + htonl(priv->sreg_addr_min)) || + nla_put_be32(skb, NFTA_NAT_REG_ADDR_MAX, + htonl(priv->sreg_addr_max))) + goto nla_put_failure; + } + if (priv->sreg_proto_min) { if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MIN, - htonl(priv->sreg_proto_min))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX, + htonl(priv->sreg_proto_min)) || + nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX, htonl(priv->sreg_proto_max))) goto nla_put_failure; } -- cgit v1.2.2 From 11b2357d5dbce999803e9055f8c09829a8a87db4 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Mon, 20 Oct 2014 10:54:36 +0200 Subject: mac80211: minstrels: fix buffer overflow in HT debugfs rc_stats ATM an HT rc_stats line is 106 chars. Times 8(MCS_GROUP_RATES)*3(SS)*2(GI)*2(BW) + CCK(4), i.e. x100, this is well above the current 8192 - sizeof(*ms) currently allocated. Fix this by squeezing the output as follows (not that we're short on memory but this also improves readability and range, the new format adds one more digit to *ok/*cum and ok/cum): - Before (HT) (106 ch): type rate throughput ewma prob this prob retry this succ/attempt success attempts CCK/LP 5.5M 0.0 0.0 0.0 0 0( 0) 0 0 HT20/LGI ABCDP MCS0 0.0 0.0 0.0 1 0( 0) 0 0 - After (75 ch): type rate tpt eprob *prob ret *ok(*cum) ok( cum) CCK/LP 5.5M 0.0 0.0 0.0 0 0( 0) 0( 0) HT20/LGI ABCDP MCS0 0.0 0.0 0.0 1 0( 0) 0( 0) - Align non-HT format Before (non-HT) (83 ch): rate throughput ewma prob this prob this succ/attempt success attempts ABCDP 6 0.0 0.0 0.0 0( 0) 0 0 54 0.0 0.0 0.0 0( 0) 0 0 - After (61 ch): rate tpt eprob *prob *ok(*cum) ok( cum) ABCDP 1 0.0 0.0 0.0 0( 0) 0( 0) 54 0.0 0.0 0.0 0( 0) 0( 0) *This also adds dynamic checks for overflow, lowers the size of the non-HT request (allowing > 30 entries) and replaces the buddy-rounded allocations (s/sizeof(*ms) + 8192/8192). Signed-off-by: Karl Beldan Acked-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_debugfs.c | 12 +++++++----- net/mac80211/rc80211_minstrel_ht_debugfs.c | 13 ++++++++----- 2 files changed, 15 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c index edde723f9f00..2acab1bcaa4b 100644 --- a/net/mac80211/rc80211_minstrel_debugfs.c +++ b/net/mac80211/rc80211_minstrel_debugfs.c @@ -62,14 +62,14 @@ minstrel_stats_open(struct inode *inode, struct file *file) unsigned int i, tp, prob, eprob; char *p; - ms = kmalloc(sizeof(*ms) + 4096, GFP_KERNEL); + ms = kmalloc(2048, GFP_KERNEL); if (!ms) return -ENOMEM; file->private_data = ms; p = ms->buf; - p += sprintf(p, "rate throughput ewma prob this prob " - "this succ/attempt success attempts\n"); + p += sprintf(p, "rate tpt eprob *prob" + " *ok(*cum) ok( cum)\n"); for (i = 0; i < mi->n_rates; i++) { struct minstrel_rate *mr = &mi->r[i]; struct minstrel_rate_stats *mrs = &mi->r[i].stats; @@ -86,8 +86,8 @@ minstrel_stats_open(struct inode *inode, struct file *file) prob = MINSTREL_TRUNC(mrs->cur_prob * 1000); eprob = MINSTREL_TRUNC(mrs->probability * 1000); - p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " - " %3u(%3u) %8llu %8llu\n", + p += sprintf(p, " %4u.%1u %3u.%1u %3u.%1u" + " %4u(%4u) %9llu(%9llu)\n", tp / 10, tp % 10, eprob / 10, eprob % 10, prob / 10, prob % 10, @@ -102,6 +102,8 @@ minstrel_stats_open(struct inode *inode, struct file *file) mi->sample_packets); ms->len = p - ms->buf; + WARN_ON(ms->len + sizeof(*ms) > 2048); + return 0; } diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index a72ad46f2a04..d537bec93754 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -63,8 +63,8 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) prob = MINSTREL_TRUNC(mr->cur_prob * 1000); eprob = MINSTREL_TRUNC(mr->probability * 1000); - p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " - "%3u %3u(%3u) %8llu %8llu\n", + p += sprintf(p, " %4u.%1u %3u.%1u %3u.%1u " + "%3u %4u(%4u) %9llu(%9llu)\n", tp / 10, tp % 10, eprob / 10, eprob % 10, prob / 10, prob % 10, @@ -96,14 +96,15 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file) return ret; } - ms = kmalloc(sizeof(*ms) + 8192, GFP_KERNEL); + ms = kmalloc(8192, GFP_KERNEL); if (!ms) return -ENOMEM; file->private_data = ms; p = ms->buf; - p += sprintf(p, "type rate throughput ewma prob " - "this prob retry this succ/attempt success attempts\n"); + p += sprintf(p, "type rate tpt eprob *prob " + "ret *ok(*cum) ok( cum)\n"); + p = minstrel_ht_stats_dump(mi, max_mcs, p); for (i = 0; i < max_mcs; i++) @@ -118,6 +119,8 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file) MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10); ms->len = p - ms->buf; + WARN_ON(ms->len + sizeof(*ms) > 8192); + return nonseekable_open(inode, file); } -- cgit v1.2.2 From 1e16aa3ddf863c6b9f37eddf52503230a62dedb3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 20 Oct 2014 13:49:16 +0200 Subject: net: gso: use feature flag argument in all protocol gso handlers skb_gso_segment() has a 'features' argument representing offload features available to the output path. A few handlers, e.g. GRE, instead re-fetch the features of skb->dev and use those instead of the provided ones when handing encapsulation/tunnels. Depending on dev->hw_enc_features of the output device skb_gso_segment() can then return NULL even when the caller has disabled all GSO feature bits, as segmentation of inner header thinks device will take care of segmentation. This e.g. affects the tbf scheduler, which will silently drop GRE-encap GSO skbs that did not fit the remaining token quota as the segmentation does not work when device supports corresponding hw offload capabilities. Cc: Pravin B Shelar Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 2 +- net/ipv4/gre_offload.c | 2 +- net/ipv4/udp_offload.c | 2 +- net/ipv6/ip6_offload.c | 2 +- net/mpls/mpls_gso.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 92db7a69f2b9..8b7fe5b03906 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1246,7 +1246,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, encap = SKB_GSO_CB(skb)->encap_level > 0; if (encap) - features = skb->dev->hw_enc_features & netif_skb_features(skb); + features &= skb->dev->hw_enc_features; SKB_GSO_CB(skb)->encap_level += ihl; skb_reset_transport_header(skb); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index ccda09628de7..f6e345c0bc23 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -68,7 +68,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, skb->mac_len = skb_inner_network_offset(skb); /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + enc_features = skb->dev->hw_enc_features & features; segs = skb_mac_gso_segment(skb, enc_features); if (IS_ERR_OR_NULL(segs)) { skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 507310ef4b56..6480cea7aa53 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -58,7 +58,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, skb->encap_hdr_csum = 1; /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + enc_features = skb->dev->hw_enc_features & features; segs = gso_inner_segment(skb, enc_features); if (IS_ERR_OR_NULL(segs)) { skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 91014d32488d..a071563a7e6e 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -90,7 +90,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, encap = SKB_GSO_CB(skb)->encap_level > 0; if (encap) - features = skb->dev->hw_enc_features & netif_skb_features(skb); + features &= skb->dev->hw_enc_features; SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h); ipv6h = ipv6_hdr(skb); diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index e28ed2ef5b06..f0f5309a2d72 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -48,7 +48,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, __skb_push(skb, skb->mac_len); /* Segment inner packet. */ - mpls_features = skb->dev->mpls_features & netif_skb_features(skb); + mpls_features = skb->dev->mpls_features & features; segs = skb_mac_gso_segment(skb, mpls_features); -- cgit v1.2.2 From 330966e501ffe282d7184fde4518d5e0c24bc7f8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 20 Oct 2014 13:49:17 +0200 Subject: net: make skb_gso_segment error handling more robust skb_gso_segment has three possible return values: 1. a pointer to the first segmented skb 2. an errno value (IS_ERR()) 3. NULL. This can happen when GSO is used for header verification. However, several callers currently test IS_ERR instead of IS_ERR_OR_NULL and would oops when NULL is returned. Note that these call sites should never actually see such a NULL return value; all callers mask out the GSO bits in the feature argument. However, there have been issues with some protocol handlers erronously not respecting the specified feature mask in some cases. It is preferable to get 'have to turn off hw offloading, else slow' reports rather than 'kernel crashes'. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 2 +- net/netfilter/nfnetlink_queue_core.c | 2 +- net/openvswitch/datapath.c | 2 ++ net/xfrm/xfrm_output.c | 2 ++ 4 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 88e5ef2c7f51..bc6471d4abcd 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -231,7 +231,7 @@ static int ip_finish_output_gso(struct sk_buff *skb) */ features = netif_skb_features(skb); segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); - if (IS_ERR(segs)) { + if (IS_ERR_OR_NULL(segs)) { kfree_skb(skb); return -ENOMEM; } diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index a82077d9f59b..7c60ccd61a3e 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -665,7 +665,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) * returned by nf_queue. For instance, callers rely on -ECANCELED to * mean 'ignore this hook'. */ - if (IS_ERR(segs)) + if (IS_ERR_OR_NULL(segs)) goto out_err; queued = 0; err = 0; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 2e31d9e7f4dc..e6d7255183eb 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -324,6 +324,8 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, segs = __skb_gso_segment(skb, NETIF_F_SG, false); if (IS_ERR(segs)) return PTR_ERR(segs); + if (segs == NULL) + return -EINVAL; /* Queue all of the segments. */ skb = segs; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 499d6c18a8ce..7c532856b398 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -157,6 +157,8 @@ static int xfrm_output_gso(struct sk_buff *skb) kfree_skb(skb); if (IS_ERR(segs)) return PTR_ERR(segs); + if (segs == NULL) + return -EINVAL; do { struct sk_buff *nskb = segs->next; -- cgit v1.2.2 From f993bc25e5196e60514c216d0bca0f600de64af8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 20 Oct 2014 13:49:18 +0200 Subject: net: core: handle encapsulation offloads when computing segment lengths if ->encapsulation is set we have to use inner_tcp_hdrlen and add the size of the inner network headers too. This is 'mostly harmless'; tbf might send skb that is slightly over quota or drop skb even if it would have fit. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/skbuff.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 61059a05ec95..c16615bfb61e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4070,15 +4070,22 @@ EXPORT_SYMBOL_GPL(skb_scrub_packet); unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) { const struct skb_shared_info *shinfo = skb_shinfo(skb); + unsigned int thlen = 0; - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - return tcp_hdrlen(skb) + shinfo->gso_size; + if (skb->encapsulation) { + thlen = skb_inner_transport_header(skb) - + skb_transport_header(skb); + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + thlen += inner_tcp_hdrlen(skb); + } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { + thlen = tcp_hdrlen(skb); + } /* UFO sets gso_size to the size of the fragmentation * payload, i.e. the size of the L4 (UDP) header is already * accounted for. */ - return shinfo->gso_size; + return thlen + shinfo->gso_size; } EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); -- cgit v1.2.2 From 7b8613e0a1502b43b3b36c93c66f835c891f63b3 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 20 Oct 2014 14:44:25 +0800 Subject: tipc: fix a potential deadlock Locking dependency detected below possible unsafe locking scenario: CPU0 CPU1 T0: tipc_named_rcv() tipc_rcv() T1: [grab nametble write lock]* [grab node lock]* T2: tipc_update_nametbl() tipc_node_link_up() T3: tipc_nodesub_subscribe() tipc_nametbl_publish() T4: [grab node lock]* [grab nametble write lock]* The opposite order of holding nametbl write lock and node lock on above two different paths may result in a deadlock. If we move the the updating of the name table after link state named out of node lock, the reverse order of holding locks will be eliminated, and as a result, the deadlock risk. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 46 ++++++++++++++++++++++++++++------------------ net/tipc/node.h | 7 ++++++- net/tipc/socket.c | 2 +- 3 files changed, 35 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/tipc/node.c b/net/tipc/node.c index 90cee4a6fce4..5781634e957d 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -219,11 +219,11 @@ void tipc_node_abort_sock_conns(struct list_head *conns) void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { struct tipc_link **active = &n_ptr->active_links[0]; - u32 addr = n_ptr->addr; n_ptr->working_links++; - tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, TIPC_NODE_SCOPE, - l_ptr->bearer_id, addr); + n_ptr->action_flags |= TIPC_NOTIFY_LINK_UP; + n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id; + pr_info("Established link <%s> on network plane %c\n", l_ptr->name, l_ptr->net_plane); @@ -284,10 +284,10 @@ static void node_select_active_links(struct tipc_node *n_ptr) void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { struct tipc_link **active; - u32 addr = n_ptr->addr; n_ptr->working_links--; - tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, l_ptr->bearer_id, addr); + n_ptr->action_flags |= TIPC_NOTIFY_LINK_DOWN; + n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id; if (!tipc_link_is_active(l_ptr)) { pr_info("Lost standby link <%s> on network plane %c\n", @@ -552,28 +552,30 @@ void tipc_node_unlock(struct tipc_node *node) LIST_HEAD(conn_sks); struct sk_buff_head waiting_sks; u32 addr = 0; - unsigned int flags = node->action_flags; + int flags = node->action_flags; + u32 link_id = 0; - if (likely(!node->action_flags)) { + if (likely(!flags)) { spin_unlock_bh(&node->lock); return; } + addr = node->addr; + link_id = node->link_id; __skb_queue_head_init(&waiting_sks); - if (node->action_flags & TIPC_WAKEUP_USERS) { + + if (flags & TIPC_WAKEUP_USERS) skb_queue_splice_init(&node->waiting_sks, &waiting_sks); - node->action_flags &= ~TIPC_WAKEUP_USERS; - } - if (node->action_flags & TIPC_NOTIFY_NODE_DOWN) { + + if (flags & TIPC_NOTIFY_NODE_DOWN) { list_replace_init(&node->nsub, &nsub_list); list_replace_init(&node->conn_sks, &conn_sks); - node->action_flags &= ~TIPC_NOTIFY_NODE_DOWN; } - if (node->action_flags & TIPC_NOTIFY_NODE_UP) { - node->action_flags &= ~TIPC_NOTIFY_NODE_UP; - addr = node->addr; - } - node->action_flags &= ~TIPC_WAKEUP_BCAST_USERS; + node->action_flags &= ~(TIPC_WAKEUP_USERS | TIPC_NOTIFY_NODE_DOWN | + TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_UP | + TIPC_NOTIFY_LINK_DOWN | + TIPC_WAKEUP_BCAST_USERS); + spin_unlock_bh(&node->lock); while (!skb_queue_empty(&waiting_sks)) @@ -588,6 +590,14 @@ void tipc_node_unlock(struct tipc_node *node) if (flags & TIPC_WAKEUP_BCAST_USERS) tipc_bclink_wakeup_users(); - if (addr) + if (flags & TIPC_NOTIFY_NODE_UP) tipc_named_node_up(addr); + + if (flags & TIPC_NOTIFY_LINK_UP) + tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, + TIPC_NODE_SCOPE, link_id, addr); + + if (flags & TIPC_NOTIFY_LINK_DOWN) + tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, + link_id, addr); } diff --git a/net/tipc/node.h b/net/tipc/node.h index 67513c3c852c..04e91458bb29 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -53,6 +53,7 @@ * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down * TIPC_NOTIFY_NODE_DOWN: notify node is down * TIPC_NOTIFY_NODE_UP: notify node is up + * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type */ enum { TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1), @@ -60,7 +61,9 @@ enum { TIPC_NOTIFY_NODE_DOWN = (1 << 3), TIPC_NOTIFY_NODE_UP = (1 << 4), TIPC_WAKEUP_USERS = (1 << 5), - TIPC_WAKEUP_BCAST_USERS = (1 << 6) + TIPC_WAKEUP_BCAST_USERS = (1 << 6), + TIPC_NOTIFY_LINK_UP = (1 << 7), + TIPC_NOTIFY_LINK_DOWN = (1 << 8) }; /** @@ -100,6 +103,7 @@ struct tipc_node_bclink { * @working_links: number of working links to node (both active and standby) * @link_cnt: number of links to node * @signature: node instance identifier + * @link_id: local and remote bearer ids of changing link, if any * @nsub: list of "node down" subscriptions monitoring node * @rcu: rcu struct for tipc_node */ @@ -116,6 +120,7 @@ struct tipc_node { int link_cnt; int working_links; u32 signature; + u32 link_id; struct list_head nsub; struct sk_buff_head waiting_sks; struct list_head conn_sks; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 75275c5cf929..3043f10dc328 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2673,7 +2673,7 @@ static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) case SIOCGETLINKNAME: if (copy_from_user(&lnr, argp, sizeof(lnr))) return -EFAULT; - if (!tipc_node_get_linkname(lnr.bearer_id, lnr.peer, + if (!tipc_node_get_linkname(lnr.bearer_id & 0xffff, lnr.peer, lnr.linkname, TIPC_MAX_LINK_NAME)) { if (copy_to_user(argp, &lnr, sizeof(lnr))) return -EFAULT; -- cgit v1.2.2 From 1a194c2d59c55c37cb4c0c459d5418071a141341 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 20 Oct 2014 14:46:35 +0800 Subject: tipc: fix lockdep warning when intra-node messages are delivered When running tipcTC&tipcTS test suite, below lockdep unsafe locking scenario is reported: [ 1109.997854] [ 1109.997988] ================================= [ 1109.998290] [ INFO: inconsistent lock state ] [ 1109.998575] 3.17.0-rc1+ #113 Not tainted [ 1109.998762] --------------------------------- [ 1109.998762] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 1109.998762] swapper/7/0 [HC0[0]:SC1[1]:HE1:SE0] takes: [ 1109.998762] (slock-AF_TIPC){+.?...}, at: [] tipc_sk_rcv+0x49/0x2b0 [tipc] [ 1109.998762] {SOFTIRQ-ON-W} state was registered at: [ 1109.998762] [] __lock_acquire+0x6a0/0x1d80 [ 1109.998762] [] lock_acquire+0x95/0x1e0 [ 1109.998762] [] _raw_spin_lock+0x3e/0x80 [ 1109.998762] [] tipc_sk_rcv+0x49/0x2b0 [tipc] [ 1109.998762] [] tipc_link_xmit+0xa8/0xc0 [tipc] [ 1109.998762] [] tipc_sendmsg+0x15f/0x550 [tipc] [ 1109.998762] [] tipc_connect+0x105/0x140 [tipc] [ 1109.998762] [] SYSC_connect+0xae/0xc0 [ 1109.998762] [] SyS_connect+0xe/0x10 [ 1109.998762] [] compat_SyS_socketcall+0xb8/0x200 [ 1109.998762] [] sysenter_dispatch+0x7/0x1f [ 1109.998762] irq event stamp: 241060 [ 1109.998762] hardirqs last enabled at (241060): [] __local_bh_enable_ip+0x6d/0xd0 [ 1109.998762] hardirqs last disabled at (241059): [] __local_bh_enable_ip+0x2f/0xd0 [ 1109.998762] softirqs last enabled at (241020): [] _local_bh_enable+0x22/0x50 [ 1109.998762] softirqs last disabled at (241021): [] irq_exit+0x96/0xc0 [ 1109.998762] [ 1109.998762] other info that might help us debug this: [ 1109.998762] Possible unsafe locking scenario: [ 1109.998762] [ 1109.998762] CPU0 [ 1109.998762] ---- [ 1109.998762] lock(slock-AF_TIPC); [ 1109.998762] [ 1109.998762] lock(slock-AF_TIPC); [ 1109.998762] [ 1109.998762] *** DEADLOCK *** [ 1109.998762] [ 1109.998762] 2 locks held by swapper/7/0: [ 1109.998762] #0: (rcu_read_lock){......}, at: [] __netif_receive_skb_core+0x69/0xb70 [ 1109.998762] #1: (rcu_read_lock){......}, at: [] tipc_l2_rcv_msg+0x40/0x260 [tipc] [ 1109.998762] [ 1109.998762] stack backtrace: [ 1109.998762] CPU: 7 PID: 0 Comm: swapper/7 Not tainted 3.17.0-rc1+ #113 [ 1109.998762] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 1109.998762] ffffffff82745830 ffff880016c03828 ffffffff81a209eb 0000000000000007 [ 1109.998762] ffff880017b3cac0 ffff880016c03888 ffffffff81a1c5ef 0000000000000001 [ 1109.998762] ffff880000000001 ffff880000000000 ffffffff81012d4f 0000000000000000 [ 1109.998762] Call Trace: [ 1109.998762] [] dump_stack+0x4e/0x68 [ 1109.998762] [] print_usage_bug+0x1f1/0x202 [ 1109.998762] [] ? save_stack_trace+0x2f/0x50 [ 1109.998762] [] mark_lock+0x28c/0x2f0 [ 1109.998762] [] ? print_irq_inversion_bug.part.46+0x1f0/0x1f0 [ 1109.998762] [] __lock_acquire+0x5ad/0x1d80 [ 1109.998762] [] ? trace_hardirqs_on+0xd/0x10 [ 1109.998762] [] ? sched_clock_cpu+0x98/0xc0 [ 1109.998762] [] ? local_clock+0x1b/0x30 [ 1109.998762] [] ? lock_release_holdtime.part.29+0x1c/0x1a0 [ 1109.998762] [] ? sched_clock_local+0x25/0x90 [ 1109.998762] [] ? tipc_sk_get+0x60/0x80 [tipc] [ 1109.998762] [] lock_acquire+0x95/0x1e0 [ 1109.998762] [] ? tipc_sk_rcv+0x49/0x2b0 [tipc] [ 1109.998762] [] ? trace_hardirqs_on_caller+0xa6/0x1c0 [ 1109.998762] [] _raw_spin_lock+0x3e/0x80 [ 1109.998762] [] ? tipc_sk_rcv+0x49/0x2b0 [tipc] [ 1109.998762] [] ? tipc_sk_get+0x60/0x80 [tipc] [ 1109.998762] [] tipc_sk_rcv+0x49/0x2b0 [tipc] [ 1109.998762] [] tipc_rcv+0x5ed/0x960 [tipc] [ 1109.998762] [] tipc_l2_rcv_msg+0xcc/0x260 [tipc] [ 1109.998762] [] ? tipc_l2_rcv_msg+0x40/0x260 [tipc] [ 1109.998762] [] __netif_receive_skb_core+0x5e5/0xb70 [ 1109.998762] [] ? __netif_receive_skb_core+0x69/0xb70 [ 1109.998762] [] ? dev_gro_receive+0x259/0x4e0 [ 1109.998762] [] __netif_receive_skb+0x26/0x70 [ 1109.998762] [] netif_receive_skb_internal+0x2d/0x1f0 [ 1109.998762] [] napi_gro_receive+0xd8/0x240 [ 1109.998762] [] e1000_clean_rx_irq+0x2c4/0x530 [ 1109.998762] [] e1000_clean+0x266/0x9c0 [ 1109.998762] [] ? local_clock+0x1b/0x30 [ 1109.998762] [] ? sched_clock_local+0x25/0x90 [ 1109.998762] [] net_rx_action+0x141/0x310 [ 1109.998762] [] ? handle_fasteoi_irq+0xe0/0x150 [ 1109.998762] [] __do_softirq+0x116/0x4d0 [ 1109.998762] [] irq_exit+0x96/0xc0 [ 1109.998762] [] do_IRQ+0x67/0x110 [ 1109.998762] [] common_interrupt+0x6f/0x6f [ 1109.998762] [] ? default_idle+0x37/0x250 [ 1109.998762] [] ? default_idle+0x35/0x250 [ 1109.998762] [] arch_cpu_idle+0xf/0x20 [ 1109.998762] [] cpu_startup_entry+0x27d/0x4d0 [ 1109.998762] [] start_secondary+0x188/0x1f0 When intra-node messages are delivered from one process to another process, tipc_link_xmit() doesn't disable BH before it directly calls tipc_sk_rcv() on process context to forward messages to destination socket. Meanwhile, if messages delivered by remote node arrive at the node and their destinations are also the same socket, tipc_sk_rcv() running on process context might be preempted by tipc_sk_rcv() running BH context. As a result, the latter cannot obtain the socket lock as the lock was obtained by the former, however, the former has no chance to be run as the latter is owning the CPU now, so headlock happens. To avoid it, BH should be always disabled in tipc_sk_rcv(). Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3043f10dc328..51bddc236a15 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1776,7 +1776,7 @@ int tipc_sk_rcv(struct sk_buff *buf) sk = &tsk->sk; /* Queue message */ - bh_lock_sock(sk); + spin_lock_bh(&sk->sk_lock.slock); if (!sock_owned_by_user(sk)) { rc = filter_rcv(sk, buf); @@ -1787,7 +1787,7 @@ int tipc_sk_rcv(struct sk_buff *buf) if (sk_add_backlog(sk, buf, limit)) rc = -TIPC_ERR_OVERLOAD; } - bh_unlock_sock(sk); + spin_unlock_bh(&sk->sk_lock.slock); tipc_sk_put(tsk); if (likely(!rc)) return 0; -- cgit v1.2.2 From 78fd1d0ab072d4d9b5f0b7c14a1516665170b565 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 21 Oct 2014 22:05:38 +0200 Subject: netlink: Re-add locking to netlink_lookup() and seq walker The synchronize_rcu() in netlink_release() introduces unacceptable latency. Reintroduce minimal lookup so we can drop the synchronize_rcu() until socket destruction has been RCUfied. Cc: David S. Miller Cc: Eric Dumazet Reported-by: Steinar H. Gunderson Reported-and-tested-by: Heiko Carstens Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7a186e74b1b3..f1de72de273e 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -96,6 +96,14 @@ static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); static int netlink_dump(struct sock *sk); static void netlink_skb_destructor(struct sk_buff *skb); +/* nl_table locking explained: + * Lookup and traversal are protected with nl_sk_hash_lock or nl_table_lock + * combined with an RCU read-side lock. Insertion and removal are protected + * with nl_sk_hash_lock while using RCU list modification primitives and may + * run in parallel to nl_table_lock protected lookups. Destruction of the + * Netlink socket may only occur *after* nl_table_lock has been acquired + * either during or after the socket has been removed from the list. + */ DEFINE_RWLOCK(nl_table_lock); EXPORT_SYMBOL_GPL(nl_table_lock); static atomic_t nl_table_users = ATOMIC_INIT(0); @@ -109,10 +117,10 @@ EXPORT_SYMBOL_GPL(nl_sk_hash_lock); static int lockdep_nl_sk_hash_is_held(void) { #ifdef CONFIG_LOCKDEP - return (debug_locks) ? lockdep_is_held(&nl_sk_hash_lock) : 1; -#else - return 1; + if (debug_locks) + return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock); #endif + return 1; } static ATOMIC_NOTIFIER_HEAD(netlink_chain); @@ -1028,11 +1036,13 @@ static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) struct netlink_table *table = &nl_table[protocol]; struct sock *sk; + read_lock(&nl_table_lock); rcu_read_lock(); sk = __netlink_lookup(table, portid, net); if (sk) sock_hold(sk); rcu_read_unlock(); + read_unlock(&nl_table_lock); return sk; } @@ -1257,9 +1267,6 @@ static int netlink_release(struct socket *sock) } netlink_table_ungrab(); - /* Wait for readers to complete */ - synchronize_net(); - kfree(nlk->groups); nlk->groups = NULL; @@ -1281,6 +1288,7 @@ static int netlink_autobind(struct socket *sock) retry: cond_resched(); + netlink_table_grab(); rcu_read_lock(); if (__netlink_lookup(table, portid, net)) { /* Bind collision, search negative portid values. */ @@ -1288,9 +1296,11 @@ retry: if (rover > -4097) rover = -4097; rcu_read_unlock(); + netlink_table_ungrab(); goto retry; } rcu_read_unlock(); + netlink_table_ungrab(); err = netlink_insert(sk, net, portid); if (err == -EADDRINUSE) @@ -2921,14 +2931,16 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) } static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) + __acquires(nl_table_lock) __acquires(RCU) { + read_lock(&nl_table_lock); rcu_read_lock(); return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct rhashtable *ht; struct netlink_sock *nlk; struct nl_seq_iter *iter; struct net *net; @@ -2943,19 +2955,19 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) iter = seq->private; nlk = v; - rht_for_each_entry_rcu(nlk, nlk->node.next, node) + i = iter->link; + ht = &nl_table[i].hash; + rht_for_each_entry(nlk, nlk->node.next, ht, node) if (net_eq(sock_net((struct sock *)nlk), net)) return nlk; - i = iter->link; j = iter->hash_idx + 1; do { - struct rhashtable *ht = &nl_table[i].hash; const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); for (; j < tbl->size; j++) { - rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) { + rht_for_each_entry(nlk, tbl->buckets[j], ht, node) { if (net_eq(sock_net((struct sock *)nlk), net)) { iter->link = i; iter->hash_idx = j; @@ -2971,9 +2983,10 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void netlink_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) + __releases(RCU) __releases(nl_table_lock) { rcu_read_unlock(); + read_unlock(&nl_table_lock); } -- cgit v1.2.2 From 7c1c97d54f9bfc810908d3903cb8bcacf734df18 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 21 Oct 2014 11:23:30 +0200 Subject: net: sched: initialize bstats syncp Use netdev_alloc_pcpu_stats to allocate percpu stats and initialize syncp. Fixes: 22e0f8b9322c "net: sched: make bstats per cpu and estimator RCU safe" Signed-off-by: Sabrina Dubroca Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2cf61b3e633c..76f402e05bd6 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -947,7 +947,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { if (qdisc_is_percpu_stats(sch)) { sch->cpu_bstats = - alloc_percpu(struct gnet_stats_basic_cpu); + netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); if (!sch->cpu_bstats) goto err_out4; -- cgit v1.2.2 From e37ad9fd636071e45368d1d9cc3b7b421281ce7f Mon Sep 17 00:00:00 2001 From: Marcelo Leitner Date: Mon, 13 Oct 2014 13:09:28 -0300 Subject: netfilter: nf_conntrack: allow server to become a client in TW handling When a port that was used to listen for inbound connections gets closed and reused for outgoing connections (like rsh ends up doing for stderr flow), current we may reject the SYN/ACK packet for the new connection because tcp_conntracks states forbirds a port to become a client while there is still a TIME_WAIT entry in there for it. As TCP may expire the TIME_WAIT socket in 60s and conntrack's timeout for it is 120s, there is a ~60s window that the application can end up opening a port that conntrack will end up blocking. This patch fixes this by simply allowing such state transition: if we see a SYN, in TIME_WAIT state, on REPLY direction, move it to sSS. Note that the rest of the code already handles this situation, more specificly in tcp_packet(), first switch clause. Signed-off-by: Marcelo Ricardo Leitner Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 44d1ea32570a..d87b6423ffb2 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -213,7 +213,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { { /* REPLY */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ -/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 }, +/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 }, /* * sNO -> sIV Never reached. * sSS -> sS2 Simultaneous open @@ -223,7 +223,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sFW -> sIV * sCW -> sIV * sLA -> sIV - * sTW -> sIV Reopened connection, but server may not do it. + * sTW -> sSS Reopened connection, but server may have switched role * sCL -> sIV */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ -- cgit v1.2.2 From 0f9f5e1b83abd2b37c67658e02a6fc9001831fa5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 21 Oct 2014 11:28:12 +0300 Subject: netfilter: ipset: off by one in ip_set_nfnl_get_byindex() The ->ip_set_list[] array is initialized in ip_set_net_init() and it has ->ip_set_max elements so this check should be >= instead of > otherwise we are off by one. Signed-off-by: Dan Carpenter Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 912e5a05b79d..86f9d76b1464 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -659,7 +659,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) struct ip_set *set; struct ip_set_net *inst = ip_set_pernet(net); - if (index > inst->ip_set_max) + if (index >= inst->ip_set_max) return IPSET_INVALID_ID; nfnl_lock(NFNL_SUBSYS_IPSET); -- cgit v1.2.2 From c123bb7163043bb8f33858cf8e45b01c17dbd171 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 21 Oct 2014 11:08:21 +0200 Subject: netfilter: nf_tables: check for NULL in nf_tables_newchain pcpu stats allocation alloc_percpu returns NULL on failure, not a negative error code. Fixes: ff3cd7b3c922 ("netfilter: nf_tables: refactor chain statistic routines") Signed-off-by: Sabrina Dubroca Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 65eb2a1160d5..11ab4b078f3b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1328,10 +1328,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, basechain->stats = stats; } else { stats = netdev_alloc_pcpu_stats(struct nft_stats); - if (IS_ERR(stats)) { + if (stats == NULL) { module_put(type->owner); kfree(basechain); - return PTR_ERR(stats); + return -ENOMEM; } rcu_assign_pointer(basechain->stats, stats); } -- cgit v1.2.2 From a63ba13eec092b70d4e5522d692eaeb2f9747387 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Tue, 21 Oct 2014 16:06:05 +0200 Subject: net: tso: fix unaligned access to crafted TCP header in helper API The crafted header start address is from a driver supplied buffer, which one can reasonably expect to be aligned on a 4-bytes boundary. However ATM the TSO helper API is only used by ethernet drivers and the tcp header will then be aligned to a 2-bytes only boundary from the header start address. Signed-off-by: Karl Beldan Cc: Ezequiel Garcia Signed-off-by: David S. Miller --- net/core/tso.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/tso.c b/net/core/tso.c index 8c3203c585b0..630b30b4fb53 100644 --- a/net/core/tso.c +++ b/net/core/tso.c @@ -1,6 +1,7 @@ #include #include #include +#include /* Calculate expected number of TX descriptors */ int tso_count_descs(struct sk_buff *skb) @@ -23,7 +24,7 @@ void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, iph->id = htons(tso->ip_id); iph->tot_len = htons(size + hdr_len - mac_hdr_len); tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb)); - tcph->seq = htonl(tso->tcp_seq); + put_unaligned_be32(tso->tcp_seq, &tcph->seq); tso->ip_id++; if (!is_last) { -- cgit v1.2.2 From 789f202326640814c52f82e80cef3584d8254623 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Wed, 22 Oct 2014 17:09:53 +0800 Subject: xfrm6: fix a potential use after free in xfrm6_policy.c pskb_may_pull() maybe change skb->data and make nh and exthdr pointer oboslete, so recompute the nd and exthdr Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/ipv6/xfrm6_policy.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ac49f84fe2c3..5f983644373a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -170,8 +170,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_DCCP: if (!onlyproto && (nh + offset + 4 < skb->data || pskb_may_pull(skb, nh + offset + 4 - skb->data))) { - __be16 *ports = (__be16 *)exthdr; + __be16 *ports; + nh = skb_network_header(skb); + ports = (__be16 *)(nh + offset); fl6->fl6_sport = ports[!!reverse]; fl6->fl6_dport = ports[!reverse]; } @@ -180,8 +182,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_ICMPV6: if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { - u8 *icmp = (u8 *)exthdr; + u8 *icmp; + nh = skb_network_header(skb); + icmp = (u8 *)(nh + offset); fl6->fl6_icmp_type = icmp[0]; fl6->fl6_icmp_code = icmp[1]; } @@ -192,8 +196,9 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_MH: if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { struct ip6_mh *mh; - mh = (struct ip6_mh *)exthdr; + nh = skb_network_header(skb); + mh = (struct ip6_mh *)(nh + offset); fl6->fl6_mh_type = mh->ip6mh_type; } fl6->flowi6_proto = nexthdr; -- cgit v1.2.2 From 9e7ceb060754f134231f68cb29d5db31419fe1ed Mon Sep 17 00:00:00 2001 From: Sathya Perla Date: Wed, 22 Oct 2014 21:42:01 +0530 Subject: net: fix saving TX flow hash in sock for outgoing connections The commit "net: Save TX flow hash in sock and set in skbuf on xmit" introduced the inet_set_txhash() and ip6_set_txhash() routines to calculate and record flow hash(sk_txhash) in the socket structure. sk_txhash is used to set skb->hash which is used to spread flows across multiple TXQs. But, the above routines are invoked before the source port of the connection is created. Because of this all outgoing connections that just differ in the source port get hashed into the same TXQ. This patch fixes this problem for IPv4/6 by invoking the the above routines after the source port is available for the socket. Fixes: b73c3d0e4("net: Save TX flow hash in sock and set in skbuf on xmit") Signed-off-by: Sathya Perla Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/tcp_ipv6.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 94d1a7757ff7..9c7d7621466b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -206,8 +206,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_dport = usin->sin_port; inet->inet_daddr = daddr; - inet_set_txhash(sk); - inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; @@ -224,6 +222,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err) goto failure; + inet_set_txhash(sk); + rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, inet->inet_sport, inet->inet_dport, sk); if (IS_ERR(rt)) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 831495529b82..ace29b60813c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -200,8 +200,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; - ip6_set_txhash(sk); - /* * TCP over IPv4 */ @@ -297,6 +295,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (err) goto late_failure; + ip6_set_txhash(sk); + if (!tp->write_seq && likely(!tp->repair)) tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32, -- cgit v1.2.2 From 7677e86843e2136a9b05549a9ca47d4f744565b6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 4 Oct 2014 22:18:02 +0800 Subject: bridge: Do not compile options in br_parse_ip_options Commit 462fb2af9788a82a534f8184abfde31574e1cfa0 bridge : Sanitize skb before it enters the IP stack broke when IP options are actually used because it mangles the skb as if it entered the IP stack which is wrong because the bridge is supposed to operate below the IP stack. Since nobody has actually requested for parsing of IP options this patch fixes it by simply reverting to the previous approach of ignoring all IP options, i.e., zeroing the IPCB. If and when somebody who uses IP options and actually needs them to be parsed by the bridge complains then we can revisit this. Reported-by: David Newall Signed-off-by: Herbert Xu Tested-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 1bada53bb195..1a4f32c09ad5 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -192,7 +192,6 @@ static inline void nf_bridge_save_header(struct sk_buff *skb) static int br_parse_ip_options(struct sk_buff *skb) { - struct ip_options *opt; const struct iphdr *iph; struct net_device *dev = skb->dev; u32 len; @@ -201,7 +200,6 @@ static int br_parse_ip_options(struct sk_buff *skb) goto inhdr_error; iph = ip_hdr(skb); - opt = &(IPCB(skb)->opt); /* Basic sanity checks */ if (iph->ihl < 5 || iph->version != 4) @@ -227,23 +225,11 @@ static int br_parse_ip_options(struct sk_buff *skb) } memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - if (iph->ihl == 5) - return 0; - - opt->optlen = iph->ihl*4 - sizeof(struct iphdr); - if (ip_options_compile(dev_net(dev), opt, skb)) - goto inhdr_error; - - /* Check correct handling of SRR option */ - if (unlikely(opt->srr)) { - struct in_device *in_dev = __in_dev_get_rcu(dev); - if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev)) - goto drop; - - if (ip_options_rcv_srr(skb)) - goto drop; - } - + /* We should really parse IP options here but until + * somebody who actually uses IP options complains to + * us we'll just silently ignore the options because + * we're lazy! + */ return 0; inhdr_error: -- cgit v1.2.2 From 9dfa1dfe4d5e5e66a991321ab08afe69759d797a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 23 Oct 2014 10:36:06 +0200 Subject: netfilter: nf_log: account for size of NLMSG_DONE attribute We currently neither account for the nlattr size, nor do we consider the size of the trailing NLMSG_DONE when allocating nlmsg skb. This can result in nflog to stop working, as __nfulnl_send() re-tries sending forever if it failed to append NLMSG_DONE (which will never work if buffer is not large enough). Reported-by: Houcheng Lin Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b1e3a0579416..8117fba8e661 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -649,7 +649,8 @@ nfulnl_log_packet(struct net *net, + nla_total_size(sizeof(u_int32_t)) /* gid */ + nla_total_size(plen) /* prefix */ + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) - + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)); + + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)) + + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ if (in && skb_mac_header_was_set(skb)) { size += nla_total_size(skb->dev->hard_header_len) @@ -692,8 +693,7 @@ nfulnl_log_packet(struct net *net, goto unlock_and_release; } - if (inst->skb && - size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) { + if (inst->skb && size > skb_tailroom(inst->skb)) { /* either the queue len is too high or we don't have * enough room in the skb left. flush to userspace. */ __nfulnl_flush(inst); -- cgit v1.2.2 From c1e7dc91eed0ed1a51c9b814d648db18bf8fc6e9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 23 Oct 2014 10:36:07 +0200 Subject: netfilter: nfnetlink_log: fix maximum packet length logged to userspace don't try to queue payloads > 0xffff - NLA_HDRLEN, it does not work. The nla length includes the size of the nla struct, so anything larger results in u16 integer overflow. This patch is similar to 9cefbbc9c8f9abe (netfilter: nfnetlink_queue: cleanup copy_range usage). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 8117fba8e661..2d02eac35415 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -43,7 +43,8 @@ #define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE #define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ #define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ -#define NFULNL_COPY_RANGE_MAX 0xFFFF /* max packet size is limited by 16-bit struct nfattr nfa_len field */ +/* max packet size is limited by 16-bit struct nfattr nfa_len field */ +#define NFULNL_COPY_RANGE_MAX (0xFFFF - NLA_HDRLEN) #define PRINTR(x, args...) do { if (net_ratelimit()) \ printk(x, ## args); } while (0); @@ -252,6 +253,8 @@ nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode, case NFULNL_COPY_PACKET: inst->copy_mode = mode; + if (range == 0) + range = NFULNL_COPY_RANGE_MAX; inst->copy_range = min_t(unsigned int, range, NFULNL_COPY_RANGE_MAX); break; @@ -679,8 +682,7 @@ nfulnl_log_packet(struct net *net, break; case NFULNL_COPY_PACKET: - if (inst->copy_range == 0 - || inst->copy_range > skb->len) + if (inst->copy_range > skb->len) data_len = skb->len; else data_len = inst->copy_range; -- cgit v1.2.2 From b51d3fa364885a2c1e1668f88776c67c95291820 Mon Sep 17 00:00:00 2001 From: Houcheng Lin Date: Thu, 23 Oct 2014 10:36:08 +0200 Subject: netfilter: nf_log: release skbuff on nlmsg put failure The kernel should reserve enough room in the skb so that the DONE message can always be appended. However, in case of e.g. new attribute erronously not being size-accounted for, __nfulnl_send() will still try to put next nlmsg into this full skbuf, causing the skb to be stuck forever and blocking delivery of further messages. Fix issue by releasing skb immediately after nlmsg_put error and WARN() so we can track down the cause of such size mismatch. [ fw@strlen.de: add tailroom/len info to WARN ] Signed-off-by: Houcheng Lin Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 2d02eac35415..5f1be5ba3559 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -346,26 +346,25 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, return skb; } -static int +static void __nfulnl_send(struct nfulnl_instance *inst) { - int status = -1; - if (inst->qlen > 1) { struct nlmsghdr *nlh = nlmsg_put(inst->skb, 0, 0, NLMSG_DONE, sizeof(struct nfgenmsg), 0); - if (!nlh) + if (WARN_ONCE(!nlh, "bad nlskb size: %u, tailroom %d\n", + inst->skb->len, skb_tailroom(inst->skb))) { + kfree_skb(inst->skb); goto out; + } } - status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, - MSG_DONTWAIT); - + nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, + MSG_DONTWAIT); +out: inst->qlen = 0; inst->skb = NULL; -out: - return status; } static void -- cgit v1.2.2 From 349ce993ac706869d553a1816426d3a4bfda02b1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Oct 2014 12:58:58 -0700 Subject: tcp: md5: do not use alloc_percpu() percpu tcp_md5sig_pool contains memory blobs that ultimately go through sg_set_buf(). -> sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf)); This requires that whole area is in a physically contiguous portion of memory. And that @buf is not backed by vmalloc(). Given that alloc_percpu() can use vmalloc() areas, this does not fit the requirements. Replace alloc_percpu() by a static DEFINE_PER_CPU() as tcp_md5sig_pool is small anyway, there is no gain to dynamically allocate it. Signed-off-by: Eric Dumazet Fixes: 765cf9976e93 ("tcp: md5: remove one indirection level in tcp_md5sig_pool") Reported-by: Crestez Dan Leonard Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 59 ++++++++++++++++++++-------------------------------------- 1 file changed, 20 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1bec4e76d88c..39ec0c379545 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2868,61 +2868,42 @@ EXPORT_SYMBOL(compat_tcp_getsockopt); #endif #ifdef CONFIG_TCP_MD5SIG -static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly; +static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool); static DEFINE_MUTEX(tcp_md5sig_mutex); - -static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu); - - if (p->md5_desc.tfm) - crypto_free_hash(p->md5_desc.tfm); - } - free_percpu(pool); -} +static bool tcp_md5sig_pool_populated = false; static void __tcp_alloc_md5sig_pool(void) { int cpu; - struct tcp_md5sig_pool __percpu *pool; - - pool = alloc_percpu(struct tcp_md5sig_pool); - if (!pool) - return; for_each_possible_cpu(cpu) { - struct crypto_hash *hash; - - hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR_OR_NULL(hash)) - goto out_free; + if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) { + struct crypto_hash *hash; - per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; + hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR_OR_NULL(hash)) + return; + per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash; + } } - /* before setting tcp_md5sig_pool, we must commit all writes - * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool() + /* before setting tcp_md5sig_pool_populated, we must commit all writes + * to memory. See smp_rmb() in tcp_get_md5sig_pool() */ smp_wmb(); - tcp_md5sig_pool = pool; - return; -out_free: - __tcp_free_md5sig_pool(pool); + tcp_md5sig_pool_populated = true; } bool tcp_alloc_md5sig_pool(void) { - if (unlikely(!tcp_md5sig_pool)) { + if (unlikely(!tcp_md5sig_pool_populated)) { mutex_lock(&tcp_md5sig_mutex); - if (!tcp_md5sig_pool) + if (!tcp_md5sig_pool_populated) __tcp_alloc_md5sig_pool(); mutex_unlock(&tcp_md5sig_mutex); } - return tcp_md5sig_pool != NULL; + return tcp_md5sig_pool_populated; } EXPORT_SYMBOL(tcp_alloc_md5sig_pool); @@ -2936,13 +2917,13 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool); */ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) { - struct tcp_md5sig_pool __percpu *p; - local_bh_disable(); - p = ACCESS_ONCE(tcp_md5sig_pool); - if (p) - return raw_cpu_ptr(p); + if (tcp_md5sig_pool_populated) { + /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */ + smp_rmb(); + return this_cpu_ptr(&tcp_md5sig_pool); + } local_bh_enable(); return NULL; } -- cgit v1.2.2 From 93a35f59f1b13a02674877e3efdf07ae47e34052 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Oct 2014 06:30:30 -0700 Subject: net: napi_reuse_skb() should check pfmemalloc Do not reuse skb if it was pfmemalloc tainted, otherwise future frame might be dropped anyway. Signed-off-by: Eric Dumazet Signed-off-by: Roman Gushchin Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index b793e3521a36..945bbd001359 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4157,6 +4157,10 @@ EXPORT_SYMBOL(napi_gro_receive); static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { + if (unlikely(skb->pfmemalloc)) { + consume_skb(skb); + return; + } __skb_pull(skb, skb_headlen(skb)); /* restore the reserve we had after netdev_alloc_skb_ip_align() */ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); -- cgit v1.2.2 From 7965ee93719921ea5978f331da653dfa2d7b99f5 Mon Sep 17 00:00:00 2001 From: Arturo Borrero Date: Sun, 26 Oct 2014 12:22:40 +0100 Subject: netfilter: nft_compat: fix wrong target lookup in nft_target_select_ops() The code looks for an already loaded target, and the correct list to search is nft_target_list, not nft_match_list. Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 0480f57a4eb6..9d6d6f60a80f 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -672,7 +672,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, family = ctx->afi->family; /* Re-use the existing target if it's already loaded. */ - list_for_each_entry(nft_target, &nft_match_list, head) { + list_for_each_entry(nft_target, &nft_target_list, head) { struct xt_target *target = nft_target->ops.data; if (strcmp(target->name, tg_name) == 0 && -- cgit v1.2.2 From f89b7755f517cdbb755d7543eef986ee9d54e654 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 23 Oct 2014 18:41:08 -0700 Subject: bpf: split eBPF out of NET introduce two configs: - hidden CONFIG_BPF to select eBPF interpreter that classic socket filters depend on - visible CONFIG_BPF_SYSCALL (default off) that tracing and sockets can use that solves several problems: - tracing and others that wish to use eBPF don't need to depend on NET. They can use BPF_SYSCALL to allow loading from userspace or select BPF to use it directly from kernel in NET-less configs. - in 3.18 programs cannot be attached to events yet, so don't force it on - when the rest of eBPF infra is there in 3.19+, it's still useful to switch it off to minimize kernel size bloat-o-meter on x64 shows: add/remove: 0/60 grow/shrink: 0/2 up/down: 0/-15601 (-15601) tested with many different config combinations. Hopefully didn't miss anything. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 6272420a721b..99815b5454bf 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -6,7 +6,7 @@ menuconfig NET bool "Networking support" select NLATTR select GENERIC_NET_UTILS - select ANON_INODES + select BPF ---help--- Unless you really know what you are doing, you should say Y here. The reason is that some programs need kernel networking support even -- cgit v1.2.2 From 3d53666b40007b55204ee8890618da79a20c9940 Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Mon, 6 Oct 2014 08:46:19 -0700 Subject: ipvs: Avoid null-pointer deref in debug code Use daddr instead of reaching into dest. Reported-by: Dan Carpenter Signed-off-by: Alex Gartrell Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_xmit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 91f17c1eb8a2..437a3663ad03 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -316,7 +316,7 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, local))) { IP_VS_DBG_RL("We are crossing local and non-local addresses" - " daddr=%pI4\n", &dest->addr.ip); + " daddr=%pI4\n", &daddr); goto err_put; } @@ -458,7 +458,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, local))) { IP_VS_DBG_RL("We are crossing local and non-local addresses" - " daddr=%pI6\n", &dest->addr.in6); + " daddr=%pI6\n", daddr); goto err_put; } -- cgit v1.2.2 From ae439286a0dec99cc8029868243689b5b5f3ff75 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 24 Oct 2014 23:44:04 +0200 Subject: net: dsa: Error out on tagging protocol mismatches If there is a mismatch between enabled tagging protocols and the protocol the switch supports, error out, rather than continue with a situation which is unlikely to work. Signed-off-by: Andrew Lunn cc: alexander.h.duyck@intel.com Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 22f34cf4cb27..6317b41c99b0 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -174,8 +174,11 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, dst->rcv = brcm_netdev_ops.rcv; break; #endif - default: + case DSA_TAG_PROTO_NONE: break; + default: + ret = -ENOPROTOOPT; + goto out; } dst->tag_protocol = drv->tag_protocol; -- cgit v1.2.2 From d56109020d93337545dd257a790cb429a70acfad Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 24 Oct 2014 16:55:58 -0700 Subject: sch_pie: schedule the timer after all init succeed Cc: Vijay Subramanian Cc: David S. Miller Signed-off-by: Cong Wang Acked-by: Eric Dumazet --- net/sched/sch_pie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 33d7a98a7a97..b783a446d884 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -445,7 +445,6 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt) sch->limit = q->params.limit; setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch); - mod_timer(&q->adapt_timer, jiffies + HZ / 2); if (opt) { int err = pie_change(sch, opt); @@ -454,6 +453,7 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt) return err; } + mod_timer(&q->adapt_timer, jiffies + HZ / 2); return 0; } -- cgit v1.2.2 From b2ed64a97430a26a63c6ea91c9b50e639a98dfbc Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 27 Oct 2014 17:39:16 +0100 Subject: ipv6: notify userspace when we added or changed an ipv6 token NetworkManager might want to know that it changed when the router advertisement arrives. Signed-off-by: Lubomir Rintel Cc: Hannes Frederic Sowa Cc: Daniel Borkmann Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 725c763270a0..0169ccf5aa4f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4531,6 +4531,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) } write_unlock_bh(&idev->lock); + inet6_ifinfo_notify(RTM_NEWLINK, idev); addrconf_verify_rtnl(); return 0; } -- cgit v1.2.2 From 65ba1f1ec0eff1c25933468e1d238201c0c2cb29 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 28 Oct 2014 10:30:34 +0100 Subject: inet: frags: fix a race between inet_evict_bucket and inet_frag_kill When the evictor is running it adds some chosen frags to a local list to be evicted once the chain lock has been released but at the same time the *frag_queue can be running for some of the same queues and it may call inet_frag_kill which will wait on the chain lock and will then delete the queue from the wrong list since it was added in the eviction one. The fix is simple - check if the queue has the evict flag set under the chain lock before deleting it, this is safe because the evict flag is set only under that lock and having the flag set also means that the queue has been detached from the chain list, so no need to delete it again. An important note to make is that we're safe w.r.t refcnt because inet_frag_kill and inet_evict_bucket will sync on the del_timer operation where only one of the two can succeed (or if the timer is executing - none of them), the cases are: 1. inet_frag_kill succeeds in del_timer - then the timer ref is removed, but inet_evict_bucket will not add this queue to its expire list but will restart eviction in that chain 2. inet_evict_bucket succeeds in del_timer - then the timer ref is kept until the evictor "expires" the queue, but inet_frag_kill will remove the initial ref and will set INET_FRAG_COMPLETE which will make the frag_expire fn just to remove its ref. In the end all of the queue users will do an inet_frag_put and the one that reaches 0 will free it. The refcount balance should be okay. CC: Florian Westphal CC: Eric Dumazet CC: Patrick McLean Fixes: b13d3cbfb8e8 ("inet: frag: move eviction of queues to work queue") Suggested-by: Eric Dumazet Reported-by: Patrick McLean Tested-by: Patrick McLean Signed-off-by: Nikolay Aleksandrov Reviewed-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 9eb89f3f0ee4..894ec30c5896 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -285,7 +285,8 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) struct inet_frag_bucket *hb; hb = get_frag_bucket_locked(fq, f); - hlist_del(&fq->list); + if (!(fq->flags & INET_FRAG_EVICTED)) + hlist_del(&fq->list); spin_unlock(&hb->chain_lock); } -- cgit v1.2.2 From d70127e8a942364de8dd140fe73893efda363293 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 28 Oct 2014 10:44:01 +0100 Subject: inet: frags: remove the WARN_ON from inet_evict_bucket The WARN_ON in inet_evict_bucket can be triggered by a valid case: inet_frag_kill and inet_evict_bucket can be running in parallel on the same queue which means that there has been at least one more ref added by a previous inet_frag_find call, but inet_frag_kill can delete the timer before inet_evict_bucket which will cause the WARN_ON() there to trigger since we'll have refcnt!=1. Now, this case is valid because the queue is being "killed" for some reason (removed from the chain list and its timer deleted) so it will get destroyed in the end by one of the inet_frag_put() calls which reaches 0 i.e. refcnt is still valid. CC: Florian Westphal CC: Eric Dumazet CC: Patrick McLean Fixes: b13d3cbfb8e8 ("inet: frag: move eviction of queues to work queue") Reported-by: Patrick McLean Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 894ec30c5896..19419b60cb37 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -146,7 +146,6 @@ evict_again: atomic_inc(&fq->refcnt); spin_unlock(&hb->chain_lock); del_timer_sync(&fq->timer); - WARN_ON(atomic_read(&fq->refcnt) != 1); inet_frag_put(fq, f); goto evict_again; } -- cgit v1.2.2 From fa19c2b050ab5254326f5fc07096dd3c6a8d5d58 Mon Sep 17 00:00:00 2001 From: Nicolas Cavallari Date: Thu, 30 Oct 2014 10:09:53 +0100 Subject: ipv4: Do not cache routing failures due to disabled forwarding. If we cache them, the kernel will reuse them, independently of whether forwarding is enabled or not. Which means that if forwarding is disabled on the input interface where the first routing request comes from, then that unreachable result will be cached and reused for other interfaces, even if forwarding is enabled on them. The opposite is also true. This can be verified with two interfaces A and B and an output interface C, where B has forwarding enabled, but not A and trying ip route get $dst iif A from $src && ip route get $dst iif B from $src Signed-off-by: Nicolas Cavallari Reviewed-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/route.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2d4ae469b471..6a2155b02602 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1798,6 +1798,7 @@ local_input: no_route: RT_CACHE_STAT_INC(in_no_route); res.type = RTN_UNREACHABLE; + res.fi = NULL; goto local_input; /* -- cgit v1.2.2 From 14051f0452a2c26a3f4791e6ad6a435e8f1945ff Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 30 Oct 2014 08:40:56 -0700 Subject: gre: Use inner mac length when computing tunnel length Currently, skb_inner_network_header is used but this does not account for Ethernet header for ETH_P_TEB. Use skb_inner_mac_header which handles TEB and also should work with IP encapsulation in which case inner mac and inner network headers are the same. Tested: Ran TCP_STREAM over GRE, worked as expected. Signed-off-by: Tom Herbert Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/gre_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index f6e345c0bc23..bb5947b0ce2d 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -47,7 +47,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, greh = (struct gre_base_hdr *)skb_transport_header(skb); - ghl = skb_inner_network_header(skb) - skb_transport_header(skb); + ghl = skb_inner_mac_header(skb) - skb_transport_header(skb); if (unlikely(ghl < sizeof(*greh))) goto out; -- cgit v1.2.2 From 39bb5e62867de82b269b07df900165029b928359 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Oct 2014 10:32:34 -0700 Subject: net: skb_fclone_busy() needs to detect orphaned skb Some drivers are unable to perform TX completions in a bound time. They instead call skb_orphan() Problem is skb_fclone_busy() has to detect this case, otherwise we block TCP retransmits and can freeze unlucky tcp sessions on mostly idle hosts. Signed-off-by: Eric Dumazet Fixes: 1f3279ae0c13 ("tcp: avoid retransmits of TCP packets hanging in host queues") Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- net/xfrm/xfrm_policy.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3af21296d967..a3d453b94747 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2126,7 +2126,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) static bool skb_still_in_host_queue(const struct sock *sk, const struct sk_buff *skb) { - if (unlikely(skb_fclone_busy(skb))) { + if (unlikely(skb_fclone_busy(sk, skb))) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); return true; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4c4e457e7888..88bf289abdc9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1962,7 +1962,7 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb) struct xfrm_policy *pol = xdst->pols[0]; struct xfrm_policy_queue *pq = &pol->polq; - if (unlikely(skb_fclone_busy(skb))) { + if (unlikely(skb_fclone_busy(sk, skb))) { kfree_skb(skb); return 0; } -- cgit v1.2.2 From 5188cd44c55db3e92cd9e77a40b5baa7ed4340f7 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 30 Oct 2014 18:27:17 +0000 Subject: drivers/net, ipv6: Select IPv6 fragment idents for virtio UFO packets UFO is now disabled on all drivers that work with virtio net headers, but userland may try to send UFO/IPv6 packets anyway. Instead of sending with ID=0, we should select identifiers on their behalf (as we used to). Signed-off-by: Ben Hutchings Fixes: 916e4cf46d02 ("ipv6: reuse ip6_frag_id from ip6_ufo_append_data") Signed-off-by: David S. Miller --- net/ipv6/output_core.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'net') diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index fc24c390af05..97f41a3e68d9 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -3,11 +3,45 @@ * not configured or static. These functions are needed by GSO/GRO implementation. */ #include +#include #include #include #include #include +/* This function exists only for tap drivers that must support broken + * clients requesting UFO without specifying an IPv6 fragment ID. + * + * This is similar to ipv6_select_ident() but we use an independent hash + * seed to limit information leakage. + * + * The network header must be set before calling this. + */ +void ipv6_proxy_select_ident(struct sk_buff *skb) +{ + static u32 ip6_proxy_idents_hashrnd __read_mostly; + struct in6_addr buf[2]; + struct in6_addr *addrs; + u32 hash, id; + + addrs = skb_header_pointer(skb, + skb_network_offset(skb) + + offsetof(struct ipv6hdr, saddr), + sizeof(buf), buf); + if (!addrs) + return; + + net_get_random_once(&ip6_proxy_idents_hashrnd, + sizeof(ip6_proxy_idents_hashrnd)); + + hash = __ipv6_addr_jhash(&addrs[1], ip6_proxy_idents_hashrnd); + hash = __ipv6_addr_jhash(&addrs[0], hash); + + id = ip_idents_reserve(hash, 1); + skb_shinfo(skb)->ip6_frag_id = htonl(id); +} +EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); + int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { u16 offset = sizeof(struct ipv6hdr); -- cgit v1.2.2 From 4d87716cd057bde3f90e304289c1fec88d45a1cc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 25 Oct 2014 12:25:06 +0200 Subject: netfilter: nf_tables_bridge: update hook_mask to allow {pre,post}routing Fixes: 36d2af5 ("netfilter: nf_tables: allow to filter from prerouting and postrouting") Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nf_tables_bridge.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index da17a5eab8b4..074c557ab505 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -75,9 +75,11 @@ static const struct nf_chain_type filter_bridge = { .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_BRIDGE, .owner = THIS_MODULE, - .hook_mask = (1 << NF_BR_LOCAL_IN) | + .hook_mask = (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | - (1 << NF_BR_LOCAL_OUT), + (1 << NF_BR_LOCAL_OUT) | + (1 << NF_BR_POST_ROUTING), }; static int __init nf_tables_bridge_init(void) -- cgit v1.2.2 From 052b9498eea532deb5de75277a53f6e0623215dc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 25 Oct 2014 18:24:57 +0200 Subject: netfilter: nf_reject_ipv4: split nf_send_reset() in smaller functions That can be reused by the reject bridge expression to build the reject packet. The new functions are: * nf_reject_ip_tcphdr_get(): to sanitize and to obtain the TCP header. * nf_reject_iphdr_put(): to build the IPv4 header. * nf_reject_ip_tcphdr_put(): to build the TCP header. Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_reject_ipv4.c | 88 ++++++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 92b303dbd5fc..1baaa83dfe5c 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -12,43 +12,39 @@ #include #include #include +#include -/* Send RST reply */ -void nf_send_reset(struct sk_buff *oldskb, int hook) +const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *_oth, int hook) { - struct sk_buff *nskb; - const struct iphdr *oiph; - struct iphdr *niph; const struct tcphdr *oth; - struct tcphdr _otcph, *tcph; /* IP header checks: fragment. */ if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) - return; + return NULL; oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), - sizeof(_otcph), &_otcph); + sizeof(struct tcphdr), _oth); if (oth == NULL) - return; + return NULL; /* No RST for RST. */ if (oth->rst) - return; - - if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) - return; + return NULL; /* Check checksum */ if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) - return; - oiph = ip_hdr(oldskb); + return NULL; - nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + - LL_MAX_HEADER, GFP_ATOMIC); - if (!nskb) - return; + return oth; +} +EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_get); - skb_reserve(nskb, LL_MAX_HEADER); +struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int ttl) +{ + struct iphdr *niph, *oiph = ip_hdr(oldskb); skb_reset_network_header(nskb); niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); @@ -57,10 +53,23 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) niph->tos = 0; niph->id = 0; niph->frag_off = htons(IP_DF); - niph->protocol = IPPROTO_TCP; + niph->protocol = protocol; niph->check = 0; niph->saddr = oiph->daddr; niph->daddr = oiph->saddr; + niph->ttl = ttl; + + nskb->protocol = htons(ETH_P_IP); + + return niph; +} +EXPORT_SYMBOL_GPL(nf_reject_iphdr_put); + +void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, + const struct tcphdr *oth) +{ + struct iphdr *niph = ip_hdr(nskb); + struct tcphdr *tcph; skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); @@ -69,9 +78,9 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) tcph->dest = oth->source; tcph->doff = sizeof(struct tcphdr) / 4; - if (oth->ack) + if (oth->ack) { tcph->seq = oth->ack_seq; - else { + } else { tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + oldskb->len - ip_hdrlen(oldskb) - (oth->doff << 2)); @@ -84,16 +93,43 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum_start = (unsigned char *)tcph - nskb->head; nskb->csum_offset = offsetof(struct tcphdr, check); +} +EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put); + +/* Send RST reply */ +void nf_send_reset(struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + const struct iphdr *oiph; + struct iphdr *niph; + const struct tcphdr *oth; + struct tcphdr _oth; + + oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook); + if (!oth) + return; + + if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + return; + + oiph = ip_hdr(oldskb); + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; /* ip_route_me_harder expects skb->dst to be set */ skb_dst_set_noref(nskb, skb_dst(oldskb)); - nskb->protocol = htons(ETH_P_IP); + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, + ip4_dst_hoplimit(skb_dst(nskb))); + nf_reject_ip_tcphdr_put(nskb, oldskb, oth); + if (ip_route_me_harder(nskb, RTN_UNSPEC)) goto free_nskb; - niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); - /* "Never happens" */ if (nskb->len > dst_mtu(skb_dst(nskb))) goto free_nskb; -- cgit v1.2.2 From 8bfcdf6671b1c8006c52c3eaf9fd1b5dfcf41c3d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 26 Oct 2014 12:35:54 +0100 Subject: netfilter: nf_reject_ipv6: split nf_send_reset6() in smaller functions That can be reused by the reject bridge expression to build the reject packet. The new functions are: * nf_reject_ip6_tcphdr_get(): to sanitize and to obtain the TCP header. * nf_reject_ip6hdr_put(): to build the IPv6 header. * nf_reject_ip6_tcphdr_put(): to build the TCP header. Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_reject_ipv6.c | 175 ++++++++++++++++++++++-------------- 1 file changed, 109 insertions(+), 66 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 20d9defc6c59..015eb8a80766 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -12,116 +12,102 @@ #include #include #include +#include -void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) +const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *otcph, + unsigned int *otcplen, int hook) { - struct sk_buff *nskb; - struct tcphdr otcph, *tcph; - unsigned int otcplen, hh_len; - int tcphoff, needs_ack; const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); - struct ipv6hdr *ip6h; -#define DEFAULT_TOS_VALUE 0x0U - const __u8 tclass = DEFAULT_TOS_VALUE; - struct dst_entry *dst = NULL; u8 proto; __be16 frag_off; - struct flowi6 fl6; - - if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || - (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { - pr_debug("addr is not unicast.\n"); - return; - } + int tcphoff; proto = oip6h->nexthdr; - tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); + tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), + &proto, &frag_off); if ((tcphoff < 0) || (tcphoff > oldskb->len)) { pr_debug("Cannot get TCP header.\n"); - return; + return NULL; } - otcplen = oldskb->len - tcphoff; + *otcplen = oldskb->len - tcphoff; /* IP header checks: fragment, too short. */ - if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { - pr_debug("proto(%d) != IPPROTO_TCP, " - "or too short. otcplen = %d\n", - proto, otcplen); - return; + if (proto != IPPROTO_TCP || *otcplen < sizeof(struct tcphdr)) { + pr_debug("proto(%d) != IPPROTO_TCP or too short (len = %d)\n", + proto, *otcplen); + return NULL; } - if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) - BUG(); + otcph = skb_header_pointer(oldskb, tcphoff, sizeof(struct tcphdr), + otcph); + if (otcph == NULL) + return NULL; /* No RST for RST. */ - if (otcph.rst) { + if (otcph->rst) { pr_debug("RST is set\n"); - return; + return NULL; } /* Check checksum. */ if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { pr_debug("TCP checksum is invalid\n"); - return; - } - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.saddr = oip6h->daddr; - fl6.daddr = oip6h->saddr; - fl6.fl6_sport = otcph.dest; - fl6.fl6_dport = otcph.source; - security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); - dst = ip6_route_output(net, NULL, &fl6); - if (dst == NULL || dst->error) { - dst_release(dst); - return; - } - dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); - if (IS_ERR(dst)) - return; - - hh_len = (dst->dev->hard_header_len + 15)&~15; - nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) + dst->trailer_len, - GFP_ATOMIC); - - if (!nskb) { - net_dbg_ratelimited("cannot alloc skb\n"); - dst_release(dst); - return; + return NULL; } - skb_dst_set(nskb, dst); + return otcph; +} +EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_get); - skb_reserve(nskb, hh_len + dst->header_len); +struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int hoplimit) +{ + struct ipv6hdr *ip6h; + const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); +#define DEFAULT_TOS_VALUE 0x0U + const __u8 tclass = DEFAULT_TOS_VALUE; skb_put(nskb, sizeof(struct ipv6hdr)); skb_reset_network_header(nskb); ip6h = ipv6_hdr(nskb); ip6_flow_hdr(ip6h, tclass, 0); - ip6h->hop_limit = ip6_dst_hoplimit(dst); - ip6h->nexthdr = IPPROTO_TCP; + ip6h->hop_limit = hoplimit; + ip6h->nexthdr = protocol; ip6h->saddr = oip6h->daddr; ip6h->daddr = oip6h->saddr; + nskb->protocol = htons(ETH_P_IPV6); + + return ip6h; +} +EXPORT_SYMBOL_GPL(nf_reject_ip6hdr_put); + +void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + const struct tcphdr *oth, unsigned int otcplen) +{ + struct tcphdr *tcph; + int needs_ack; + skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); /* Truncate to length (no data) */ tcph->doff = sizeof(struct tcphdr)/4; - tcph->source = otcph.dest; - tcph->dest = otcph.source; + tcph->source = oth->dest; + tcph->dest = oth->source; - if (otcph.ack) { + if (oth->ack) { needs_ack = 0; - tcph->seq = otcph.ack_seq; + tcph->seq = oth->ack_seq; tcph->ack_seq = 0; } else { needs_ack = 1; - tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin - + otcplen - (otcph.doff<<2)); + tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + + otcplen - (oth->doff<<2)); tcph->seq = 0; } @@ -139,6 +125,63 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) sizeof(struct tcphdr), IPPROTO_TCP, csum_partial(tcph, sizeof(struct tcphdr), 0)); +} +EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_put); + +void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + struct tcphdr _otcph; + const struct tcphdr *otcph; + unsigned int otcplen, hh_len; + const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); + struct ipv6hdr *ip6h; + struct dst_entry *dst = NULL; + struct flowi6 fl6; + + if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || + (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { + pr_debug("addr is not unicast.\n"); + return; + } + + otcph = nf_reject_ip6_tcphdr_get(oldskb, &_otcph, &otcplen, hook); + if (!otcph) + return; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + fl6.saddr = oip6h->daddr; + fl6.daddr = oip6h->saddr; + fl6.fl6_sport = otcph->dest; + fl6.fl6_dport = otcph->source; + security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); + dst = ip6_route_output(net, NULL, &fl6); + if (dst == NULL || dst->error) { + dst_release(dst); + return; + } + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) + return; + + hh_len = (dst->dev->hard_header_len + 15)&~15; + nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + dst->trailer_len, + GFP_ATOMIC); + + if (!nskb) { + net_dbg_ratelimited("cannot alloc skb\n"); + dst_release(dst); + return; + } + + skb_dst_set(nskb, dst); + + skb_reserve(nskb, hh_len + dst->header_len); + ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, + ip6_dst_hoplimit(dst)); + nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen); nf_ct_attach(nskb, oldskb); -- cgit v1.2.2 From 523b929d5446c023e1219aa81455a8c766cac883 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 25 Oct 2014 18:40:26 +0200 Subject: netfilter: nft_reject_bridge: don't use IP stack to reject traffic If the packet is received via the bridge stack, this cannot reject packets from the IP stack. This adds functions to build the reject packet and send it from the bridge stack. Comments and assumptions on this patch: 1) Validate the IPv4 and IPv6 headers before further processing, given that the packet comes from the bridge stack, we cannot assume they are clean. Truncated packets are dropped, we follow similar approach in the existing iptables match/target extensions that need to inspect layer 4 headers that is not available. This also includes packets that are directed to multicast and broadcast ethernet addresses. 2) br_deliver() is exported to inject the reject packet via bridge localout -> postrouting. So the approach is similar to what we already do in the iptables reject target. The reject packet is sent to the bridge port from which we have received the original packet. 3) The reject packet is forged based on the original packet. The TTL is set based on sysctl_ip_default_ttl for IPv4 and per-net ipv6.devconf_all hoplimit for IPv6. Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_forward.c | 1 + net/bridge/netfilter/nft_reject_bridge.c | 263 +++++++++++++++++++++++++++++-- 2 files changed, 254 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 992ec49a96aa..44cb786b925a 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -112,6 +112,7 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) kfree_skb(skb); } +EXPORT_SYMBOL_GPL(br_deliver); /* called with rcu_read_lock */ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0) diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index a76479535df2..31b27e1bab9f 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -16,6 +16,237 @@ #include #include #include +#include +#include +#include "../br_private.h" + +static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, + struct sk_buff *nskb) +{ + struct ethhdr *eth; + + eth = (struct ethhdr *)skb_push(nskb, ETH_HLEN); + skb_reset_mac_header(nskb); + ether_addr_copy(eth->h_source, eth_hdr(oldskb)->h_dest); + ether_addr_copy(eth->h_dest, eth_hdr(oldskb)->h_source); + eth->h_proto = eth_hdr(oldskb)->h_proto; + skb_pull(nskb, ETH_HLEN); +} + +static int nft_reject_iphdr_validate(struct sk_buff *oldskb) +{ + struct iphdr *iph; + u32 len; + + if (!pskb_may_pull(oldskb, sizeof(struct iphdr))) + return 0; + + iph = ip_hdr(oldskb); + if (iph->ihl < 5 || iph->version != 4) + return 0; + + len = ntohs(iph->tot_len); + if (oldskb->len < len) + return 0; + else if (len < (iph->ihl*4)) + return 0; + + if (!pskb_may_pull(oldskb, iph->ihl*4)) + return 0; + + return 1; +} + +static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + struct iphdr *niph; + const struct tcphdr *oth; + struct tcphdr _oth; + + if (!nft_reject_iphdr_validate(oldskb)) + return; + + oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook); + if (!oth) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, + sysctl_ip_default_ttl); + nf_reject_ip_tcphdr_put(nskb, oldskb, oth); + niph->ttl = sysctl_ip_default_ttl; + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} + +static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, + u8 code) +{ + struct sk_buff *nskb; + struct iphdr *niph; + struct icmphdr *icmph; + unsigned int len; + void *payload; + __wsum csum; + + if (!nft_reject_iphdr_validate(oldskb)) + return; + + /* IP header checks: fragment. */ + if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) + return; + + /* RFC says return as much as we can without exceeding 576 bytes. */ + len = min_t(unsigned int, 536, oldskb->len); + + if (!pskb_may_pull(oldskb, len)) + return; + + if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), 0)) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmphdr) + + LL_MAX_HEADER + len, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP, + sysctl_ip_default_ttl); + + skb_reset_transport_header(nskb); + icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr)); + memset(icmph, 0, sizeof(*icmph)); + icmph->type = ICMP_DEST_UNREACH; + icmph->code = code; + + payload = skb_put(nskb, len); + memcpy(payload, skb_network_header(oldskb), len); + + csum = csum_partial((void *)icmph, len + sizeof(struct icmphdr), 0); + icmph->checksum = csum_fold(csum); + + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} + +static int nft_reject_ip6hdr_validate(struct sk_buff *oldskb) +{ + struct ipv6hdr *hdr; + u32 pkt_len; + + if (!pskb_may_pull(oldskb, sizeof(struct ipv6hdr))) + return 0; + + hdr = ipv6_hdr(oldskb); + if (hdr->version != 6) + return 0; + + pkt_len = ntohs(hdr->payload_len); + if (pkt_len + sizeof(struct ipv6hdr) > oldskb->len) + return 0; + + return 1; +} + +static void nft_reject_br_send_v6_tcp_reset(struct net *net, + struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + const struct tcphdr *oth; + struct tcphdr _oth; + unsigned int otcplen; + struct ipv6hdr *nip6h; + + if (!nft_reject_ip6hdr_validate(oldskb)) + return; + + oth = nf_reject_ip6_tcphdr_get(oldskb, &_oth, &otcplen, hook); + if (!oth) + return; + + nskb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, + net->ipv6.devconf_all->hop_limit); + nf_reject_ip6_tcphdr_put(nskb, oldskb, oth, otcplen); + nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} + +static void nft_reject_br_send_v6_unreach(struct net *net, + struct sk_buff *oldskb, int hook, + u8 code) +{ + struct sk_buff *nskb; + struct ipv6hdr *nip6h; + struct icmp6hdr *icmp6h; + unsigned int len; + void *payload; + + if (!nft_reject_ip6hdr_validate(oldskb)) + return; + + /* Include "As much of invoking packet as possible without the ICMPv6 + * packet exceeding the minimum IPv6 MTU" in the ICMP payload. + */ + len = min_t(unsigned int, 1220, oldskb->len); + + if (!pskb_may_pull(oldskb, len)) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmp6hdr) + + LL_MAX_HEADER + len, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_ICMPV6, + net->ipv6.devconf_all->hop_limit); + + skb_reset_transport_header(nskb); + icmp6h = (struct icmp6hdr *)skb_put(nskb, sizeof(struct icmp6hdr)); + memset(icmp6h, 0, sizeof(*icmp6h)); + icmp6h->icmp6_type = ICMPV6_DEST_UNREACH; + icmp6h->icmp6_code = code; + + payload = skb_put(nskb, len); + memcpy(payload, skb_network_header(oldskb), len); + nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); + + icmp6h->icmp6_cksum = + csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, + nskb->len - sizeof(struct ipv6hdr), + IPPROTO_ICMPV6, + csum_partial(icmp6h, + nskb->len - sizeof(struct ipv6hdr), + 0)); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} static void nft_reject_bridge_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1], @@ -23,35 +254,46 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, { struct nft_reject *priv = nft_expr_priv(expr); struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); + const unsigned char *dest = eth_hdr(pkt->skb)->h_dest; + + if (is_broadcast_ether_addr(dest) || + is_multicast_ether_addr(dest)) + goto out; switch (eth_hdr(pkt->skb)->h_proto) { case htons(ETH_P_IP): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach(pkt->skb, priv->icmp_code); + nft_reject_br_send_v4_unreach(pkt->skb, + pkt->ops->hooknum, + priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nf_send_reset(pkt->skb, pkt->ops->hooknum); + nft_reject_br_send_v4_tcp_reset(pkt->skb, + pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: - nf_send_unreach(pkt->skb, - nft_reject_icmp_code(priv->icmp_code)); + nft_reject_br_send_v4_unreach(pkt->skb, + pkt->ops->hooknum, + nft_reject_icmp_code(priv->icmp_code)); break; } break; case htons(ETH_P_IPV6): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach6(net, pkt->skb, priv->icmp_code, - pkt->ops->hooknum); + nft_reject_br_send_v6_unreach(net, pkt->skb, + pkt->ops->hooknum, + priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); + nft_reject_br_send_v6_tcp_reset(net, pkt->skb, + pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: - nf_send_unreach6(net, pkt->skb, - nft_reject_icmpv6_code(priv->icmp_code), - pkt->ops->hooknum); + nft_reject_br_send_v6_unreach(net, pkt->skb, + pkt->ops->hooknum, + nft_reject_icmpv6_code(priv->icmp_code)); break; } break; @@ -59,6 +301,7 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, /* No explicit way to reject this protocol, drop it. */ break; } +out: data[NFT_REG_VERDICT].verdict = NF_DROP; } -- cgit v1.2.2 From 127917c29a432c3b798e014a1714e9c1af0f87fe Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 27 Oct 2014 14:08:17 +0100 Subject: netfilter: nft_reject_bridge: restrict reject to prerouting and input Restrict the reject expression to the prerouting and input bridge hooks. If we allow this to be used from forward or any other later bridge hook, if the frame is flooded to several ports, we'll end up sending several reject packets, one per cloned packet. Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nft_reject_bridge.c | 33 +++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 31b27e1bab9f..654c9018e3e7 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "../br_private.h" static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, @@ -305,12 +306,34 @@ out: data[NFT_REG_VERDICT].verdict = NF_DROP; } +static int nft_reject_bridge_validate_hooks(const struct nft_chain *chain) +{ + struct nft_base_chain *basechain; + + if (chain->flags & NFT_BASE_CHAIN) { + basechain = nft_base_chain(chain); + + switch (basechain->ops[0].hooknum) { + case NF_BR_PRE_ROUTING: + case NF_BR_LOCAL_IN: + break; + default: + return -EOPNOTSUPP; + } + } + return 0; +} + static int nft_reject_bridge_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_reject *priv = nft_expr_priv(expr); - int icmp_code; + int icmp_code, err; + + err = nft_reject_bridge_validate_hooks(ctx->chain); + if (err < 0) + return err; if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; @@ -359,6 +382,13 @@ nla_put_failure: return -1; } +static int nft_reject_bridge_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_reject_bridge_validate_hooks(ctx->chain); +} + static struct nft_expr_type nft_reject_bridge_type; static const struct nft_expr_ops nft_reject_bridge_ops = { .type = &nft_reject_bridge_type, @@ -366,6 +396,7 @@ static const struct nft_expr_ops nft_reject_bridge_ops = { .eval = nft_reject_bridge_eval, .init = nft_reject_bridge_init, .dump = nft_reject_bridge_dump, + .validate = nft_reject_bridge_validate, }; static struct nft_expr_type nft_reject_bridge_type __read_mostly = { -- cgit v1.2.2 From f7065f4bd3fe4ad6bf7e49ba7c68baa2c7046146 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 30 Oct 2014 00:49:57 -0700 Subject: mpls: Fix mpls_gso handler. mpls gso handler needs to pull skb after segmenting skb. CC: Simon Horman Signed-off-by: Pravin B Shelar Acked-by: Simon Horman Signed-off-by: David S. Miller --- net/mpls/mpls_gso.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index f0f5309a2d72..e3545f21a099 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -59,8 +59,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, * above pulled. It will be re-pushed after returning * skb_mac_gso_segment(), an indirect caller of this function. */ - __skb_push(skb, skb->data - skb_mac_header(skb)); - + __skb_pull(skb, skb->data - skb_mac_header(skb)); out: return segs; } -- cgit v1.2.2 From de05c400f7dfa566f598140f8604a5de8067cd5f Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 30 Oct 2014 00:50:04 -0700 Subject: mpls: Allow mpls_gso to be built as module Kconfig already allows mpls to be built as module. Following patch fixes Makefile to do same. CC: Simon Horman Signed-off-by: Pravin B Shelar Acked-by: Simon Horman Signed-off-by: David S. Miller --- net/mpls/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mpls/Makefile b/net/mpls/Makefile index 0a3c171be537..6dec088c2d0f 100644 --- a/net/mpls/Makefile +++ b/net/mpls/Makefile @@ -1,4 +1,4 @@ # # Makefile for MPLS. # -obj-y += mpls_gso.o +obj-$(CONFIG_NET_MPLS_GSO) += mpls_gso.o -- cgit v1.2.2 From e0fb6fb6d52686134b2ece144060219591d4f8d3 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 30 Oct 2014 20:50:15 -0700 Subject: net: ethtool: Return -EOPNOTSUPP if user space tries to read EEPROM with lengh 0 If a driver supports reading EEPROM but no EEPROM is installed in the system, the driver's get_eeprom_len function returns 0. ethtool will subsequently try to read that zero-length EEPROM anyway. If the driver does not support EEPROM access at all, this operation will return -EOPNOTSUPP. If the driver does support EEPROM access but no EEPROM is installed, the operation will return -EINVAL. Return -EOPNOTSUPP in both cases for consistency. Signed-off-by: Guenter Roeck Tested-by: Andrew Lunn Signed-off-by: David S. Miller --- net/core/ethtool.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1600aa24d36b..06dfb293e5aa 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1036,7 +1036,8 @@ static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) { const struct ethtool_ops *ops = dev->ethtool_ops; - if (!ops->get_eeprom || !ops->get_eeprom_len) + if (!ops->get_eeprom || !ops->get_eeprom_len || + !ops->get_eeprom_len(dev)) return -EOPNOTSUPP; return ethtool_get_any_eeprom(dev, useraddr, ops->get_eeprom, @@ -1052,7 +1053,8 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) u8 *data; int ret = 0; - if (!ops->set_eeprom || !ops->get_eeprom_len) + if (!ops->set_eeprom || !ops->get_eeprom_len || + !ops->get_eeprom_len(dev)) return -EOPNOTSUPP; if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) -- cgit v1.2.2 From 4cb8c3593bbb884c5c282b1d8502a0930235fe88 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Nov 2014 10:20:26 -0800 Subject: irda: stop calling sk_prot->disconnect() on connection failure The sk_prot is irda's own set of protocol handlers, so irda should statically know what that function is anyway, without using an indirect pointer. And as it happens, we know *exactly* what that pointer is statically: it's NULL, because irda doesn't define a disconnect operation. So calling that function is doubly wrong, and will just cause an oops. Reported-by: Martin Lang Cc: Samuel Ortiz Cc: David Miller Signed-off-by: Linus Torvalds --- net/irda/af_irda.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 92fafd485deb..3f3a6cbdceb7 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1064,8 +1064,6 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr, if (sk->sk_state != TCP_ESTABLISHED) { sock->state = SS_UNCONNECTED; - if (sk->sk_prot->disconnect(sk, flags)) - sock->state = SS_DISCONNECTING; err = sock_error(sk); if (!err) err = -ECONNRESET; -- cgit v1.2.2