diff options
| author | David S. Miller <davem@davemloft.net> | 2015-08-31 15:34:00 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2015-08-31 15:34:00 -0400 |
| commit | 9dc30648f0708cf063e29470d83f63f8dc8fc430 (patch) | |
| tree | f12130b61955f4471ebd61474244ecf9ebdc0858 /net | |
| parent | 87583ebb9f6ea6dc7f8ef167b815656787e429fc (diff) | |
| parent | c3a8d9474684d391b0afc3970d9b249add15ec07 (diff) | |
Merge branch 'per-route-dctcp-receive-side'
Daniel Borkmann says:
====================
tcp: receive-side per route dctcp handling
Original cover letter:
Currently, the following case doesn't use DCTCP, even if it should:
- responder has f.e. cubic as system wide default
- 'ip route congctl dctcp $src' was set
Then, DCTCP is NOT used if a DCTCP sender attempts to connect from a
host in the $src range: ECT(0) is set, but listen_sk is not dctcp, so
we fail the INET_ECN_is_not_ect sanity check.
We also have to examine the dst used for the SYN/ACK reply to make
this case work.
In order to minimize additional cost, store the 'ecn is must have'
information is the dst_features field.
The set targets -next instead of -net since this doesn't seem to be a
serious bug and to give the change more soak time until it hits linus
tree.
v1 -> v2:
- Addressed Dave's feedback, not exposing any bits to user space
- Added patch 3 to reject incorrect configurations
- Rest as is, rebased and retested
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
| -rw-r--r-- | net/core/rtnetlink.c | 6 | ||||
| -rw-r--r-- | net/ipv4/fib_semantics.c | 77 | ||||
| -rw-r--r-- | net/ipv4/tcp_cong.c | 9 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 7 | ||||
| -rw-r--r-- | net/ipv6/route.c | 39 |
5 files changed, 87 insertions, 51 deletions
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 788ceed39463..a466821d1441 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
| @@ -678,6 +678,12 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) | |||
| 678 | continue; | 678 | continue; |
| 679 | if (nla_put_string(skb, i + 1, name)) | 679 | if (nla_put_string(skb, i + 1, name)) |
| 680 | goto nla_put_failure; | 680 | goto nla_put_failure; |
| 681 | } else if (i == RTAX_FEATURES - 1) { | ||
| 682 | u32 user_features = metrics[i] & RTAX_FEATURE_MASK; | ||
| 683 | |||
| 684 | BUILD_BUG_ON(RTAX_FEATURE_MASK & DST_FEATURE_MASK); | ||
| 685 | if (nla_put_u32(skb, i + 1, user_features)) | ||
| 686 | goto nla_put_failure; | ||
| 681 | } else { | 687 | } else { |
| 682 | if (nla_put_u32(skb, i + 1, metrics[i])) | 688 | if (nla_put_u32(skb, i + 1, metrics[i])) |
| 683 | goto nla_put_failure; | 689 | goto nla_put_failure; |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 1b2d01170a4d..992a9597daf8 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
| @@ -876,6 +876,50 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) | |||
| 876 | return true; | 876 | return true; |
| 877 | } | 877 | } |
| 878 | 878 | ||
| 879 | static int | ||
| 880 | fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) | ||
| 881 | { | ||
| 882 | bool ecn_ca = false; | ||
| 883 | struct nlattr *nla; | ||
| 884 | int remaining; | ||
| 885 | |||
| 886 | if (!cfg->fc_mx) | ||
| 887 | return 0; | ||
| 888 | |||
| 889 | nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { | ||
| 890 | int type = nla_type(nla); | ||
| 891 | u32 val; | ||
| 892 | |||
| 893 | if (!type) | ||
| 894 | continue; | ||
| 895 | if (type > RTAX_MAX) | ||
| 896 | return -EINVAL; | ||
| 897 | |||
| 898 | if (type == RTAX_CC_ALGO) { | ||
| 899 | char tmp[TCP_CA_NAME_MAX]; | ||
| 900 | |||
| 901 | nla_strlcpy(tmp, nla, sizeof(tmp)); | ||
| 902 | val = tcp_ca_get_key_by_name(tmp, &ecn_ca); | ||
| 903 | if (val == TCP_CA_UNSPEC) | ||
| 904 | return -EINVAL; | ||
| 905 | } else { | ||
| 906 | val = nla_get_u32(nla); | ||
| 907 | } | ||
| 908 | if (type == RTAX_ADVMSS && val > 65535 - 40) | ||
| 909 | val = 65535 - 40; | ||
| 910 | if (type == RTAX_MTU && val > 65535 - 15) | ||
| 911 | val = 65535 - 15; | ||
| 912 | if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) | ||
| 913 | return -EINVAL; | ||
| 914 | fi->fib_metrics[type - 1] = val; | ||
| 915 | } | ||
| 916 | |||
| 917 | if (ecn_ca) | ||
| 918 | fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; | ||
| 919 | |||
| 920 | return 0; | ||
| 921 | } | ||
| 922 | |||
| 879 | struct fib_info *fib_create_info(struct fib_config *cfg) | 923 | struct fib_info *fib_create_info(struct fib_config *cfg) |
| 880 | { | 924 | { |
| 881 | int err; | 925 | int err; |
| @@ -948,36 +992,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
| 948 | goto failure; | 992 | goto failure; |
| 949 | } endfor_nexthops(fi) | 993 | } endfor_nexthops(fi) |
| 950 | 994 | ||
| 951 | if (cfg->fc_mx) { | 995 | err = fib_convert_metrics(fi, cfg); |
| 952 | struct nlattr *nla; | 996 | if (err) |
| 953 | int remaining; | 997 | goto failure; |
| 954 | |||
| 955 | nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { | ||
| 956 | int type = nla_type(nla); | ||
| 957 | |||
| 958 | if (type) { | ||
| 959 | u32 val; | ||
| 960 | |||
| 961 | if (type > RTAX_MAX) | ||
| 962 | goto err_inval; | ||
| 963 | if (type == RTAX_CC_ALGO) { | ||
| 964 | char tmp[TCP_CA_NAME_MAX]; | ||
| 965 | |||
| 966 | nla_strlcpy(tmp, nla, sizeof(tmp)); | ||
| 967 | val = tcp_ca_get_key_by_name(tmp); | ||
| 968 | if (val == TCP_CA_UNSPEC) | ||
| 969 | goto err_inval; | ||
| 970 | } else { | ||
| 971 | val = nla_get_u32(nla); | ||
| 972 | } | ||
| 973 | if (type == RTAX_ADVMSS && val > 65535 - 40) | ||
| 974 | val = 65535 - 40; | ||
| 975 | if (type == RTAX_MTU && val > 65535 - 15) | ||
| 976 | val = 65535 - 15; | ||
| 977 | fi->fib_metrics[type - 1] = val; | ||
| 978 | } | ||
| 979 | } | ||
| 980 | } | ||
| 981 | 998 | ||
| 982 | if (cfg->fc_mp) { | 999 | if (cfg->fc_mp) { |
| 983 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1000 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index a2ed23c595cf..93c4dc3ab23f 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
| @@ -114,16 +114,19 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) | |||
| 114 | } | 114 | } |
| 115 | EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); | 115 | EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); |
| 116 | 116 | ||
| 117 | u32 tcp_ca_get_key_by_name(const char *name) | 117 | u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) |
| 118 | { | 118 | { |
| 119 | const struct tcp_congestion_ops *ca; | 119 | const struct tcp_congestion_ops *ca; |
| 120 | u32 key; | 120 | u32 key = TCP_CA_UNSPEC; |
| 121 | 121 | ||
| 122 | might_sleep(); | 122 | might_sleep(); |
| 123 | 123 | ||
| 124 | rcu_read_lock(); | 124 | rcu_read_lock(); |
| 125 | ca = __tcp_ca_find_autoload(name); | 125 | ca = __tcp_ca_find_autoload(name); |
| 126 | key = ca ? ca->key : TCP_CA_UNSPEC; | 126 | if (ca) { |
| 127 | key = ca->key; | ||
| 128 | *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; | ||
| 129 | } | ||
| 127 | rcu_read_unlock(); | 130 | rcu_read_unlock(); |
| 128 | 131 | ||
| 129 | return key; | 132 | return key; |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index dc08e2352665..a8f515bb19c4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -6003,14 +6003,17 @@ static void tcp_ecn_create_request(struct request_sock *req, | |||
| 6003 | const struct net *net = sock_net(listen_sk); | 6003 | const struct net *net = sock_net(listen_sk); |
| 6004 | bool th_ecn = th->ece && th->cwr; | 6004 | bool th_ecn = th->ece && th->cwr; |
| 6005 | bool ect, ecn_ok; | 6005 | bool ect, ecn_ok; |
| 6006 | u32 ecn_ok_dst; | ||
| 6006 | 6007 | ||
| 6007 | if (!th_ecn) | 6008 | if (!th_ecn) |
| 6008 | return; | 6009 | return; |
| 6009 | 6010 | ||
| 6010 | ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); | 6011 | ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); |
| 6011 | ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN); | 6012 | ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK); |
| 6013 | ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst; | ||
| 6012 | 6014 | ||
| 6013 | if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk)) | 6015 | if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) || |
| 6016 | (ecn_ok_dst & DST_FEATURE_ECN_CA)) | ||
| 6014 | inet_rsk(req)->ecn_ok = 1; | 6017 | inet_rsk(req)->ecn_ok = 1; |
| 6015 | } | 6018 | } |
| 6016 | 6019 | ||
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 308dd5f9158f..f45cac6f8356 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
| @@ -1698,6 +1698,7 @@ out: | |||
| 1698 | static int ip6_convert_metrics(struct mx6_config *mxc, | 1698 | static int ip6_convert_metrics(struct mx6_config *mxc, |
| 1699 | const struct fib6_config *cfg) | 1699 | const struct fib6_config *cfg) |
| 1700 | { | 1700 | { |
| 1701 | bool ecn_ca = false; | ||
| 1701 | struct nlattr *nla; | 1702 | struct nlattr *nla; |
| 1702 | int remaining; | 1703 | int remaining; |
| 1703 | u32 *mp; | 1704 | u32 *mp; |
| @@ -1711,30 +1712,36 @@ static int ip6_convert_metrics(struct mx6_config *mxc, | |||
| 1711 | 1712 | ||
| 1712 | nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { | 1713 | nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { |
| 1713 | int type = nla_type(nla); | 1714 | int type = nla_type(nla); |
| 1715 | u32 val; | ||
| 1714 | 1716 | ||
| 1715 | if (type) { | 1717 | if (!type) |
| 1716 | u32 val; | 1718 | continue; |
| 1719 | if (unlikely(type > RTAX_MAX)) | ||
| 1720 | goto err; | ||
| 1721 | |||
| 1722 | if (type == RTAX_CC_ALGO) { | ||
| 1723 | char tmp[TCP_CA_NAME_MAX]; | ||
| 1717 | 1724 | ||
| 1718 | if (unlikely(type > RTAX_MAX)) | 1725 | nla_strlcpy(tmp, nla, sizeof(tmp)); |
| 1726 | val = tcp_ca_get_key_by_name(tmp, &ecn_ca); | ||
| 1727 | if (val == TCP_CA_UNSPEC) | ||
| 1719 | goto err; | 1728 | goto err; |
| 1720 | if (type == RTAX_CC_ALGO) { | 1729 | } else { |
| 1721 | char tmp[TCP_CA_NAME_MAX]; | 1730 | val = nla_get_u32(nla); |
| 1731 | } | ||
| 1732 | if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) | ||
| 1733 | goto err; | ||
| 1722 | 1734 | ||
| 1723 | nla_strlcpy(tmp, nla, sizeof(tmp)); | 1735 | mp[type - 1] = val; |
| 1724 | val = tcp_ca_get_key_by_name(tmp); | 1736 | __set_bit(type - 1, mxc->mx_valid); |
| 1725 | if (val == TCP_CA_UNSPEC) | 1737 | } |
| 1726 | goto err; | ||
| 1727 | } else { | ||
| 1728 | val = nla_get_u32(nla); | ||
| 1729 | } | ||
| 1730 | 1738 | ||
| 1731 | mp[type - 1] = val; | 1739 | if (ecn_ca) { |
| 1732 | __set_bit(type - 1, mxc->mx_valid); | 1740 | __set_bit(RTAX_FEATURES - 1, mxc->mx_valid); |
| 1733 | } | 1741 | mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; |
| 1734 | } | 1742 | } |
| 1735 | 1743 | ||
| 1736 | mxc->mx = mp; | 1744 | mxc->mx = mp; |
| 1737 | |||
| 1738 | return 0; | 1745 | return 0; |
| 1739 | err: | 1746 | err: |
| 1740 | kfree(mp); | 1747 | kfree(mp); |
