aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-08-31 15:34:00 -0400
committerDavid S. Miller <davem@davemloft.net>2015-08-31 15:34:00 -0400
commit9dc30648f0708cf063e29470d83f63f8dc8fc430 (patch)
treef12130b61955f4471ebd61474244ecf9ebdc0858 /net
parent87583ebb9f6ea6dc7f8ef167b815656787e429fc (diff)
parentc3a8d9474684d391b0afc3970d9b249add15ec07 (diff)
Merge branch 'per-route-dctcp-receive-side'
Daniel Borkmann says: ==================== tcp: receive-side per route dctcp handling Original cover letter: Currently, the following case doesn't use DCTCP, even if it should: - responder has f.e. cubic as system wide default - 'ip route congctl dctcp $src' was set Then, DCTCP is NOT used if a DCTCP sender attempts to connect from a host in the $src range: ECT(0) is set, but listen_sk is not dctcp, so we fail the INET_ECN_is_not_ect sanity check. We also have to examine the dst used for the SYN/ACK reply to make this case work. In order to minimize additional cost, store the 'ecn is must have' information is the dst_features field. The set targets -next instead of -net since this doesn't seem to be a serious bug and to give the change more soak time until it hits linus tree. v1 -> v2: - Addressed Dave's feedback, not exposing any bits to user space - Added patch 3 to reject incorrect configurations - Rest as is, rebased and retested ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/rtnetlink.c6
-rw-r--r--net/ipv4/fib_semantics.c77
-rw-r--r--net/ipv4/tcp_cong.c9
-rw-r--r--net/ipv4/tcp_input.c7
-rw-r--r--net/ipv6/route.c39
5 files changed, 87 insertions, 51 deletions
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 788ceed39463..a466821d1441 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -678,6 +678,12 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
678 continue; 678 continue;
679 if (nla_put_string(skb, i + 1, name)) 679 if (nla_put_string(skb, i + 1, name))
680 goto nla_put_failure; 680 goto nla_put_failure;
681 } else if (i == RTAX_FEATURES - 1) {
682 u32 user_features = metrics[i] & RTAX_FEATURE_MASK;
683
684 BUILD_BUG_ON(RTAX_FEATURE_MASK & DST_FEATURE_MASK);
685 if (nla_put_u32(skb, i + 1, user_features))
686 goto nla_put_failure;
681 } else { 687 } else {
682 if (nla_put_u32(skb, i + 1, metrics[i])) 688 if (nla_put_u32(skb, i + 1, metrics[i]))
683 goto nla_put_failure; 689 goto nla_put_failure;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 1b2d01170a4d..992a9597daf8 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -876,6 +876,50 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
876 return true; 876 return true;
877} 877}
878 878
879static int
880fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
881{
882 bool ecn_ca = false;
883 struct nlattr *nla;
884 int remaining;
885
886 if (!cfg->fc_mx)
887 return 0;
888
889 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
890 int type = nla_type(nla);
891 u32 val;
892
893 if (!type)
894 continue;
895 if (type > RTAX_MAX)
896 return -EINVAL;
897
898 if (type == RTAX_CC_ALGO) {
899 char tmp[TCP_CA_NAME_MAX];
900
901 nla_strlcpy(tmp, nla, sizeof(tmp));
902 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
903 if (val == TCP_CA_UNSPEC)
904 return -EINVAL;
905 } else {
906 val = nla_get_u32(nla);
907 }
908 if (type == RTAX_ADVMSS && val > 65535 - 40)
909 val = 65535 - 40;
910 if (type == RTAX_MTU && val > 65535 - 15)
911 val = 65535 - 15;
912 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
913 return -EINVAL;
914 fi->fib_metrics[type - 1] = val;
915 }
916
917 if (ecn_ca)
918 fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
919
920 return 0;
921}
922
879struct fib_info *fib_create_info(struct fib_config *cfg) 923struct fib_info *fib_create_info(struct fib_config *cfg)
880{ 924{
881 int err; 925 int err;
@@ -948,36 +992,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
948 goto failure; 992 goto failure;
949 } endfor_nexthops(fi) 993 } endfor_nexthops(fi)
950 994
951 if (cfg->fc_mx) { 995 err = fib_convert_metrics(fi, cfg);
952 struct nlattr *nla; 996 if (err)
953 int remaining; 997 goto failure;
954
955 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
956 int type = nla_type(nla);
957
958 if (type) {
959 u32 val;
960
961 if (type > RTAX_MAX)
962 goto err_inval;
963 if (type == RTAX_CC_ALGO) {
964 char tmp[TCP_CA_NAME_MAX];
965
966 nla_strlcpy(tmp, nla, sizeof(tmp));
967 val = tcp_ca_get_key_by_name(tmp);
968 if (val == TCP_CA_UNSPEC)
969 goto err_inval;
970 } else {
971 val = nla_get_u32(nla);
972 }
973 if (type == RTAX_ADVMSS && val > 65535 - 40)
974 val = 65535 - 40;
975 if (type == RTAX_MTU && val > 65535 - 15)
976 val = 65535 - 15;
977 fi->fib_metrics[type - 1] = val;
978 }
979 }
980 }
981 998
982 if (cfg->fc_mp) { 999 if (cfg->fc_mp) {
983#ifdef CONFIG_IP_ROUTE_MULTIPATH 1000#ifdef CONFIG_IP_ROUTE_MULTIPATH
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index a2ed23c595cf..93c4dc3ab23f 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -114,16 +114,19 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
114} 114}
115EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); 115EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
116 116
117u32 tcp_ca_get_key_by_name(const char *name) 117u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca)
118{ 118{
119 const struct tcp_congestion_ops *ca; 119 const struct tcp_congestion_ops *ca;
120 u32 key; 120 u32 key = TCP_CA_UNSPEC;
121 121
122 might_sleep(); 122 might_sleep();
123 123
124 rcu_read_lock(); 124 rcu_read_lock();
125 ca = __tcp_ca_find_autoload(name); 125 ca = __tcp_ca_find_autoload(name);
126 key = ca ? ca->key : TCP_CA_UNSPEC; 126 if (ca) {
127 key = ca->key;
128 *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN;
129 }
127 rcu_read_unlock(); 130 rcu_read_unlock();
128 131
129 return key; 132 return key;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index dc08e2352665..a8f515bb19c4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6003,14 +6003,17 @@ static void tcp_ecn_create_request(struct request_sock *req,
6003 const struct net *net = sock_net(listen_sk); 6003 const struct net *net = sock_net(listen_sk);
6004 bool th_ecn = th->ece && th->cwr; 6004 bool th_ecn = th->ece && th->cwr;
6005 bool ect, ecn_ok; 6005 bool ect, ecn_ok;
6006 u32 ecn_ok_dst;
6006 6007
6007 if (!th_ecn) 6008 if (!th_ecn)
6008 return; 6009 return;
6009 6010
6010 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); 6011 ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
6011 ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN); 6012 ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
6013 ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
6012 6014
6013 if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk)) 6015 if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
6016 (ecn_ok_dst & DST_FEATURE_ECN_CA))
6014 inet_rsk(req)->ecn_ok = 1; 6017 inet_rsk(req)->ecn_ok = 1;
6015} 6018}
6016 6019
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 308dd5f9158f..f45cac6f8356 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1698,6 +1698,7 @@ out:
1698static int ip6_convert_metrics(struct mx6_config *mxc, 1698static int ip6_convert_metrics(struct mx6_config *mxc,
1699 const struct fib6_config *cfg) 1699 const struct fib6_config *cfg)
1700{ 1700{
1701 bool ecn_ca = false;
1701 struct nlattr *nla; 1702 struct nlattr *nla;
1702 int remaining; 1703 int remaining;
1703 u32 *mp; 1704 u32 *mp;
@@ -1711,30 +1712,36 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
1711 1712
1712 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1713 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1713 int type = nla_type(nla); 1714 int type = nla_type(nla);
1715 u32 val;
1714 1716
1715 if (type) { 1717 if (!type)
1716 u32 val; 1718 continue;
1719 if (unlikely(type > RTAX_MAX))
1720 goto err;
1721
1722 if (type == RTAX_CC_ALGO) {
1723 char tmp[TCP_CA_NAME_MAX];
1717 1724
1718 if (unlikely(type > RTAX_MAX)) 1725 nla_strlcpy(tmp, nla, sizeof(tmp));
1726 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1727 if (val == TCP_CA_UNSPEC)
1719 goto err; 1728 goto err;
1720 if (type == RTAX_CC_ALGO) { 1729 } else {
1721 char tmp[TCP_CA_NAME_MAX]; 1730 val = nla_get_u32(nla);
1731 }
1732 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1733 goto err;
1722 1734
1723 nla_strlcpy(tmp, nla, sizeof(tmp)); 1735 mp[type - 1] = val;
1724 val = tcp_ca_get_key_by_name(tmp); 1736 __set_bit(type - 1, mxc->mx_valid);
1725 if (val == TCP_CA_UNSPEC) 1737 }
1726 goto err;
1727 } else {
1728 val = nla_get_u32(nla);
1729 }
1730 1738
1731 mp[type - 1] = val; 1739 if (ecn_ca) {
1732 __set_bit(type - 1, mxc->mx_valid); 1740 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1733 } 1741 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1734 } 1742 }
1735 1743
1736 mxc->mx = mp; 1744 mxc->mx = mp;
1737
1738 return 0; 1745 return 0;
1739 err: 1746 err:
1740 kfree(mp); 1747 kfree(mp);