summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Ahern <dsahern@gmail.com>2018-03-02 11:32:18 -0500
committerDavid S. Miller <davem@davemloft.net>2018-03-04 13:04:23 -0500
commitb4bac172e90ce4a93df8adf44eb70d91b9d611eb (patch)
tree875d572b15ee7f2ea7a06dcf5be4faa6c0865967
parentb75cc8f90f07342467b3bd51dbc0054f185032c9 (diff)
net/ipv6: Add support for path selection using hash of 5-tuple
Some operators prefer IPv6 path selection to use a standard 5-tuple hash rather than just an L3 hash with the flow the label. To that end add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb ("net: ipv4: add support for ECMP hash policy choice"). The default is still L3 which covers source and destination addresses along with flow label and IPv6 protocol. Signed-off-by: David Ahern <dsahern@gmail.com> Reviewed-by: Ido Schimmel <idosch@mellanox.com> Tested-by: Ido Schimmel <idosch@mellanox.com> Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/ip-sysctl.txt7
-rw-r--r--include/net/ip6_route.h4
-rw-r--r--include/net/netevent.h1
-rw-r--r--include/net/netns/ipv6.h1
-rw-r--r--net/ipv6/icmp.c2
-rw-r--r--net/ipv6/route.c68
-rw-r--r--net/ipv6/sysctl_net_ipv6.c27
7 files changed, 91 insertions, 19 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index a553d4e4a0fb..783675a730e5 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN
1363 FALSE: disabled 1363 FALSE: disabled
1364 Default: FALSE 1364 Default: FALSE
1365 1365
1366fib_multipath_hash_policy - INTEGER
1367 Controls which hash policy to use for multipath routes.
1368 Default: 0 (Layer 3)
1369 Possible values:
1370 0 - Layer 3 (source and destination addresses plus flow label)
1371 1 - Layer 4 (standard 5-tuple)
1372
1366anycast_src_echo_reply - BOOLEAN 1373anycast_src_echo_reply - BOOLEAN
1367 Controls the use of anycast addresses as source addresses for ICMPv6 1374 Controls the use of anycast addresses as source addresses for ICMPv6
1368 echo reply 1375 echo reply
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 9594f9317952..ce2abc0ff102 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -130,8 +130,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
130struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 130struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
131 const struct in6_addr *saddr, int oif, 131 const struct in6_addr *saddr, int oif,
132 const struct sk_buff *skb, int flags); 132 const struct sk_buff *skb, int flags);
133u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, 133u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
134 struct flow_keys *hkeys); 134 const struct sk_buff *skb, struct flow_keys *hkeys);
135 135
136struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); 136struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
137 137
diff --git a/include/net/netevent.h b/include/net/netevent.h
index baee605a94ab..d9918261701c 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -27,6 +27,7 @@ enum netevent_notif_type {
27 NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ 27 NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */
28 NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ 28 NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
29 NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */ 29 NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */
30 NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */
30}; 31};
31 32
32int register_netevent_notifier(struct notifier_block *nb); 33int register_netevent_notifier(struct notifier_block *nb);
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index e286fda09fcf..5b51110435fc 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
28 int ip6_rt_gc_elasticity; 28 int ip6_rt_gc_elasticity;
29 int ip6_rt_mtu_expires; 29 int ip6_rt_mtu_expires;
30 int ip6_rt_min_advmss; 30 int ip6_rt_min_advmss;
31 int multipath_hash_policy;
31 int flowlabel_consistency; 32 int flowlabel_consistency;
32 int auto_flowlabels; 33 int auto_flowlabels;
33 int icmpv6_time; 34 int icmpv6_time;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index a5d929223820..6f84668be6ea 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
522 fl6.fl6_icmp_type = type; 522 fl6.fl6_icmp_type = type;
523 fl6.fl6_icmp_code = code; 523 fl6.fl6_icmp_code = code;
524 fl6.flowi6_uid = sock_net_uid(net, NULL); 524 fl6.flowi6_uid = sock_net_uid(net, NULL);
525 fl6.mp_hash = rt6_multipath_hash(&fl6, skb, NULL); 525 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
526 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); 526 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
527 527
528 sk = icmpv6_xmit_lock(net); 528 sk = icmpv6_xmit_lock(net);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d2b8368663cb..f0ae58424c45 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -450,7 +450,8 @@ static bool rt6_check_expired(const struct rt6_info *rt)
450 return false; 450 return false;
451} 451}
452 452
453static struct rt6_info *rt6_multipath_select(struct rt6_info *match, 453static struct rt6_info *rt6_multipath_select(const struct net *net,
454 struct rt6_info *match,
454 struct flowi6 *fl6, int oif, 455 struct flowi6 *fl6, int oif,
455 const struct sk_buff *skb, 456 const struct sk_buff *skb,
456 int strict) 457 int strict)
@@ -461,7 +462,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
461 * case it will always be non-zero. Otherwise now is the time to do it. 462 * case it will always be non-zero. Otherwise now is the time to do it.
462 */ 463 */
463 if (!fl6->mp_hash) 464 if (!fl6->mp_hash)
464 fl6->mp_hash = rt6_multipath_hash(fl6, skb, NULL); 465 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
465 466
466 if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) 467 if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
467 return match; 468 return match;
@@ -932,7 +933,7 @@ restart:
932 rt = rt6_device_match(net, rt, &fl6->saddr, 933 rt = rt6_device_match(net, rt, &fl6->saddr,
933 fl6->flowi6_oif, flags); 934 fl6->flowi6_oif, flags);
934 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) 935 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
935 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, 936 rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
936 skb, flags); 937 skb, flags);
937 } 938 }
938 if (rt == net->ipv6.ip6_null_entry) { 939 if (rt == net->ipv6.ip6_null_entry) {
@@ -1674,7 +1675,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1674redo_rt6_select: 1675redo_rt6_select:
1675 rt = rt6_select(net, fn, oif, strict); 1676 rt = rt6_select(net, fn, oif, strict);
1676 if (rt->rt6i_nsiblings) 1677 if (rt->rt6i_nsiblings)
1677 rt = rt6_multipath_select(rt, fl6, oif, skb, strict); 1678 rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
1678 if (rt == net->ipv6.ip6_null_entry) { 1679 if (rt == net->ipv6.ip6_null_entry) {
1679 fn = fib6_backtrack(fn, &fl6->saddr); 1680 fn = fib6_backtrack(fn, &fl6->saddr);
1680 if (fn) 1681 if (fn)
@@ -1839,21 +1840,56 @@ out:
1839} 1840}
1840 1841
1841/* if skb is set it will be used and fl6 can be NULL */ 1842/* if skb is set it will be used and fl6 can be NULL */
1842u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, 1843u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1843 struct flow_keys *flkeys) 1844 const struct sk_buff *skb, struct flow_keys *flkeys)
1844{ 1845{
1845 struct flow_keys hash_keys; 1846 struct flow_keys hash_keys;
1846 u32 mhash; 1847 u32 mhash;
1847 1848
1848 memset(&hash_keys, 0, sizeof(hash_keys)); 1849 switch (net->ipv6.sysctl.multipath_hash_policy) {
1849 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 1850 case 0:
1850 if (skb) { 1851 memset(&hash_keys, 0, sizeof(hash_keys));
1851 ip6_multipath_l3_keys(skb, &hash_keys, flkeys); 1852 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1852 } else { 1853 if (skb) {
1853 hash_keys.addrs.v6addrs.src = fl6->saddr; 1854 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1854 hash_keys.addrs.v6addrs.dst = fl6->daddr; 1855 } else {
1855 hash_keys.tags.flow_label = (__force u32)fl6->flowlabel; 1856 hash_keys.addrs.v6addrs.src = fl6->saddr;
1856 hash_keys.basic.ip_proto = fl6->flowi6_proto; 1857 hash_keys.addrs.v6addrs.dst = fl6->daddr;
1858 hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
1859 hash_keys.basic.ip_proto = fl6->flowi6_proto;
1860 }
1861 break;
1862 case 1:
1863 if (skb) {
1864 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1865 struct flow_keys keys;
1866
1867 /* short-circuit if we already have L4 hash present */
1868 if (skb->l4_hash)
1869 return skb_get_hash_raw(skb) >> 1;
1870
1871 memset(&hash_keys, 0, sizeof(hash_keys));
1872
1873 if (!flkeys) {
1874 skb_flow_dissect_flow_keys(skb, &keys, flag);
1875 flkeys = &keys;
1876 }
1877 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1878 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
1879 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
1880 hash_keys.ports.src = flkeys->ports.src;
1881 hash_keys.ports.dst = flkeys->ports.dst;
1882 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
1883 } else {
1884 memset(&hash_keys, 0, sizeof(hash_keys));
1885 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1886 hash_keys.addrs.v6addrs.src = fl6->saddr;
1887 hash_keys.addrs.v6addrs.dst = fl6->daddr;
1888 hash_keys.ports.src = fl6->fl6_sport;
1889 hash_keys.ports.dst = fl6->fl6_dport;
1890 hash_keys.basic.ip_proto = fl6->flowi6_proto;
1891 }
1892 break;
1857 } 1893 }
1858 mhash = flow_hash_from_keys(&hash_keys); 1894 mhash = flow_hash_from_keys(&hash_keys);
1859 1895
@@ -1884,7 +1920,7 @@ void ip6_route_input(struct sk_buff *skb)
1884 flkeys = &_flkeys; 1920 flkeys = &_flkeys;
1885 1921
1886 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) 1922 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1887 fl6.mp_hash = rt6_multipath_hash(&fl6, skb, flkeys); 1923 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
1888 skb_dst_drop(skb); 1924 skb_dst_drop(skb);
1889 skb_dst_set(skb, 1925 skb_dst_set(skb,
1890 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags)); 1926 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 262f791f1b9b..966c42af92f4 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -16,14 +16,31 @@
16#include <net/ipv6.h> 16#include <net/ipv6.h>
17#include <net/addrconf.h> 17#include <net/addrconf.h>
18#include <net/inet_frag.h> 18#include <net/inet_frag.h>
19#include <net/netevent.h>
19#ifdef CONFIG_NETLABEL 20#ifdef CONFIG_NETLABEL
20#include <net/calipso.h> 21#include <net/calipso.h>
21#endif 22#endif
22 23
24static int zero;
23static int one = 1; 25static int one = 1;
24static int auto_flowlabels_min; 26static int auto_flowlabels_min;
25static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; 27static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
26 28
29static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
30 void __user *buffer, size_t *lenp,
31 loff_t *ppos)
32{
33 struct net *net;
34 int ret;
35
36 net = container_of(table->data, struct net,
37 ipv6.sysctl.multipath_hash_policy);
38 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
39 if (write && ret == 0)
40 call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
41
42 return ret;
43}
27 44
28static struct ctl_table ipv6_table_template[] = { 45static struct ctl_table ipv6_table_template[] = {
29 { 46 {
@@ -126,6 +143,15 @@ static struct ctl_table ipv6_table_template[] = {
126 .mode = 0644, 143 .mode = 0644,
127 .proc_handler = proc_dointvec 144 .proc_handler = proc_dointvec
128 }, 145 },
146 {
147 .procname = "fib_multipath_hash_policy",
148 .data = &init_net.ipv6.sysctl.multipath_hash_policy,
149 .maxlen = sizeof(int),
150 .mode = 0644,
151 .proc_handler = proc_rt6_multipath_hash_policy,
152 .extra1 = &zero,
153 .extra2 = &one,
154 },
129 { } 155 { }
130}; 156};
131 157
@@ -190,6 +216,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
190 ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt; 216 ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
191 ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len; 217 ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
192 ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len; 218 ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
219 ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy,
193 220
194 ipv6_route_table = ipv6_route_sysctl_init(net); 221 ipv6_route_table = ipv6_route_sysctl_init(net);
195 if (!ipv6_route_table) 222 if (!ipv6_route_table)