diff options
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 8 | ||||
-rw-r--r-- | include/net/ip_fib.h | 14 | ||||
-rw-r--r-- | include/net/netns/ipv4.h | 1 | ||||
-rw-r--r-- | include/net/route.h | 6 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 11 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 19 | ||||
-rw-r--r-- | net/ipv4/route.c | 92 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 9 |
8 files changed, 100 insertions, 60 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ed3d0791eb27..b57308e76b1d 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -73,6 +73,14 @@ fib_multipath_use_neigh - BOOLEAN | |||
73 | 0 - disabled | 73 | 0 - disabled |
74 | 1 - enabled | 74 | 1 - enabled |
75 | 75 | ||
76 | fib_multipath_hash_policy - INTEGER | ||
77 | Controls which hash policy to use for multipath routes. Only valid | ||
78 | for kernels built with CONFIG_IP_ROUTE_MULTIPATH enabled. | ||
79 | Default: 0 (Layer 3) | ||
80 | Possible values: | ||
81 | 0 - Layer 3 | ||
82 | 1 - Layer 4 | ||
83 | |||
76 | route/max_size - INTEGER | 84 | route/max_size - INTEGER |
77 | Maximum number of routes allowed in the kernel. Increase | 85 | Maximum number of routes allowed in the kernel. Increase |
78 | this when using large numbers of interfaces and/or routes. | 86 | this when using large numbers of interfaces and/or routes. |
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 272e62e139e0..6692c5758b33 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h | |||
@@ -395,17 +395,13 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); | |||
395 | int fib_sync_down_addr(struct net_device *dev, __be32 local); | 395 | int fib_sync_down_addr(struct net_device *dev, __be32 local); |
396 | int fib_sync_up(struct net_device *dev, unsigned int nh_flags); | 396 | int fib_sync_up(struct net_device *dev, unsigned int nh_flags); |
397 | 397 | ||
398 | extern u32 fib_multipath_secret __read_mostly; | 398 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
399 | 399 | int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, | |
400 | static inline int fib_multipath_hash(__be32 saddr, __be32 daddr) | 400 | const struct sk_buff *skb); |
401 | { | 401 | #endif |
402 | return jhash_2words((__force u32)saddr, (__force u32)daddr, | ||
403 | fib_multipath_secret) >> 1; | ||
404 | } | ||
405 | |||
406 | void fib_select_multipath(struct fib_result *res, int hash); | 402 | void fib_select_multipath(struct fib_result *res, int hash); |
407 | void fib_select_path(struct net *net, struct fib_result *res, | 403 | void fib_select_path(struct net *net, struct fib_result *res, |
408 | struct flowi4 *fl4, int mp_hash); | 404 | struct flowi4 *fl4, const struct sk_buff *skb); |
409 | 405 | ||
410 | /* Exported by fib_trie.c */ | 406 | /* Exported by fib_trie.c */ |
411 | void fib_trie_init(void); | 407 | void fib_trie_init(void); |
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2e9d649ba169..a0e89190a3e9 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h | |||
@@ -151,6 +151,7 @@ struct netns_ipv4 { | |||
151 | #endif | 151 | #endif |
152 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 152 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
153 | int sysctl_fib_multipath_use_neigh; | 153 | int sysctl_fib_multipath_use_neigh; |
154 | int sysctl_fib_multipath_hash_policy; | ||
154 | #endif | 155 | #endif |
155 | 156 | ||
156 | unsigned int fib_seq; /* protected by rtnl_mutex */ | 157 | unsigned int fib_seq; /* protected by rtnl_mutex */ |
diff --git a/include/net/route.h b/include/net/route.h index c0874c87c173..2cc0e14c6359 100644 --- a/include/net/route.h +++ b/include/net/route.h | |||
@@ -113,13 +113,13 @@ struct in_device; | |||
113 | int ip_rt_init(void); | 113 | int ip_rt_init(void); |
114 | void rt_cache_flush(struct net *net); | 114 | void rt_cache_flush(struct net *net); |
115 | void rt_flush_dev(struct net_device *dev); | 115 | void rt_flush_dev(struct net_device *dev); |
116 | struct rtable *__ip_route_output_key_hash(struct net *, struct flowi4 *flp, | 116 | struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *flp, |
117 | int mp_hash); | 117 | const struct sk_buff *skb); |
118 | 118 | ||
119 | static inline struct rtable *__ip_route_output_key(struct net *net, | 119 | static inline struct rtable *__ip_route_output_key(struct net *net, |
120 | struct flowi4 *flp) | 120 | struct flowi4 *flp) |
121 | { | 121 | { |
122 | return __ip_route_output_key_hash(net, flp, -1); | 122 | return __ip_route_output_key_hash(net, flp, NULL); |
123 | } | 123 | } |
124 | 124 | ||
125 | struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, | 125 | struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 317026a39cfa..da449ddb8cc1 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -57,7 +57,6 @@ static unsigned int fib_info_cnt; | |||
57 | static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; | 57 | static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; |
58 | 58 | ||
59 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 59 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
60 | u32 fib_multipath_secret __read_mostly; | ||
61 | 60 | ||
62 | #define for_nexthops(fi) { \ | 61 | #define for_nexthops(fi) { \ |
63 | int nhsel; const struct fib_nh *nh; \ | 62 | int nhsel; const struct fib_nh *nh; \ |
@@ -576,9 +575,6 @@ static void fib_rebalance(struct fib_info *fi) | |||
576 | 575 | ||
577 | atomic_set(&nexthop_nh->nh_upper_bound, upper_bound); | 576 | atomic_set(&nexthop_nh->nh_upper_bound, upper_bound); |
578 | } endfor_nexthops(fi); | 577 | } endfor_nexthops(fi); |
579 | |||
580 | net_get_random_once(&fib_multipath_secret, | ||
581 | sizeof(fib_multipath_secret)); | ||
582 | } | 578 | } |
583 | 579 | ||
584 | static inline void fib_add_weight(struct fib_info *fi, | 580 | static inline void fib_add_weight(struct fib_info *fi, |
@@ -1641,7 +1637,7 @@ void fib_select_multipath(struct fib_result *res, int hash) | |||
1641 | #endif | 1637 | #endif |
1642 | 1638 | ||
1643 | void fib_select_path(struct net *net, struct fib_result *res, | 1639 | void fib_select_path(struct net *net, struct fib_result *res, |
1644 | struct flowi4 *fl4, int mp_hash) | 1640 | struct flowi4 *fl4, const struct sk_buff *skb) |
1645 | { | 1641 | { |
1646 | bool oif_check; | 1642 | bool oif_check; |
1647 | 1643 | ||
@@ -1650,10 +1646,9 @@ void fib_select_path(struct net *net, struct fib_result *res, | |||
1650 | 1646 | ||
1651 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1647 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
1652 | if (res->fi->fib_nhs > 1 && oif_check) { | 1648 | if (res->fi->fib_nhs > 1 && oif_check) { |
1653 | if (mp_hash < 0) | 1649 | int h = fib_multipath_hash(res->fi, fl4, skb); |
1654 | mp_hash = get_hash_from_flowi4(fl4) >> 1; | ||
1655 | 1650 | ||
1656 | fib_select_multipath(res, mp_hash); | 1651 | fib_select_multipath(res, h); |
1657 | } | 1652 | } |
1658 | else | 1653 | else |
1659 | #endif | 1654 | #endif |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index fc310db2708b..43318b5f5647 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -464,22 +464,6 @@ out_bh_enable: | |||
464 | local_bh_enable(); | 464 | local_bh_enable(); |
465 | } | 465 | } |
466 | 466 | ||
467 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
468 | |||
469 | /* Source and destination is swapped. See ip_multipath_icmp_hash */ | ||
470 | static int icmp_multipath_hash_skb(const struct sk_buff *skb) | ||
471 | { | ||
472 | const struct iphdr *iph = ip_hdr(skb); | ||
473 | |||
474 | return fib_multipath_hash(iph->daddr, iph->saddr); | ||
475 | } | ||
476 | |||
477 | #else | ||
478 | |||
479 | #define icmp_multipath_hash_skb(skb) (-1) | ||
480 | |||
481 | #endif | ||
482 | |||
483 | static struct rtable *icmp_route_lookup(struct net *net, | 467 | static struct rtable *icmp_route_lookup(struct net *net, |
484 | struct flowi4 *fl4, | 468 | struct flowi4 *fl4, |
485 | struct sk_buff *skb_in, | 469 | struct sk_buff *skb_in, |
@@ -505,8 +489,7 @@ static struct rtable *icmp_route_lookup(struct net *net, | |||
505 | fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev); | 489 | fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev); |
506 | 490 | ||
507 | security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); | 491 | security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); |
508 | rt = __ip_route_output_key_hash(net, fl4, | 492 | rt = __ip_route_output_key_hash(net, fl4, skb_in); |
509 | icmp_multipath_hash_skb(skb_in)); | ||
510 | if (IS_ERR(rt)) | 493 | if (IS_ERR(rt)) |
511 | return rt; | 494 | return rt; |
512 | 495 | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8471dd116771..5dda1ef81c7e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -1734,45 +1734,97 @@ out: | |||
1734 | } | 1734 | } |
1735 | 1735 | ||
1736 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1736 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
1737 | |||
1738 | /* To make ICMP packets follow the right flow, the multipath hash is | 1737 | /* To make ICMP packets follow the right flow, the multipath hash is |
1739 | * calculated from the inner IP addresses in reverse order. | 1738 | * calculated from the inner IP addresses. |
1740 | */ | 1739 | */ |
1741 | static int ip_multipath_icmp_hash(struct sk_buff *skb) | 1740 | static void ip_multipath_l3_keys(const struct sk_buff *skb, |
1741 | struct flow_keys *hash_keys) | ||
1742 | { | 1742 | { |
1743 | const struct iphdr *outer_iph = ip_hdr(skb); | 1743 | const struct iphdr *outer_iph = ip_hdr(skb); |
1744 | struct icmphdr _icmph; | 1744 | const struct iphdr *inner_iph; |
1745 | const struct icmphdr *icmph; | 1745 | const struct icmphdr *icmph; |
1746 | struct iphdr _inner_iph; | 1746 | struct iphdr _inner_iph; |
1747 | const struct iphdr *inner_iph; | 1747 | struct icmphdr _icmph; |
1748 | |||
1749 | hash_keys->addrs.v4addrs.src = outer_iph->saddr; | ||
1750 | hash_keys->addrs.v4addrs.dst = outer_iph->daddr; | ||
1751 | if (likely(outer_iph->protocol != IPPROTO_ICMP)) | ||
1752 | return; | ||
1748 | 1753 | ||
1749 | if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) | 1754 | if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) |
1750 | goto standard_hash; | 1755 | return; |
1751 | 1756 | ||
1752 | icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph), | 1757 | icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph), |
1753 | &_icmph); | 1758 | &_icmph); |
1754 | if (!icmph) | 1759 | if (!icmph) |
1755 | goto standard_hash; | 1760 | return; |
1756 | 1761 | ||
1757 | if (icmph->type != ICMP_DEST_UNREACH && | 1762 | if (icmph->type != ICMP_DEST_UNREACH && |
1758 | icmph->type != ICMP_REDIRECT && | 1763 | icmph->type != ICMP_REDIRECT && |
1759 | icmph->type != ICMP_TIME_EXCEEDED && | 1764 | icmph->type != ICMP_TIME_EXCEEDED && |
1760 | icmph->type != ICMP_PARAMETERPROB) { | 1765 | icmph->type != ICMP_PARAMETERPROB) |
1761 | goto standard_hash; | 1766 | return; |
1762 | } | ||
1763 | 1767 | ||
1764 | inner_iph = skb_header_pointer(skb, | 1768 | inner_iph = skb_header_pointer(skb, |
1765 | outer_iph->ihl * 4 + sizeof(_icmph), | 1769 | outer_iph->ihl * 4 + sizeof(_icmph), |
1766 | sizeof(_inner_iph), &_inner_iph); | 1770 | sizeof(_inner_iph), &_inner_iph); |
1767 | if (!inner_iph) | 1771 | if (!inner_iph) |
1768 | goto standard_hash; | 1772 | return; |
1773 | hash_keys->addrs.v4addrs.src = inner_iph->saddr; | ||
1774 | hash_keys->addrs.v4addrs.dst = inner_iph->daddr; | ||
1775 | } | ||
1769 | 1776 | ||
1770 | return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr); | 1777 | /* if skb is set it will be used and fl4 can be NULL */ |
1778 | int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, | ||
1779 | const struct sk_buff *skb) | ||
1780 | { | ||
1781 | struct net *net = fi->fib_net; | ||
1782 | struct flow_keys hash_keys; | ||
1783 | u32 mhash; | ||
1771 | 1784 | ||
1772 | standard_hash: | 1785 | switch (net->ipv4.sysctl_fib_multipath_hash_policy) { |
1773 | return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr); | 1786 | case 0: |
1774 | } | 1787 | memset(&hash_keys, 0, sizeof(hash_keys)); |
1788 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; | ||
1789 | if (skb) { | ||
1790 | ip_multipath_l3_keys(skb, &hash_keys); | ||
1791 | } else { | ||
1792 | hash_keys.addrs.v4addrs.src = fl4->saddr; | ||
1793 | hash_keys.addrs.v4addrs.dst = fl4->daddr; | ||
1794 | } | ||
1795 | break; | ||
1796 | case 1: | ||
1797 | /* skb is currently provided only when forwarding */ | ||
1798 | if (skb) { | ||
1799 | unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; | ||
1800 | struct flow_keys keys; | ||
1801 | |||
1802 | /* short-circuit if we already have L4 hash present */ | ||
1803 | if (skb->l4_hash) | ||
1804 | return skb_get_hash_raw(skb) >> 1; | ||
1805 | memset(&hash_keys, 0, sizeof(hash_keys)); | ||
1806 | skb_flow_dissect_flow_keys(skb, &keys, flag); | ||
1807 | hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; | ||
1808 | hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; | ||
1809 | hash_keys.ports.src = keys.ports.src; | ||
1810 | hash_keys.ports.dst = keys.ports.dst; | ||
1811 | hash_keys.basic.ip_proto = keys.basic.ip_proto; | ||
1812 | } else { | ||
1813 | memset(&hash_keys, 0, sizeof(hash_keys)); | ||
1814 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; | ||
1815 | hash_keys.addrs.v4addrs.src = fl4->saddr; | ||
1816 | hash_keys.addrs.v4addrs.dst = fl4->daddr; | ||
1817 | hash_keys.ports.src = fl4->fl4_sport; | ||
1818 | hash_keys.ports.dst = fl4->fl4_dport; | ||
1819 | hash_keys.basic.ip_proto = fl4->flowi4_proto; | ||
1820 | } | ||
1821 | break; | ||
1822 | } | ||
1823 | mhash = flow_hash_from_keys(&hash_keys); | ||
1775 | 1824 | ||
1825 | return mhash >> 1; | ||
1826 | } | ||
1827 | EXPORT_SYMBOL_GPL(fib_multipath_hash); | ||
1776 | #endif /* CONFIG_IP_ROUTE_MULTIPATH */ | 1828 | #endif /* CONFIG_IP_ROUTE_MULTIPATH */ |
1777 | 1829 | ||
1778 | static int ip_mkroute_input(struct sk_buff *skb, | 1830 | static int ip_mkroute_input(struct sk_buff *skb, |
@@ -1782,12 +1834,8 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
1782 | { | 1834 | { |
1783 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1835 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
1784 | if (res->fi && res->fi->fib_nhs > 1) { | 1836 | if (res->fi && res->fi->fib_nhs > 1) { |
1785 | int h; | 1837 | int h = fib_multipath_hash(res->fi, NULL, skb); |
1786 | 1838 | ||
1787 | if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP)) | ||
1788 | h = ip_multipath_icmp_hash(skb); | ||
1789 | else | ||
1790 | h = fib_multipath_hash(saddr, daddr); | ||
1791 | fib_select_multipath(res, h); | 1839 | fib_select_multipath(res, h); |
1792 | } | 1840 | } |
1793 | #endif | 1841 | #endif |
@@ -2203,7 +2251,7 @@ add: | |||
2203 | */ | 2251 | */ |
2204 | 2252 | ||
2205 | struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, | 2253 | struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, |
2206 | int mp_hash) | 2254 | const struct sk_buff *skb) |
2207 | { | 2255 | { |
2208 | struct net_device *dev_out = NULL; | 2256 | struct net_device *dev_out = NULL; |
2209 | __u8 tos = RT_FL_TOS(fl4); | 2257 | __u8 tos = RT_FL_TOS(fl4); |
@@ -2365,7 +2413,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, | |||
2365 | goto make_route; | 2413 | goto make_route; |
2366 | } | 2414 | } |
2367 | 2415 | ||
2368 | fib_select_path(net, &res, fl4, mp_hash); | 2416 | fib_select_path(net, &res, fl4, skb); |
2369 | 2417 | ||
2370 | dev_out = FIB_RES_DEV(res); | 2418 | dev_out = FIB_RES_DEV(res); |
2371 | fl4->flowi4_oif = dev_out->ifindex; | 2419 | fl4->flowi4_oif = dev_out->ifindex; |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 11aaef0939b2..711c3e2e17b1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -997,6 +997,15 @@ static struct ctl_table ipv4_net_table[] = { | |||
997 | .extra1 = &zero, | 997 | .extra1 = &zero, |
998 | .extra2 = &one, | 998 | .extra2 = &one, |
999 | }, | 999 | }, |
1000 | { | ||
1001 | .procname = "fib_multipath_hash_policy", | ||
1002 | .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy, | ||
1003 | .maxlen = sizeof(int), | ||
1004 | .mode = 0644, | ||
1005 | .proc_handler = proc_dointvec_minmax, | ||
1006 | .extra1 = &zero, | ||
1007 | .extra2 = &one, | ||
1008 | }, | ||
1000 | #endif | 1009 | #endif |
1001 | { | 1010 | { |
1002 | .procname = "ip_unprivileged_port_start", | 1011 | .procname = "ip_unprivileged_port_start", |