summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.txt8
-rw-r--r--include/net/ip_fib.h14
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--include/net/route.h6
-rw-r--r--net/ipv4/fib_semantics.c11
-rw-r--r--net/ipv4/icmp.c19
-rw-r--r--net/ipv4/route.c92
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
8 files changed, 100 insertions, 60 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index ed3d0791eb27..b57308e76b1d 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -73,6 +73,14 @@ fib_multipath_use_neigh - BOOLEAN
73 0 - disabled 73 0 - disabled
74 1 - enabled 74 1 - enabled
75 75
76fib_multipath_hash_policy - INTEGER
77 Controls which hash policy to use for multipath routes. Only valid
78 for kernels built with CONFIG_IP_ROUTE_MULTIPATH enabled.
79 Default: 0 (Layer 3)
80 Possible values:
81 0 - Layer 3
82 1 - Layer 4
83
76route/max_size - INTEGER 84route/max_size - INTEGER
77 Maximum number of routes allowed in the kernel. Increase 85 Maximum number of routes allowed in the kernel. Increase
78 this when using large numbers of interfaces and/or routes. 86 this when using large numbers of interfaces and/or routes.
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 272e62e139e0..6692c5758b33 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -395,17 +395,13 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
395int fib_sync_down_addr(struct net_device *dev, __be32 local); 395int fib_sync_down_addr(struct net_device *dev, __be32 local);
396int fib_sync_up(struct net_device *dev, unsigned int nh_flags); 396int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
397 397
398extern u32 fib_multipath_secret __read_mostly; 398#ifdef CONFIG_IP_ROUTE_MULTIPATH
399 399int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
400static inline int fib_multipath_hash(__be32 saddr, __be32 daddr) 400 const struct sk_buff *skb);
401{ 401#endif
402 return jhash_2words((__force u32)saddr, (__force u32)daddr,
403 fib_multipath_secret) >> 1;
404}
405
406void fib_select_multipath(struct fib_result *res, int hash); 402void fib_select_multipath(struct fib_result *res, int hash);
407void fib_select_path(struct net *net, struct fib_result *res, 403void fib_select_path(struct net *net, struct fib_result *res,
408 struct flowi4 *fl4, int mp_hash); 404 struct flowi4 *fl4, const struct sk_buff *skb);
409 405
410/* Exported by fib_trie.c */ 406/* Exported by fib_trie.c */
411void fib_trie_init(void); 407void fib_trie_init(void);
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 2e9d649ba169..a0e89190a3e9 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -151,6 +151,7 @@ struct netns_ipv4 {
151#endif 151#endif
152#ifdef CONFIG_IP_ROUTE_MULTIPATH 152#ifdef CONFIG_IP_ROUTE_MULTIPATH
153 int sysctl_fib_multipath_use_neigh; 153 int sysctl_fib_multipath_use_neigh;
154 int sysctl_fib_multipath_hash_policy;
154#endif 155#endif
155 156
156 unsigned int fib_seq; /* protected by rtnl_mutex */ 157 unsigned int fib_seq; /* protected by rtnl_mutex */
diff --git a/include/net/route.h b/include/net/route.h
index c0874c87c173..2cc0e14c6359 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -113,13 +113,13 @@ struct in_device;
113int ip_rt_init(void); 113int ip_rt_init(void);
114void rt_cache_flush(struct net *net); 114void rt_cache_flush(struct net *net);
115void rt_flush_dev(struct net_device *dev); 115void rt_flush_dev(struct net_device *dev);
116struct rtable *__ip_route_output_key_hash(struct net *, struct flowi4 *flp, 116struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *flp,
117 int mp_hash); 117 const struct sk_buff *skb);
118 118
119static inline struct rtable *__ip_route_output_key(struct net *net, 119static inline struct rtable *__ip_route_output_key(struct net *net,
120 struct flowi4 *flp) 120 struct flowi4 *flp)
121{ 121{
122 return __ip_route_output_key_hash(net, flp, -1); 122 return __ip_route_output_key_hash(net, flp, NULL);
123} 123}
124 124
125struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, 125struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 317026a39cfa..da449ddb8cc1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -57,7 +57,6 @@ static unsigned int fib_info_cnt;
57static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 57static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
58 58
59#ifdef CONFIG_IP_ROUTE_MULTIPATH 59#ifdef CONFIG_IP_ROUTE_MULTIPATH
60u32 fib_multipath_secret __read_mostly;
61 60
62#define for_nexthops(fi) { \ 61#define for_nexthops(fi) { \
63 int nhsel; const struct fib_nh *nh; \ 62 int nhsel; const struct fib_nh *nh; \
@@ -576,9 +575,6 @@ static void fib_rebalance(struct fib_info *fi)
576 575
577 atomic_set(&nexthop_nh->nh_upper_bound, upper_bound); 576 atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
578 } endfor_nexthops(fi); 577 } endfor_nexthops(fi);
579
580 net_get_random_once(&fib_multipath_secret,
581 sizeof(fib_multipath_secret));
582} 578}
583 579
584static inline void fib_add_weight(struct fib_info *fi, 580static inline void fib_add_weight(struct fib_info *fi,
@@ -1641,7 +1637,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
1641#endif 1637#endif
1642 1638
1643void fib_select_path(struct net *net, struct fib_result *res, 1639void fib_select_path(struct net *net, struct fib_result *res,
1644 struct flowi4 *fl4, int mp_hash) 1640 struct flowi4 *fl4, const struct sk_buff *skb)
1645{ 1641{
1646 bool oif_check; 1642 bool oif_check;
1647 1643
@@ -1650,10 +1646,9 @@ void fib_select_path(struct net *net, struct fib_result *res,
1650 1646
1651#ifdef CONFIG_IP_ROUTE_MULTIPATH 1647#ifdef CONFIG_IP_ROUTE_MULTIPATH
1652 if (res->fi->fib_nhs > 1 && oif_check) { 1648 if (res->fi->fib_nhs > 1 && oif_check) {
1653 if (mp_hash < 0) 1649 int h = fib_multipath_hash(res->fi, fl4, skb);
1654 mp_hash = get_hash_from_flowi4(fl4) >> 1;
1655 1650
1656 fib_select_multipath(res, mp_hash); 1651 fib_select_multipath(res, h);
1657 } 1652 }
1658 else 1653 else
1659#endif 1654#endif
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fc310db2708b..43318b5f5647 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -464,22 +464,6 @@ out_bh_enable:
464 local_bh_enable(); 464 local_bh_enable();
465} 465}
466 466
467#ifdef CONFIG_IP_ROUTE_MULTIPATH
468
469/* Source and destination is swapped. See ip_multipath_icmp_hash */
470static int icmp_multipath_hash_skb(const struct sk_buff *skb)
471{
472 const struct iphdr *iph = ip_hdr(skb);
473
474 return fib_multipath_hash(iph->daddr, iph->saddr);
475}
476
477#else
478
479#define icmp_multipath_hash_skb(skb) (-1)
480
481#endif
482
483static struct rtable *icmp_route_lookup(struct net *net, 467static struct rtable *icmp_route_lookup(struct net *net,
484 struct flowi4 *fl4, 468 struct flowi4 *fl4,
485 struct sk_buff *skb_in, 469 struct sk_buff *skb_in,
@@ -505,8 +489,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
505 fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev); 489 fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
506 490
507 security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); 491 security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
508 rt = __ip_route_output_key_hash(net, fl4, 492 rt = __ip_route_output_key_hash(net, fl4, skb_in);
509 icmp_multipath_hash_skb(skb_in));
510 if (IS_ERR(rt)) 493 if (IS_ERR(rt))
511 return rt; 494 return rt;
512 495
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8471dd116771..5dda1ef81c7e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1734,45 +1734,97 @@ out:
1734} 1734}
1735 1735
1736#ifdef CONFIG_IP_ROUTE_MULTIPATH 1736#ifdef CONFIG_IP_ROUTE_MULTIPATH
1737
1738/* To make ICMP packets follow the right flow, the multipath hash is 1737/* To make ICMP packets follow the right flow, the multipath hash is
1739 * calculated from the inner IP addresses in reverse order. 1738 * calculated from the inner IP addresses.
1740 */ 1739 */
1741static int ip_multipath_icmp_hash(struct sk_buff *skb) 1740static void ip_multipath_l3_keys(const struct sk_buff *skb,
1741 struct flow_keys *hash_keys)
1742{ 1742{
1743 const struct iphdr *outer_iph = ip_hdr(skb); 1743 const struct iphdr *outer_iph = ip_hdr(skb);
1744 struct icmphdr _icmph; 1744 const struct iphdr *inner_iph;
1745 const struct icmphdr *icmph; 1745 const struct icmphdr *icmph;
1746 struct iphdr _inner_iph; 1746 struct iphdr _inner_iph;
1747 const struct iphdr *inner_iph; 1747 struct icmphdr _icmph;
1748
1749 hash_keys->addrs.v4addrs.src = outer_iph->saddr;
1750 hash_keys->addrs.v4addrs.dst = outer_iph->daddr;
1751 if (likely(outer_iph->protocol != IPPROTO_ICMP))
1752 return;
1748 1753
1749 if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) 1754 if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
1750 goto standard_hash; 1755 return;
1751 1756
1752 icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph), 1757 icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
1753 &_icmph); 1758 &_icmph);
1754 if (!icmph) 1759 if (!icmph)
1755 goto standard_hash; 1760 return;
1756 1761
1757 if (icmph->type != ICMP_DEST_UNREACH && 1762 if (icmph->type != ICMP_DEST_UNREACH &&
1758 icmph->type != ICMP_REDIRECT && 1763 icmph->type != ICMP_REDIRECT &&
1759 icmph->type != ICMP_TIME_EXCEEDED && 1764 icmph->type != ICMP_TIME_EXCEEDED &&
1760 icmph->type != ICMP_PARAMETERPROB) { 1765 icmph->type != ICMP_PARAMETERPROB)
1761 goto standard_hash; 1766 return;
1762 }
1763 1767
1764 inner_iph = skb_header_pointer(skb, 1768 inner_iph = skb_header_pointer(skb,
1765 outer_iph->ihl * 4 + sizeof(_icmph), 1769 outer_iph->ihl * 4 + sizeof(_icmph),
1766 sizeof(_inner_iph), &_inner_iph); 1770 sizeof(_inner_iph), &_inner_iph);
1767 if (!inner_iph) 1771 if (!inner_iph)
1768 goto standard_hash; 1772 return;
1773 hash_keys->addrs.v4addrs.src = inner_iph->saddr;
1774 hash_keys->addrs.v4addrs.dst = inner_iph->daddr;
1775}
1769 1776
1770 return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr); 1777/* if skb is set it will be used and fl4 can be NULL */
1778int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
1779 const struct sk_buff *skb)
1780{
1781 struct net *net = fi->fib_net;
1782 struct flow_keys hash_keys;
1783 u32 mhash;
1771 1784
1772standard_hash: 1785 switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
1773 return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr); 1786 case 0:
1774} 1787 memset(&hash_keys, 0, sizeof(hash_keys));
1788 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1789 if (skb) {
1790 ip_multipath_l3_keys(skb, &hash_keys);
1791 } else {
1792 hash_keys.addrs.v4addrs.src = fl4->saddr;
1793 hash_keys.addrs.v4addrs.dst = fl4->daddr;
1794 }
1795 break;
1796 case 1:
1797 /* skb is currently provided only when forwarding */
1798 if (skb) {
1799 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1800 struct flow_keys keys;
1801
1802 /* short-circuit if we already have L4 hash present */
1803 if (skb->l4_hash)
1804 return skb_get_hash_raw(skb) >> 1;
1805 memset(&hash_keys, 0, sizeof(hash_keys));
1806 skb_flow_dissect_flow_keys(skb, &keys, flag);
1807 hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
1808 hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
1809 hash_keys.ports.src = keys.ports.src;
1810 hash_keys.ports.dst = keys.ports.dst;
1811 hash_keys.basic.ip_proto = keys.basic.ip_proto;
1812 } else {
1813 memset(&hash_keys, 0, sizeof(hash_keys));
1814 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1815 hash_keys.addrs.v4addrs.src = fl4->saddr;
1816 hash_keys.addrs.v4addrs.dst = fl4->daddr;
1817 hash_keys.ports.src = fl4->fl4_sport;
1818 hash_keys.ports.dst = fl4->fl4_dport;
1819 hash_keys.basic.ip_proto = fl4->flowi4_proto;
1820 }
1821 break;
1822 }
1823 mhash = flow_hash_from_keys(&hash_keys);
1775 1824
1825 return mhash >> 1;
1826}
1827EXPORT_SYMBOL_GPL(fib_multipath_hash);
1776#endif /* CONFIG_IP_ROUTE_MULTIPATH */ 1828#endif /* CONFIG_IP_ROUTE_MULTIPATH */
1777 1829
1778static int ip_mkroute_input(struct sk_buff *skb, 1830static int ip_mkroute_input(struct sk_buff *skb,
@@ -1782,12 +1834,8 @@ static int ip_mkroute_input(struct sk_buff *skb,
1782{ 1834{
1783#ifdef CONFIG_IP_ROUTE_MULTIPATH 1835#ifdef CONFIG_IP_ROUTE_MULTIPATH
1784 if (res->fi && res->fi->fib_nhs > 1) { 1836 if (res->fi && res->fi->fib_nhs > 1) {
1785 int h; 1837 int h = fib_multipath_hash(res->fi, NULL, skb);
1786 1838
1787 if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP))
1788 h = ip_multipath_icmp_hash(skb);
1789 else
1790 h = fib_multipath_hash(saddr, daddr);
1791 fib_select_multipath(res, h); 1839 fib_select_multipath(res, h);
1792 } 1840 }
1793#endif 1841#endif
@@ -2203,7 +2251,7 @@ add:
2203 */ 2251 */
2204 2252
2205struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, 2253struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2206 int mp_hash) 2254 const struct sk_buff *skb)
2207{ 2255{
2208 struct net_device *dev_out = NULL; 2256 struct net_device *dev_out = NULL;
2209 __u8 tos = RT_FL_TOS(fl4); 2257 __u8 tos = RT_FL_TOS(fl4);
@@ -2365,7 +2413,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2365 goto make_route; 2413 goto make_route;
2366 } 2414 }
2367 2415
2368 fib_select_path(net, &res, fl4, mp_hash); 2416 fib_select_path(net, &res, fl4, skb);
2369 2417
2370 dev_out = FIB_RES_DEV(res); 2418 dev_out = FIB_RES_DEV(res);
2371 fl4->flowi4_oif = dev_out->ifindex; 2419 fl4->flowi4_oif = dev_out->ifindex;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 11aaef0939b2..711c3e2e17b1 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -997,6 +997,15 @@ static struct ctl_table ipv4_net_table[] = {
997 .extra1 = &zero, 997 .extra1 = &zero,
998 .extra2 = &one, 998 .extra2 = &one,
999 }, 999 },
1000 {
1001 .procname = "fib_multipath_hash_policy",
1002 .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy,
1003 .maxlen = sizeof(int),
1004 .mode = 0644,
1005 .proc_handler = proc_dointvec_minmax,
1006 .extra1 = &zero,
1007 .extra2 = &one,
1008 },
1000#endif 1009#endif
1001 { 1010 {
1002 .procname = "ip_unprivileged_port_start", 1011 .procname = "ip_unprivileged_port_start",