aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@sunset.davemloft.net>2007-06-10 20:22:39 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2007-07-11 01:05:57 -0400
commite06e7c615877026544ad7f8b309d1a3706410383 (patch)
treeeb087b49279e6db492039a5d684ca9acb13265a3
parent4eb6bf6bfb580afaf1e1a1d30cba17a078530cf4 (diff)
[IPV4]: The scheduled removal of multipath cached routing support.
With help from Chris Wedgwood. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/feature-removal-schedule.txt19
-rw-r--r--include/linux/Kbuild1
-rw-r--r--include/linux/ip_mp_alg.h22
-rw-r--r--include/linux/rtnetlink.h2
-rw-r--r--include/net/dst.h1
-rw-r--r--include/net/ip_fib.h16
-rw-r--r--include/net/ip_mp_alg.h96
-rw-r--r--include/net/route.h1
-rw-r--r--net/ipv4/Kconfig42
-rw-r--r--net/ipv4/Makefile5
-rw-r--r--net/ipv4/fib_frontend.c4
-rw-r--r--net/ipv4/fib_semantics.c16
-rw-r--r--net/ipv4/multipath.c55
-rw-r--r--net/ipv4/multipath_drr.c249
-rw-r--r--net/ipv4/multipath_random.c114
-rw-r--r--net/ipv4/multipath_rr.c95
-rw-r--r--net/ipv4/multipath_wrandom.c329
-rw-r--r--net/ipv4/route.c259
18 files changed, 12 insertions, 1314 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 3a159dac04f5..484250dcdbe0 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -262,25 +262,6 @@ Who: Richard Purdie <rpurdie@rpsys.net>
262 262
263--------------------------- 263---------------------------
264 264
265What: Multipath cached routing support in ipv4
266When: in 2.6.23
267Why: Code was merged, then submitter immediately disappeared leaving
268 us with no maintainer and lots of bugs. The code should not have
269 been merged in the first place, and many aspects of it's
270 implementation are blocking more critical core networking
271 development. It's marked EXPERIMENTAL and no distribution
272 enables it because it cause obscure crashes due to unfixable bugs
273 (interfaces don't return errors so memory allocation can't be
274 handled, calling contexts of these interfaces make handling
275 errors impossible too because they get called after we've
276 totally commited to creating a route object, for example).
277 This problem has existed for years and no forward progress
278 has ever been made, and nobody steps up to try and salvage
279 this code, so we're going to finally just get rid of it.
280Who: David S. Miller <davem@davemloft.net>
281
282---------------------------
283
284What: read_dev_chars(), read_conf_data{,_lpm}() (s390 common I/O layer) 265What: read_dev_chars(), read_conf_data{,_lpm}() (s390 common I/O layer)
285When: December 2007 266When: December 2007
286Why: These functions are a leftover from 2.4 times. They have several 267Why: These functions are a leftover from 2.4 times. They have several
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index afae306b177c..d94451682761 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -91,7 +91,6 @@ header-y += in6.h
91header-y += in_route.h 91header-y += in_route.h
92header-y += ioctl.h 92header-y += ioctl.h
93header-y += ipmi_msgdefs.h 93header-y += ipmi_msgdefs.h
94header-y += ip_mp_alg.h
95header-y += ipsec.h 94header-y += ipsec.h
96header-y += ipx.h 95header-y += ipx.h
97header-y += irda.h 96header-y += irda.h
diff --git a/include/linux/ip_mp_alg.h b/include/linux/ip_mp_alg.h
deleted file mode 100644
index e234e2008f5d..000000000000
--- a/include/linux/ip_mp_alg.h
+++ /dev/null
@@ -1,22 +0,0 @@
1/* ip_mp_alg.h: IPV4 multipath algorithm support, user-visible values.
2 *
3 * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
4 * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
5 */
6
7#ifndef _LINUX_IP_MP_ALG_H
8#define _LINUX_IP_MP_ALG_H
9
10enum ip_mp_alg {
11 IP_MP_ALG_NONE,
12 IP_MP_ALG_RR,
13 IP_MP_ALG_DRR,
14 IP_MP_ALG_RANDOM,
15 IP_MP_ALG_WRANDOM,
16 __IP_MP_ALG_MAX
17};
18
19#define IP_MP_ALG_MAX (__IP_MP_ALG_MAX - 1)
20
21#endif /* _LINUX_IP_MP_ALG_H */
22
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 1fae30af91f3..612785848532 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -261,7 +261,7 @@ enum rtattr_type_t
261 RTA_FLOW, 261 RTA_FLOW,
262 RTA_CACHEINFO, 262 RTA_CACHEINFO,
263 RTA_SESSION, 263 RTA_SESSION,
264 RTA_MP_ALGO, 264 RTA_MP_ALGO, /* no longer used */
265 RTA_TABLE, 265 RTA_TABLE,
266 __RTA_MAX 266 __RTA_MAX
267}; 267};
diff --git a/include/net/dst.h b/include/net/dst.h
index 82270f9332db..e9ff4a4caef9 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -47,7 +47,6 @@ struct dst_entry
47#define DST_NOXFRM 2 47#define DST_NOXFRM 2
48#define DST_NOPOLICY 4 48#define DST_NOPOLICY 4
49#define DST_NOHASH 8 49#define DST_NOHASH 8
50#define DST_BALANCED 0x10
51 unsigned long expires; 50 unsigned long expires;
52 51
53 unsigned short header_len; /* more space at head required */ 52 unsigned short header_len; /* more space at head required */
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 69252cbe05b0..8cadc77c7df4 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -39,7 +39,6 @@ struct fib_config {
39 int fc_mx_len; 39 int fc_mx_len;
40 int fc_mp_len; 40 int fc_mp_len;
41 u32 fc_flow; 41 u32 fc_flow;
42 u32 fc_mp_alg;
43 u32 fc_nlflags; 42 u32 fc_nlflags;
44 struct nl_info fc_nlinfo; 43 struct nl_info fc_nlinfo;
45 }; 44 };
@@ -86,9 +85,6 @@ struct fib_info {
86#ifdef CONFIG_IP_ROUTE_MULTIPATH 85#ifdef CONFIG_IP_ROUTE_MULTIPATH
87 int fib_power; 86 int fib_power;
88#endif 87#endif
89#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
90 u32 fib_mp_alg;
91#endif
92 struct fib_nh fib_nh[0]; 88 struct fib_nh fib_nh[0];
93#define fib_dev fib_nh[0].nh_dev 89#define fib_dev fib_nh[0].nh_dev
94}; 90};
@@ -103,10 +99,6 @@ struct fib_result {
103 unsigned char nh_sel; 99 unsigned char nh_sel;
104 unsigned char type; 100 unsigned char type;
105 unsigned char scope; 101 unsigned char scope;
106#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
107 __be32 network;
108 __be32 netmask;
109#endif
110 struct fib_info *fi; 102 struct fib_info *fi;
111#ifdef CONFIG_IP_MULTIPLE_TABLES 103#ifdef CONFIG_IP_MULTIPLE_TABLES
112 struct fib_rule *r; 104 struct fib_rule *r;
@@ -145,14 +137,6 @@ struct fib_result_nl {
145#define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev) 137#define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev)
146#define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif) 138#define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif)
147 139
148#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
149#define FIB_RES_NETWORK(res) ((res).network)
150#define FIB_RES_NETMASK(res) ((res).netmask)
151#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
152#define FIB_RES_NETWORK(res) (0)
153#define FIB_RES_NETMASK(res) (0)
154#endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */
155
156struct fib_table { 140struct fib_table {
157 struct hlist_node tb_hlist; 141 struct hlist_node tb_hlist;
158 u32 tb_id; 142 u32 tb_id;
diff --git a/include/net/ip_mp_alg.h b/include/net/ip_mp_alg.h
deleted file mode 100644
index 25b56571e54b..000000000000
--- a/include/net/ip_mp_alg.h
+++ /dev/null
@@ -1,96 +0,0 @@
1/* ip_mp_alg.h: IPV4 multipath algorithm support.
2 *
3 * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
4 * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
5 */
6
7#ifndef _NET_IP_MP_ALG_H
8#define _NET_IP_MP_ALG_H
9
10#include <linux/ip_mp_alg.h>
11#include <net/flow.h>
12#include <net/route.h>
13
14struct fib_nh;
15
16struct ip_mp_alg_ops {
17 void (*mp_alg_select_route)(const struct flowi *flp,
18 struct rtable *rth, struct rtable **rp);
19 void (*mp_alg_flush)(void);
20 void (*mp_alg_set_nhinfo)(__be32 network, __be32 netmask,
21 unsigned char prefixlen,
22 const struct fib_nh *nh);
23 void (*mp_alg_remove)(struct rtable *rth);
24};
25
26extern int multipath_alg_register(struct ip_mp_alg_ops *, enum ip_mp_alg);
27extern void multipath_alg_unregister(struct ip_mp_alg_ops *, enum ip_mp_alg);
28
29extern struct ip_mp_alg_ops *ip_mp_alg_table[];
30
31static inline int multipath_select_route(const struct flowi *flp,
32 struct rtable *rth,
33 struct rtable **rp)
34{
35#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
36 struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
37
38 /* mp_alg_select_route _MUST_ be implemented */
39 if (ops && (rth->u.dst.flags & DST_BALANCED)) {
40 ops->mp_alg_select_route(flp, rth, rp);
41 return 1;
42 }
43#endif
44 return 0;
45}
46
47static inline void multipath_flush(void)
48{
49#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
50 int i;
51
52 for (i = IP_MP_ALG_NONE; i <= IP_MP_ALG_MAX; i++) {
53 struct ip_mp_alg_ops *ops = ip_mp_alg_table[i];
54
55 if (ops && ops->mp_alg_flush)
56 ops->mp_alg_flush();
57 }
58#endif
59}
60
61static inline void multipath_set_nhinfo(struct rtable *rth,
62 __be32 network, __be32 netmask,
63 unsigned char prefixlen,
64 const struct fib_nh *nh)
65{
66#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
67 struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
68
69 if (ops && ops->mp_alg_set_nhinfo)
70 ops->mp_alg_set_nhinfo(network, netmask, prefixlen, nh);
71#endif
72}
73
74static inline void multipath_remove(struct rtable *rth)
75{
76#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
77 struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
78
79 if (ops && ops->mp_alg_remove &&
80 (rth->u.dst.flags & DST_BALANCED))
81 ops->mp_alg_remove(rth);
82#endif
83}
84
85static inline int multipath_comparekeys(const struct flowi *flp1,
86 const struct flowi *flp2)
87{
88 return flp1->fl4_dst == flp2->fl4_dst &&
89 flp1->fl4_src == flp2->fl4_src &&
90 flp1->oif == flp2->oif &&
91 flp1->mark == flp2->mark &&
92 !((flp1->fl4_tos ^ flp2->fl4_tos) &
93 (IPTOS_RT_MASK | RTO_ONLINK));
94}
95
96#endif /* _NET_IP_MP_ALG_H */
diff --git a/include/net/route.h b/include/net/route.h
index 749e4dfe5ff3..188b8936bb27 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -62,7 +62,6 @@ struct rtable
62 62
63 unsigned rt_flags; 63 unsigned rt_flags;
64 __u16 rt_type; 64 __u16 rt_type;
65 __u16 rt_multipath_alg;
66 65
67 __be32 rt_dst; /* Path destination */ 66 __be32 rt_dst; /* Path destination */
68 __be32 rt_src; /* Path source */ 67 __be32 rt_src; /* Path source */
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 010fbb2d45e9..fb7909774254 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -116,48 +116,6 @@ config IP_ROUTE_MULTIPATH
116 equal "cost" and chooses one of them in a non-deterministic fashion 116 equal "cost" and chooses one of them in a non-deterministic fashion
117 if a matching packet arrives. 117 if a matching packet arrives.
118 118
119config IP_ROUTE_MULTIPATH_CACHED
120 bool "IP: equal cost multipath with caching support (EXPERIMENTAL)"
121 depends on IP_ROUTE_MULTIPATH
122 help
123 Normally, equal cost multipath routing is not supported by the
124 routing cache. If you say Y here, alternative routes are cached
125 and on cache lookup a route is chosen in a configurable fashion.
126
127 If unsure, say N.
128
129config IP_ROUTE_MULTIPATH_RR
130 tristate "MULTIPATH: round robin algorithm"
131 depends on IP_ROUTE_MULTIPATH_CACHED
132 help
133 Multipath routes are chosen according to Round Robin
134
135config IP_ROUTE_MULTIPATH_RANDOM
136 tristate "MULTIPATH: random algorithm"
137 depends on IP_ROUTE_MULTIPATH_CACHED
138 help
139 Multipath routes are chosen in a random fashion. Actually,
140 there is no weight for a route. The advantage of this policy
141 is that it is implemented stateless and therefore introduces only
142 a very small delay.
143
144config IP_ROUTE_MULTIPATH_WRANDOM
145 tristate "MULTIPATH: weighted random algorithm"
146 depends on IP_ROUTE_MULTIPATH_CACHED
147 help
148 Multipath routes are chosen in a weighted random fashion.
149 The per route weights are the weights visible via ip route 2. As the
150 corresponding state management introduces some overhead routing delay
151 is increased.
152
153config IP_ROUTE_MULTIPATH_DRR
154 tristate "MULTIPATH: interface round robin algorithm"
155 depends on IP_ROUTE_MULTIPATH_CACHED
156 help
157 Connections are distributed in a round robin fashion over the
158 available interfaces. This policy makes sense if the connections
159 should be primarily distributed on interfaces and not on routes.
160
161config IP_ROUTE_VERBOSE 119config IP_ROUTE_VERBOSE
162 bool "IP: verbose route monitoring" 120 bool "IP: verbose route monitoring"
163 depends on IP_ADVANCED_ROUTER 121 depends on IP_ADVANCED_ROUTER
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 4ff6c151d7f3..fbf1674e0c2a 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -29,14 +29,9 @@ obj-$(CONFIG_INET_TUNNEL) += tunnel4.o
29obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o 29obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
30obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o 30obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
31obj-$(CONFIG_IP_PNP) += ipconfig.o 31obj-$(CONFIG_IP_PNP) += ipconfig.o
32obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o
33obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o
34obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o
35obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o
36obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ 32obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/
37obj-$(CONFIG_IP_VS) += ipvs/ 33obj-$(CONFIG_IP_VS) += ipvs/
38obj-$(CONFIG_INET_DIAG) += inet_diag.o 34obj-$(CONFIG_INET_DIAG) += inet_diag.o
39obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o
40obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o 35obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
41obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o 36obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
42obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o 37obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 311d633f7f39..2eb909be8041 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -453,7 +453,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
453 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 453 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
454 [RTA_PROTOINFO] = { .type = NLA_U32 }, 454 [RTA_PROTOINFO] = { .type = NLA_U32 },
455 [RTA_FLOW] = { .type = NLA_U32 }, 455 [RTA_FLOW] = { .type = NLA_U32 },
456 [RTA_MP_ALGO] = { .type = NLA_U32 },
457}; 456};
458 457
459static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, 458static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -515,9 +514,6 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
515 case RTA_FLOW: 514 case RTA_FLOW:
516 cfg->fc_flow = nla_get_u32(attr); 515 cfg->fc_flow = nla_get_u32(attr);
517 break; 516 break;
518 case RTA_MP_ALGO:
519 cfg->fc_mp_alg = nla_get_u32(attr);
520 break;
521 case RTA_TABLE: 517 case RTA_TABLE:
522 cfg->fc_table = nla_get_u32(attr); 518 cfg->fc_table = nla_get_u32(attr);
523 break; 519 break;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index bb94550d95c3..c434119deb52 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -42,7 +42,6 @@
42#include <net/tcp.h> 42#include <net/tcp.h>
43#include <net/sock.h> 43#include <net/sock.h>
44#include <net/ip_fib.h> 44#include <net/ip_fib.h>
45#include <net/ip_mp_alg.h>
46#include <net/netlink.h> 45#include <net/netlink.h>
47#include <net/nexthop.h> 46#include <net/nexthop.h>
48 47
@@ -697,13 +696,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
697 goto err_inval; 696 goto err_inval;
698 } 697 }
699#endif 698#endif
700#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
701 if (cfg->fc_mp_alg) {
702 if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
703 cfg->fc_mp_alg > IP_MP_ALG_MAX)
704 goto err_inval;
705 }
706#endif
707 699
708 err = -ENOBUFS; 700 err = -ENOBUFS;
709 if (fib_info_cnt >= fib_hash_size) { 701 if (fib_info_cnt >= fib_hash_size) {
@@ -791,10 +783,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
791#endif 783#endif
792 } 784 }
793 785
794#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
795 fi->fib_mp_alg = cfg->fc_mp_alg;
796#endif
797
798 if (fib_props[cfg->fc_type].error) { 786 if (fib_props[cfg->fc_type].error) {
799 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) 787 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
800 goto err_inval; 788 goto err_inval;
@@ -940,10 +928,6 @@ out_fill_res:
940 res->type = fa->fa_type; 928 res->type = fa->fa_type;
941 res->scope = fa->fa_scope; 929 res->scope = fa->fa_scope;
942 res->fi = fa->fa_info; 930 res->fi = fa->fa_info;
943#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
944 res->netmask = mask;
945 res->network = zone & inet_make_mask(prefixlen);
946#endif
947 atomic_inc(&res->fi->fib_clntref); 931 atomic_inc(&res->fi->fib_clntref);
948 return 0; 932 return 0;
949} 933}
diff --git a/net/ipv4/multipath.c b/net/ipv4/multipath.c
deleted file mode 100644
index 4e9ca7c76407..000000000000
--- a/net/ipv4/multipath.c
+++ /dev/null
@@ -1,55 +0,0 @@
1/* multipath.c: IPV4 multipath algorithm support.
2 *
3 * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
4 * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
5 */
6
7#include <linux/module.h>
8#include <linux/errno.h>
9#include <linux/netdevice.h>
10#include <linux/spinlock.h>
11
12#include <net/ip_mp_alg.h>
13
14static DEFINE_SPINLOCK(alg_table_lock);
15struct ip_mp_alg_ops *ip_mp_alg_table[IP_MP_ALG_MAX + 1];
16
17int multipath_alg_register(struct ip_mp_alg_ops *ops, enum ip_mp_alg n)
18{
19 struct ip_mp_alg_ops **slot;
20 int err;
21
22 if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX ||
23 !ops->mp_alg_select_route)
24 return -EINVAL;
25
26 spin_lock(&alg_table_lock);
27 slot = &ip_mp_alg_table[n];
28 if (*slot != NULL) {
29 err = -EBUSY;
30 } else {
31 *slot = ops;
32 err = 0;
33 }
34 spin_unlock(&alg_table_lock);
35
36 return err;
37}
38EXPORT_SYMBOL(multipath_alg_register);
39
40void multipath_alg_unregister(struct ip_mp_alg_ops *ops, enum ip_mp_alg n)
41{
42 struct ip_mp_alg_ops **slot;
43
44 if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX)
45 return;
46
47 spin_lock(&alg_table_lock);
48 slot = &ip_mp_alg_table[n];
49 if (*slot == ops)
50 *slot = NULL;
51 spin_unlock(&alg_table_lock);
52
53 synchronize_net();
54}
55EXPORT_SYMBOL(multipath_alg_unregister);
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c
deleted file mode 100644
index b03c5ca2c823..000000000000
--- a/net/ipv4/multipath_drr.c
+++ /dev/null
@@ -1,249 +0,0 @@
1/*
2 * Device round robin policy for multipath.
3 *
4 *
5 * Version: $Id: multipath_drr.c,v 1.1.2.1 2004/09/16 07:42:34 elueck Exp $
6 *
7 * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#include <asm/system.h>
16#include <asm/uaccess.h>
17#include <linux/types.h>
18#include <linux/errno.h>
19#include <linux/timer.h>
20#include <linux/mm.h>
21#include <linux/kernel.h>
22#include <linux/fcntl.h>
23#include <linux/stat.h>
24#include <linux/socket.h>
25#include <linux/in.h>
26#include <linux/inet.h>
27#include <linux/netdevice.h>
28#include <linux/inetdevice.h>
29#include <linux/igmp.h>
30#include <linux/proc_fs.h>
31#include <linux/seq_file.h>
32#include <linux/module.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <net/ip.h>
36#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/sock.h>
39#include <net/icmp.h>
40#include <net/udp.h>
41#include <net/raw.h>
42#include <linux/notifier.h>
43#include <linux/if_arp.h>
44#include <linux/netfilter_ipv4.h>
45#include <net/ipip.h>
46#include <net/checksum.h>
47#include <net/ip_mp_alg.h>
48
49struct multipath_device {
50 int ifi; /* interface index of device */
51 atomic_t usecount;
52 int allocated;
53};
54
55#define MULTIPATH_MAX_DEVICECANDIDATES 10
56
57static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES];
58static DEFINE_SPINLOCK(state_lock);
59
60static int inline __multipath_findslot(void)
61{
62 int i;
63
64 for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) {
65 if (state[i].allocated == 0)
66 return i;
67 }
68 return -1;
69}
70
71static int inline __multipath_finddev(int ifindex)
72{
73 int i;
74
75 for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) {
76 if (state[i].allocated != 0 &&
77 state[i].ifi == ifindex)
78 return i;
79 }
80 return -1;
81}
82
83static int drr_dev_event(struct notifier_block *this,
84 unsigned long event, void *ptr)
85{
86 struct net_device *dev = ptr;
87 int devidx;
88
89 switch (event) {
90 case NETDEV_UNREGISTER:
91 case NETDEV_DOWN:
92 spin_lock_bh(&state_lock);
93
94 devidx = __multipath_finddev(dev->ifindex);
95 if (devidx != -1) {
96 state[devidx].allocated = 0;
97 state[devidx].ifi = 0;
98 atomic_set(&state[devidx].usecount, 0);
99 }
100
101 spin_unlock_bh(&state_lock);
102 break;
103 }
104
105 return NOTIFY_DONE;
106}
107
108static struct notifier_block drr_dev_notifier = {
109 .notifier_call = drr_dev_event,
110};
111
112
113static void drr_safe_inc(atomic_t *usecount)
114{
115 int n;
116
117 atomic_inc(usecount);
118
119 n = atomic_read(usecount);
120 if (n <= 0) {
121 int i;
122
123 spin_lock_bh(&state_lock);
124
125 for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++)
126 atomic_set(&state[i].usecount, 0);
127
128 spin_unlock_bh(&state_lock);
129 }
130}
131
132static void drr_select_route(const struct flowi *flp,
133 struct rtable *first, struct rtable **rp)
134{
135 struct rtable *nh, *result, *cur_min;
136 int min_usecount = -1;
137 int devidx = -1;
138 int cur_min_devidx = -1;
139
140 /* 1. make sure all alt. nexthops have the same GC related data */
141 /* 2. determine the new candidate to be returned */
142 result = NULL;
143 cur_min = NULL;
144 for (nh = rcu_dereference(first); nh;
145 nh = rcu_dereference(nh->u.dst.rt_next)) {
146 if ((nh->u.dst.flags & DST_BALANCED) != 0 &&
147 multipath_comparekeys(&nh->fl, flp)) {
148 int nh_ifidx = nh->u.dst.dev->ifindex;
149
150 nh->u.dst.lastuse = jiffies;
151 nh->u.dst.__use++;
152 if (result != NULL)
153 continue;
154
155 /* search for the output interface */
156
157 /* this is not SMP safe, only add/remove are
158 * SMP safe as wrong usecount updates have no big
159 * impact
160 */
161 devidx = __multipath_finddev(nh_ifidx);
162 if (devidx == -1) {
163 /* add the interface to the array
164 * SMP safe
165 */
166 spin_lock_bh(&state_lock);
167
168 /* due to SMP: search again */
169 devidx = __multipath_finddev(nh_ifidx);
170 if (devidx == -1) {
171 /* add entry for device */
172 devidx = __multipath_findslot();
173 if (devidx == -1) {
174 /* unlikely but possible */
175 continue;
176 }
177
178 state[devidx].allocated = 1;
179 state[devidx].ifi = nh_ifidx;
180 atomic_set(&state[devidx].usecount, 0);
181 min_usecount = 0;
182 }
183
184 spin_unlock_bh(&state_lock);
185 }
186
187 if (min_usecount == 0) {
188 /* if the device has not been used it is
189 * the primary target
190 */
191 drr_safe_inc(&state[devidx].usecount);
192 result = nh;
193 } else {
194 int count =
195 atomic_read(&state[devidx].usecount);
196
197 if (min_usecount == -1 ||
198 count < min_usecount) {
199 cur_min = nh;
200 cur_min_devidx = devidx;
201 min_usecount = count;
202 }
203 }
204 }
205 }
206
207 if (!result) {
208 if (cur_min) {
209 drr_safe_inc(&state[cur_min_devidx].usecount);
210 result = cur_min;
211 } else {
212 result = first;
213 }
214 }
215
216 *rp = result;
217}
218
219static struct ip_mp_alg_ops drr_ops = {
220 .mp_alg_select_route = drr_select_route,
221};
222
223static int __init drr_init(void)
224{
225 int err = register_netdevice_notifier(&drr_dev_notifier);
226
227 if (err)
228 return err;
229
230 err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR);
231 if (err)
232 goto fail;
233
234 return 0;
235
236fail:
237 unregister_netdevice_notifier(&drr_dev_notifier);
238 return err;
239}
240
241static void __exit drr_exit(void)
242{
243 unregister_netdevice_notifier(&drr_dev_notifier);
244 multipath_alg_unregister(&drr_ops, IP_MP_ALG_DRR);
245}
246
247module_init(drr_init);
248module_exit(drr_exit);
249MODULE_LICENSE("GPL");
diff --git a/net/ipv4/multipath_random.c b/net/ipv4/multipath_random.c
deleted file mode 100644
index c312785d14d0..000000000000
--- a/net/ipv4/multipath_random.c
+++ /dev/null
@@ -1,114 +0,0 @@
1/*
2 * Random policy for multipath.
3 *
4 *
5 * Version: $Id: multipath_random.c,v 1.1.2.3 2004/09/21 08:42:11 elueck Exp $
6 *
7 * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#include <asm/system.h>
16#include <asm/uaccess.h>
17#include <linux/types.h>
18#include <linux/errno.h>
19#include <linux/timer.h>
20#include <linux/mm.h>
21#include <linux/kernel.h>
22#include <linux/fcntl.h>
23#include <linux/stat.h>
24#include <linux/socket.h>
25#include <linux/in.h>
26#include <linux/inet.h>
27#include <linux/netdevice.h>
28#include <linux/inetdevice.h>
29#include <linux/igmp.h>
30#include <linux/proc_fs.h>
31#include <linux/seq_file.h>
32#include <linux/module.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/random.h>
36#include <net/ip.h>
37#include <net/protocol.h>
38#include <linux/skbuff.h>
39#include <net/sock.h>
40#include <net/icmp.h>
41#include <net/udp.h>
42#include <net/raw.h>
43#include <linux/notifier.h>
44#include <linux/if_arp.h>
45#include <linux/netfilter_ipv4.h>
46#include <net/ipip.h>
47#include <net/checksum.h>
48#include <net/ip_mp_alg.h>
49
50#define MULTIPATH_MAX_CANDIDATES 40
51
52static void random_select_route(const struct flowi *flp,
53 struct rtable *first,
54 struct rtable **rp)
55{
56 struct rtable *rt;
57 struct rtable *decision;
58 unsigned char candidate_count = 0;
59
60 /* count all candidate */
61 for (rt = rcu_dereference(first); rt;
62 rt = rcu_dereference(rt->u.dst.rt_next)) {
63 if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
64 multipath_comparekeys(&rt->fl, flp))
65 ++candidate_count;
66 }
67
68 /* choose a random candidate */
69 decision = first;
70 if (candidate_count > 1) {
71 unsigned char i = 0;
72 unsigned char candidate_no = (unsigned char)
73 (random32() % candidate_count);
74
75 /* find chosen candidate and adjust GC data for all candidates
76 * to ensure they stay in cache
77 */
78 for (rt = first; rt; rt = rt->u.dst.rt_next) {
79 if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
80 multipath_comparekeys(&rt->fl, flp)) {
81 rt->u.dst.lastuse = jiffies;
82
83 if (i == candidate_no)
84 decision = rt;
85
86 if (i >= candidate_count)
87 break;
88
89 i++;
90 }
91 }
92 }
93
94 decision->u.dst.__use++;
95 *rp = decision;
96}
97
98static struct ip_mp_alg_ops random_ops = {
99 .mp_alg_select_route = random_select_route,
100};
101
102static int __init random_init(void)
103{
104 return multipath_alg_register(&random_ops, IP_MP_ALG_RANDOM);
105}
106
107static void __exit random_exit(void)
108{
109 multipath_alg_unregister(&random_ops, IP_MP_ALG_RANDOM);
110}
111
112module_init(random_init);
113module_exit(random_exit);
114MODULE_LICENSE("GPL");
diff --git a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c
deleted file mode 100644
index 0ad22524f450..000000000000
--- a/net/ipv4/multipath_rr.c
+++ /dev/null
@@ -1,95 +0,0 @@
1/*
2 * Round robin policy for multipath.
3 *
4 *
5 * Version: $Id: multipath_rr.c,v 1.1.2.2 2004/09/16 07:42:34 elueck Exp $
6 *
7 * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#include <asm/system.h>
16#include <asm/uaccess.h>
17#include <linux/types.h>
18#include <linux/errno.h>
19#include <linux/timer.h>
20#include <linux/mm.h>
21#include <linux/kernel.h>
22#include <linux/fcntl.h>
23#include <linux/stat.h>
24#include <linux/socket.h>
25#include <linux/in.h>
26#include <linux/inet.h>
27#include <linux/netdevice.h>
28#include <linux/inetdevice.h>
29#include <linux/igmp.h>
30#include <linux/proc_fs.h>
31#include <linux/seq_file.h>
32#include <linux/module.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <net/ip.h>
36#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/sock.h>
39#include <net/icmp.h>
40#include <net/udp.h>
41#include <net/raw.h>
42#include <linux/notifier.h>
43#include <linux/if_arp.h>
44#include <linux/netfilter_ipv4.h>
45#include <net/ipip.h>
46#include <net/checksum.h>
47#include <net/ip_mp_alg.h>
48
49static void rr_select_route(const struct flowi *flp,
50 struct rtable *first, struct rtable **rp)
51{
52 struct rtable *nh, *result, *min_use_cand = NULL;
53 int min_use = -1;
54
55 /* 1. make sure all alt. nexthops have the same GC related data
56 * 2. determine the new candidate to be returned
57 */
58 result = NULL;
59 for (nh = rcu_dereference(first); nh;
60 nh = rcu_dereference(nh->u.dst.rt_next)) {
61 if ((nh->u.dst.flags & DST_BALANCED) != 0 &&
62 multipath_comparekeys(&nh->fl, flp)) {
63 nh->u.dst.lastuse = jiffies;
64
65 if (min_use == -1 || nh->u.dst.__use < min_use) {
66 min_use = nh->u.dst.__use;
67 min_use_cand = nh;
68 }
69 }
70 }
71 result = min_use_cand;
72 if (!result)
73 result = first;
74
75 result->u.dst.__use++;
76 *rp = result;
77}
78
79static struct ip_mp_alg_ops rr_ops = {
80 .mp_alg_select_route = rr_select_route,
81};
82
83static int __init rr_init(void)
84{
85 return multipath_alg_register(&rr_ops, IP_MP_ALG_RR);
86}
87
88static void __exit rr_exit(void)
89{
90 multipath_alg_unregister(&rr_ops, IP_MP_ALG_RR);
91}
92
93module_init(rr_init);
94module_exit(rr_exit);
95MODULE_LICENSE("GPL");
diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c
deleted file mode 100644
index 57c503694539..000000000000
--- a/net/ipv4/multipath_wrandom.c
+++ /dev/null
@@ -1,329 +0,0 @@
1/*
2 * Weighted random policy for multipath.
3 *
4 *
5 * Version: $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $
6 *
7 * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#include <asm/system.h>
16#include <asm/uaccess.h>
17#include <linux/types.h>
18#include <linux/errno.h>
19#include <linux/timer.h>
20#include <linux/mm.h>
21#include <linux/kernel.h>
22#include <linux/fcntl.h>
23#include <linux/stat.h>
24#include <linux/socket.h>
25#include <linux/in.h>
26#include <linux/inet.h>
27#include <linux/netdevice.h>
28#include <linux/inetdevice.h>
29#include <linux/igmp.h>
30#include <linux/proc_fs.h>
31#include <linux/seq_file.h>
32#include <linux/module.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/random.h>
36#include <net/ip.h>
37#include <net/protocol.h>
38#include <linux/skbuff.h>
39#include <net/sock.h>
40#include <net/icmp.h>
41#include <net/udp.h>
42#include <net/raw.h>
43#include <linux/notifier.h>
44#include <linux/if_arp.h>
45#include <linux/netfilter_ipv4.h>
46#include <net/ipip.h>
47#include <net/checksum.h>
48#include <net/ip_fib.h>
49#include <net/ip_mp_alg.h>
50
51#define MULTIPATH_STATE_SIZE 15
52
53struct multipath_candidate {
54 struct multipath_candidate *next;
55 int power;
56 struct rtable *rt;
57};
58
59struct multipath_dest {
60 struct list_head list;
61
62 const struct fib_nh *nh_info;
63 __be32 netmask;
64 __be32 network;
65 unsigned char prefixlen;
66
67 struct rcu_head rcu;
68};
69
70struct multipath_bucket {
71 struct list_head head;
72 spinlock_t lock;
73};
74
75struct multipath_route {
76 struct list_head list;
77
78 int oif;
79 __be32 gw;
80 struct list_head dests;
81
82 struct rcu_head rcu;
83};
84
85/* state: primarily weight per route information */
86static struct multipath_bucket state[MULTIPATH_STATE_SIZE];
87
88static unsigned char __multipath_lookup_weight(const struct flowi *fl,
89 const struct rtable *rt)
90{
91 const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE;
92 struct multipath_route *r;
93 struct multipath_route *target_route = NULL;
94 struct multipath_dest *d;
95 int weight = 1;
96
97 /* lookup the weight information for a certain route */
98 rcu_read_lock();
99
100 /* find state entry for gateway or add one if necessary */
101 list_for_each_entry_rcu(r, &state[state_idx].head, list) {
102 if (r->gw == rt->rt_gateway &&
103 r->oif == rt->idev->dev->ifindex) {
104 target_route = r;
105 break;
106 }
107 }
108
109 if (!target_route) {
110 /* this should not happen... but we are prepared */
111 printk( KERN_CRIT"%s: missing state for gateway: %u and " \
112 "device %d\n", __FUNCTION__, rt->rt_gateway,
113 rt->idev->dev->ifindex);
114 goto out;
115 }
116
117 /* find state entry for destination */
118 list_for_each_entry_rcu(d, &target_route->dests, list) {
119 __be32 targetnetwork = fl->fl4_dst &
120 inet_make_mask(d->prefixlen);
121
122 if ((targetnetwork & d->netmask) == d->network) {
123 weight = d->nh_info->nh_weight;
124 goto out;
125 }
126 }
127
128out:
129 rcu_read_unlock();
130 return weight;
131}
132
133static void wrandom_init_state(void)
134{
135 int i;
136
137 for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
138 INIT_LIST_HEAD(&state[i].head);
139 spin_lock_init(&state[i].lock);
140 }
141}
142
143static void wrandom_select_route(const struct flowi *flp,
144 struct rtable *first,
145 struct rtable **rp)
146{
147 struct rtable *rt;
148 struct rtable *decision;
149 struct multipath_candidate *first_mpc = NULL;
150 struct multipath_candidate *mpc, *last_mpc = NULL;
151 int power = 0;
152 int last_power;
153 int selector;
154 const size_t size_mpc = sizeof(struct multipath_candidate);
155
156 /* collect all candidates and identify their weights */
157 for (rt = rcu_dereference(first); rt;
158 rt = rcu_dereference(rt->u.dst.rt_next)) {
159 if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
160 multipath_comparekeys(&rt->fl, flp)) {
161 struct multipath_candidate* mpc =
162 (struct multipath_candidate*)
163 kmalloc(size_mpc, GFP_ATOMIC);
164
165 if (!mpc)
166 return;
167
168 power += __multipath_lookup_weight(flp, rt) * 10000;
169
170 mpc->power = power;
171 mpc->rt = rt;
172 mpc->next = NULL;
173
174 if (!first_mpc)
175 first_mpc = mpc;
176 else
177 last_mpc->next = mpc;
178
179 last_mpc = mpc;
180 }
181 }
182
183 /* choose a weighted random candidate */
184 decision = first;
185 selector = random32() % power;
186 last_power = 0;
187
188 /* select candidate, adjust GC data and cleanup local state */
189 decision = first;
190 last_mpc = NULL;
191 for (mpc = first_mpc; mpc; mpc = mpc->next) {
192 mpc->rt->u.dst.lastuse = jiffies;
193 if (last_power <= selector && selector < mpc->power)
194 decision = mpc->rt;
195
196 last_power = mpc->power;
197 kfree(last_mpc);
198 last_mpc = mpc;
199 }
200
201 /* concurrent __multipath_flush may lead to !last_mpc */
202 kfree(last_mpc);
203
204 decision->u.dst.__use++;
205 *rp = decision;
206}
207
208static void wrandom_set_nhinfo(__be32 network,
209 __be32 netmask,
210 unsigned char prefixlen,
211 const struct fib_nh *nh)
212{
213 const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE;
214 struct multipath_route *r, *target_route = NULL;
215 struct multipath_dest *d, *target_dest = NULL;
216
217 /* store the weight information for a certain route */
218 spin_lock_bh(&state[state_idx].lock);
219
220 /* find state entry for gateway or add one if necessary */
221 list_for_each_entry_rcu(r, &state[state_idx].head, list) {
222 if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) {
223 target_route = r;
224 break;
225 }
226 }
227
228 if (!target_route) {
229 const size_t size_rt = sizeof(struct multipath_route);
230 target_route = (struct multipath_route *)
231 kmalloc(size_rt, GFP_ATOMIC);
232
233 target_route->gw = nh->nh_gw;
234 target_route->oif = nh->nh_oif;
235 memset(&target_route->rcu, 0, sizeof(struct rcu_head));
236 INIT_LIST_HEAD(&target_route->dests);
237
238 list_add_rcu(&target_route->list, &state[state_idx].head);
239 }
240
241 /* find state entry for destination or add one if necessary */
242 list_for_each_entry_rcu(d, &target_route->dests, list) {
243 if (d->nh_info == nh) {
244 target_dest = d;
245 break;
246 }
247 }
248
249 if (!target_dest) {
250 const size_t size_dst = sizeof(struct multipath_dest);
251 target_dest = (struct multipath_dest*)
252 kmalloc(size_dst, GFP_ATOMIC);
253
254 target_dest->nh_info = nh;
255 target_dest->network = network;
256 target_dest->netmask = netmask;
257 target_dest->prefixlen = prefixlen;
258 memset(&target_dest->rcu, 0, sizeof(struct rcu_head));
259
260 list_add_rcu(&target_dest->list, &target_route->dests);
261 }
262 /* else: we already stored this info for another destination =>
263 * we are finished
264 */
265
266 spin_unlock_bh(&state[state_idx].lock);
267}
268
269static void __multipath_free(struct rcu_head *head)
270{
271 struct multipath_route *rt = container_of(head, struct multipath_route,
272 rcu);
273 kfree(rt);
274}
275
276static void __multipath_free_dst(struct rcu_head *head)
277{
278 struct multipath_dest *dst = container_of(head,
279 struct multipath_dest,
280 rcu);
281 kfree(dst);
282}
283
284static void wrandom_flush(void)
285{
286 int i;
287
288 /* defere delete to all entries */
289 for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
290 struct multipath_route *r;
291
292 spin_lock_bh(&state[i].lock);
293 list_for_each_entry_rcu(r, &state[i].head, list) {
294 struct multipath_dest *d;
295 list_for_each_entry_rcu(d, &r->dests, list) {
296 list_del_rcu(&d->list);
297 call_rcu(&d->rcu,
298 __multipath_free_dst);
299 }
300 list_del_rcu(&r->list);
301 call_rcu(&r->rcu,
302 __multipath_free);
303 }
304
305 spin_unlock_bh(&state[i].lock);
306 }
307}
308
309static struct ip_mp_alg_ops wrandom_ops = {
310 .mp_alg_select_route = wrandom_select_route,
311 .mp_alg_flush = wrandom_flush,
312 .mp_alg_set_nhinfo = wrandom_set_nhinfo,
313};
314
315static int __init wrandom_init(void)
316{
317 wrandom_init_state();
318
319 return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM);
320}
321
322static void __exit wrandom_exit(void)
323{
324 multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM);
325}
326
327module_init(wrandom_init);
328module_exit(wrandom_exit);
329MODULE_LICENSE("GPL");
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 29ca63e81ced..85285021518b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -101,7 +101,6 @@
101#include <net/tcp.h> 101#include <net/tcp.h>
102#include <net/icmp.h> 102#include <net/icmp.h>
103#include <net/xfrm.h> 103#include <net/xfrm.h>
104#include <net/ip_mp_alg.h>
105#include <net/netevent.h> 104#include <net/netevent.h>
106#include <net/rtnetlink.h> 105#include <net/rtnetlink.h>
107#ifdef CONFIG_SYSCTL 106#ifdef CONFIG_SYSCTL
@@ -495,13 +494,11 @@ static const struct file_operations rt_cpu_seq_fops = {
495 494
496static __inline__ void rt_free(struct rtable *rt) 495static __inline__ void rt_free(struct rtable *rt)
497{ 496{
498 multipath_remove(rt);
499 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 497 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
500} 498}
501 499
502static __inline__ void rt_drop(struct rtable *rt) 500static __inline__ void rt_drop(struct rtable *rt)
503{ 501{
504 multipath_remove(rt);
505 ip_rt_put(rt); 502 ip_rt_put(rt);
506 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 503 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
507} 504}
@@ -574,52 +571,6 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
574 (fl1->iif ^ fl2->iif)) == 0; 571 (fl1->iif ^ fl2->iif)) == 0;
575} 572}
576 573
577#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
578static struct rtable **rt_remove_balanced_route(struct rtable **chain_head,
579 struct rtable *expentry,
580 int *removed_count)
581{
582 int passedexpired = 0;
583 struct rtable **nextstep = NULL;
584 struct rtable **rthp = chain_head;
585 struct rtable *rth;
586
587 if (removed_count)
588 *removed_count = 0;
589
590 while ((rth = *rthp) != NULL) {
591 if (rth == expentry)
592 passedexpired = 1;
593
594 if (((*rthp)->u.dst.flags & DST_BALANCED) != 0 &&
595 compare_keys(&(*rthp)->fl, &expentry->fl)) {
596 if (*rthp == expentry) {
597 *rthp = rth->u.dst.rt_next;
598 continue;
599 } else {
600 *rthp = rth->u.dst.rt_next;
601 rt_free(rth);
602 if (removed_count)
603 ++(*removed_count);
604 }
605 } else {
606 if (!((*rthp)->u.dst.flags & DST_BALANCED) &&
607 passedexpired && !nextstep)
608 nextstep = &rth->u.dst.rt_next;
609
610 rthp = &rth->u.dst.rt_next;
611 }
612 }
613
614 rt_free(expentry);
615 if (removed_count)
616 ++(*removed_count);
617
618 return nextstep;
619}
620#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
621
622
623/* This runs via a timer and thus is always in BH context. */ 574/* This runs via a timer and thus is always in BH context. */
624static void rt_check_expire(unsigned long dummy) 575static void rt_check_expire(unsigned long dummy)
625{ 576{
@@ -658,22 +609,8 @@ static void rt_check_expire(unsigned long dummy)
658 } 609 }
659 610
660 /* Cleanup aged off entries. */ 611 /* Cleanup aged off entries. */
661#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
662 /* remove all related balanced entries if necessary */
663 if (rth->u.dst.flags & DST_BALANCED) {
664 rthp = rt_remove_balanced_route(
665 &rt_hash_table[i].chain,
666 rth, NULL);
667 if (!rthp)
668 break;
669 } else {
670 *rthp = rth->u.dst.rt_next;
671 rt_free(rth);
672 }
673#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
674 *rthp = rth->u.dst.rt_next; 612 *rthp = rth->u.dst.rt_next;
675 rt_free(rth); 613 rt_free(rth);
676#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
677 } 614 }
678 spin_unlock(rt_hash_lock_addr(i)); 615 spin_unlock(rt_hash_lock_addr(i));
679 616
@@ -721,9 +658,6 @@ void rt_cache_flush(int delay)
721 if (delay < 0) 658 if (delay < 0)
722 delay = ip_rt_min_delay; 659 delay = ip_rt_min_delay;
723 660
724 /* flush existing multipath state*/
725 multipath_flush();
726
727 spin_lock_bh(&rt_flush_lock); 661 spin_lock_bh(&rt_flush_lock);
728 662
729 if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) { 663 if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) {
@@ -842,30 +776,9 @@ static int rt_garbage_collect(void)
842 rthp = &rth->u.dst.rt_next; 776 rthp = &rth->u.dst.rt_next;
843 continue; 777 continue;
844 } 778 }
845#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
846 /* remove all related balanced entries
847 * if necessary
848 */
849 if (rth->u.dst.flags & DST_BALANCED) {
850 int r;
851
852 rthp = rt_remove_balanced_route(
853 &rt_hash_table[k].chain,
854 rth,
855 &r);
856 goal -= r;
857 if (!rthp)
858 break;
859 } else {
860 *rthp = rth->u.dst.rt_next;
861 rt_free(rth);
862 goal--;
863 }
864#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
865 *rthp = rth->u.dst.rt_next; 779 *rthp = rth->u.dst.rt_next;
866 rt_free(rth); 780 rt_free(rth);
867 goal--; 781 goal--;
868#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
869 } 782 }
870 spin_unlock_bh(rt_hash_lock_addr(k)); 783 spin_unlock_bh(rt_hash_lock_addr(k));
871 if (goal <= 0) 784 if (goal <= 0)
@@ -939,12 +852,7 @@ restart:
939 852
940 spin_lock_bh(rt_hash_lock_addr(hash)); 853 spin_lock_bh(rt_hash_lock_addr(hash));
941 while ((rth = *rthp) != NULL) { 854 while ((rth = *rthp) != NULL) {
942#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
943 if (!(rth->u.dst.flags & DST_BALANCED) &&
944 compare_keys(&rth->fl, &rt->fl)) {
945#else
946 if (compare_keys(&rth->fl, &rt->fl)) { 855 if (compare_keys(&rth->fl, &rt->fl)) {
947#endif
948 /* Put it first */ 856 /* Put it first */
949 *rthp = rth->u.dst.rt_next; 857 *rthp = rth->u.dst.rt_next;
950 /* 858 /*
@@ -1774,10 +1682,6 @@ static inline int __mkroute_input(struct sk_buff *skb,
1774 1682
1775 atomic_set(&rth->u.dst.__refcnt, 1); 1683 atomic_set(&rth->u.dst.__refcnt, 1);
1776 rth->u.dst.flags= DST_HOST; 1684 rth->u.dst.flags= DST_HOST;
1777#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
1778 if (res->fi->fib_nhs > 1)
1779 rth->u.dst.flags |= DST_BALANCED;
1780#endif
1781 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 1685 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
1782 rth->u.dst.flags |= DST_NOPOLICY; 1686 rth->u.dst.flags |= DST_NOPOLICY;
1783 if (IN_DEV_CONF_GET(out_dev, NOXFRM)) 1687 if (IN_DEV_CONF_GET(out_dev, NOXFRM))
@@ -1812,11 +1716,11 @@ static inline int __mkroute_input(struct sk_buff *skb,
1812 return err; 1716 return err;
1813} 1717}
1814 1718
1815static inline int ip_mkroute_input_def(struct sk_buff *skb, 1719static inline int ip_mkroute_input(struct sk_buff *skb,
1816 struct fib_result* res, 1720 struct fib_result* res,
1817 const struct flowi *fl, 1721 const struct flowi *fl,
1818 struct in_device *in_dev, 1722 struct in_device *in_dev,
1819 __be32 daddr, __be32 saddr, u32 tos) 1723 __be32 daddr, __be32 saddr, u32 tos)
1820{ 1724{
1821 struct rtable* rth = NULL; 1725 struct rtable* rth = NULL;
1822 int err; 1726 int err;
@@ -1837,63 +1741,6 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb,
1837 return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); 1741 return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
1838} 1742}
1839 1743
1840static inline int ip_mkroute_input(struct sk_buff *skb,
1841 struct fib_result* res,
1842 const struct flowi *fl,
1843 struct in_device *in_dev,
1844 __be32 daddr, __be32 saddr, u32 tos)
1845{
1846#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
1847 struct rtable* rth = NULL, *rtres;
1848 unsigned char hop, hopcount;
1849 int err = -EINVAL;
1850 unsigned int hash;
1851
1852 if (res->fi)
1853 hopcount = res->fi->fib_nhs;
1854 else
1855 hopcount = 1;
1856
1857 /* distinguish between multipath and singlepath */
1858 if (hopcount < 2)
1859 return ip_mkroute_input_def(skb, res, fl, in_dev, daddr,
1860 saddr, tos);
1861
1862 /* add all alternatives to the routing cache */
1863 for (hop = 0; hop < hopcount; hop++) {
1864 res->nh_sel = hop;
1865
1866 /* put reference to previous result */
1867 if (hop)
1868 ip_rt_put(rtres);
1869
1870 /* create a routing cache entry */
1871 err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos,
1872 &rth);
1873 if (err)
1874 return err;
1875
1876 /* put it into the cache */
1877 hash = rt_hash(daddr, saddr, fl->iif);
1878 err = rt_intern_hash(hash, rth, &rtres);
1879 if (err)
1880 return err;
1881
1882 /* forward hop information to multipath impl. */
1883 multipath_set_nhinfo(rth,
1884 FIB_RES_NETWORK(*res),
1885 FIB_RES_NETMASK(*res),
1886 res->prefixlen,
1887 &FIB_RES_NH(*res));
1888 }
1889 skb->dst = &rtres->u.dst;
1890 return err;
1891#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
1892 return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos);
1893#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
1894}
1895
1896
1897/* 1744/*
1898 * NOTE. We drop all the packets that has local source 1745 * NOTE. We drop all the packets that has local source
1899 * addresses, because every properly looped back packet 1746 * addresses, because every properly looped back packet
@@ -2211,13 +2058,6 @@ static inline int __mkroute_output(struct rtable **result,
2211 2058
2212 atomic_set(&rth->u.dst.__refcnt, 1); 2059 atomic_set(&rth->u.dst.__refcnt, 1);
2213 rth->u.dst.flags= DST_HOST; 2060 rth->u.dst.flags= DST_HOST;
2214#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
2215 if (res->fi) {
2216 rth->rt_multipath_alg = res->fi->fib_mp_alg;
2217 if (res->fi->fib_nhs > 1)
2218 rth->u.dst.flags |= DST_BALANCED;
2219 }
2220#endif
2221 if (IN_DEV_CONF_GET(in_dev, NOXFRM)) 2061 if (IN_DEV_CONF_GET(in_dev, NOXFRM))
2222 rth->u.dst.flags |= DST_NOXFRM; 2062 rth->u.dst.flags |= DST_NOXFRM;
2223 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2063 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
@@ -2277,12 +2117,12 @@ static inline int __mkroute_output(struct rtable **result,
2277 return err; 2117 return err;
2278} 2118}
2279 2119
2280static inline int ip_mkroute_output_def(struct rtable **rp, 2120static inline int ip_mkroute_output(struct rtable **rp,
2281 struct fib_result* res, 2121 struct fib_result* res,
2282 const struct flowi *fl, 2122 const struct flowi *fl,
2283 const struct flowi *oldflp, 2123 const struct flowi *oldflp,
2284 struct net_device *dev_out, 2124 struct net_device *dev_out,
2285 unsigned flags) 2125 unsigned flags)
2286{ 2126{
2287 struct rtable *rth = NULL; 2127 struct rtable *rth = NULL;
2288 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); 2128 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
@@ -2295,68 +2135,6 @@ static inline int ip_mkroute_output_def(struct rtable **rp,
2295 return err; 2135 return err;
2296} 2136}
2297 2137
2298static inline int ip_mkroute_output(struct rtable** rp,
2299 struct fib_result* res,
2300 const struct flowi *fl,
2301 const struct flowi *oldflp,
2302 struct net_device *dev_out,
2303 unsigned flags)
2304{
2305#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
2306 unsigned char hop;
2307 unsigned hash;
2308 int err = -EINVAL;
2309 struct rtable *rth = NULL;
2310
2311 if (res->fi && res->fi->fib_nhs > 1) {
2312 unsigned char hopcount = res->fi->fib_nhs;
2313
2314 for (hop = 0; hop < hopcount; hop++) {
2315 struct net_device *dev2nexthop;
2316
2317 res->nh_sel = hop;
2318
2319 /* hold a work reference to the output device */
2320 dev2nexthop = FIB_RES_DEV(*res);
2321 dev_hold(dev2nexthop);
2322
2323 /* put reference to previous result */
2324 if (hop)
2325 ip_rt_put(*rp);
2326
2327 err = __mkroute_output(&rth, res, fl, oldflp,
2328 dev2nexthop, flags);
2329
2330 if (err != 0)
2331 goto cleanup;
2332
2333 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src,
2334 oldflp->oif);
2335 err = rt_intern_hash(hash, rth, rp);
2336
2337 /* forward hop information to multipath impl. */
2338 multipath_set_nhinfo(rth,
2339 FIB_RES_NETWORK(*res),
2340 FIB_RES_NETMASK(*res),
2341 res->prefixlen,
2342 &FIB_RES_NH(*res));
2343 cleanup:
2344 /* release work reference to output device */
2345 dev_put(dev2nexthop);
2346
2347 if (err != 0)
2348 return err;
2349 }
2350 return err;
2351 } else {
2352 return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out,
2353 flags);
2354 }
2355#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
2356 return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, flags);
2357#endif
2358}
2359
2360/* 2138/*
2361 * Major route resolver routine. 2139 * Major route resolver routine.
2362 */ 2140 */
@@ -2570,17 +2348,6 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
2570 rth->fl.mark == flp->mark && 2348 rth->fl.mark == flp->mark &&
2571 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2349 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
2572 (IPTOS_RT_MASK | RTO_ONLINK))) { 2350 (IPTOS_RT_MASK | RTO_ONLINK))) {
2573
2574 /* check for multipath routes and choose one if
2575 * necessary
2576 */
2577 if (multipath_select_route(flp, rth, rp)) {
2578 dst_hold(&(*rp)->u.dst);
2579 RT_CACHE_STAT_INC(out_hit);
2580 rcu_read_unlock_bh();
2581 return 0;
2582 }
2583
2584 rth->u.dst.lastuse = jiffies; 2351 rth->u.dst.lastuse = jiffies;
2585 dst_hold(&rth->u.dst); 2352 dst_hold(&rth->u.dst);
2586 rth->u.dst.__use++; 2353 rth->u.dst.__use++;
@@ -2729,10 +2496,6 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2729 if (rt->u.dst.tclassid) 2496 if (rt->u.dst.tclassid)
2730 NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); 2497 NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
2731#endif 2498#endif
2732#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
2733 if (rt->rt_multipath_alg != IP_MP_ALG_NONE)
2734 NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg);
2735#endif
2736 if (rt->fl.iif) 2499 if (rt->fl.iif)
2737 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); 2500 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
2738 else if (rt->rt_src != rt->fl.fl4_src) 2501 else if (rt->rt_src != rt->fl.fl4_src)