diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/Kconfig | 42 | ||||
-rw-r--r-- | net/ipv4/Makefile | 5 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 4 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 16 | ||||
-rw-r--r-- | net/ipv4/multipath.c | 55 | ||||
-rw-r--r-- | net/ipv4/multipath_drr.c | 249 | ||||
-rw-r--r-- | net/ipv4/multipath_random.c | 114 | ||||
-rw-r--r-- | net/ipv4/multipath_rr.c | 95 | ||||
-rw-r--r-- | net/ipv4/multipath_wrandom.c | 329 | ||||
-rw-r--r-- | net/ipv4/route.c | 259 |
10 files changed, 11 insertions, 1157 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 010fbb2d45e9..fb7909774254 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -116,48 +116,6 @@ config IP_ROUTE_MULTIPATH | |||
116 | equal "cost" and chooses one of them in a non-deterministic fashion | 116 | equal "cost" and chooses one of them in a non-deterministic fashion |
117 | if a matching packet arrives. | 117 | if a matching packet arrives. |
118 | 118 | ||
119 | config IP_ROUTE_MULTIPATH_CACHED | ||
120 | bool "IP: equal cost multipath with caching support (EXPERIMENTAL)" | ||
121 | depends on IP_ROUTE_MULTIPATH | ||
122 | help | ||
123 | Normally, equal cost multipath routing is not supported by the | ||
124 | routing cache. If you say Y here, alternative routes are cached | ||
125 | and on cache lookup a route is chosen in a configurable fashion. | ||
126 | |||
127 | If unsure, say N. | ||
128 | |||
129 | config IP_ROUTE_MULTIPATH_RR | ||
130 | tristate "MULTIPATH: round robin algorithm" | ||
131 | depends on IP_ROUTE_MULTIPATH_CACHED | ||
132 | help | ||
133 | Multipath routes are chosen according to Round Robin | ||
134 | |||
135 | config IP_ROUTE_MULTIPATH_RANDOM | ||
136 | tristate "MULTIPATH: random algorithm" | ||
137 | depends on IP_ROUTE_MULTIPATH_CACHED | ||
138 | help | ||
139 | Multipath routes are chosen in a random fashion. Actually, | ||
140 | there is no weight for a route. The advantage of this policy | ||
141 | is that it is implemented stateless and therefore introduces only | ||
142 | a very small delay. | ||
143 | |||
144 | config IP_ROUTE_MULTIPATH_WRANDOM | ||
145 | tristate "MULTIPATH: weighted random algorithm" | ||
146 | depends on IP_ROUTE_MULTIPATH_CACHED | ||
147 | help | ||
148 | Multipath routes are chosen in a weighted random fashion. | ||
149 | The per route weights are the weights visible via ip route 2. As the | ||
150 | corresponding state management introduces some overhead routing delay | ||
151 | is increased. | ||
152 | |||
153 | config IP_ROUTE_MULTIPATH_DRR | ||
154 | tristate "MULTIPATH: interface round robin algorithm" | ||
155 | depends on IP_ROUTE_MULTIPATH_CACHED | ||
156 | help | ||
157 | Connections are distributed in a round robin fashion over the | ||
158 | available interfaces. This policy makes sense if the connections | ||
159 | should be primarily distributed on interfaces and not on routes. | ||
160 | |||
161 | config IP_ROUTE_VERBOSE | 119 | config IP_ROUTE_VERBOSE |
162 | bool "IP: verbose route monitoring" | 120 | bool "IP: verbose route monitoring" |
163 | depends on IP_ADVANCED_ROUTER | 121 | depends on IP_ADVANCED_ROUTER |
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4ff6c151d7f3..fbf1674e0c2a 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -29,14 +29,9 @@ obj-$(CONFIG_INET_TUNNEL) += tunnel4.o | |||
29 | obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o | 29 | obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o |
30 | obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o | 30 | obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o |
31 | obj-$(CONFIG_IP_PNP) += ipconfig.o | 31 | obj-$(CONFIG_IP_PNP) += ipconfig.o |
32 | obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o | ||
33 | obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o | ||
34 | obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o | ||
35 | obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o | ||
36 | obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ | 32 | obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ |
37 | obj-$(CONFIG_IP_VS) += ipvs/ | 33 | obj-$(CONFIG_IP_VS) += ipvs/ |
38 | obj-$(CONFIG_INET_DIAG) += inet_diag.o | 34 | obj-$(CONFIG_INET_DIAG) += inet_diag.o |
39 | obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o | ||
40 | obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o | 35 | obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o |
41 | obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o | 36 | obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o |
42 | obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o | 37 | obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 311d633f7f39..2eb909be8041 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -453,7 +453,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { | |||
453 | [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, | 453 | [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, |
454 | [RTA_PROTOINFO] = { .type = NLA_U32 }, | 454 | [RTA_PROTOINFO] = { .type = NLA_U32 }, |
455 | [RTA_FLOW] = { .type = NLA_U32 }, | 455 | [RTA_FLOW] = { .type = NLA_U32 }, |
456 | [RTA_MP_ALGO] = { .type = NLA_U32 }, | ||
457 | }; | 456 | }; |
458 | 457 | ||
459 | static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, | 458 | static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, |
@@ -515,9 +514,6 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, | |||
515 | case RTA_FLOW: | 514 | case RTA_FLOW: |
516 | cfg->fc_flow = nla_get_u32(attr); | 515 | cfg->fc_flow = nla_get_u32(attr); |
517 | break; | 516 | break; |
518 | case RTA_MP_ALGO: | ||
519 | cfg->fc_mp_alg = nla_get_u32(attr); | ||
520 | break; | ||
521 | case RTA_TABLE: | 517 | case RTA_TABLE: |
522 | cfg->fc_table = nla_get_u32(attr); | 518 | cfg->fc_table = nla_get_u32(attr); |
523 | break; | 519 | break; |
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index bb94550d95c3..c434119deb52 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -42,7 +42,6 @@ | |||
42 | #include <net/tcp.h> | 42 | #include <net/tcp.h> |
43 | #include <net/sock.h> | 43 | #include <net/sock.h> |
44 | #include <net/ip_fib.h> | 44 | #include <net/ip_fib.h> |
45 | #include <net/ip_mp_alg.h> | ||
46 | #include <net/netlink.h> | 45 | #include <net/netlink.h> |
47 | #include <net/nexthop.h> | 46 | #include <net/nexthop.h> |
48 | 47 | ||
@@ -697,13 +696,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
697 | goto err_inval; | 696 | goto err_inval; |
698 | } | 697 | } |
699 | #endif | 698 | #endif |
700 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | ||
701 | if (cfg->fc_mp_alg) { | ||
702 | if (cfg->fc_mp_alg < IP_MP_ALG_NONE || | ||
703 | cfg->fc_mp_alg > IP_MP_ALG_MAX) | ||
704 | goto err_inval; | ||
705 | } | ||
706 | #endif | ||
707 | 699 | ||
708 | err = -ENOBUFS; | 700 | err = -ENOBUFS; |
709 | if (fib_info_cnt >= fib_hash_size) { | 701 | if (fib_info_cnt >= fib_hash_size) { |
@@ -791,10 +783,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
791 | #endif | 783 | #endif |
792 | } | 784 | } |
793 | 785 | ||
794 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | ||
795 | fi->fib_mp_alg = cfg->fc_mp_alg; | ||
796 | #endif | ||
797 | |||
798 | if (fib_props[cfg->fc_type].error) { | 786 | if (fib_props[cfg->fc_type].error) { |
799 | if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) | 787 | if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) |
800 | goto err_inval; | 788 | goto err_inval; |
@@ -940,10 +928,6 @@ out_fill_res: | |||
940 | res->type = fa->fa_type; | 928 | res->type = fa->fa_type; |
941 | res->scope = fa->fa_scope; | 929 | res->scope = fa->fa_scope; |
942 | res->fi = fa->fa_info; | 930 | res->fi = fa->fa_info; |
943 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | ||
944 | res->netmask = mask; | ||
945 | res->network = zone & inet_make_mask(prefixlen); | ||
946 | #endif | ||
947 | atomic_inc(&res->fi->fib_clntref); | 931 | atomic_inc(&res->fi->fib_clntref); |
948 | return 0; | 932 | return 0; |
949 | } | 933 | } |
diff --git a/net/ipv4/multipath.c b/net/ipv4/multipath.c deleted file mode 100644 index 4e9ca7c76407..000000000000 --- a/net/ipv4/multipath.c +++ /dev/null | |||
@@ -1,55 +0,0 @@ | |||
1 | /* multipath.c: IPV4 multipath algorithm support. | ||
2 | * | ||
3 | * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com> | ||
4 | * Copyright (C) 2005 David S. Miller <davem@davemloft.net> | ||
5 | */ | ||
6 | |||
7 | #include <linux/module.h> | ||
8 | #include <linux/errno.h> | ||
9 | #include <linux/netdevice.h> | ||
10 | #include <linux/spinlock.h> | ||
11 | |||
12 | #include <net/ip_mp_alg.h> | ||
13 | |||
14 | static DEFINE_SPINLOCK(alg_table_lock); | ||
15 | struct ip_mp_alg_ops *ip_mp_alg_table[IP_MP_ALG_MAX + 1]; | ||
16 | |||
17 | int multipath_alg_register(struct ip_mp_alg_ops *ops, enum ip_mp_alg n) | ||
18 | { | ||
19 | struct ip_mp_alg_ops **slot; | ||
20 | int err; | ||
21 | |||
22 | if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX || | ||
23 | !ops->mp_alg_select_route) | ||
24 | return -EINVAL; | ||
25 | |||
26 | spin_lock(&alg_table_lock); | ||
27 | slot = &ip_mp_alg_table[n]; | ||
28 | if (*slot != NULL) { | ||
29 | err = -EBUSY; | ||
30 | } else { | ||
31 | *slot = ops; | ||
32 | err = 0; | ||
33 | } | ||
34 | spin_unlock(&alg_table_lock); | ||
35 | |||
36 | return err; | ||
37 | } | ||
38 | EXPORT_SYMBOL(multipath_alg_register); | ||
39 | |||
40 | void multipath_alg_unregister(struct ip_mp_alg_ops *ops, enum ip_mp_alg n) | ||
41 | { | ||
42 | struct ip_mp_alg_ops **slot; | ||
43 | |||
44 | if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX) | ||
45 | return; | ||
46 | |||
47 | spin_lock(&alg_table_lock); | ||
48 | slot = &ip_mp_alg_table[n]; | ||
49 | if (*slot == ops) | ||
50 | *slot = NULL; | ||
51 | spin_unlock(&alg_table_lock); | ||
52 | |||
53 | synchronize_net(); | ||
54 | } | ||
55 | EXPORT_SYMBOL(multipath_alg_unregister); | ||
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c deleted file mode 100644 index b03c5ca2c823..000000000000 --- a/net/ipv4/multipath_drr.c +++ /dev/null | |||
@@ -1,249 +0,0 @@ | |||
1 | /* | ||
2 | * Device round robin policy for multipath. | ||
3 | * | ||
4 | * | ||
5 | * Version: $Id: multipath_drr.c,v 1.1.2.1 2004/09/16 07:42:34 elueck Exp $ | ||
6 | * | ||
7 | * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; either version | ||
12 | * 2 of the License, or (at your option) any later version. | ||
13 | */ | ||
14 | |||
15 | #include <asm/system.h> | ||
16 | #include <asm/uaccess.h> | ||
17 | #include <linux/types.h> | ||
18 | #include <linux/errno.h> | ||
19 | #include <linux/timer.h> | ||
20 | #include <linux/mm.h> | ||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/fcntl.h> | ||
23 | #include <linux/stat.h> | ||
24 | #include <linux/socket.h> | ||
25 | #include <linux/in.h> | ||
26 | #include <linux/inet.h> | ||
27 | #include <linux/netdevice.h> | ||
28 | #include <linux/inetdevice.h> | ||
29 | #include <linux/igmp.h> | ||
30 | #include <linux/proc_fs.h> | ||
31 | #include <linux/seq_file.h> | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/mroute.h> | ||
34 | #include <linux/init.h> | ||
35 | #include <net/ip.h> | ||
36 | #include <net/protocol.h> | ||
37 | #include <linux/skbuff.h> | ||
38 | #include <net/sock.h> | ||
39 | #include <net/icmp.h> | ||
40 | #include <net/udp.h> | ||
41 | #include <net/raw.h> | ||
42 | #include <linux/notifier.h> | ||
43 | #include <linux/if_arp.h> | ||
44 | #include <linux/netfilter_ipv4.h> | ||
45 | #include <net/ipip.h> | ||
46 | #include <net/checksum.h> | ||
47 | #include <net/ip_mp_alg.h> | ||
48 | |||
49 | struct multipath_device { | ||
50 | int ifi; /* interface index of device */ | ||
51 | atomic_t usecount; | ||
52 | int allocated; | ||
53 | }; | ||
54 | |||
55 | #define MULTIPATH_MAX_DEVICECANDIDATES 10 | ||
56 | |||
57 | static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES]; | ||
58 | static DEFINE_SPINLOCK(state_lock); | ||
59 | |||
60 | static int inline __multipath_findslot(void) | ||
61 | { | ||
62 | int i; | ||
63 | |||
64 | for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) { | ||
65 | if (state[i].allocated == 0) | ||
66 | return i; | ||
67 | } | ||
68 | return -1; | ||
69 | } | ||
70 | |||
71 | static int inline __multipath_finddev(int ifindex) | ||
72 | { | ||
73 | int i; | ||
74 | |||
75 | for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) { | ||
76 | if (state[i].allocated != 0 && | ||
77 | state[i].ifi == ifindex) | ||
78 | return i; | ||
79 | } | ||
80 | return -1; | ||
81 | } | ||
82 | |||
83 | static int drr_dev_event(struct notifier_block *this, | ||
84 | unsigned long event, void *ptr) | ||
85 | { | ||
86 | struct net_device *dev = ptr; | ||
87 | int devidx; | ||
88 | |||
89 | switch (event) { | ||
90 | case NETDEV_UNREGISTER: | ||
91 | case NETDEV_DOWN: | ||
92 | spin_lock_bh(&state_lock); | ||
93 | |||
94 | devidx = __multipath_finddev(dev->ifindex); | ||
95 | if (devidx != -1) { | ||
96 | state[devidx].allocated = 0; | ||
97 | state[devidx].ifi = 0; | ||
98 | atomic_set(&state[devidx].usecount, 0); | ||
99 | } | ||
100 | |||
101 | spin_unlock_bh(&state_lock); | ||
102 | break; | ||
103 | } | ||
104 | |||
105 | return NOTIFY_DONE; | ||
106 | } | ||
107 | |||
108 | static struct notifier_block drr_dev_notifier = { | ||
109 | .notifier_call = drr_dev_event, | ||
110 | }; | ||
111 | |||
112 | |||
113 | static void drr_safe_inc(atomic_t *usecount) | ||
114 | { | ||
115 | int n; | ||
116 | |||
117 | atomic_inc(usecount); | ||
118 | |||
119 | n = atomic_read(usecount); | ||
120 | if (n <= 0) { | ||
121 | int i; | ||
122 | |||
123 | spin_lock_bh(&state_lock); | ||
124 | |||
125 | for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) | ||
126 | atomic_set(&state[i].usecount, 0); | ||
127 | |||
128 | spin_unlock_bh(&state_lock); | ||
129 | } | ||
130 | } | ||
131 | |||
132 | static void drr_select_route(const struct flowi *flp, | ||
133 | struct rtable *first, struct rtable **rp) | ||
134 | { | ||
135 | struct rtable *nh, *result, *cur_min; | ||
136 | int min_usecount = -1; | ||
137 | int devidx = -1; | ||
138 | int cur_min_devidx = -1; | ||
139 | |||
140 | /* 1. make sure all alt. nexthops have the same GC related data */ | ||
141 | /* 2. determine the new candidate to be returned */ | ||
142 | result = NULL; | ||
143 | cur_min = NULL; | ||
144 | for (nh = rcu_dereference(first); nh; | ||
145 | nh = rcu_dereference(nh->u.dst.rt_next)) { | ||
146 | if ((nh->u.dst.flags & DST_BALANCED) != 0 && | ||
147 | multipath_comparekeys(&nh->fl, flp)) { | ||
148 | int nh_ifidx = nh->u.dst.dev->ifindex; | ||
149 | |||
150 | nh->u.dst.lastuse = jiffies; | ||
151 | nh->u.dst.__use++; | ||
152 | if (result != NULL) | ||
153 | continue; | ||
154 | |||
155 | /* search for the output interface */ | ||
156 | |||
157 | /* this is not SMP safe, only add/remove are | ||
158 | * SMP safe as wrong usecount updates have no big | ||
159 | * impact | ||
160 | */ | ||
161 | devidx = __multipath_finddev(nh_ifidx); | ||
162 | if (devidx == -1) { | ||
163 | /* add the interface to the array | ||
164 | * SMP safe | ||
165 | */ | ||
166 | spin_lock_bh(&state_lock); | ||
167 | |||
168 | /* due to SMP: search again */ | ||
169 | devidx = __multipath_finddev(nh_ifidx); | ||
170 | if (devidx == -1) { | ||
171 | /* add entry for device */ | ||
172 | devidx = __multipath_findslot(); | ||
173 | if (devidx == -1) { | ||
174 | /* unlikely but possible */ | ||
175 | continue; | ||
176 | } | ||
177 | |||
178 | state[devidx].allocated = 1; | ||
179 | state[devidx].ifi = nh_ifidx; | ||
180 | atomic_set(&state[devidx].usecount, 0); | ||
181 | min_usecount = 0; | ||
182 | } | ||
183 | |||
184 | spin_unlock_bh(&state_lock); | ||
185 | } | ||
186 | |||
187 | if (min_usecount == 0) { | ||
188 | /* if the device has not been used it is | ||
189 | * the primary target | ||
190 | */ | ||
191 | drr_safe_inc(&state[devidx].usecount); | ||
192 | result = nh; | ||
193 | } else { | ||
194 | int count = | ||
195 | atomic_read(&state[devidx].usecount); | ||
196 | |||
197 | if (min_usecount == -1 || | ||
198 | count < min_usecount) { | ||
199 | cur_min = nh; | ||
200 | cur_min_devidx = devidx; | ||
201 | min_usecount = count; | ||
202 | } | ||
203 | } | ||
204 | } | ||
205 | } | ||
206 | |||
207 | if (!result) { | ||
208 | if (cur_min) { | ||
209 | drr_safe_inc(&state[cur_min_devidx].usecount); | ||
210 | result = cur_min; | ||
211 | } else { | ||
212 | result = first; | ||
213 | } | ||
214 | } | ||
215 | |||
216 | *rp = result; | ||
217 | } | ||
218 | |||
219 | static struct ip_mp_alg_ops drr_ops = { | ||
220 | .mp_alg_select_route = drr_select_route, | ||
221 | }; | ||
222 | |||
223 | static int __init drr_init(void) | ||
224 | { | ||
225 | int err = register_netdevice_notifier(&drr_dev_notifier); | ||
226 | |||
227 | if (err) | ||
228 | return err; | ||
229 | |||
230 | err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR); | ||
231 | if (err) | ||
232 | goto fail; | ||
233 | |||
234 | return 0; | ||
235 | |||
236 | fail: | ||
237 | unregister_netdevice_notifier(&drr_dev_notifier); | ||
238 | return err; | ||
239 | } | ||
240 | |||
241 | static void __exit drr_exit(void) | ||
242 | { | ||
243 | unregister_netdevice_notifier(&drr_dev_notifier); | ||
244 | multipath_alg_unregister(&drr_ops, IP_MP_ALG_DRR); | ||
245 | } | ||
246 | |||
247 | module_init(drr_init); | ||
248 | module_exit(drr_exit); | ||
249 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/ipv4/multipath_random.c b/net/ipv4/multipath_random.c deleted file mode 100644 index c312785d14d0..000000000000 --- a/net/ipv4/multipath_random.c +++ /dev/null | |||
@@ -1,114 +0,0 @@ | |||
1 | /* | ||
2 | * Random policy for multipath. | ||
3 | * | ||
4 | * | ||
5 | * Version: $Id: multipath_random.c,v 1.1.2.3 2004/09/21 08:42:11 elueck Exp $ | ||
6 | * | ||
7 | * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; either version | ||
12 | * 2 of the License, or (at your option) any later version. | ||
13 | */ | ||
14 | |||
15 | #include <asm/system.h> | ||
16 | #include <asm/uaccess.h> | ||
17 | #include <linux/types.h> | ||
18 | #include <linux/errno.h> | ||
19 | #include <linux/timer.h> | ||
20 | #include <linux/mm.h> | ||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/fcntl.h> | ||
23 | #include <linux/stat.h> | ||
24 | #include <linux/socket.h> | ||
25 | #include <linux/in.h> | ||
26 | #include <linux/inet.h> | ||
27 | #include <linux/netdevice.h> | ||
28 | #include <linux/inetdevice.h> | ||
29 | #include <linux/igmp.h> | ||
30 | #include <linux/proc_fs.h> | ||
31 | #include <linux/seq_file.h> | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/mroute.h> | ||
34 | #include <linux/init.h> | ||
35 | #include <linux/random.h> | ||
36 | #include <net/ip.h> | ||
37 | #include <net/protocol.h> | ||
38 | #include <linux/skbuff.h> | ||
39 | #include <net/sock.h> | ||
40 | #include <net/icmp.h> | ||
41 | #include <net/udp.h> | ||
42 | #include <net/raw.h> | ||
43 | #include <linux/notifier.h> | ||
44 | #include <linux/if_arp.h> | ||
45 | #include <linux/netfilter_ipv4.h> | ||
46 | #include <net/ipip.h> | ||
47 | #include <net/checksum.h> | ||
48 | #include <net/ip_mp_alg.h> | ||
49 | |||
50 | #define MULTIPATH_MAX_CANDIDATES 40 | ||
51 | |||
52 | static void random_select_route(const struct flowi *flp, | ||
53 | struct rtable *first, | ||
54 | struct rtable **rp) | ||
55 | { | ||
56 | struct rtable *rt; | ||
57 | struct rtable *decision; | ||
58 | unsigned char candidate_count = 0; | ||
59 | |||
60 | /* count all candidate */ | ||
61 | for (rt = rcu_dereference(first); rt; | ||
62 | rt = rcu_dereference(rt->u.dst.rt_next)) { | ||
63 | if ((rt->u.dst.flags & DST_BALANCED) != 0 && | ||
64 | multipath_comparekeys(&rt->fl, flp)) | ||
65 | ++candidate_count; | ||
66 | } | ||
67 | |||
68 | /* choose a random candidate */ | ||
69 | decision = first; | ||
70 | if (candidate_count > 1) { | ||
71 | unsigned char i = 0; | ||
72 | unsigned char candidate_no = (unsigned char) | ||
73 | (random32() % candidate_count); | ||
74 | |||
75 | /* find chosen candidate and adjust GC data for all candidates | ||
76 | * to ensure they stay in cache | ||
77 | */ | ||
78 | for (rt = first; rt; rt = rt->u.dst.rt_next) { | ||
79 | if ((rt->u.dst.flags & DST_BALANCED) != 0 && | ||
80 | multipath_comparekeys(&rt->fl, flp)) { | ||
81 | rt->u.dst.lastuse = jiffies; | ||
82 | |||
83 | if (i == candidate_no) | ||
84 | decision = rt; | ||
85 | |||
86 | if (i >= candidate_count) | ||
87 | break; | ||
88 | |||
89 | i++; | ||
90 | } | ||
91 | } | ||
92 | } | ||
93 | |||
94 | decision->u.dst.__use++; | ||
95 | *rp = decision; | ||
96 | } | ||
97 | |||
98 | static struct ip_mp_alg_ops random_ops = { | ||
99 | .mp_alg_select_route = random_select_route, | ||
100 | }; | ||
101 | |||
102 | static int __init random_init(void) | ||
103 | { | ||
104 | return multipath_alg_register(&random_ops, IP_MP_ALG_RANDOM); | ||
105 | } | ||
106 | |||
107 | static void __exit random_exit(void) | ||
108 | { | ||
109 | multipath_alg_unregister(&random_ops, IP_MP_ALG_RANDOM); | ||
110 | } | ||
111 | |||
112 | module_init(random_init); | ||
113 | module_exit(random_exit); | ||
114 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c deleted file mode 100644 index 0ad22524f450..000000000000 --- a/net/ipv4/multipath_rr.c +++ /dev/null | |||
@@ -1,95 +0,0 @@ | |||
1 | /* | ||
2 | * Round robin policy for multipath. | ||
3 | * | ||
4 | * | ||
5 | * Version: $Id: multipath_rr.c,v 1.1.2.2 2004/09/16 07:42:34 elueck Exp $ | ||
6 | * | ||
7 | * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; either version | ||
12 | * 2 of the License, or (at your option) any later version. | ||
13 | */ | ||
14 | |||
15 | #include <asm/system.h> | ||
16 | #include <asm/uaccess.h> | ||
17 | #include <linux/types.h> | ||
18 | #include <linux/errno.h> | ||
19 | #include <linux/timer.h> | ||
20 | #include <linux/mm.h> | ||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/fcntl.h> | ||
23 | #include <linux/stat.h> | ||
24 | #include <linux/socket.h> | ||
25 | #include <linux/in.h> | ||
26 | #include <linux/inet.h> | ||
27 | #include <linux/netdevice.h> | ||
28 | #include <linux/inetdevice.h> | ||
29 | #include <linux/igmp.h> | ||
30 | #include <linux/proc_fs.h> | ||
31 | #include <linux/seq_file.h> | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/mroute.h> | ||
34 | #include <linux/init.h> | ||
35 | #include <net/ip.h> | ||
36 | #include <net/protocol.h> | ||
37 | #include <linux/skbuff.h> | ||
38 | #include <net/sock.h> | ||
39 | #include <net/icmp.h> | ||
40 | #include <net/udp.h> | ||
41 | #include <net/raw.h> | ||
42 | #include <linux/notifier.h> | ||
43 | #include <linux/if_arp.h> | ||
44 | #include <linux/netfilter_ipv4.h> | ||
45 | #include <net/ipip.h> | ||
46 | #include <net/checksum.h> | ||
47 | #include <net/ip_mp_alg.h> | ||
48 | |||
49 | static void rr_select_route(const struct flowi *flp, | ||
50 | struct rtable *first, struct rtable **rp) | ||
51 | { | ||
52 | struct rtable *nh, *result, *min_use_cand = NULL; | ||
53 | int min_use = -1; | ||
54 | |||
55 | /* 1. make sure all alt. nexthops have the same GC related data | ||
56 | * 2. determine the new candidate to be returned | ||
57 | */ | ||
58 | result = NULL; | ||
59 | for (nh = rcu_dereference(first); nh; | ||
60 | nh = rcu_dereference(nh->u.dst.rt_next)) { | ||
61 | if ((nh->u.dst.flags & DST_BALANCED) != 0 && | ||
62 | multipath_comparekeys(&nh->fl, flp)) { | ||
63 | nh->u.dst.lastuse = jiffies; | ||
64 | |||
65 | if (min_use == -1 || nh->u.dst.__use < min_use) { | ||
66 | min_use = nh->u.dst.__use; | ||
67 | min_use_cand = nh; | ||
68 | } | ||
69 | } | ||
70 | } | ||
71 | result = min_use_cand; | ||
72 | if (!result) | ||
73 | result = first; | ||
74 | |||
75 | result->u.dst.__use++; | ||
76 | *rp = result; | ||
77 | } | ||
78 | |||
79 | static struct ip_mp_alg_ops rr_ops = { | ||
80 | .mp_alg_select_route = rr_select_route, | ||
81 | }; | ||
82 | |||
83 | static int __init rr_init(void) | ||
84 | { | ||
85 | return multipath_alg_register(&rr_ops, IP_MP_ALG_RR); | ||
86 | } | ||
87 | |||
88 | static void __exit rr_exit(void) | ||
89 | { | ||
90 | multipath_alg_unregister(&rr_ops, IP_MP_ALG_RR); | ||
91 | } | ||
92 | |||
93 | module_init(rr_init); | ||
94 | module_exit(rr_exit); | ||
95 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c deleted file mode 100644 index 57c503694539..000000000000 --- a/net/ipv4/multipath_wrandom.c +++ /dev/null | |||
@@ -1,329 +0,0 @@ | |||
1 | /* | ||
2 | * Weighted random policy for multipath. | ||
3 | * | ||
4 | * | ||
5 | * Version: $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $ | ||
6 | * | ||
7 | * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; either version | ||
12 | * 2 of the License, or (at your option) any later version. | ||
13 | */ | ||
14 | |||
15 | #include <asm/system.h> | ||
16 | #include <asm/uaccess.h> | ||
17 | #include <linux/types.h> | ||
18 | #include <linux/errno.h> | ||
19 | #include <linux/timer.h> | ||
20 | #include <linux/mm.h> | ||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/fcntl.h> | ||
23 | #include <linux/stat.h> | ||
24 | #include <linux/socket.h> | ||
25 | #include <linux/in.h> | ||
26 | #include <linux/inet.h> | ||
27 | #include <linux/netdevice.h> | ||
28 | #include <linux/inetdevice.h> | ||
29 | #include <linux/igmp.h> | ||
30 | #include <linux/proc_fs.h> | ||
31 | #include <linux/seq_file.h> | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/mroute.h> | ||
34 | #include <linux/init.h> | ||
35 | #include <linux/random.h> | ||
36 | #include <net/ip.h> | ||
37 | #include <net/protocol.h> | ||
38 | #include <linux/skbuff.h> | ||
39 | #include <net/sock.h> | ||
40 | #include <net/icmp.h> | ||
41 | #include <net/udp.h> | ||
42 | #include <net/raw.h> | ||
43 | #include <linux/notifier.h> | ||
44 | #include <linux/if_arp.h> | ||
45 | #include <linux/netfilter_ipv4.h> | ||
46 | #include <net/ipip.h> | ||
47 | #include <net/checksum.h> | ||
48 | #include <net/ip_fib.h> | ||
49 | #include <net/ip_mp_alg.h> | ||
50 | |||
51 | #define MULTIPATH_STATE_SIZE 15 | ||
52 | |||
53 | struct multipath_candidate { | ||
54 | struct multipath_candidate *next; | ||
55 | int power; | ||
56 | struct rtable *rt; | ||
57 | }; | ||
58 | |||
59 | struct multipath_dest { | ||
60 | struct list_head list; | ||
61 | |||
62 | const struct fib_nh *nh_info; | ||
63 | __be32 netmask; | ||
64 | __be32 network; | ||
65 | unsigned char prefixlen; | ||
66 | |||
67 | struct rcu_head rcu; | ||
68 | }; | ||
69 | |||
70 | struct multipath_bucket { | ||
71 | struct list_head head; | ||
72 | spinlock_t lock; | ||
73 | }; | ||
74 | |||
75 | struct multipath_route { | ||
76 | struct list_head list; | ||
77 | |||
78 | int oif; | ||
79 | __be32 gw; | ||
80 | struct list_head dests; | ||
81 | |||
82 | struct rcu_head rcu; | ||
83 | }; | ||
84 | |||
85 | /* state: primarily weight per route information */ | ||
86 | static struct multipath_bucket state[MULTIPATH_STATE_SIZE]; | ||
87 | |||
88 | static unsigned char __multipath_lookup_weight(const struct flowi *fl, | ||
89 | const struct rtable *rt) | ||
90 | { | ||
91 | const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE; | ||
92 | struct multipath_route *r; | ||
93 | struct multipath_route *target_route = NULL; | ||
94 | struct multipath_dest *d; | ||
95 | int weight = 1; | ||
96 | |||
97 | /* lookup the weight information for a certain route */ | ||
98 | rcu_read_lock(); | ||
99 | |||
100 | /* find state entry for gateway or add one if necessary */ | ||
101 | list_for_each_entry_rcu(r, &state[state_idx].head, list) { | ||
102 | if (r->gw == rt->rt_gateway && | ||
103 | r->oif == rt->idev->dev->ifindex) { | ||
104 | target_route = r; | ||
105 | break; | ||
106 | } | ||
107 | } | ||
108 | |||
109 | if (!target_route) { | ||
110 | /* this should not happen... but we are prepared */ | ||
111 | printk( KERN_CRIT"%s: missing state for gateway: %u and " \ | ||
112 | "device %d\n", __FUNCTION__, rt->rt_gateway, | ||
113 | rt->idev->dev->ifindex); | ||
114 | goto out; | ||
115 | } | ||
116 | |||
117 | /* find state entry for destination */ | ||
118 | list_for_each_entry_rcu(d, &target_route->dests, list) { | ||
119 | __be32 targetnetwork = fl->fl4_dst & | ||
120 | inet_make_mask(d->prefixlen); | ||
121 | |||
122 | if ((targetnetwork & d->netmask) == d->network) { | ||
123 | weight = d->nh_info->nh_weight; | ||
124 | goto out; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | out: | ||
129 | rcu_read_unlock(); | ||
130 | return weight; | ||
131 | } | ||
132 | |||
133 | static void wrandom_init_state(void) | ||
134 | { | ||
135 | int i; | ||
136 | |||
137 | for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { | ||
138 | INIT_LIST_HEAD(&state[i].head); | ||
139 | spin_lock_init(&state[i].lock); | ||
140 | } | ||
141 | } | ||
142 | |||
143 | static void wrandom_select_route(const struct flowi *flp, | ||
144 | struct rtable *first, | ||
145 | struct rtable **rp) | ||
146 | { | ||
147 | struct rtable *rt; | ||
148 | struct rtable *decision; | ||
149 | struct multipath_candidate *first_mpc = NULL; | ||
150 | struct multipath_candidate *mpc, *last_mpc = NULL; | ||
151 | int power = 0; | ||
152 | int last_power; | ||
153 | int selector; | ||
154 | const size_t size_mpc = sizeof(struct multipath_candidate); | ||
155 | |||
156 | /* collect all candidates and identify their weights */ | ||
157 | for (rt = rcu_dereference(first); rt; | ||
158 | rt = rcu_dereference(rt->u.dst.rt_next)) { | ||
159 | if ((rt->u.dst.flags & DST_BALANCED) != 0 && | ||
160 | multipath_comparekeys(&rt->fl, flp)) { | ||
161 | struct multipath_candidate* mpc = | ||
162 | (struct multipath_candidate*) | ||
163 | kmalloc(size_mpc, GFP_ATOMIC); | ||
164 | |||
165 | if (!mpc) | ||
166 | return; | ||
167 | |||
168 | power += __multipath_lookup_weight(flp, rt) * 10000; | ||
169 | |||
170 | mpc->power = power; | ||
171 | mpc->rt = rt; | ||
172 | mpc->next = NULL; | ||
173 | |||
174 | if (!first_mpc) | ||
175 | first_mpc = mpc; | ||
176 | else | ||
177 | last_mpc->next = mpc; | ||
178 | |||
179 | last_mpc = mpc; | ||
180 | } | ||
181 | } | ||
182 | |||
183 | /* choose a weighted random candidate */ | ||
184 | decision = first; | ||
185 | selector = random32() % power; | ||
186 | last_power = 0; | ||
187 | |||
188 | /* select candidate, adjust GC data and cleanup local state */ | ||
189 | decision = first; | ||
190 | last_mpc = NULL; | ||
191 | for (mpc = first_mpc; mpc; mpc = mpc->next) { | ||
192 | mpc->rt->u.dst.lastuse = jiffies; | ||
193 | if (last_power <= selector && selector < mpc->power) | ||
194 | decision = mpc->rt; | ||
195 | |||
196 | last_power = mpc->power; | ||
197 | kfree(last_mpc); | ||
198 | last_mpc = mpc; | ||
199 | } | ||
200 | |||
201 | /* concurrent __multipath_flush may lead to !last_mpc */ | ||
202 | kfree(last_mpc); | ||
203 | |||
204 | decision->u.dst.__use++; | ||
205 | *rp = decision; | ||
206 | } | ||
207 | |||
208 | static void wrandom_set_nhinfo(__be32 network, | ||
209 | __be32 netmask, | ||
210 | unsigned char prefixlen, | ||
211 | const struct fib_nh *nh) | ||
212 | { | ||
213 | const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE; | ||
214 | struct multipath_route *r, *target_route = NULL; | ||
215 | struct multipath_dest *d, *target_dest = NULL; | ||
216 | |||
217 | /* store the weight information for a certain route */ | ||
218 | spin_lock_bh(&state[state_idx].lock); | ||
219 | |||
220 | /* find state entry for gateway or add one if necessary */ | ||
221 | list_for_each_entry_rcu(r, &state[state_idx].head, list) { | ||
222 | if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) { | ||
223 | target_route = r; | ||
224 | break; | ||
225 | } | ||
226 | } | ||
227 | |||
228 | if (!target_route) { | ||
229 | const size_t size_rt = sizeof(struct multipath_route); | ||
230 | target_route = (struct multipath_route *) | ||
231 | kmalloc(size_rt, GFP_ATOMIC); | ||
232 | |||
233 | target_route->gw = nh->nh_gw; | ||
234 | target_route->oif = nh->nh_oif; | ||
235 | memset(&target_route->rcu, 0, sizeof(struct rcu_head)); | ||
236 | INIT_LIST_HEAD(&target_route->dests); | ||
237 | |||
238 | list_add_rcu(&target_route->list, &state[state_idx].head); | ||
239 | } | ||
240 | |||
241 | /* find state entry for destination or add one if necessary */ | ||
242 | list_for_each_entry_rcu(d, &target_route->dests, list) { | ||
243 | if (d->nh_info == nh) { | ||
244 | target_dest = d; | ||
245 | break; | ||
246 | } | ||
247 | } | ||
248 | |||
249 | if (!target_dest) { | ||
250 | const size_t size_dst = sizeof(struct multipath_dest); | ||
251 | target_dest = (struct multipath_dest*) | ||
252 | kmalloc(size_dst, GFP_ATOMIC); | ||
253 | |||
254 | target_dest->nh_info = nh; | ||
255 | target_dest->network = network; | ||
256 | target_dest->netmask = netmask; | ||
257 | target_dest->prefixlen = prefixlen; | ||
258 | memset(&target_dest->rcu, 0, sizeof(struct rcu_head)); | ||
259 | |||
260 | list_add_rcu(&target_dest->list, &target_route->dests); | ||
261 | } | ||
262 | /* else: we already stored this info for another destination => | ||
263 | * we are finished | ||
264 | */ | ||
265 | |||
266 | spin_unlock_bh(&state[state_idx].lock); | ||
267 | } | ||
268 | |||
269 | static void __multipath_free(struct rcu_head *head) | ||
270 | { | ||
271 | struct multipath_route *rt = container_of(head, struct multipath_route, | ||
272 | rcu); | ||
273 | kfree(rt); | ||
274 | } | ||
275 | |||
276 | static void __multipath_free_dst(struct rcu_head *head) | ||
277 | { | ||
278 | struct multipath_dest *dst = container_of(head, | ||
279 | struct multipath_dest, | ||
280 | rcu); | ||
281 | kfree(dst); | ||
282 | } | ||
283 | |||
284 | static void wrandom_flush(void) | ||
285 | { | ||
286 | int i; | ||
287 | |||
288 | /* defere delete to all entries */ | ||
289 | for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { | ||
290 | struct multipath_route *r; | ||
291 | |||
292 | spin_lock_bh(&state[i].lock); | ||
293 | list_for_each_entry_rcu(r, &state[i].head, list) { | ||
294 | struct multipath_dest *d; | ||
295 | list_for_each_entry_rcu(d, &r->dests, list) { | ||
296 | list_del_rcu(&d->list); | ||
297 | call_rcu(&d->rcu, | ||
298 | __multipath_free_dst); | ||
299 | } | ||
300 | list_del_rcu(&r->list); | ||
301 | call_rcu(&r->rcu, | ||
302 | __multipath_free); | ||
303 | } | ||
304 | |||
305 | spin_unlock_bh(&state[i].lock); | ||
306 | } | ||
307 | } | ||
308 | |||
309 | static struct ip_mp_alg_ops wrandom_ops = { | ||
310 | .mp_alg_select_route = wrandom_select_route, | ||
311 | .mp_alg_flush = wrandom_flush, | ||
312 | .mp_alg_set_nhinfo = wrandom_set_nhinfo, | ||
313 | }; | ||
314 | |||
315 | static int __init wrandom_init(void) | ||
316 | { | ||
317 | wrandom_init_state(); | ||
318 | |||
319 | return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM); | ||
320 | } | ||
321 | |||
322 | static void __exit wrandom_exit(void) | ||
323 | { | ||
324 | multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM); | ||
325 | } | ||
326 | |||
327 | module_init(wrandom_init); | ||
328 | module_exit(wrandom_exit); | ||
329 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 29ca63e81ced..85285021518b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -101,7 +101,6 @@ | |||
101 | #include <net/tcp.h> | 101 | #include <net/tcp.h> |
102 | #include <net/icmp.h> | 102 | #include <net/icmp.h> |
103 | #include <net/xfrm.h> | 103 | #include <net/xfrm.h> |
104 | #include <net/ip_mp_alg.h> | ||
105 | #include <net/netevent.h> | 104 | #include <net/netevent.h> |
106 | #include <net/rtnetlink.h> | 105 | #include <net/rtnetlink.h> |
107 | #ifdef CONFIG_SYSCTL | 106 | #ifdef CONFIG_SYSCTL |
@@ -495,13 +494,11 @@ static const struct file_operations rt_cpu_seq_fops = { | |||
495 | 494 | ||
496 | static __inline__ void rt_free(struct rtable *rt) | 495 | static __inline__ void rt_free(struct rtable *rt) |
497 | { | 496 | { |
498 | multipath_remove(rt); | ||
499 | call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); | 497 | call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); |
500 | } | 498 | } |
501 | 499 | ||
502 | static __inline__ void rt_drop(struct rtable *rt) | 500 | static __inline__ void rt_drop(struct rtable *rt) |
503 | { | 501 | { |
504 | multipath_remove(rt); | ||
505 | ip_rt_put(rt); | 502 | ip_rt_put(rt); |
506 | call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); | 503 | call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); |
507 | } | 504 | } |
@@ -574,52 +571,6 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) | |||
574 | (fl1->iif ^ fl2->iif)) == 0; | 571 | (fl1->iif ^ fl2->iif)) == 0; |
575 | } | 572 | } |
576 | 573 | ||
577 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | ||
578 | static struct rtable **rt_remove_balanced_route(struct rtable **chain_head, | ||
579 | struct rtable *expentry, | ||
580 | int *removed_count) | ||
581 | { | ||
582 | int passedexpired = 0; | ||
583 | struct rtable **nextstep = NULL; | ||
584 | struct rtable **rthp = chain_head; | ||
585 | struct rtable *rth; | ||
586 | |||
587 | if (removed_count) | ||
588 | *removed_count = 0; | ||
589 | |||
590 | while ((rth = *rthp) != NULL) { | ||
591 | if (rth == expentry) | ||
592 | passedexpired = 1; | ||
593 | |||
594 | if (((*rthp)->u.dst.flags & DST_BALANCED) != 0 && | ||
595 | compare_keys(&(*rthp)->fl, &expentry->fl)) { | ||
596 | if (*rthp == expentry) { | ||
597 | *rthp = rth->u.dst.rt_next; | ||
598 | continue; | ||
599 | } else { | ||
600 | *rthp = rth->u.dst.rt_next; | ||
601 | rt_free(rth); | ||
602 | if (removed_count) | ||
603 | ++(*removed_count); | ||
604 | } | ||
605 | } else { | ||
606 | if (!((*rthp)->u.dst.flags & DST_BALANCED) && | ||
607 | passedexpired && !nextstep) | ||
608 | nextstep = &rth->u.dst.rt_next; | ||
609 | |||
610 | rthp = &rth->u.dst.rt_next; | ||
611 | } | ||
612 | } | ||
613 | |||
614 | rt_free(expentry); | ||
615 | if (removed_count) | ||
616 | ++(*removed_count); | ||
617 | |||
618 | return nextstep; | ||
619 | } | ||
620 | #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ | ||
621 | |||
622 | |||
623 | /* This runs via a timer and thus is always in BH context. */ | 574 | /* This runs via a timer and thus is always in BH context. */ |
624 | static void rt_check_expire(unsigned long dummy) | 575 | static void rt_check_expire(unsigned long dummy) |
625 | { | 576 | { |
@@ -658,22 +609,8 @@ static void rt_check_expire(unsigned long dummy) | |||
658 | } | 609 | } |
659 | 610 | ||
660 | /* Cleanup aged off entries. */ | 611 | /* Cleanup aged off entries. */ |
661 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | ||
662 | /* remove all related balanced entries if necessary */ | ||
663 | if (rth->u.dst.flags & DST_BALANCED) { | ||
664 | rthp = rt_remove_balanced_route( | ||
665 | &rt_hash_table[i].chain, | ||
666 | rth, NULL); | ||
667 | if (!rthp) | ||
668 | break; | ||
669 | } else { | ||
670 | *rthp = rth->u.dst.rt_next; | ||
671 | rt_free(rth); | ||
672 | } | ||
673 | #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ | ||
674 | *rthp = rth->u.dst.rt_next; | 612 | *rthp = rth->u.dst.rt_next; |
675 | rt_free(rth); | 613 | rt_free(rth); |
676 | #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ | ||
677 | } | 614 | } |
678 | spin_unlock(rt_hash_lock_addr(i)); | 615 | spin_unlock(rt_hash_lock_addr(i)); |
679 | 616 | ||
@@ -721,9 +658,6 @@ void rt_cache_flush(int delay) | |||
721 | if (delay < 0) | 658 | if (delay < 0) |
722 | delay = ip_rt_min_delay; | 659 | delay = ip_rt_min_delay; |
723 | 660 | ||
724 | /* flush existing multipath state*/ | ||
725 | multipath_flush(); | ||
726 | |||
727 | spin_lock_bh(&rt_flush_lock); | 661 | spin_lock_bh(&rt_flush_lock); |
728 | 662 | ||
729 | if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) { | 663 | if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) { |
@@ -842,30 +776,9 @@ static int rt_garbage_collect(void) | |||
842 | rthp = &rth->u.dst.rt_next; | 776 | rthp = &rth->u.dst.rt_next; |
843 | continue; | 777 | continue; |
844 | } | 778 | } |
845 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | ||
846 | /* remove all related balanced entries | ||
847 | * if necessary | ||
848 | */ | ||
849 | if (rth->u.dst.flags & DST_BALANCED) { | ||
850 | int r; | ||
851 | |||
852 | rthp = rt_remove_balanced_route( | ||
853 | &rt_hash_table[k].chain, | ||
854 | rth, | ||
855 | &r); | ||
856 | goal -= r; | ||
857 | if (!rthp) | ||
858 | break; | ||
859 | } else { | ||
860 | *rthp = rth->u.dst.rt_next; | ||
861 | rt_free(rth); | ||
862 | goal--; | ||
863 | } | ||
864 | #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ | ||
865 | *rthp = rth->u.dst.rt_next; | 779 | *rthp = rth->u.dst.rt_next; |
866 | rt_free(rth); | 780 | rt_free(rth); |
867 | goal--; | 781 | goal--; |
868 | #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ | ||
869 | } | 782 | } |
870 | spin_unlock_bh(rt_hash_lock_addr(k)); | 783 | spin_unlock_bh(rt_hash_lock_addr(k)); |
871 | if (goal <= 0) | 784 | if (goal <= 0) |
@@ -939,12 +852,7 @@ restart: | |||
939 | 852 | ||
940 | spin_lock_bh(rt_hash_lock_addr(hash)); | 853 | spin_lock_bh(rt_hash_lock_addr(hash)); |
941 | while ((rth = *rthp) != NULL) { | 854 | while ((rth = *rthp) != NULL) { |
942 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | ||
943 | if (!(rth->u.dst.flags & DST_BALANCED) && | ||
944 | compare_keys(&rth->fl, &rt->fl)) { | ||
945 | #else | ||
946 | if (compare_keys(&rth->fl, &rt->fl)) { | 855 | if (compare_keys(&rth->fl, &rt->fl)) { |
947 | #endif | ||
948 | /* Put it first */ | 856 | /* Put it first */ |
949 | *rthp = rth->u.dst.rt_next; | 857 | *rthp = rth->u.dst.rt_next; |
950 | /* | 858 | /* |
@@ -1774,10 +1682,6 @@ static inline int __mkroute_input(struct sk_buff *skb, | |||
1774 | 1682 | ||
1775 | atomic_set(&rth->u.dst.__refcnt, 1); | 1683 | atomic_set(&rth->u.dst.__refcnt, 1); |
1776 | rth->u.dst.flags= DST_HOST; | 1684 | rth->u.dst.flags= DST_HOST; |
1777 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | ||
1778 | if (res->fi->fib_nhs > 1) | ||
1779 | rth->u.dst.flags |= DST_BALANCED; | ||
1780 | #endif | ||
1781 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 1685 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) |
1782 | rth->u.dst.flags |= DST_NOPOLICY; | 1686 | rth->u.dst.flags |= DST_NOPOLICY; |
1783 | if (IN_DEV_CONF_GET(out_dev, NOXFRM)) | 1687 | if (IN_DEV_CONF_GET(out_dev, NOXFRM)) |
@@ -1812,11 +1716,11 @@ static inline int __mkroute_input(struct sk_buff *skb, | |||
1812 | return err; | 1716 | return err; |
1813 | } | 1717 | } |
1814 | 1718 | ||
1815 | static inline int ip_mkroute_input_def(struct sk_buff *skb, | 1719 | static inline int ip_mkroute_input(struct sk_buff *skb, |
1816 | struct fib_result* res, | 1720 | struct fib_result* res, |
1817 | const struct flowi *fl, | 1721 | const struct flowi *fl, |
1818 | struct in_device *in_dev, | 1722 | struct in_device *in_dev, |
1819 | __be32 daddr, __be32 saddr, u32 tos) | 1723 | __be32 daddr, __be32 saddr, u32 tos) |
1820 | { | 1724 | { |
1821 | struct rtable* rth = NULL; | 1725 | struct rtable* rth = NULL; |
1822 | int err; | 1726 | int err; |
@@ -1837,63 +1741,6 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb, | |||
1837 | return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); | 1741 | return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); |
1838 | } | 1742 | } |
1839 | 1743 | ||
1840 | static inline int ip_mkroute_input(struct sk_buff *skb, | ||
1841 | struct fib_result* res, | ||
1842 | const struct flowi *fl, | ||
1843 | struct in_device *in_dev, | ||
1844 | __be32 daddr, __be32 saddr, u32 tos) | ||
1845 | { | ||
1846 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | ||
1847 | struct rtable* rth = NULL, *rtres; | ||
1848 | unsigned char hop, hopcount; | ||
1849 | int err = -EINVAL; | ||
1850 | unsigned int hash; | ||
1851 | |||
1852 | if (res->fi) | ||
1853 | hopcount = res->fi->fib_nhs; | ||
1854 | else | ||
1855 | hopcount = 1; | ||
1856 | |||
1857 | /* distinguish between multipath and singlepath */ | ||
1858 | if (hopcount < 2) | ||
1859 | return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, | ||
1860 | saddr, tos); | ||
1861 | |||
1862 | /* add all alternatives to the routing cache */ | ||
1863 | for (hop = 0; hop < hopcount; hop++) { | ||
1864 | res->nh_sel = hop; | ||
1865 | |||
1866 | /* put reference to previous result */ | ||
1867 | if (hop) | ||
1868 | ip_rt_put(rtres); | ||
1869 | |||
1870 | /* create a routing cache entry */ | ||
1871 | err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, | ||
1872 | &rth); | ||
1873 | if (err) | ||
1874 | return err; | ||
1875 | |||
1876 | /* put it into the cache */ | ||
1877 | hash = rt_hash(daddr, saddr, fl->iif); | ||
1878 | err = rt_intern_hash(hash, rth, &rtres); | ||
1879 | if (err) | ||
1880 | return err; | ||
1881 | |||
1882 | /* forward hop information to multipath impl. */ | ||
1883 | multipath_set_nhinfo(rth, | ||
1884 | FIB_RES_NETWORK(*res), | ||
1885 | FIB_RES_NETMASK(*res), | ||
1886 | res->prefixlen, | ||
1887 | &FIB_RES_NH(*res)); | ||
1888 | } | ||
1889 | skb->dst = &rtres->u.dst; | ||
1890 | return err; | ||
1891 | #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ | ||
1892 | return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos); | ||
1893 | #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ | ||
1894 | } | ||
1895 | |||
1896 | |||
1897 | /* | 1744 | /* |
1898 | * NOTE. We drop all the packets that has local source | 1745 | * NOTE. We drop all the packets that has local source |
1899 | * addresses, because every properly looped back packet | 1746 | * addresses, because every properly looped back packet |
@@ -2211,13 +2058,6 @@ static inline int __mkroute_output(struct rtable **result, | |||
2211 | 2058 | ||
2212 | atomic_set(&rth->u.dst.__refcnt, 1); | 2059 | atomic_set(&rth->u.dst.__refcnt, 1); |
2213 | rth->u.dst.flags= DST_HOST; | 2060 | rth->u.dst.flags= DST_HOST; |
2214 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | ||
2215 | if (res->fi) { | ||
2216 | rth->rt_multipath_alg = res->fi->fib_mp_alg; | ||
2217 | if (res->fi->fib_nhs > 1) | ||
2218 | rth->u.dst.flags |= DST_BALANCED; | ||
2219 | } | ||
2220 | #endif | ||
2221 | if (IN_DEV_CONF_GET(in_dev, NOXFRM)) | 2061 | if (IN_DEV_CONF_GET(in_dev, NOXFRM)) |
2222 | rth->u.dst.flags |= DST_NOXFRM; | 2062 | rth->u.dst.flags |= DST_NOXFRM; |
2223 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 2063 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) |
@@ -2277,12 +2117,12 @@ static inline int __mkroute_output(struct rtable **result, | |||
2277 | return err; | 2117 | return err; |
2278 | } | 2118 | } |
2279 | 2119 | ||
2280 | static inline int ip_mkroute_output_def(struct rtable **rp, | 2120 | static inline int ip_mkroute_output(struct rtable **rp, |
2281 | struct fib_result* res, | 2121 | struct fib_result* res, |
2282 | const struct flowi *fl, | 2122 | const struct flowi *fl, |
2283 | const struct flowi *oldflp, | 2123 | const struct flowi *oldflp, |
2284 | struct net_device *dev_out, | 2124 | struct net_device *dev_out, |
2285 | unsigned flags) | 2125 | unsigned flags) |
2286 | { | 2126 | { |
2287 | struct rtable *rth = NULL; | 2127 | struct rtable *rth = NULL; |
2288 | int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); | 2128 | int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); |
@@ -2295,68 +2135,6 @@ static inline int ip_mkroute_output_def(struct rtable **rp, | |||
2295 | return err; | 2135 | return err; |
2296 | } | 2136 | } |
2297 | 2137 | ||
2298 | static inline int ip_mkroute_output(struct rtable** rp, | ||
2299 | struct fib_result* res, | ||
2300 | const struct flowi *fl, | ||
2301 | const struct flowi *oldflp, | ||
2302 | struct net_device *dev_out, | ||
2303 | unsigned flags) | ||
2304 | { | ||
2305 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | ||
2306 | unsigned char hop; | ||
2307 | unsigned hash; | ||
2308 | int err = -EINVAL; | ||
2309 | struct rtable *rth = NULL; | ||
2310 | |||
2311 | if (res->fi && res->fi->fib_nhs > 1) { | ||
2312 | unsigned char hopcount = res->fi->fib_nhs; | ||
2313 | |||
2314 | for (hop = 0; hop < hopcount; hop++) { | ||
2315 | struct net_device *dev2nexthop; | ||
2316 | |||
2317 | res->nh_sel = hop; | ||
2318 | |||
2319 | /* hold a work reference to the output device */ | ||
2320 | dev2nexthop = FIB_RES_DEV(*res); | ||
2321 | dev_hold(dev2nexthop); | ||
2322 | |||
2323 | /* put reference to previous result */ | ||
2324 | if (hop) | ||
2325 | ip_rt_put(*rp); | ||
2326 | |||
2327 | err = __mkroute_output(&rth, res, fl, oldflp, | ||
2328 | dev2nexthop, flags); | ||
2329 | |||
2330 | if (err != 0) | ||
2331 | goto cleanup; | ||
2332 | |||
2333 | hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, | ||
2334 | oldflp->oif); | ||
2335 | err = rt_intern_hash(hash, rth, rp); | ||
2336 | |||
2337 | /* forward hop information to multipath impl. */ | ||
2338 | multipath_set_nhinfo(rth, | ||
2339 | FIB_RES_NETWORK(*res), | ||
2340 | FIB_RES_NETMASK(*res), | ||
2341 | res->prefixlen, | ||
2342 | &FIB_RES_NH(*res)); | ||
2343 | cleanup: | ||
2344 | /* release work reference to output device */ | ||
2345 | dev_put(dev2nexthop); | ||
2346 | |||
2347 | if (err != 0) | ||
2348 | return err; | ||
2349 | } | ||
2350 | return err; | ||
2351 | } else { | ||
2352 | return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, | ||
2353 | flags); | ||
2354 | } | ||
2355 | #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ | ||
2356 | return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, flags); | ||
2357 | #endif | ||
2358 | } | ||
2359 | |||
2360 | /* | 2138 | /* |
2361 | * Major route resolver routine. | 2139 | * Major route resolver routine. |
2362 | */ | 2140 | */ |
@@ -2570,17 +2348,6 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) | |||
2570 | rth->fl.mark == flp->mark && | 2348 | rth->fl.mark == flp->mark && |
2571 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & | 2349 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & |
2572 | (IPTOS_RT_MASK | RTO_ONLINK))) { | 2350 | (IPTOS_RT_MASK | RTO_ONLINK))) { |
2573 | |||
2574 | /* check for multipath routes and choose one if | ||
2575 | * necessary | ||
2576 | */ | ||
2577 | if (multipath_select_route(flp, rth, rp)) { | ||
2578 | dst_hold(&(*rp)->u.dst); | ||
2579 | RT_CACHE_STAT_INC(out_hit); | ||
2580 | rcu_read_unlock_bh(); | ||
2581 | return 0; | ||
2582 | } | ||
2583 | |||
2584 | rth->u.dst.lastuse = jiffies; | 2351 | rth->u.dst.lastuse = jiffies; |
2585 | dst_hold(&rth->u.dst); | 2352 | dst_hold(&rth->u.dst); |
2586 | rth->u.dst.__use++; | 2353 | rth->u.dst.__use++; |
@@ -2729,10 +2496,6 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
2729 | if (rt->u.dst.tclassid) | 2496 | if (rt->u.dst.tclassid) |
2730 | NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); | 2497 | NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); |
2731 | #endif | 2498 | #endif |
2732 | #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED | ||
2733 | if (rt->rt_multipath_alg != IP_MP_ALG_NONE) | ||
2734 | NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg); | ||
2735 | #endif | ||
2736 | if (rt->fl.iif) | 2499 | if (rt->fl.iif) |
2737 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); | 2500 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); |
2738 | else if (rt->rt_src != rt->fl.fl4_src) | 2501 | else if (rt->rt_src != rt->fl.fl4_src) |