diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /net/ipv4/multipath_wrandom.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'net/ipv4/multipath_wrandom.c')
-rw-r--r-- | net/ipv4/multipath_wrandom.c | 344 |
1 files changed, 344 insertions, 0 deletions
diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c new file mode 100644 index 000000000000..10b23e1bece6 --- /dev/null +++ b/net/ipv4/multipath_wrandom.c | |||
@@ -0,0 +1,344 @@ | |||
1 | /* | ||
2 | * Weighted random policy for multipath. | ||
3 | * | ||
4 | * | ||
5 | * Version: $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $ | ||
6 | * | ||
7 | * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; either version | ||
12 | * 2 of the License, or (at your option) any later version. | ||
13 | */ | ||
14 | |||
15 | #include <linux/config.h> | ||
16 | #include <asm/system.h> | ||
17 | #include <asm/uaccess.h> | ||
18 | #include <linux/types.h> | ||
19 | #include <linux/sched.h> | ||
20 | #include <linux/errno.h> | ||
21 | #include <linux/timer.h> | ||
22 | #include <linux/mm.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/fcntl.h> | ||
25 | #include <linux/stat.h> | ||
26 | #include <linux/socket.h> | ||
27 | #include <linux/in.h> | ||
28 | #include <linux/inet.h> | ||
29 | #include <linux/netdevice.h> | ||
30 | #include <linux/inetdevice.h> | ||
31 | #include <linux/igmp.h> | ||
32 | #include <linux/proc_fs.h> | ||
33 | #include <linux/seq_file.h> | ||
34 | #include <linux/mroute.h> | ||
35 | #include <linux/init.h> | ||
36 | #include <net/ip.h> | ||
37 | #include <net/protocol.h> | ||
38 | #include <linux/skbuff.h> | ||
39 | #include <net/sock.h> | ||
40 | #include <net/icmp.h> | ||
41 | #include <net/udp.h> | ||
42 | #include <net/raw.h> | ||
43 | #include <linux/notifier.h> | ||
44 | #include <linux/if_arp.h> | ||
45 | #include <linux/netfilter_ipv4.h> | ||
46 | #include <net/ipip.h> | ||
47 | #include <net/checksum.h> | ||
48 | #include <net/ip_fib.h> | ||
49 | #include <net/ip_mp_alg.h> | ||
50 | |||
51 | #define MULTIPATH_STATE_SIZE 15 | ||
52 | |||
53 | struct multipath_candidate { | ||
54 | struct multipath_candidate *next; | ||
55 | int power; | ||
56 | struct rtable *rt; | ||
57 | }; | ||
58 | |||
59 | struct multipath_dest { | ||
60 | struct list_head list; | ||
61 | |||
62 | const struct fib_nh *nh_info; | ||
63 | __u32 netmask; | ||
64 | __u32 network; | ||
65 | unsigned char prefixlen; | ||
66 | |||
67 | struct rcu_head rcu; | ||
68 | }; | ||
69 | |||
70 | struct multipath_bucket { | ||
71 | struct list_head head; | ||
72 | spinlock_t lock; | ||
73 | }; | ||
74 | |||
75 | struct multipath_route { | ||
76 | struct list_head list; | ||
77 | |||
78 | int oif; | ||
79 | __u32 gw; | ||
80 | struct list_head dests; | ||
81 | |||
82 | struct rcu_head rcu; | ||
83 | }; | ||
84 | |||
85 | /* state: primarily weight per route information */ | ||
86 | static struct multipath_bucket state[MULTIPATH_STATE_SIZE]; | ||
87 | |||
88 | /* interface to random number generation */ | ||
89 | static unsigned int RANDOM_SEED = 93186752; | ||
90 | |||
91 | static inline unsigned int random(unsigned int ubound) | ||
92 | { | ||
93 | static unsigned int a = 1588635695, | ||
94 | q = 2, | ||
95 | r = 1117695901; | ||
96 | RANDOM_SEED = a*(RANDOM_SEED % q) - r*(RANDOM_SEED / q); | ||
97 | return RANDOM_SEED % ubound; | ||
98 | } | ||
99 | |||
100 | static unsigned char __multipath_lookup_weight(const struct flowi *fl, | ||
101 | const struct rtable *rt) | ||
102 | { | ||
103 | const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE; | ||
104 | struct multipath_route *r; | ||
105 | struct multipath_route *target_route = NULL; | ||
106 | struct multipath_dest *d; | ||
107 | int weight = 1; | ||
108 | |||
109 | /* lookup the weight information for a certain route */ | ||
110 | rcu_read_lock(); | ||
111 | |||
112 | /* find state entry for gateway or add one if necessary */ | ||
113 | list_for_each_entry_rcu(r, &state[state_idx].head, list) { | ||
114 | if (r->gw == rt->rt_gateway && | ||
115 | r->oif == rt->idev->dev->ifindex) { | ||
116 | target_route = r; | ||
117 | break; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | if (!target_route) { | ||
122 | /* this should not happen... but we are prepared */ | ||
123 | printk( KERN_CRIT"%s: missing state for gateway: %u and " \ | ||
124 | "device %d\n", __FUNCTION__, rt->rt_gateway, | ||
125 | rt->idev->dev->ifindex); | ||
126 | goto out; | ||
127 | } | ||
128 | |||
129 | /* find state entry for destination */ | ||
130 | list_for_each_entry_rcu(d, &target_route->dests, list) { | ||
131 | __u32 targetnetwork = fl->fl4_dst & | ||
132 | (0xFFFFFFFF >> (32 - d->prefixlen)); | ||
133 | |||
134 | if ((targetnetwork & d->netmask) == d->network) { | ||
135 | weight = d->nh_info->nh_weight; | ||
136 | goto out; | ||
137 | } | ||
138 | } | ||
139 | |||
140 | out: | ||
141 | rcu_read_unlock(); | ||
142 | return weight; | ||
143 | } | ||
144 | |||
145 | static void wrandom_init_state(void) | ||
146 | { | ||
147 | int i; | ||
148 | |||
149 | for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { | ||
150 | INIT_LIST_HEAD(&state[i].head); | ||
151 | spin_lock_init(&state[i].lock); | ||
152 | } | ||
153 | } | ||
154 | |||
155 | static void wrandom_select_route(const struct flowi *flp, | ||
156 | struct rtable *first, | ||
157 | struct rtable **rp) | ||
158 | { | ||
159 | struct rtable *rt; | ||
160 | struct rtable *decision; | ||
161 | struct multipath_candidate *first_mpc = NULL; | ||
162 | struct multipath_candidate *mpc, *last_mpc = NULL; | ||
163 | int power = 0; | ||
164 | int last_power; | ||
165 | int selector; | ||
166 | const size_t size_mpc = sizeof(struct multipath_candidate); | ||
167 | |||
168 | /* collect all candidates and identify their weights */ | ||
169 | for (rt = rcu_dereference(first); rt; | ||
170 | rt = rcu_dereference(rt->u.rt_next)) { | ||
171 | if ((rt->u.dst.flags & DST_BALANCED) != 0 && | ||
172 | multipath_comparekeys(&rt->fl, flp)) { | ||
173 | struct multipath_candidate* mpc = | ||
174 | (struct multipath_candidate*) | ||
175 | kmalloc(size_mpc, GFP_KERNEL); | ||
176 | |||
177 | if (!mpc) | ||
178 | return; | ||
179 | |||
180 | power += __multipath_lookup_weight(flp, rt) * 10000; | ||
181 | |||
182 | mpc->power = power; | ||
183 | mpc->rt = rt; | ||
184 | mpc->next = NULL; | ||
185 | |||
186 | if (!first_mpc) | ||
187 | first_mpc = mpc; | ||
188 | else | ||
189 | last_mpc->next = mpc; | ||
190 | |||
191 | last_mpc = mpc; | ||
192 | } | ||
193 | } | ||
194 | |||
195 | /* choose a weighted random candidate */ | ||
196 | decision = first; | ||
197 | selector = random(power); | ||
198 | last_power = 0; | ||
199 | |||
200 | /* select candidate, adjust GC data and cleanup local state */ | ||
201 | decision = first; | ||
202 | last_mpc = NULL; | ||
203 | for (mpc = first_mpc; mpc; mpc = mpc->next) { | ||
204 | mpc->rt->u.dst.lastuse = jiffies; | ||
205 | if (last_power <= selector && selector < mpc->power) | ||
206 | decision = mpc->rt; | ||
207 | |||
208 | last_power = mpc->power; | ||
209 | if (last_mpc) | ||
210 | kfree(last_mpc); | ||
211 | |||
212 | last_mpc = mpc; | ||
213 | } | ||
214 | |||
215 | if (last_mpc) { | ||
216 | /* concurrent __multipath_flush may lead to !last_mpc */ | ||
217 | kfree(last_mpc); | ||
218 | } | ||
219 | |||
220 | decision->u.dst.__use++; | ||
221 | *rp = decision; | ||
222 | } | ||
223 | |||
224 | static void wrandom_set_nhinfo(__u32 network, | ||
225 | __u32 netmask, | ||
226 | unsigned char prefixlen, | ||
227 | const struct fib_nh *nh) | ||
228 | { | ||
229 | const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE; | ||
230 | struct multipath_route *r, *target_route = NULL; | ||
231 | struct multipath_dest *d, *target_dest = NULL; | ||
232 | |||
233 | /* store the weight information for a certain route */ | ||
234 | spin_lock(&state[state_idx].lock); | ||
235 | |||
236 | /* find state entry for gateway or add one if necessary */ | ||
237 | list_for_each_entry_rcu(r, &state[state_idx].head, list) { | ||
238 | if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) { | ||
239 | target_route = r; | ||
240 | break; | ||
241 | } | ||
242 | } | ||
243 | |||
244 | if (!target_route) { | ||
245 | const size_t size_rt = sizeof(struct multipath_route); | ||
246 | target_route = (struct multipath_route *) | ||
247 | kmalloc(size_rt, GFP_KERNEL); | ||
248 | |||
249 | target_route->gw = nh->nh_gw; | ||
250 | target_route->oif = nh->nh_oif; | ||
251 | memset(&target_route->rcu, 0, sizeof(struct rcu_head)); | ||
252 | INIT_LIST_HEAD(&target_route->dests); | ||
253 | |||
254 | list_add_rcu(&target_route->list, &state[state_idx].head); | ||
255 | } | ||
256 | |||
257 | /* find state entry for destination or add one if necessary */ | ||
258 | list_for_each_entry_rcu(d, &target_route->dests, list) { | ||
259 | if (d->nh_info == nh) { | ||
260 | target_dest = d; | ||
261 | break; | ||
262 | } | ||
263 | } | ||
264 | |||
265 | if (!target_dest) { | ||
266 | const size_t size_dst = sizeof(struct multipath_dest); | ||
267 | target_dest = (struct multipath_dest*) | ||
268 | kmalloc(size_dst, GFP_KERNEL); | ||
269 | |||
270 | target_dest->nh_info = nh; | ||
271 | target_dest->network = network; | ||
272 | target_dest->netmask = netmask; | ||
273 | target_dest->prefixlen = prefixlen; | ||
274 | memset(&target_dest->rcu, 0, sizeof(struct rcu_head)); | ||
275 | |||
276 | list_add_rcu(&target_dest->list, &target_route->dests); | ||
277 | } | ||
278 | /* else: we already stored this info for another destination => | ||
279 | * we are finished | ||
280 | */ | ||
281 | |||
282 | spin_unlock(&state[state_idx].lock); | ||
283 | } | ||
284 | |||
285 | static void __multipath_free(struct rcu_head *head) | ||
286 | { | ||
287 | struct multipath_route *rt = container_of(head, struct multipath_route, | ||
288 | rcu); | ||
289 | kfree(rt); | ||
290 | } | ||
291 | |||
292 | static void __multipath_free_dst(struct rcu_head *head) | ||
293 | { | ||
294 | struct multipath_dest *dst = container_of(head, | ||
295 | struct multipath_dest, | ||
296 | rcu); | ||
297 | kfree(dst); | ||
298 | } | ||
299 | |||
300 | static void wrandom_flush(void) | ||
301 | { | ||
302 | int i; | ||
303 | |||
304 | /* defere delete to all entries */ | ||
305 | for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { | ||
306 | struct multipath_route *r; | ||
307 | |||
308 | spin_lock(&state[i].lock); | ||
309 | list_for_each_entry_rcu(r, &state[i].head, list) { | ||
310 | struct multipath_dest *d; | ||
311 | list_for_each_entry_rcu(d, &r->dests, list) { | ||
312 | list_del_rcu(&d->list); | ||
313 | call_rcu(&d->rcu, | ||
314 | __multipath_free_dst); | ||
315 | } | ||
316 | list_del_rcu(&r->list); | ||
317 | call_rcu(&r->rcu, | ||
318 | __multipath_free); | ||
319 | } | ||
320 | |||
321 | spin_unlock(&state[i].lock); | ||
322 | } | ||
323 | } | ||
324 | |||
325 | static struct ip_mp_alg_ops wrandom_ops = { | ||
326 | .mp_alg_select_route = wrandom_select_route, | ||
327 | .mp_alg_flush = wrandom_flush, | ||
328 | .mp_alg_set_nhinfo = wrandom_set_nhinfo, | ||
329 | }; | ||
330 | |||
331 | static int __init wrandom_init(void) | ||
332 | { | ||
333 | wrandom_init_state(); | ||
334 | |||
335 | return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM); | ||
336 | } | ||
337 | |||
338 | static void __exit wrandom_exit(void) | ||
339 | { | ||
340 | multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM); | ||
341 | } | ||
342 | |||
343 | module_init(wrandom_init); | ||
344 | module_exit(wrandom_exit); | ||