diff options
Diffstat (limited to 'net/openvswitch/flow_table.c')
-rw-r--r-- | net/openvswitch/flow_table.c | 517 |
1 files changed, 517 insertions, 0 deletions
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c new file mode 100644 index 000000000000..dcadb75bb173 --- /dev/null +++ b/net/openvswitch/flow_table.c | |||
@@ -0,0 +1,517 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2007-2013 Nicira, Inc. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of version 2 of the GNU General Public | ||
6 | * License as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program; if not, write to the Free Software | ||
15 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | ||
16 | * 02110-1301, USA | ||
17 | */ | ||
18 | |||
19 | #include "flow.h" | ||
20 | #include "datapath.h" | ||
21 | #include <linux/uaccess.h> | ||
22 | #include <linux/netdevice.h> | ||
23 | #include <linux/etherdevice.h> | ||
24 | #include <linux/if_ether.h> | ||
25 | #include <linux/if_vlan.h> | ||
26 | #include <net/llc_pdu.h> | ||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/jhash.h> | ||
29 | #include <linux/jiffies.h> | ||
30 | #include <linux/llc.h> | ||
31 | #include <linux/module.h> | ||
32 | #include <linux/in.h> | ||
33 | #include <linux/rcupdate.h> | ||
34 | #include <linux/if_arp.h> | ||
35 | #include <linux/ip.h> | ||
36 | #include <linux/ipv6.h> | ||
37 | #include <linux/sctp.h> | ||
38 | #include <linux/tcp.h> | ||
39 | #include <linux/udp.h> | ||
40 | #include <linux/icmp.h> | ||
41 | #include <linux/icmpv6.h> | ||
42 | #include <linux/rculist.h> | ||
43 | #include <net/ip.h> | ||
44 | #include <net/ipv6.h> | ||
45 | #include <net/ndisc.h> | ||
46 | |||
47 | static struct kmem_cache *flow_cache; | ||
48 | |||
49 | static u16 range_n_bytes(const struct sw_flow_key_range *range) | ||
50 | { | ||
51 | return range->end - range->start; | ||
52 | } | ||
53 | |||
54 | void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, | ||
55 | const struct sw_flow_mask *mask) | ||
56 | { | ||
57 | const long *m = (long *)((u8 *)&mask->key + mask->range.start); | ||
58 | const long *s = (long *)((u8 *)src + mask->range.start); | ||
59 | long *d = (long *)((u8 *)dst + mask->range.start); | ||
60 | int i; | ||
61 | |||
62 | /* The memory outside of the 'mask->range' are not set since | ||
63 | * further operations on 'dst' only uses contents within | ||
64 | * 'mask->range'. | ||
65 | */ | ||
66 | for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) | ||
67 | *d++ = *s++ & *m++; | ||
68 | } | ||
69 | |||
70 | struct sw_flow *ovs_flow_alloc(void) | ||
71 | { | ||
72 | struct sw_flow *flow; | ||
73 | |||
74 | flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); | ||
75 | if (!flow) | ||
76 | return ERR_PTR(-ENOMEM); | ||
77 | |||
78 | spin_lock_init(&flow->lock); | ||
79 | flow->sf_acts = NULL; | ||
80 | flow->mask = NULL; | ||
81 | |||
82 | return flow; | ||
83 | } | ||
84 | |||
85 | static struct flex_array *alloc_buckets(unsigned int n_buckets) | ||
86 | { | ||
87 | struct flex_array *buckets; | ||
88 | int i, err; | ||
89 | |||
90 | buckets = flex_array_alloc(sizeof(struct hlist_head), | ||
91 | n_buckets, GFP_KERNEL); | ||
92 | if (!buckets) | ||
93 | return NULL; | ||
94 | |||
95 | err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); | ||
96 | if (err) { | ||
97 | flex_array_free(buckets); | ||
98 | return NULL; | ||
99 | } | ||
100 | |||
101 | for (i = 0; i < n_buckets; i++) | ||
102 | INIT_HLIST_HEAD((struct hlist_head *) | ||
103 | flex_array_get(buckets, i)); | ||
104 | |||
105 | return buckets; | ||
106 | } | ||
107 | |||
108 | static void flow_free(struct sw_flow *flow) | ||
109 | { | ||
110 | kfree((struct sf_flow_acts __force *)flow->sf_acts); | ||
111 | kmem_cache_free(flow_cache, flow); | ||
112 | } | ||
113 | |||
114 | static void rcu_free_flow_callback(struct rcu_head *rcu) | ||
115 | { | ||
116 | struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); | ||
117 | |||
118 | flow_free(flow); | ||
119 | } | ||
120 | |||
121 | void ovs_flow_free(struct sw_flow *flow, bool deferred) | ||
122 | { | ||
123 | if (!flow) | ||
124 | return; | ||
125 | |||
126 | ovs_sw_flow_mask_del_ref(flow->mask, deferred); | ||
127 | |||
128 | if (deferred) | ||
129 | call_rcu(&flow->rcu, rcu_free_flow_callback); | ||
130 | else | ||
131 | flow_free(flow); | ||
132 | } | ||
133 | |||
134 | static void free_buckets(struct flex_array *buckets) | ||
135 | { | ||
136 | flex_array_free(buckets); | ||
137 | } | ||
138 | |||
139 | static void __flow_tbl_destroy(struct flow_table *table) | ||
140 | { | ||
141 | int i; | ||
142 | |||
143 | if (table->keep_flows) | ||
144 | goto skip_flows; | ||
145 | |||
146 | for (i = 0; i < table->n_buckets; i++) { | ||
147 | struct sw_flow *flow; | ||
148 | struct hlist_head *head = flex_array_get(table->buckets, i); | ||
149 | struct hlist_node *n; | ||
150 | int ver = table->node_ver; | ||
151 | |||
152 | hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { | ||
153 | hlist_del(&flow->hash_node[ver]); | ||
154 | ovs_flow_free(flow, false); | ||
155 | } | ||
156 | } | ||
157 | |||
158 | BUG_ON(!list_empty(table->mask_list)); | ||
159 | kfree(table->mask_list); | ||
160 | |||
161 | skip_flows: | ||
162 | free_buckets(table->buckets); | ||
163 | kfree(table); | ||
164 | } | ||
165 | |||
166 | static struct flow_table *__flow_tbl_alloc(int new_size) | ||
167 | { | ||
168 | struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); | ||
169 | |||
170 | if (!table) | ||
171 | return NULL; | ||
172 | |||
173 | table->buckets = alloc_buckets(new_size); | ||
174 | |||
175 | if (!table->buckets) { | ||
176 | kfree(table); | ||
177 | return NULL; | ||
178 | } | ||
179 | table->n_buckets = new_size; | ||
180 | table->count = 0; | ||
181 | table->node_ver = 0; | ||
182 | table->keep_flows = false; | ||
183 | get_random_bytes(&table->hash_seed, sizeof(u32)); | ||
184 | table->mask_list = NULL; | ||
185 | |||
186 | return table; | ||
187 | } | ||
188 | |||
189 | struct flow_table *ovs_flow_tbl_alloc(int new_size) | ||
190 | { | ||
191 | struct flow_table *table = __flow_tbl_alloc(new_size); | ||
192 | |||
193 | if (!table) | ||
194 | return NULL; | ||
195 | |||
196 | table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL); | ||
197 | if (!table->mask_list) { | ||
198 | table->keep_flows = true; | ||
199 | __flow_tbl_destroy(table); | ||
200 | return NULL; | ||
201 | } | ||
202 | INIT_LIST_HEAD(table->mask_list); | ||
203 | |||
204 | return table; | ||
205 | } | ||
206 | |||
207 | static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) | ||
208 | { | ||
209 | struct flow_table *table = container_of(rcu, struct flow_table, rcu); | ||
210 | |||
211 | __flow_tbl_destroy(table); | ||
212 | } | ||
213 | |||
214 | void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) | ||
215 | { | ||
216 | if (!table) | ||
217 | return; | ||
218 | |||
219 | if (deferred) | ||
220 | call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); | ||
221 | else | ||
222 | __flow_tbl_destroy(table); | ||
223 | } | ||
224 | |||
225 | struct sw_flow *ovs_flow_tbl_dump_next(struct flow_table *table, | ||
226 | u32 *bucket, u32 *last) | ||
227 | { | ||
228 | struct sw_flow *flow; | ||
229 | struct hlist_head *head; | ||
230 | int ver; | ||
231 | int i; | ||
232 | |||
233 | ver = table->node_ver; | ||
234 | while (*bucket < table->n_buckets) { | ||
235 | i = 0; | ||
236 | head = flex_array_get(table->buckets, *bucket); | ||
237 | hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { | ||
238 | if (i < *last) { | ||
239 | i++; | ||
240 | continue; | ||
241 | } | ||
242 | *last = i + 1; | ||
243 | return flow; | ||
244 | } | ||
245 | (*bucket)++; | ||
246 | *last = 0; | ||
247 | } | ||
248 | |||
249 | return NULL; | ||
250 | } | ||
251 | |||
252 | static struct hlist_head *find_bucket(struct flow_table *table, u32 hash) | ||
253 | { | ||
254 | hash = jhash_1word(hash, table->hash_seed); | ||
255 | return flex_array_get(table->buckets, | ||
256 | (hash & (table->n_buckets - 1))); | ||
257 | } | ||
258 | |||
259 | static void __tbl_insert(struct flow_table *table, struct sw_flow *flow) | ||
260 | { | ||
261 | struct hlist_head *head; | ||
262 | |||
263 | head = find_bucket(table, flow->hash); | ||
264 | hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); | ||
265 | |||
266 | table->count++; | ||
267 | } | ||
268 | |||
269 | static void flow_table_copy_flows(struct flow_table *old, | ||
270 | struct flow_table *new) | ||
271 | { | ||
272 | int old_ver; | ||
273 | int i; | ||
274 | |||
275 | old_ver = old->node_ver; | ||
276 | new->node_ver = !old_ver; | ||
277 | |||
278 | /* Insert in new table. */ | ||
279 | for (i = 0; i < old->n_buckets; i++) { | ||
280 | struct sw_flow *flow; | ||
281 | struct hlist_head *head; | ||
282 | |||
283 | head = flex_array_get(old->buckets, i); | ||
284 | |||
285 | hlist_for_each_entry(flow, head, hash_node[old_ver]) | ||
286 | __tbl_insert(new, flow); | ||
287 | } | ||
288 | |||
289 | new->mask_list = old->mask_list; | ||
290 | old->keep_flows = true; | ||
291 | } | ||
292 | |||
293 | static struct flow_table *__flow_tbl_rehash(struct flow_table *table, | ||
294 | int n_buckets) | ||
295 | { | ||
296 | struct flow_table *new_table; | ||
297 | |||
298 | new_table = __flow_tbl_alloc(n_buckets); | ||
299 | if (!new_table) | ||
300 | return ERR_PTR(-ENOMEM); | ||
301 | |||
302 | flow_table_copy_flows(table, new_table); | ||
303 | |||
304 | return new_table; | ||
305 | } | ||
306 | |||
307 | struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table) | ||
308 | { | ||
309 | return __flow_tbl_rehash(table, table->n_buckets); | ||
310 | } | ||
311 | |||
312 | struct flow_table *ovs_flow_tbl_expand(struct flow_table *table) | ||
313 | { | ||
314 | return __flow_tbl_rehash(table, table->n_buckets * 2); | ||
315 | } | ||
316 | |||
317 | static u32 flow_hash(const struct sw_flow_key *key, int key_start, | ||
318 | int key_end) | ||
319 | { | ||
320 | u32 *hash_key = (u32 *)((u8 *)key + key_start); | ||
321 | int hash_u32s = (key_end - key_start) >> 2; | ||
322 | |||
323 | /* Make sure number of hash bytes are multiple of u32. */ | ||
324 | BUILD_BUG_ON(sizeof(long) % sizeof(u32)); | ||
325 | |||
326 | return jhash2(hash_key, hash_u32s, 0); | ||
327 | } | ||
328 | |||
329 | static int flow_key_start(const struct sw_flow_key *key) | ||
330 | { | ||
331 | if (key->tun_key.ipv4_dst) | ||
332 | return 0; | ||
333 | else | ||
334 | return rounddown(offsetof(struct sw_flow_key, phy), | ||
335 | sizeof(long)); | ||
336 | } | ||
337 | |||
338 | static bool cmp_key(const struct sw_flow_key *key1, | ||
339 | const struct sw_flow_key *key2, | ||
340 | int key_start, int key_end) | ||
341 | { | ||
342 | const long *cp1 = (long *)((u8 *)key1 + key_start); | ||
343 | const long *cp2 = (long *)((u8 *)key2 + key_start); | ||
344 | long diffs = 0; | ||
345 | int i; | ||
346 | |||
347 | for (i = key_start; i < key_end; i += sizeof(long)) | ||
348 | diffs |= *cp1++ ^ *cp2++; | ||
349 | |||
350 | return diffs == 0; | ||
351 | } | ||
352 | |||
353 | static bool flow_cmp_masked_key(const struct sw_flow *flow, | ||
354 | const struct sw_flow_key *key, | ||
355 | int key_start, int key_end) | ||
356 | { | ||
357 | return cmp_key(&flow->key, key, key_start, key_end); | ||
358 | } | ||
359 | |||
360 | bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, | ||
361 | struct sw_flow_match *match) | ||
362 | { | ||
363 | struct sw_flow_key *key = match->key; | ||
364 | int key_start = flow_key_start(key); | ||
365 | int key_end = match->range.end; | ||
366 | |||
367 | return cmp_key(&flow->unmasked_key, key, key_start, key_end); | ||
368 | } | ||
369 | |||
370 | static struct sw_flow *masked_flow_lookup(struct flow_table *table, | ||
371 | const struct sw_flow_key *unmasked, | ||
372 | struct sw_flow_mask *mask) | ||
373 | { | ||
374 | struct sw_flow *flow; | ||
375 | struct hlist_head *head; | ||
376 | int key_start = mask->range.start; | ||
377 | int key_end = mask->range.end; | ||
378 | u32 hash; | ||
379 | struct sw_flow_key masked_key; | ||
380 | |||
381 | ovs_flow_mask_key(&masked_key, unmasked, mask); | ||
382 | hash = flow_hash(&masked_key, key_start, key_end); | ||
383 | head = find_bucket(table, hash); | ||
384 | hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { | ||
385 | if (flow->mask == mask && | ||
386 | flow_cmp_masked_key(flow, &masked_key, | ||
387 | key_start, key_end)) | ||
388 | return flow; | ||
389 | } | ||
390 | return NULL; | ||
391 | } | ||
392 | |||
393 | struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl, | ||
394 | const struct sw_flow_key *key) | ||
395 | { | ||
396 | struct sw_flow *flow = NULL; | ||
397 | struct sw_flow_mask *mask; | ||
398 | |||
399 | list_for_each_entry_rcu(mask, tbl->mask_list, list) { | ||
400 | flow = masked_flow_lookup(tbl, key, mask); | ||
401 | if (flow) /* Found */ | ||
402 | break; | ||
403 | } | ||
404 | |||
405 | return flow; | ||
406 | } | ||
407 | |||
408 | void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) | ||
409 | { | ||
410 | flow->hash = flow_hash(&flow->key, flow->mask->range.start, | ||
411 | flow->mask->range.end); | ||
412 | __tbl_insert(table, flow); | ||
413 | } | ||
414 | |||
415 | void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) | ||
416 | { | ||
417 | BUG_ON(table->count == 0); | ||
418 | hlist_del_rcu(&flow->hash_node[table->node_ver]); | ||
419 | table->count--; | ||
420 | } | ||
421 | |||
422 | struct sw_flow_mask *ovs_sw_flow_mask_alloc(void) | ||
423 | { | ||
424 | struct sw_flow_mask *mask; | ||
425 | |||
426 | mask = kmalloc(sizeof(*mask), GFP_KERNEL); | ||
427 | if (mask) | ||
428 | mask->ref_count = 0; | ||
429 | |||
430 | return mask; | ||
431 | } | ||
432 | |||
433 | void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask) | ||
434 | { | ||
435 | mask->ref_count++; | ||
436 | } | ||
437 | |||
438 | static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu) | ||
439 | { | ||
440 | struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu); | ||
441 | |||
442 | kfree(mask); | ||
443 | } | ||
444 | |||
445 | void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) | ||
446 | { | ||
447 | if (!mask) | ||
448 | return; | ||
449 | |||
450 | BUG_ON(!mask->ref_count); | ||
451 | mask->ref_count--; | ||
452 | |||
453 | if (!mask->ref_count) { | ||
454 | list_del_rcu(&mask->list); | ||
455 | if (deferred) | ||
456 | call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); | ||
457 | else | ||
458 | kfree(mask); | ||
459 | } | ||
460 | } | ||
461 | |||
462 | static bool mask_equal(const struct sw_flow_mask *a, | ||
463 | const struct sw_flow_mask *b) | ||
464 | { | ||
465 | u8 *a_ = (u8 *)&a->key + a->range.start; | ||
466 | u8 *b_ = (u8 *)&b->key + b->range.start; | ||
467 | |||
468 | return (a->range.end == b->range.end) | ||
469 | && (a->range.start == b->range.start) | ||
470 | && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); | ||
471 | } | ||
472 | |||
473 | struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl, | ||
474 | const struct sw_flow_mask *mask) | ||
475 | { | ||
476 | struct list_head *ml; | ||
477 | |||
478 | list_for_each(ml, tbl->mask_list) { | ||
479 | struct sw_flow_mask *m; | ||
480 | m = container_of(ml, struct sw_flow_mask, list); | ||
481 | if (mask_equal(mask, m)) | ||
482 | return m; | ||
483 | } | ||
484 | |||
485 | return NULL; | ||
486 | } | ||
487 | |||
488 | /** | ||
489 | * add a new mask into the mask list. | ||
490 | * The caller needs to make sure that 'mask' is not the same | ||
491 | * as any masks that are already on the list. | ||
492 | */ | ||
493 | void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask) | ||
494 | { | ||
495 | list_add_rcu(&mask->list, tbl->mask_list); | ||
496 | } | ||
497 | |||
498 | /* Initializes the flow module. | ||
499 | * Returns zero if successful or a negative error code. */ | ||
500 | int ovs_flow_init(void) | ||
501 | { | ||
502 | BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); | ||
503 | BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); | ||
504 | |||
505 | flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, | ||
506 | 0, NULL); | ||
507 | if (flow_cache == NULL) | ||
508 | return -ENOMEM; | ||
509 | |||
510 | return 0; | ||
511 | } | ||
512 | |||
513 | /* Uninitializes the flow module. */ | ||
514 | void ovs_flow_exit(void) | ||
515 | { | ||
516 | kmem_cache_destroy(flow_cache); | ||
517 | } | ||