aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched/cls_u32.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /net/sched/cls_u32.c
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'net/sched/cls_u32.c')
-rw-r--r--net/sched/cls_u32.c828
1 files changed, 828 insertions, 0 deletions
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
new file mode 100644
index 000000000000..364b87d86455
--- /dev/null
+++ b/net/sched/cls_u32.c
@@ -0,0 +1,828 @@
1/*
2 * net/sched/cls_u32.c Ugly (or Universal) 32bit key Packet Classifier.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * The filters are packed to hash tables of key nodes
12 * with a set of 32bit key/mask pairs at every node.
13 * Nodes reference next level hash tables etc.
14 *
15 * This scheme is the best universal classifier I managed to
16 * invent; it is not super-fast, but it is not slow (provided you
17 * program it correctly), and general enough. And its relative
18 * speed grows as the number of rules becomes larger.
19 *
20 * It seems that it represents the best middle point between
21 * speed and manageability both by human and by machine.
22 *
23 * It is especially useful for link sharing combined with QoS;
24 * pure RSVP doesn't need such a general approach and can use
25 * much simpler (and faster) schemes, sort of cls_rsvp.c.
26 *
27 * JHS: We should remove the CONFIG_NET_CLS_IND from here
28 * eventually when the meta match extension is made available
29 *
30 * nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
31 */
32
33#include <asm/uaccess.h>
34#include <asm/system.h>
35#include <linux/bitops.h>
36#include <linux/config.h>
37#include <linux/module.h>
38#include <linux/types.h>
39#include <linux/kernel.h>
40#include <linux/sched.h>
41#include <linux/string.h>
42#include <linux/mm.h>
43#include <linux/socket.h>
44#include <linux/sockios.h>
45#include <linux/in.h>
46#include <linux/errno.h>
47#include <linux/interrupt.h>
48#include <linux/if_ether.h>
49#include <linux/inet.h>
50#include <linux/netdevice.h>
51#include <linux/etherdevice.h>
52#include <linux/notifier.h>
53#include <linux/rtnetlink.h>
54#include <net/ip.h>
55#include <net/route.h>
56#include <linux/skbuff.h>
57#include <net/sock.h>
58#include <net/act_api.h>
59#include <net/pkt_cls.h>
60
61struct tc_u_knode
62{
63 struct tc_u_knode *next;
64 u32 handle;
65 struct tc_u_hnode *ht_up;
66 struct tcf_exts exts;
67#ifdef CONFIG_NET_CLS_IND
68 char indev[IFNAMSIZ];
69#endif
70 u8 fshift;
71 struct tcf_result res;
72 struct tc_u_hnode *ht_down;
73#ifdef CONFIG_CLS_U32_PERF
74 struct tc_u32_pcnt *pf;
75#endif
76#ifdef CONFIG_CLS_U32_MARK
77 struct tc_u32_mark mark;
78#endif
79 struct tc_u32_sel sel;
80};
81
82struct tc_u_hnode
83{
84 struct tc_u_hnode *next;
85 u32 handle;
86 u32 prio;
87 struct tc_u_common *tp_c;
88 int refcnt;
89 unsigned divisor;
90 struct tc_u_knode *ht[1];
91};
92
93struct tc_u_common
94{
95 struct tc_u_common *next;
96 struct tc_u_hnode *hlist;
97 struct Qdisc *q;
98 int refcnt;
99 u32 hgenerator;
100};
101
102static struct tcf_ext_map u32_ext_map = {
103 .action = TCA_U32_ACT,
104 .police = TCA_U32_POLICE
105};
106
107static struct tc_u_common *u32_list;
108
109static __inline__ unsigned u32_hash_fold(u32 key, struct tc_u32_sel *sel, u8 fshift)
110{
111 unsigned h = (key & sel->hmask)>>fshift;
112
113 return h;
114}
115
116static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res)
117{
118 struct {
119 struct tc_u_knode *knode;
120 u8 *ptr;
121 } stack[TC_U32_MAXDEPTH];
122
123 struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
124 u8 *ptr = skb->nh.raw;
125 struct tc_u_knode *n;
126 int sdepth = 0;
127 int off2 = 0;
128 int sel = 0;
129#ifdef CONFIG_CLS_U32_PERF
130 int j;
131#endif
132 int i, r;
133
134next_ht:
135 n = ht->ht[sel];
136
137next_knode:
138 if (n) {
139 struct tc_u32_key *key = n->sel.keys;
140
141#ifdef CONFIG_CLS_U32_PERF
142 n->pf->rcnt +=1;
143 j = 0;
144#endif
145
146#ifdef CONFIG_CLS_U32_MARK
147 if ((skb->nfmark & n->mark.mask) != n->mark.val) {
148 n = n->next;
149 goto next_knode;
150 } else {
151 n->mark.success++;
152 }
153#endif
154
155 for (i = n->sel.nkeys; i>0; i--, key++) {
156
157 if ((*(u32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) {
158 n = n->next;
159 goto next_knode;
160 }
161#ifdef CONFIG_CLS_U32_PERF
162 n->pf->kcnts[j] +=1;
163 j++;
164#endif
165 }
166 if (n->ht_down == NULL) {
167check_terminal:
168 if (n->sel.flags&TC_U32_TERMINAL) {
169
170 *res = n->res;
171#ifdef CONFIG_NET_CLS_IND
172 if (!tcf_match_indev(skb, n->indev)) {
173 n = n->next;
174 goto next_knode;
175 }
176#endif
177#ifdef CONFIG_CLS_U32_PERF
178 n->pf->rhit +=1;
179#endif
180 r = tcf_exts_exec(skb, &n->exts, res);
181 if (r < 0) {
182 n = n->next;
183 goto next_knode;
184 }
185
186 return r;
187 }
188 n = n->next;
189 goto next_knode;
190 }
191
192 /* PUSH */
193 if (sdepth >= TC_U32_MAXDEPTH)
194 goto deadloop;
195 stack[sdepth].knode = n;
196 stack[sdepth].ptr = ptr;
197 sdepth++;
198
199 ht = n->ht_down;
200 sel = 0;
201 if (ht->divisor)
202 sel = ht->divisor&u32_hash_fold(*(u32*)(ptr+n->sel.hoff), &n->sel,n->fshift);
203
204 if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT)))
205 goto next_ht;
206
207 if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) {
208 off2 = n->sel.off + 3;
209 if (n->sel.flags&TC_U32_VAROFFSET)
210 off2 += ntohs(n->sel.offmask & *(u16*)(ptr+n->sel.offoff)) >>n->sel.offshift;
211 off2 &= ~3;
212 }
213 if (n->sel.flags&TC_U32_EAT) {
214 ptr += off2;
215 off2 = 0;
216 }
217
218 if (ptr < skb->tail)
219 goto next_ht;
220 }
221
222 /* POP */
223 if (sdepth--) {
224 n = stack[sdepth].knode;
225 ht = n->ht_up;
226 ptr = stack[sdepth].ptr;
227 goto check_terminal;
228 }
229 return -1;
230
231deadloop:
232 if (net_ratelimit())
233 printk("cls_u32: dead loop\n");
234 return -1;
235}
236
237static __inline__ struct tc_u_hnode *
238u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
239{
240 struct tc_u_hnode *ht;
241
242 for (ht = tp_c->hlist; ht; ht = ht->next)
243 if (ht->handle == handle)
244 break;
245
246 return ht;
247}
248
249static __inline__ struct tc_u_knode *
250u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
251{
252 unsigned sel;
253 struct tc_u_knode *n = NULL;
254
255 sel = TC_U32_HASH(handle);
256 if (sel > ht->divisor)
257 goto out;
258
259 for (n = ht->ht[sel]; n; n = n->next)
260 if (n->handle == handle)
261 break;
262out:
263 return n;
264}
265
266
267static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
268{
269 struct tc_u_hnode *ht;
270 struct tc_u_common *tp_c = tp->data;
271
272 if (TC_U32_HTID(handle) == TC_U32_ROOT)
273 ht = tp->root;
274 else
275 ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
276
277 if (!ht)
278 return 0;
279
280 if (TC_U32_KEY(handle) == 0)
281 return (unsigned long)ht;
282
283 return (unsigned long)u32_lookup_key(ht, handle);
284}
285
286static void u32_put(struct tcf_proto *tp, unsigned long f)
287{
288}
289
290static u32 gen_new_htid(struct tc_u_common *tp_c)
291{
292 int i = 0x800;
293
294 do {
295 if (++tp_c->hgenerator == 0x7FF)
296 tp_c->hgenerator = 1;
297 } while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
298
299 return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
300}
301
302static int u32_init(struct tcf_proto *tp)
303{
304 struct tc_u_hnode *root_ht;
305 struct tc_u_common *tp_c;
306
307 for (tp_c = u32_list; tp_c; tp_c = tp_c->next)
308 if (tp_c->q == tp->q)
309 break;
310
311 root_ht = kmalloc(sizeof(*root_ht), GFP_KERNEL);
312 if (root_ht == NULL)
313 return -ENOBUFS;
314
315 memset(root_ht, 0, sizeof(*root_ht));
316 root_ht->divisor = 0;
317 root_ht->refcnt++;
318 root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
319 root_ht->prio = tp->prio;
320
321 if (tp_c == NULL) {
322 tp_c = kmalloc(sizeof(*tp_c), GFP_KERNEL);
323 if (tp_c == NULL) {
324 kfree(root_ht);
325 return -ENOBUFS;
326 }
327 memset(tp_c, 0, sizeof(*tp_c));
328 tp_c->q = tp->q;
329 tp_c->next = u32_list;
330 u32_list = tp_c;
331 }
332
333 tp_c->refcnt++;
334 root_ht->next = tp_c->hlist;
335 tp_c->hlist = root_ht;
336 root_ht->tp_c = tp_c;
337
338 tp->root = root_ht;
339 tp->data = tp_c;
340 return 0;
341}
342
343static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
344{
345 tcf_unbind_filter(tp, &n->res);
346 tcf_exts_destroy(tp, &n->exts);
347 if (n->ht_down)
348 n->ht_down->refcnt--;
349#ifdef CONFIG_CLS_U32_PERF
350 if (n && (NULL != n->pf))
351 kfree(n->pf);
352#endif
353 kfree(n);
354 return 0;
355}
356
357static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key)
358{
359 struct tc_u_knode **kp;
360 struct tc_u_hnode *ht = key->ht_up;
361
362 if (ht) {
363 for (kp = &ht->ht[TC_U32_HASH(key->handle)]; *kp; kp = &(*kp)->next) {
364 if (*kp == key) {
365 tcf_tree_lock(tp);
366 *kp = key->next;
367 tcf_tree_unlock(tp);
368
369 u32_destroy_key(tp, key);
370 return 0;
371 }
372 }
373 }
374 BUG_TRAP(0);
375 return 0;
376}
377
378static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
379{
380 struct tc_u_knode *n;
381 unsigned h;
382
383 for (h=0; h<=ht->divisor; h++) {
384 while ((n = ht->ht[h]) != NULL) {
385 ht->ht[h] = n->next;
386
387 u32_destroy_key(tp, n);
388 }
389 }
390}
391
392static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
393{
394 struct tc_u_common *tp_c = tp->data;
395 struct tc_u_hnode **hn;
396
397 BUG_TRAP(!ht->refcnt);
398
399 u32_clear_hnode(tp, ht);
400
401 for (hn = &tp_c->hlist; *hn; hn = &(*hn)->next) {
402 if (*hn == ht) {
403 *hn = ht->next;
404 kfree(ht);
405 return 0;
406 }
407 }
408
409 BUG_TRAP(0);
410 return -ENOENT;
411}
412
413static void u32_destroy(struct tcf_proto *tp)
414{
415 struct tc_u_common *tp_c = tp->data;
416 struct tc_u_hnode *root_ht = xchg(&tp->root, NULL);
417
418 BUG_TRAP(root_ht != NULL);
419
420 if (root_ht && --root_ht->refcnt == 0)
421 u32_destroy_hnode(tp, root_ht);
422
423 if (--tp_c->refcnt == 0) {
424 struct tc_u_hnode *ht;
425 struct tc_u_common **tp_cp;
426
427 for (tp_cp = &u32_list; *tp_cp; tp_cp = &(*tp_cp)->next) {
428 if (*tp_cp == tp_c) {
429 *tp_cp = tp_c->next;
430 break;
431 }
432 }
433
434 for (ht=tp_c->hlist; ht; ht = ht->next)
435 u32_clear_hnode(tp, ht);
436
437 while ((ht = tp_c->hlist) != NULL) {
438 tp_c->hlist = ht->next;
439
440 BUG_TRAP(ht->refcnt == 0);
441
442 kfree(ht);
443 };
444
445 kfree(tp_c);
446 }
447
448 tp->data = NULL;
449}
450
451static int u32_delete(struct tcf_proto *tp, unsigned long arg)
452{
453 struct tc_u_hnode *ht = (struct tc_u_hnode*)arg;
454
455 if (ht == NULL)
456 return 0;
457
458 if (TC_U32_KEY(ht->handle))
459 return u32_delete_key(tp, (struct tc_u_knode*)ht);
460
461 if (tp->root == ht)
462 return -EINVAL;
463
464 if (--ht->refcnt == 0)
465 u32_destroy_hnode(tp, ht);
466
467 return 0;
468}
469
470static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
471{
472 struct tc_u_knode *n;
473 unsigned i = 0x7FF;
474
475 for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
476 if (i < TC_U32_NODE(n->handle))
477 i = TC_U32_NODE(n->handle);
478 i++;
479
480 return handle|(i>0xFFF ? 0xFFF : i);
481}
482
483static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
484 struct tc_u_hnode *ht,
485 struct tc_u_knode *n, struct rtattr **tb,
486 struct rtattr *est)
487{
488 int err;
489 struct tcf_exts e;
490
491 err = tcf_exts_validate(tp, tb, est, &e, &u32_ext_map);
492 if (err < 0)
493 return err;
494
495 err = -EINVAL;
496 if (tb[TCA_U32_LINK-1]) {
497 u32 handle = *(u32*)RTA_DATA(tb[TCA_U32_LINK-1]);
498 struct tc_u_hnode *ht_down = NULL;
499
500 if (TC_U32_KEY(handle))
501 goto errout;
502
503 if (handle) {
504 ht_down = u32_lookup_ht(ht->tp_c, handle);
505
506 if (ht_down == NULL)
507 goto errout;
508 ht_down->refcnt++;
509 }
510
511 tcf_tree_lock(tp);
512 ht_down = xchg(&n->ht_down, ht_down);
513 tcf_tree_unlock(tp);
514
515 if (ht_down)
516 ht_down->refcnt--;
517 }
518 if (tb[TCA_U32_CLASSID-1]) {
519 n->res.classid = *(u32*)RTA_DATA(tb[TCA_U32_CLASSID-1]);
520 tcf_bind_filter(tp, &n->res, base);
521 }
522
523#ifdef CONFIG_NET_CLS_IND
524 if (tb[TCA_U32_INDEV-1]) {
525 int err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV-1]);
526 if (err < 0)
527 goto errout;
528 }
529#endif
530 tcf_exts_change(tp, &n->exts, &e);
531
532 return 0;
533errout:
534 tcf_exts_destroy(tp, &e);
535 return err;
536}
537
538static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
539 struct rtattr **tca,
540 unsigned long *arg)
541{
542 struct tc_u_common *tp_c = tp->data;
543 struct tc_u_hnode *ht;
544 struct tc_u_knode *n;
545 struct tc_u32_sel *s;
546 struct rtattr *opt = tca[TCA_OPTIONS-1];
547 struct rtattr *tb[TCA_U32_MAX];
548 u32 htid;
549 int err;
550
551 if (opt == NULL)
552 return handle ? -EINVAL : 0;
553
554 if (rtattr_parse_nested(tb, TCA_U32_MAX, opt) < 0)
555 return -EINVAL;
556
557 if ((n = (struct tc_u_knode*)*arg) != NULL) {
558 if (TC_U32_KEY(n->handle) == 0)
559 return -EINVAL;
560
561 return u32_set_parms(tp, base, n->ht_up, n, tb, tca[TCA_RATE-1]);
562 }
563
564 if (tb[TCA_U32_DIVISOR-1]) {
565 unsigned divisor = *(unsigned*)RTA_DATA(tb[TCA_U32_DIVISOR-1]);
566
567 if (--divisor > 0x100)
568 return -EINVAL;
569 if (TC_U32_KEY(handle))
570 return -EINVAL;
571 if (handle == 0) {
572 handle = gen_new_htid(tp->data);
573 if (handle == 0)
574 return -ENOMEM;
575 }
576 ht = kmalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL);
577 if (ht == NULL)
578 return -ENOBUFS;
579 memset(ht, 0, sizeof(*ht) + divisor*sizeof(void*));
580 ht->tp_c = tp_c;
581 ht->refcnt = 0;
582 ht->divisor = divisor;
583 ht->handle = handle;
584 ht->prio = tp->prio;
585 ht->next = tp_c->hlist;
586 tp_c->hlist = ht;
587 *arg = (unsigned long)ht;
588 return 0;
589 }
590
591 if (tb[TCA_U32_HASH-1]) {
592 htid = *(unsigned*)RTA_DATA(tb[TCA_U32_HASH-1]);
593 if (TC_U32_HTID(htid) == TC_U32_ROOT) {
594 ht = tp->root;
595 htid = ht->handle;
596 } else {
597 ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
598 if (ht == NULL)
599 return -EINVAL;
600 }
601 } else {
602 ht = tp->root;
603 htid = ht->handle;
604 }
605
606 if (ht->divisor < TC_U32_HASH(htid))
607 return -EINVAL;
608
609 if (handle) {
610 if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
611 return -EINVAL;
612 handle = htid | TC_U32_NODE(handle);
613 } else
614 handle = gen_new_kid(ht, htid);
615
616 if (tb[TCA_U32_SEL-1] == 0 ||
617 RTA_PAYLOAD(tb[TCA_U32_SEL-1]) < sizeof(struct tc_u32_sel))
618 return -EINVAL;
619
620 s = RTA_DATA(tb[TCA_U32_SEL-1]);
621
622 n = kmalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
623 if (n == NULL)
624 return -ENOBUFS;
625
626 memset(n, 0, sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key));
627#ifdef CONFIG_CLS_U32_PERF
628 n->pf = kmalloc(sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64), GFP_KERNEL);
629 if (n->pf == NULL) {
630 kfree(n);
631 return -ENOBUFS;
632 }
633 memset(n->pf, 0, sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64));
634#endif
635
636 memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
637 n->ht_up = ht;
638 n->handle = handle;
639{
640 u8 i = 0;
641 u32 mask = s->hmask;
642 if (mask) {
643 while (!(mask & 1)) {
644 i++;
645 mask>>=1;
646 }
647 }
648 n->fshift = i;
649}
650
651#ifdef CONFIG_CLS_U32_MARK
652 if (tb[TCA_U32_MARK-1]) {
653 struct tc_u32_mark *mark;
654
655 if (RTA_PAYLOAD(tb[TCA_U32_MARK-1]) < sizeof(struct tc_u32_mark)) {
656#ifdef CONFIG_CLS_U32_PERF
657 kfree(n->pf);
658#endif
659 kfree(n);
660 return -EINVAL;
661 }
662 mark = RTA_DATA(tb[TCA_U32_MARK-1]);
663 memcpy(&n->mark, mark, sizeof(struct tc_u32_mark));
664 n->mark.success = 0;
665 }
666#endif
667
668 err = u32_set_parms(tp, base, ht, n, tb, tca[TCA_RATE-1]);
669 if (err == 0) {
670 struct tc_u_knode **ins;
671 for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
672 if (TC_U32_NODE(handle) < TC_U32_NODE((*ins)->handle))
673 break;
674
675 n->next = *ins;
676 wmb();
677 *ins = n;
678
679 *arg = (unsigned long)n;
680 return 0;
681 }
682#ifdef CONFIG_CLS_U32_PERF
683 if (n && (NULL != n->pf))
684 kfree(n->pf);
685#endif
686 kfree(n);
687 return err;
688}
689
690static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
691{
692 struct tc_u_common *tp_c = tp->data;
693 struct tc_u_hnode *ht;
694 struct tc_u_knode *n;
695 unsigned h;
696
697 if (arg->stop)
698 return;
699
700 for (ht = tp_c->hlist; ht; ht = ht->next) {
701 if (ht->prio != tp->prio)
702 continue;
703 if (arg->count >= arg->skip) {
704 if (arg->fn(tp, (unsigned long)ht, arg) < 0) {
705 arg->stop = 1;
706 return;
707 }
708 }
709 arg->count++;
710 for (h = 0; h <= ht->divisor; h++) {
711 for (n = ht->ht[h]; n; n = n->next) {
712 if (arg->count < arg->skip) {
713 arg->count++;
714 continue;
715 }
716 if (arg->fn(tp, (unsigned long)n, arg) < 0) {
717 arg->stop = 1;
718 return;
719 }
720 arg->count++;
721 }
722 }
723 }
724}
725
726static int u32_dump(struct tcf_proto *tp, unsigned long fh,
727 struct sk_buff *skb, struct tcmsg *t)
728{
729 struct tc_u_knode *n = (struct tc_u_knode*)fh;
730 unsigned char *b = skb->tail;
731 struct rtattr *rta;
732
733 if (n == NULL)
734 return skb->len;
735
736 t->tcm_handle = n->handle;
737
738 rta = (struct rtattr*)b;
739 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
740
741 if (TC_U32_KEY(n->handle) == 0) {
742 struct tc_u_hnode *ht = (struct tc_u_hnode*)fh;
743 u32 divisor = ht->divisor+1;
744 RTA_PUT(skb, TCA_U32_DIVISOR, 4, &divisor);
745 } else {
746 RTA_PUT(skb, TCA_U32_SEL,
747 sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
748 &n->sel);
749 if (n->ht_up) {
750 u32 htid = n->handle & 0xFFFFF000;
751 RTA_PUT(skb, TCA_U32_HASH, 4, &htid);
752 }
753 if (n->res.classid)
754 RTA_PUT(skb, TCA_U32_CLASSID, 4, &n->res.classid);
755 if (n->ht_down)
756 RTA_PUT(skb, TCA_U32_LINK, 4, &n->ht_down->handle);
757
758#ifdef CONFIG_CLS_U32_MARK
759 if (n->mark.val || n->mark.mask)
760 RTA_PUT(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark);
761#endif
762
763 if (tcf_exts_dump(skb, &n->exts, &u32_ext_map) < 0)
764 goto rtattr_failure;
765
766#ifdef CONFIG_NET_CLS_IND
767 if(strlen(n->indev))
768 RTA_PUT(skb, TCA_U32_INDEV, IFNAMSIZ, n->indev);
769#endif
770#ifdef CONFIG_CLS_U32_PERF
771 RTA_PUT(skb, TCA_U32_PCNT,
772 sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64),
773 n->pf);
774#endif
775 }
776
777 rta->rta_len = skb->tail - b;
778 if (TC_U32_KEY(n->handle))
779 if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0)
780 goto rtattr_failure;
781 return skb->len;
782
783rtattr_failure:
784 skb_trim(skb, b - skb->data);
785 return -1;
786}
787
788static struct tcf_proto_ops cls_u32_ops = {
789 .next = NULL,
790 .kind = "u32",
791 .classify = u32_classify,
792 .init = u32_init,
793 .destroy = u32_destroy,
794 .get = u32_get,
795 .put = u32_put,
796 .change = u32_change,
797 .delete = u32_delete,
798 .walk = u32_walk,
799 .dump = u32_dump,
800 .owner = THIS_MODULE,
801};
802
803static int __init init_u32(void)
804{
805 printk("u32 classifier\n");
806#ifdef CONFIG_CLS_U32_PERF
807 printk(" Perfomance counters on\n");
808#endif
809#ifdef CONFIG_NET_CLS_POLICE
810 printk(" OLD policer on \n");
811#endif
812#ifdef CONFIG_NET_CLS_IND
813 printk(" input device check on \n");
814#endif
815#ifdef CONFIG_NET_CLS_ACT
816 printk(" Actions configured \n");
817#endif
818 return register_tcf_proto_ops(&cls_u32_ops);
819}
820
821static void __exit exit_u32(void)
822{
823 unregister_tcf_proto_ops(&cls_u32_ops);
824}
825
826module_init(init_u32)
827module_exit(exit_u32)
828MODULE_LICENSE("GPL");
gt;fc_gw) { if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) return 0; return 1; } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (cfg->fc_mp == NULL) return 0; rtnh = cfg->fc_mp; remaining = cfg->fc_mp_len; for_nexthops(fi) { int attrlen; if (!rtnh_ok(rtnh, remaining)) return -EINVAL; if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) return 1; attrlen = rtnh_attrlen(rtnh); if (attrlen < 0) { struct nlattr *nla, *attrs = rtnh_attrs(rtnh); nla = nla_find(attrs, attrlen, RTA_GATEWAY); if (nla && nla_get_be32(nla) != nh->nh_gw) return 1; #ifdef CONFIG_NET_CLS_ROUTE nla = nla_find(attrs, attrlen, RTA_FLOW); if (nla && nla_get_u32(nla) != nh->nh_tclassid) return 1; #endif } rtnh = rtnh_next(rtnh, &remaining); } endfor_nexthops(fi); #endif return 0; } /* Picture ------- Semantics of nexthop is very messy by historical reasons. We have to take into account, that: a) gateway can be actually local interface address, so that gatewayed route is direct. b) gateway must be on-link address, possibly described not by an ifaddr, but also by a direct route. c) If both gateway and interface are specified, they should not contradict. d) If we use tunnel routes, gateway could be not on-link. Attempt to reconcile all of these (alas, self-contradictory) conditions results in pretty ugly and hairy code with obscure logic. I chose to generalized it instead, so that the size of code does not increase practically, but it becomes much more general. Every prefix is assigned a "scope" value: "host" is local address, "link" is direct route, [ ... "site" ... "interior" ... ] and "universe" is true gateway route with global meaning. Every prefix refers to a set of "nexthop"s (gw, oif), where gw must have narrower scope. This recursion stops when gw has LOCAL scope or if "nexthop" is declared ONLINK, which means that gw is forced to be on link. Code is still hairy, but now it is apparently logically consistent and very flexible. F.e. as by-product it allows to co-exists in peace independent exterior and interior routing processes. Normally it looks as following. {universe prefix} -> (gw, oif) [scope link] | |-> {link prefix} -> (gw, oif) [scope local] | |-> {local prefix} (terminal node) */ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, struct fib_nh *nh) { int err; if (nh->nh_gw) { struct fib_result res; #ifdef CONFIG_IP_ROUTE_PERVASIVE if (nh->nh_flags&RTNH_F_PERVASIVE) return 0; #endif if (nh->nh_flags&RTNH_F_ONLINK) { struct net_device *dev; if (cfg->fc_scope >= RT_SCOPE_LINK) return -EINVAL; if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) return -EINVAL; if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL) return -ENODEV; if (!(dev->flags&IFF_UP)) return -ENETDOWN; nh->nh_dev = dev; dev_hold(dev); nh->nh_scope = RT_SCOPE_LINK; return 0; } { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = nh->nh_gw, .scope = cfg->fc_scope + 1, }, }, .oif = nh->nh_oif, }; /* It is not necessary, but requires a bit of thinking */ if (fl.fl4_scope < RT_SCOPE_LINK) fl.fl4_scope = RT_SCOPE_LINK; if ((err = fib_lookup(&fl, &res)) != 0) return err; } err = -EINVAL; if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) goto out; nh->nh_scope = res.scope; nh->nh_oif = FIB_RES_OIF(res); if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) goto out; dev_hold(nh->nh_dev); err = -ENETDOWN; if (!(nh->nh_dev->flags & IFF_UP)) goto out; err = 0; out: fib_res_put(&res); return err; } else { struct in_device *in_dev; if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) return -EINVAL; in_dev = inetdev_by_index(nh->nh_oif); if (in_dev == NULL) return -ENODEV; if (!(in_dev->dev->flags&IFF_UP)) { in_dev_put(in_dev); return -ENETDOWN; } nh->nh_dev = in_dev->dev; dev_hold(nh->nh_dev); nh->nh_scope = RT_SCOPE_HOST; in_dev_put(in_dev); } return 0; } static inline unsigned int fib_laddr_hashfn(__be32 val) { unsigned int mask = (fib_hash_size - 1); return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; } static struct hlist_head *fib_hash_alloc(int bytes) { if (bytes <= PAGE_SIZE) return kmalloc(bytes, GFP_KERNEL); else return (struct hlist_head *) __get_free_pages(GFP_KERNEL, get_order(bytes)); } static void fib_hash_free(struct hlist_head *hash, int bytes) { if (!hash) return; if (bytes <= PAGE_SIZE) kfree(hash); else free_pages((unsigned long) hash, get_order(bytes)); } static void fib_hash_move(struct hlist_head *new_info_hash, struct hlist_head *new_laddrhash, unsigned int new_size) { struct hlist_head *old_info_hash, *old_laddrhash; unsigned int old_size = fib_hash_size; unsigned int i, bytes; spin_lock_bh(&fib_info_lock); old_info_hash = fib_info_hash; old_laddrhash = fib_info_laddrhash; fib_hash_size = new_size; for (i = 0; i < old_size; i++) { struct hlist_head *head = &fib_info_hash[i]; struct hlist_node *node, *n; struct fib_info *fi; hlist_for_each_entry_safe(fi, node, n, head, fib_hash) { struct hlist_head *dest; unsigned int new_hash; hlist_del(&fi->fib_hash); new_hash = fib_info_hashfn(fi); dest = &new_info_hash[new_hash]; hlist_add_head(&fi->fib_hash, dest); } } fib_info_hash = new_info_hash; for (i = 0; i < old_size; i++) { struct hlist_head *lhead = &fib_info_laddrhash[i]; struct hlist_node *node, *n; struct fib_info *fi; hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) { struct hlist_head *ldest; unsigned int new_hash; hlist_del(&fi->fib_lhash); new_hash = fib_laddr_hashfn(fi->fib_prefsrc); ldest = &new_laddrhash[new_hash]; hlist_add_head(&fi->fib_lhash, ldest); } } fib_info_laddrhash = new_laddrhash; spin_unlock_bh(&fib_info_lock); bytes = old_size * sizeof(struct hlist_head *); fib_hash_free(old_info_hash, bytes); fib_hash_free(old_laddrhash, bytes); } struct fib_info *fib_create_info(struct fib_config *cfg) { int err; struct fib_info *fi = NULL; struct fib_info *ofi; int nhs = 1; /* Fast check to catch the most weird cases */ if (fib_props[cfg->fc_type].scope > cfg->fc_scope) goto err_inval; #ifdef CONFIG_IP_ROUTE_MULTIPATH if (cfg->fc_mp) { nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); if (nhs == 0) goto err_inval; } #endif err = -ENOBUFS; if (fib_info_cnt >= fib_hash_size) { unsigned int new_size = fib_hash_size << 1; struct hlist_head *new_info_hash; struct hlist_head *new_laddrhash; unsigned int bytes; if (!new_size) new_size = 1; bytes = new_size * sizeof(struct hlist_head *); new_info_hash = fib_hash_alloc(bytes); new_laddrhash = fib_hash_alloc(bytes); if (!new_info_hash || !new_laddrhash) { fib_hash_free(new_info_hash, bytes); fib_hash_free(new_laddrhash, bytes); } else { memset(new_info_hash, 0, bytes); memset(new_laddrhash, 0, bytes); fib_hash_move(new_info_hash, new_laddrhash, new_size); } if (!fib_hash_size) goto failure; } fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (fi == NULL) goto failure; fib_info_cnt++; fi->fib_protocol = cfg->fc_protocol; fi->fib_flags = cfg->fc_flags; fi->fib_priority = cfg->fc_priority; fi->fib_prefsrc = cfg->fc_prefsrc; fi->fib_nhs = nhs; change_nexthops(fi) { nh->nh_parent = fi; } endfor_nexthops(fi) if (cfg->fc_mx) { struct nlattr *nla; int remaining; nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { int type = nla_type(nla); if (type) { if (type > RTAX_MAX) goto err_inval; fi->fib_metrics[type - 1] = nla_get_u32(nla); } } } if (cfg->fc_mp) { #ifdef CONFIG_IP_ROUTE_MULTIPATH err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); if (err != 0) goto failure; if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) goto err_inval; if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) goto err_inval; #ifdef CONFIG_NET_CLS_ROUTE if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) goto err_inval; #endif #else goto err_inval; #endif } else { struct fib_nh *nh = fi->fib_nh; nh->nh_oif = cfg->fc_oif; nh->nh_gw = cfg->fc_gw; nh->nh_flags = cfg->fc_flags; #ifdef CONFIG_NET_CLS_ROUTE nh->nh_tclassid = cfg->fc_flow; #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_weight = 1; #endif } if (fib_props[cfg->fc_type].error) { if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) goto err_inval; goto link_it; } if (cfg->fc_scope > RT_SCOPE_HOST) goto err_inval; if (cfg->fc_scope == RT_SCOPE_HOST) { struct fib_nh *nh = fi->fib_nh; /* Local address is added. */ if (nhs != 1 || nh->nh_gw) goto err_inval; nh->nh_scope = RT_SCOPE_NOWHERE; nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif); err = -ENODEV; if (nh->nh_dev == NULL) goto failure; } else { change_nexthops(fi) { if ((err = fib_check_nh(cfg, fi, nh)) != 0) goto failure; } endfor_nexthops(fi) } if (fi->fib_prefsrc) { if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || fi->fib_prefsrc != cfg->fc_dst) if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) goto err_inval; } link_it: if ((ofi = fib_find_info(fi)) != NULL) { fi->fib_dead = 1; free_fib_info(fi); ofi->fib_treeref++; return ofi; } fi->fib_treeref++; atomic_inc(&fi->fib_clntref); spin_lock_bh(&fib_info_lock); hlist_add_head(&fi->fib_hash, &fib_info_hash[fib_info_hashfn(fi)]); if (fi->fib_prefsrc) { struct hlist_head *head; head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; hlist_add_head(&fi->fib_lhash, head); } change_nexthops(fi) { struct hlist_head *head; unsigned int hash; if (!nh->nh_dev) continue; hash = fib_devindex_hashfn(nh->nh_dev->ifindex); head = &fib_info_devhash[hash]; hlist_add_head(&nh->nh_hash, head); } endfor_nexthops(fi) spin_unlock_bh(&fib_info_lock); return fi; err_inval: err = -EINVAL; failure: if (fi) { fi->fib_dead = 1; free_fib_info(fi); } return ERR_PTR(err); } /* Note! fib_semantic_match intentionally uses RCU list functions. */ int fib_semantic_match(struct list_head *head, const struct flowi *flp, struct fib_result *res, __be32 zone, __be32 mask, int prefixlen) { struct fib_alias *fa; int nh_sel = 0; list_for_each_entry_rcu(fa, head, fa_list) { int err; if (fa->fa_tos && fa->fa_tos != flp->fl4_tos) continue; if (fa->fa_scope < flp->fl4_scope) continue; fa->fa_state |= FA_S_ACCESSED; err = fib_props[fa->fa_type].error; if (err == 0) { struct fib_info *fi = fa->fa_info; if (fi->fib_flags & RTNH_F_DEAD) continue; switch (fa->fa_type) { case RTN_UNICAST: case RTN_LOCAL: case RTN_BROADCAST: case RTN_ANYCAST: case RTN_MULTICAST: for_nexthops(fi) { if (nh->nh_flags&RTNH_F_DEAD) continue; if (!flp->oif || flp->oif == nh->nh_oif) break; } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (nhsel < fi->fib_nhs) { nh_sel = nhsel; goto out_fill_res; } #else if (nhsel < 1) { goto out_fill_res; } #endif endfor_nexthops(fi); continue; default: printk(KERN_DEBUG "impossible 102\n"); return -EINVAL; } } return err; } return 1; out_fill_res: res->prefixlen = prefixlen; res->nh_sel = nh_sel; res->type = fa->fa_type; res->scope = fa->fa_scope; res->fi = fa->fa_info; atomic_inc(&res->fi->fib_clntref); return 0; } /* Find appropriate source address to this destination */ __be32 __fib_res_prefsrc(struct fib_result *res) { return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); } int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); if (nlh == NULL) return -EMSGSIZE; rtm = nlmsg_data(nlh); rtm->rtm_family = AF_INET; rtm->rtm_dst_len = dst_len; rtm->rtm_src_len = 0; rtm->rtm_tos = tos; rtm->rtm_table = tb_id; NLA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_type = type; rtm->rtm_flags = fi->fib_flags; rtm->rtm_scope = scope; rtm->rtm_protocol = fi->fib_protocol; if (rtm->rtm_dst_len) NLA_PUT_BE32(skb, RTA_DST, dst); if (fi->fib_priority) NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority); if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) goto nla_put_failure; if (fi->fib_prefsrc) NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc); if (fi->fib_nhs == 1) { if (fi->fib_nh->nh_gw) NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw); if (fi->fib_nh->nh_oif) NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); #ifdef CONFIG_NET_CLS_ROUTE if (fi->fib_nh[0].nh_tclassid) NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); #endif } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (fi->fib_nhs > 1) { struct rtnexthop *rtnh; struct nlattr *mp; mp = nla_nest_start(skb, RTA_MULTIPATH); if (mp == NULL) goto nla_put_failure; for_nexthops(fi) { rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); if (rtnh == NULL) goto nla_put_failure; rtnh->rtnh_flags = nh->nh_flags & 0xFF; rtnh->rtnh_hops = nh->nh_weight - 1; rtnh->rtnh_ifindex = nh->nh_oif; if (nh->nh_gw) NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); #ifdef CONFIG_NET_CLS_ROUTE if (nh->nh_tclassid) NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); #endif /* length of rtnetlink header + attributes */ rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; } endfor_nexthops(fi); nla_nest_end(skb, mp); } #endif return nlmsg_end(skb, nlh); nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } /* Update FIB if: - local address disappeared -> we must delete all the entries referring to it. - device went down -> we must shutdown all nexthops going via it. */ int fib_sync_down(__be32 local, struct net_device *dev, int force) { int ret = 0; int scope = RT_SCOPE_NOWHERE; if (force) scope = -1; if (local && fib_info_laddrhash) { unsigned int hash = fib_laddr_hashfn(local); struct hlist_head *head = &fib_info_laddrhash[hash]; struct hlist_node *node; struct fib_info *fi; hlist_for_each_entry(fi, node, head, fib_lhash) { if (fi->fib_prefsrc == local) { fi->fib_flags |= RTNH_F_DEAD; ret++; } } } if (dev) { struct fib_info *prev_fi = NULL; unsigned int hash = fib_devindex_hashfn(dev->ifindex); struct hlist_head *head = &fib_info_devhash[hash]; struct hlist_node *node; struct fib_nh *nh; hlist_for_each_entry(nh, node, head, nh_hash) { struct fib_info *fi = nh->nh_parent; int dead; BUG_ON(!fi->fib_nhs); if (nh->nh_dev != dev || fi == prev_fi) continue; prev_fi = fi; dead = 0; change_nexthops(fi) { if (nh->nh_flags&RTNH_F_DEAD) dead++; else if (nh->nh_dev == dev && nh->nh_scope != scope) { nh->nh_flags |= RTNH_F_DEAD; #ifdef CONFIG_IP_ROUTE_MULTIPATH spin_lock_bh(&fib_multipath_lock); fi->fib_power -= nh->nh_power; nh->nh_power = 0; spin_unlock_bh(&fib_multipath_lock); #endif dead++; } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (force > 1 && nh->nh_dev == dev) { dead = fi->fib_nhs; break; } #endif } endfor_nexthops(fi) if (dead == fi->fib_nhs) { fi->fib_flags |= RTNH_F_DEAD; ret++; } } } return ret; } #ifdef CONFIG_IP_ROUTE_MULTIPATH /* Dead device goes up. We wake up dead nexthops. It takes sense only on multipath routes. */ int fib_sync_up(struct net_device *dev) { struct fib_info *prev_fi; unsigned int hash; struct hlist_head *head; struct hlist_node *node; struct fib_nh *nh; int ret; if (!(dev->flags&IFF_UP)) return 0; prev_fi = NULL; hash = fib_devindex_hashfn(dev->ifindex); head = &fib_info_devhash[hash]; ret = 0; hlist_for_each_entry(nh, node, head, nh_hash) { struct fib_info *fi = nh->nh_parent; int alive; BUG_ON(!fi->fib_nhs); if (nh->nh_dev != dev || fi == prev_fi) continue; prev_fi = fi; alive = 0; change_nexthops(fi) { if (!(nh->nh_flags&RTNH_F_DEAD)) { alive++; continue; }