aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched/cls_flow.c
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2008-01-31 21:37:42 -0500
committerDavid S. Miller <davem@davemloft.net>2008-01-31 22:28:36 -0500
commite5dfb815181fcb186d6080ac3a091eadff2d98fe (patch)
tree25ec6cc5b3c75536dc45a14089ca14fc8bd67938 /net/sched/cls_flow.c
parent94de78d19580143c407ff2492edf2410d0e7d48c (diff)
[NET_SCHED]: Add flow classifier
Add new "flow" classifier, which is meant to extend the SFQ hashing capabilities without hard-coding new hash functions and also allows deterministic mappings of keys to classes, replacing some out of tree iptables patches like IPCLASSIFY (maps IPs to classes), IPMARK (maps IPs to marks, with fw filters to classes), ... Some examples: - Classic SFQ hash: tc filter add ... flow hash \ keys src,dst,proto,proto-src,proto-dst divisor 1024 - Classic SFQ hash, but using information from conntrack to work properly in combination with NAT: tc filter add ... flow hash \ keys nfct-src,nfct-dst,proto,nfct-proto-src,nfct-proto-dst divisor 1024 - Map destination IPs of 192.168.0.0/24 to classids 1-257: tc filter add ... flow map \ key dst addend -192.168.0.0 divisor 256 - alternatively: tc filter add ... flow map \ key dst and 0xff - similar, but reverse ordered: tc filter add ... flow map \ key dst and 0xff xor 0xff Perturbation is currently not supported because we can't reliable kill the timer on destruction. Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sched/cls_flow.c')
-rw-r--r--net/sched/cls_flow.c660
1 files changed, 660 insertions, 0 deletions
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
new file mode 100644
index 000000000000..5a7f6a3060fc
--- /dev/null
+++ b/net/sched/cls_flow.c
@@ -0,0 +1,660 @@
1/*
2 * net/sched/cls_flow.c Generic flow classifier
3 *
4 * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/init.h>
14#include <linux/list.h>
15#include <linux/jhash.h>
16#include <linux/random.h>
17#include <linux/pkt_cls.h>
18#include <linux/skbuff.h>
19#include <linux/in.h>
20#include <linux/ip.h>
21#include <linux/ipv6.h>
22
23#include <net/pkt_cls.h>
24#include <net/ip.h>
25#include <net/route.h>
26#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
27#include <net/netfilter/nf_conntrack.h>
28#endif
29
30struct flow_head {
31 struct list_head filters;
32};
33
34struct flow_filter {
35 struct list_head list;
36 struct tcf_exts exts;
37 struct tcf_ematch_tree ematches;
38 u32 handle;
39
40 u32 nkeys;
41 u32 keymask;
42 u32 mode;
43 u32 mask;
44 u32 xor;
45 u32 rshift;
46 u32 addend;
47 u32 divisor;
48 u32 baseclass;
49};
50
51static u32 flow_hashrnd __read_mostly;
52static int flow_hashrnd_initted __read_mostly;
53
54static const struct tcf_ext_map flow_ext_map = {
55 .action = TCA_FLOW_ACT,
56 .police = TCA_FLOW_POLICE,
57};
58
59static inline u32 addr_fold(void *addr)
60{
61 unsigned long a = (unsigned long)addr;
62
63 return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
64}
65
66static u32 flow_get_src(const struct sk_buff *skb)
67{
68 switch (skb->protocol) {
69 case __constant_htons(ETH_P_IP):
70 return ntohl(ip_hdr(skb)->saddr);
71 case __constant_htons(ETH_P_IPV6):
72 return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]);
73 default:
74 return addr_fold(skb->sk);
75 }
76}
77
78static u32 flow_get_dst(const struct sk_buff *skb)
79{
80 switch (skb->protocol) {
81 case __constant_htons(ETH_P_IP):
82 return ntohl(ip_hdr(skb)->daddr);
83 case __constant_htons(ETH_P_IPV6):
84 return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]);
85 default:
86 return addr_fold(skb->dst) ^ (__force u16)skb->protocol;
87 }
88}
89
90static u32 flow_get_proto(const struct sk_buff *skb)
91{
92 switch (skb->protocol) {
93 case __constant_htons(ETH_P_IP):
94 return ip_hdr(skb)->protocol;
95 case __constant_htons(ETH_P_IPV6):
96 return ipv6_hdr(skb)->nexthdr;
97 default:
98 return 0;
99 }
100}
101
102static int has_ports(u8 protocol)
103{
104 switch (protocol) {
105 case IPPROTO_TCP:
106 case IPPROTO_UDP:
107 case IPPROTO_UDPLITE:
108 case IPPROTO_SCTP:
109 case IPPROTO_DCCP:
110 case IPPROTO_ESP:
111 return 1;
112 default:
113 return 0;
114 }
115}
116
117static u32 flow_get_proto_src(const struct sk_buff *skb)
118{
119 u32 res = 0;
120
121 switch (skb->protocol) {
122 case __constant_htons(ETH_P_IP): {
123 struct iphdr *iph = ip_hdr(skb);
124
125 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
126 has_ports(iph->protocol))
127 res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4));
128 break;
129 }
130 case __constant_htons(ETH_P_IPV6): {
131 struct ipv6hdr *iph = ipv6_hdr(skb);
132
133 if (has_ports(iph->nexthdr))
134 res = ntohs(*(__be16 *)&iph[1]);
135 break;
136 }
137 default:
138 res = addr_fold(skb->sk);
139 }
140
141 return res;
142}
143
144static u32 flow_get_proto_dst(const struct sk_buff *skb)
145{
146 u32 res = 0;
147
148 switch (skb->protocol) {
149 case __constant_htons(ETH_P_IP): {
150 struct iphdr *iph = ip_hdr(skb);
151
152 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
153 has_ports(iph->protocol))
154 res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2));
155 break;
156 }
157 case __constant_htons(ETH_P_IPV6): {
158 struct ipv6hdr *iph = ipv6_hdr(skb);
159
160 if (has_ports(iph->nexthdr))
161 res = ntohs(*(__be16 *)((void *)&iph[1] + 2));
162 break;
163 }
164 default:
165 res = addr_fold(skb->dst) ^ (__force u16)skb->protocol;
166 }
167
168 return res;
169}
170
171static u32 flow_get_iif(const struct sk_buff *skb)
172{
173 return skb->iif;
174}
175
176static u32 flow_get_priority(const struct sk_buff *skb)
177{
178 return skb->priority;
179}
180
181static u32 flow_get_mark(const struct sk_buff *skb)
182{
183 return skb->mark;
184}
185
186static u32 flow_get_nfct(const struct sk_buff *skb)
187{
188#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
189 return addr_fold(skb->nfct);
190#else
191 return 0;
192#endif
193}
194
195#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
196#define CTTUPLE(skb, member) \
197({ \
198 enum ip_conntrack_info ctinfo; \
199 struct nf_conn *ct = nf_ct_get(skb, &ctinfo); \
200 if (ct == NULL) \
201 goto fallback; \
202 ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member; \
203})
204#else
205#define CTTUPLE(skb, member) \
206({ \
207 goto fallback; \
208 0; \
209})
210#endif
211
212static u32 flow_get_nfct_src(const struct sk_buff *skb)
213{
214 switch (skb->protocol) {
215 case __constant_htons(ETH_P_IP):
216 return ntohl(CTTUPLE(skb, src.u3.ip));
217 case __constant_htons(ETH_P_IPV6):
218 return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
219 }
220fallback:
221 return flow_get_src(skb);
222}
223
224static u32 flow_get_nfct_dst(const struct sk_buff *skb)
225{
226 switch (skb->protocol) {
227 case __constant_htons(ETH_P_IP):
228 return ntohl(CTTUPLE(skb, dst.u3.ip));
229 case __constant_htons(ETH_P_IPV6):
230 return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
231 }
232fallback:
233 return flow_get_dst(skb);
234}
235
236static u32 flow_get_nfct_proto_src(const struct sk_buff *skb)
237{
238 return ntohs(CTTUPLE(skb, src.u.all));
239fallback:
240 return flow_get_proto_src(skb);
241}
242
243static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb)
244{
245 return ntohs(CTTUPLE(skb, dst.u.all));
246fallback:
247 return flow_get_proto_dst(skb);
248}
249
250static u32 flow_get_rtclassid(const struct sk_buff *skb)
251{
252#ifdef CONFIG_NET_CLS_ROUTE
253 if (skb->dst)
254 return skb->dst->tclassid;
255#endif
256 return 0;
257}
258
259static u32 flow_get_skuid(const struct sk_buff *skb)
260{
261 if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
262 return skb->sk->sk_socket->file->f_uid;
263 return 0;
264}
265
266static u32 flow_get_skgid(const struct sk_buff *skb)
267{
268 if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
269 return skb->sk->sk_socket->file->f_gid;
270 return 0;
271}
272
273static u32 flow_key_get(const struct sk_buff *skb, int key)
274{
275 switch (key) {
276 case FLOW_KEY_SRC:
277 return flow_get_src(skb);
278 case FLOW_KEY_DST:
279 return flow_get_dst(skb);
280 case FLOW_KEY_PROTO:
281 return flow_get_proto(skb);
282 case FLOW_KEY_PROTO_SRC:
283 return flow_get_proto_src(skb);
284 case FLOW_KEY_PROTO_DST:
285 return flow_get_proto_dst(skb);
286 case FLOW_KEY_IIF:
287 return flow_get_iif(skb);
288 case FLOW_KEY_PRIORITY:
289 return flow_get_priority(skb);
290 case FLOW_KEY_MARK:
291 return flow_get_mark(skb);
292 case FLOW_KEY_NFCT:
293 return flow_get_nfct(skb);
294 case FLOW_KEY_NFCT_SRC:
295 return flow_get_nfct_src(skb);
296 case FLOW_KEY_NFCT_DST:
297 return flow_get_nfct_dst(skb);
298 case FLOW_KEY_NFCT_PROTO_SRC:
299 return flow_get_nfct_proto_src(skb);
300 case FLOW_KEY_NFCT_PROTO_DST:
301 return flow_get_nfct_proto_dst(skb);
302 case FLOW_KEY_RTCLASSID:
303 return flow_get_rtclassid(skb);
304 case FLOW_KEY_SKUID:
305 return flow_get_skuid(skb);
306 case FLOW_KEY_SKGID:
307 return flow_get_skgid(skb);
308 default:
309 WARN_ON(1);
310 return 0;
311 }
312}
313
314static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp,
315 struct tcf_result *res)
316{
317 struct flow_head *head = tp->root;
318 struct flow_filter *f;
319 u32 keymask;
320 u32 classid;
321 unsigned int n, key;
322 int r;
323
324 list_for_each_entry(f, &head->filters, list) {
325 u32 keys[f->nkeys];
326
327 if (!tcf_em_tree_match(skb, &f->ematches, NULL))
328 continue;
329
330 keymask = f->keymask;
331
332 for (n = 0; n < f->nkeys; n++) {
333 key = ffs(keymask) - 1;
334 keymask &= ~(1 << key);
335 keys[n] = flow_key_get(skb, key);
336 }
337
338 if (f->mode == FLOW_MODE_HASH)
339 classid = jhash2(keys, f->nkeys, flow_hashrnd);
340 else {
341 classid = keys[0];
342 classid = (classid & f->mask) ^ f->xor;
343 classid = (classid >> f->rshift) + f->addend;
344 }
345
346 if (f->divisor)
347 classid %= f->divisor;
348
349 res->class = 0;
350 res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid);
351
352 r = tcf_exts_exec(skb, &f->exts, res);
353 if (r < 0)
354 continue;
355 return r;
356 }
357 return -1;
358}
359
360static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
361 [TCA_FLOW_KEYS] = { .type = NLA_U32 },
362 [TCA_FLOW_MODE] = { .type = NLA_U32 },
363 [TCA_FLOW_BASECLASS] = { .type = NLA_U32 },
364 [TCA_FLOW_RSHIFT] = { .type = NLA_U32 },
365 [TCA_FLOW_ADDEND] = { .type = NLA_U32 },
366 [TCA_FLOW_MASK] = { .type = NLA_U32 },
367 [TCA_FLOW_XOR] = { .type = NLA_U32 },
368 [TCA_FLOW_DIVISOR] = { .type = NLA_U32 },
369 [TCA_FLOW_ACT] = { .type = NLA_NESTED },
370 [TCA_FLOW_POLICE] = { .type = NLA_NESTED },
371 [TCA_FLOW_EMATCHES] = { .type = NLA_NESTED },
372};
373
374static int flow_change(struct tcf_proto *tp, unsigned long base,
375 u32 handle, struct nlattr **tca,
376 unsigned long *arg)
377{
378 struct flow_head *head = tp->root;
379 struct flow_filter *f;
380 struct nlattr *opt = tca[TCA_OPTIONS];
381 struct nlattr *tb[TCA_FLOW_MAX + 1];
382 struct tcf_exts e;
383 struct tcf_ematch_tree t;
384 unsigned int nkeys = 0;
385 u32 baseclass = 0;
386 u32 keymask = 0;
387 u32 mode;
388 int err;
389
390 if (opt == NULL)
391 return -EINVAL;
392
393 err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy);
394 if (err < 0)
395 return err;
396
397 if (tb[TCA_FLOW_BASECLASS]) {
398 baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]);
399 if (TC_H_MIN(baseclass) == 0)
400 return -EINVAL;
401 }
402
403 if (tb[TCA_FLOW_KEYS]) {
404 keymask = nla_get_u32(tb[TCA_FLOW_KEYS]);
405 if (fls(keymask) - 1 > FLOW_KEY_MAX)
406 return -EOPNOTSUPP;
407
408 nkeys = hweight32(keymask);
409 if (nkeys == 0)
410 return -EINVAL;
411 }
412
413 err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map);
414 if (err < 0)
415 return err;
416
417 err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
418 if (err < 0)
419 goto err1;
420
421 f = (struct flow_filter *)*arg;
422 if (f != NULL) {
423 err = -EINVAL;
424 if (f->handle != handle && handle)
425 goto err2;
426
427 mode = f->mode;
428 if (tb[TCA_FLOW_MODE])
429 mode = nla_get_u32(tb[TCA_FLOW_MODE]);
430 if (mode != FLOW_MODE_HASH && nkeys > 1)
431 goto err2;
432 } else {
433 err = -EINVAL;
434 if (!handle)
435 goto err2;
436 if (!tb[TCA_FLOW_KEYS])
437 goto err2;
438
439 mode = FLOW_MODE_MAP;
440 if (tb[TCA_FLOW_MODE])
441 mode = nla_get_u32(tb[TCA_FLOW_MODE]);
442 if (mode != FLOW_MODE_HASH && nkeys > 1)
443 goto err2;
444
445 if (TC_H_MAJ(baseclass) == 0)
446 baseclass = TC_H_MAKE(tp->q->handle, baseclass);
447 if (TC_H_MIN(baseclass) == 0)
448 baseclass = TC_H_MAKE(baseclass, 1);
449
450 err = -ENOBUFS;
451 f = kzalloc(sizeof(*f), GFP_KERNEL);
452 if (f == NULL)
453 goto err2;
454
455 f->handle = handle;
456 f->mask = ~0U;
457 }
458
459 tcf_exts_change(tp, &f->exts, &e);
460 tcf_em_tree_change(tp, &f->ematches, &t);
461
462 tcf_tree_lock(tp);
463
464 if (tb[TCA_FLOW_KEYS]) {
465 f->keymask = keymask;
466 f->nkeys = nkeys;
467 }
468
469 f->mode = mode;
470
471 if (tb[TCA_FLOW_MASK])
472 f->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
473 if (tb[TCA_FLOW_XOR])
474 f->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
475 if (tb[TCA_FLOW_RSHIFT])
476 f->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
477 if (tb[TCA_FLOW_ADDEND])
478 f->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
479
480 if (tb[TCA_FLOW_DIVISOR])
481 f->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
482 if (baseclass)
483 f->baseclass = baseclass;
484
485 if (*arg == 0)
486 list_add_tail(&f->list, &head->filters);
487
488 tcf_tree_unlock(tp);
489
490 *arg = (unsigned long)f;
491 return 0;
492
493err2:
494 tcf_em_tree_destroy(tp, &t);
495err1:
496 tcf_exts_destroy(tp, &e);
497 return err;
498}
499
500static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f)
501{
502 tcf_exts_destroy(tp, &f->exts);
503 tcf_em_tree_destroy(tp, &f->ematches);
504 kfree(f);
505}
506
507static int flow_delete(struct tcf_proto *tp, unsigned long arg)
508{
509 struct flow_filter *f = (struct flow_filter *)arg;
510
511 tcf_tree_lock(tp);
512 list_del(&f->list);
513 tcf_tree_unlock(tp);
514 flow_destroy_filter(tp, f);
515 return 0;
516}
517
518static int flow_init(struct tcf_proto *tp)
519{
520 struct flow_head *head;
521
522 if (!flow_hashrnd_initted) {
523 get_random_bytes(&flow_hashrnd, 4);
524 flow_hashrnd_initted = 1;
525 }
526
527 head = kzalloc(sizeof(*head), GFP_KERNEL);
528 if (head == NULL)
529 return -ENOBUFS;
530 INIT_LIST_HEAD(&head->filters);
531 tp->root = head;
532 return 0;
533}
534
535static void flow_destroy(struct tcf_proto *tp)
536{
537 struct flow_head *head = tp->root;
538 struct flow_filter *f, *next;
539
540 list_for_each_entry_safe(f, next, &head->filters, list) {
541 list_del(&f->list);
542 flow_destroy_filter(tp, f);
543 }
544 kfree(head);
545}
546
547static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
548{
549 struct flow_head *head = tp->root;
550 struct flow_filter *f;
551
552 list_for_each_entry(f, &head->filters, list)
553 if (f->handle == handle)
554 return (unsigned long)f;
555 return 0;
556}
557
558static void flow_put(struct tcf_proto *tp, unsigned long f)
559{
560 return;
561}
562
563static int flow_dump(struct tcf_proto *tp, unsigned long fh,
564 struct sk_buff *skb, struct tcmsg *t)
565{
566 struct flow_filter *f = (struct flow_filter *)fh;
567 struct nlattr *nest;
568
569 if (f == NULL)
570 return skb->len;
571
572 t->tcm_handle = f->handle;
573
574 nest = nla_nest_start(skb, TCA_OPTIONS);
575 if (nest == NULL)
576 goto nla_put_failure;
577
578 NLA_PUT_U32(skb, TCA_FLOW_KEYS, f->keymask);
579 NLA_PUT_U32(skb, TCA_FLOW_MODE, f->mode);
580
581 if (f->mask != ~0 || f->xor != 0) {
582 NLA_PUT_U32(skb, TCA_FLOW_MASK, f->mask);
583 NLA_PUT_U32(skb, TCA_FLOW_XOR, f->xor);
584 }
585 if (f->rshift)
586 NLA_PUT_U32(skb, TCA_FLOW_RSHIFT, f->rshift);
587 if (f->addend)
588 NLA_PUT_U32(skb, TCA_FLOW_ADDEND, f->addend);
589
590 if (f->divisor)
591 NLA_PUT_U32(skb, TCA_FLOW_DIVISOR, f->divisor);
592 if (f->baseclass)
593 NLA_PUT_U32(skb, TCA_FLOW_BASECLASS, f->baseclass);
594
595 if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0)
596 goto nla_put_failure;
597
598 if (f->ematches.hdr.nmatches &&
599 tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0)
600 goto nla_put_failure;
601
602 nla_nest_end(skb, nest);
603
604 if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0)
605 goto nla_put_failure;
606
607 return skb->len;
608
609nla_put_failure:
610 nlmsg_trim(skb, nest);
611 return -1;
612}
613
614static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
615{
616 struct flow_head *head = tp->root;
617 struct flow_filter *f;
618
619 list_for_each_entry(f, &head->filters, list) {
620 if (arg->count < arg->skip)
621 goto skip;
622 if (arg->fn(tp, (unsigned long)f, arg) < 0) {
623 arg->stop = 1;
624 break;
625 }
626skip:
627 arg->count++;
628 }
629}
630
631static struct tcf_proto_ops cls_flow_ops __read_mostly = {
632 .kind = "flow",
633 .classify = flow_classify,
634 .init = flow_init,
635 .destroy = flow_destroy,
636 .change = flow_change,
637 .delete = flow_delete,
638 .get = flow_get,
639 .put = flow_put,
640 .dump = flow_dump,
641 .walk = flow_walk,
642 .owner = THIS_MODULE,
643};
644
645static int __init cls_flow_init(void)
646{
647 return register_tcf_proto_ops(&cls_flow_ops);
648}
649
650static void __exit cls_flow_exit(void)
651{
652 unregister_tcf_proto_ops(&cls_flow_ops);
653}
654
655module_init(cls_flow_init);
656module_exit(cls_flow_exit);
657
658MODULE_LICENSE("GPL");
659MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
660MODULE_DESCRIPTION("TC flow classifier");