aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Graf <tgraf@suug.ch>2008-11-08 01:56:00 -0500
committerDavid S. Miller <davem@davemloft.net>2008-11-08 01:56:00 -0500
commitf400923735ecbb67cbe4a3606c9479f694754f51 (patch)
treebfe96ecb2860837bf858ceb180c489c931ed74d9
parent505d4f73dda9e20d59da05008f1f5eb432613e71 (diff)
pkt_sched: Control group classifier
The classifier should cover the most common use case and will work without any special configuration. The principle of the classifier is to directly access the task_struct via get_current(). In order for this to work, classification requests from softirqs must be ignored. This is not a problem because the vast majority of packets in softirq context are not assigned to a task anyway. For this to work, a mechanism is needed to trace softirq context. This repost goes back to the method of relying on the number of nested bh disable calls for the sake of not adding too much complexity and the option to come up with something more reliable if actually needed. Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/cgroup_subsys.h6
-rw-r--r--include/linux/pkt_cls.h14
-rw-r--r--net/sched/Kconfig11
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/cls_cgroup.c290
5 files changed, 322 insertions, 0 deletions
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 9c22396e8b50..9c8d31bacf46 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -54,3 +54,9 @@ SUBSYS(freezer)
54#endif 54#endif
55 55
56/* */ 56/* */
57
58#ifdef CONFIG_NET_CLS_CGROUP
59SUBSYS(net_cls)
60#endif
61
62/* */
diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index 7cf7824df778..e6aa8482ad7a 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -394,6 +394,20 @@ enum
394 394
395#define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1) 395#define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1)
396 396
397
398/* Cgroup classifier */
399
400enum
401{
402 TCA_CGROUP_UNSPEC,
403 TCA_CGROUP_ACT,
404 TCA_CGROUP_POLICE,
405 TCA_CGROUP_EMATCHES,
406 __TCA_CGROUP_MAX,
407};
408
409#define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1)
410
397/* Extended Matches */ 411/* Extended Matches */
398 412
399struct tcf_ematch_tree_hdr 413struct tcf_ematch_tree_hdr
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 6767e54155db..36543b6fcef3 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -316,6 +316,17 @@ config NET_CLS_FLOW
316 To compile this code as a module, choose M here: the 316 To compile this code as a module, choose M here: the
317 module will be called cls_flow. 317 module will be called cls_flow.
318 318
319config NET_CLS_CGROUP
320 bool "Control Group Classifier"
321 select NET_CLS
322 depends on CGROUPS
323 ---help---
324 Say Y here if you want to classify packets based on the control
325 cgroup of their process.
326
327 To compile this code as a module, choose M here: the
328 module will be called cls_cgroup.
329
319config NET_EMATCH 330config NET_EMATCH
320 bool "Extended Matches" 331 bool "Extended Matches"
321 select NET_CLS 332 select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index e60c9925b269..70b35f8708c3 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o
38obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o 38obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o
39obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o 39obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
40obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o 40obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
41obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
41obj-$(CONFIG_NET_EMATCH) += ematch.o 42obj-$(CONFIG_NET_EMATCH) += ematch.o
42obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o 43obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o
43obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o 44obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
new file mode 100644
index 000000000000..53ada2c0e41c
--- /dev/null
+++ b/net/sched/cls_cgroup.c
@@ -0,0 +1,290 @@
1/*
2 * net/sched/cls_cgroup.c Control Group Classifier
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Thomas Graf <tgraf@suug.ch>
10 */
11
12#include <linux/module.h>
13#include <linux/types.h>
14#include <linux/string.h>
15#include <linux/errno.h>
16#include <linux/skbuff.h>
17#include <linux/cgroup.h>
18#include <net/rtnetlink.h>
19#include <net/pkt_cls.h>
20
21struct cgroup_cls_state
22{
23 struct cgroup_subsys_state css;
24 u32 classid;
25};
26
27static inline struct cgroup_cls_state *net_cls_state(struct cgroup *cgrp)
28{
29 return (struct cgroup_cls_state *)
30 cgroup_subsys_state(cgrp, net_cls_subsys_id);
31}
32
33static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
34 struct cgroup *cgrp)
35{
36 struct cgroup_cls_state *cs;
37
38 if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL)))
39 return ERR_PTR(-ENOMEM);
40
41 if (cgrp->parent)
42 cs->classid = net_cls_state(cgrp->parent)->classid;
43
44 return &cs->css;
45}
46
47static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
48{
49 kfree(ss);
50}
51
52static u64 read_classid(struct cgroup *cgrp, struct cftype *cft)
53{
54 return net_cls_state(cgrp)->classid;
55}
56
57static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value)
58{
59 if (!cgroup_lock_live_group(cgrp))
60 return -ENODEV;
61
62 net_cls_state(cgrp)->classid = (u32) value;
63
64 cgroup_unlock();
65
66 return 0;
67}
68
69static struct cftype ss_files[] = {
70 {
71 .name = "classid",
72 .read_u64 = read_classid,
73 .write_u64 = write_classid,
74 },
75};
76
77static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
78{
79 return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
80}
81
82struct cgroup_subsys net_cls_subsys = {
83 .name = "net_cls",
84 .create = cgrp_create,
85 .destroy = cgrp_destroy,
86 .populate = cgrp_populate,
87 .subsys_id = net_cls_subsys_id,
88};
89
90struct cls_cgroup_head
91{
92 u32 handle;
93 struct tcf_exts exts;
94 struct tcf_ematch_tree ematches;
95};
96
97static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
98 struct tcf_result *res)
99{
100 struct cls_cgroup_head *head = tp->root;
101 struct cgroup_cls_state *cs;
102 int ret = 0;
103
104 /*
105 * Due to the nature of the classifier it is required to ignore all
106 * packets originating from softirq context as accessing `current'
107 * would lead to false results.
108 *
109 * This test assumes that all callers of dev_queue_xmit() explicitely
110 * disable bh. Knowing this, it is possible to detect softirq based
111 * calls by looking at the number of nested bh disable calls because
112 * softirqs always disables bh.
113 */
114 if (softirq_count() != SOFTIRQ_OFFSET)
115 return -1;
116
117 rcu_read_lock();
118 cs = (struct cgroup_cls_state *) task_subsys_state(current,
119 net_cls_subsys_id);
120 if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) {
121 res->classid = cs->classid;
122 res->class = 0;
123 ret = tcf_exts_exec(skb, &head->exts, res);
124 } else
125 ret = -1;
126
127 rcu_read_unlock();
128
129 return ret;
130}
131
132static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle)
133{
134 return 0UL;
135}
136
137static void cls_cgroup_put(struct tcf_proto *tp, unsigned long f)
138{
139}
140
141static int cls_cgroup_init(struct tcf_proto *tp)
142{
143 return 0;
144}
145
146static const struct tcf_ext_map cgroup_ext_map = {
147 .action = TCA_CGROUP_ACT,
148 .police = TCA_CGROUP_POLICE,
149};
150
151static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
152 [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED },
153};
154
155static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
156 u32 handle, struct nlattr **tca,
157 unsigned long *arg)
158{
159 struct nlattr *tb[TCA_CGROUP_MAX+1];
160 struct cls_cgroup_head *head = tp->root;
161 struct tcf_ematch_tree t;
162 struct tcf_exts e;
163 int err;
164
165 if (head == NULL) {
166 if (!handle)
167 return -EINVAL;
168
169 head = kzalloc(sizeof(*head), GFP_KERNEL);
170 if (head == NULL)
171 return -ENOBUFS;
172
173 head->handle = handle;
174
175 tcf_tree_lock(tp);
176 tp->root = head;
177 tcf_tree_unlock(tp);
178 }
179
180 if (handle != head->handle)
181 return -ENOENT;
182
183 err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
184 cgroup_policy);
185 if (err < 0)
186 return err;
187
188 err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map);
189 if (err < 0)
190 return err;
191
192 err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
193 if (err < 0)
194 return err;
195
196 tcf_exts_change(tp, &head->exts, &e);
197 tcf_em_tree_change(tp, &head->ematches, &t);
198
199 return 0;
200}
201
202static void cls_cgroup_destroy(struct tcf_proto *tp)
203{
204 struct cls_cgroup_head *head;
205
206 head = (struct cls_cgroup_head *)xchg(&tp->root, NULL);
207
208 if (head) {
209 tcf_exts_destroy(tp, &head->exts);
210 tcf_em_tree_destroy(tp, &head->ematches);
211 kfree(head);
212 }
213}
214
215static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
216{
217 return -EOPNOTSUPP;
218}
219
220static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
221{
222 struct cls_cgroup_head *head = tp->root;
223
224 if (arg->count < arg->skip)
225 goto skip;
226
227 if (arg->fn(tp, (unsigned long) head, arg) < 0) {
228 arg->stop = 1;
229 return;
230 }
231skip:
232 arg->count++;
233}
234
235static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh,
236 struct sk_buff *skb, struct tcmsg *t)
237{
238 struct cls_cgroup_head *head = tp->root;
239 unsigned char *b = skb_tail_pointer(skb);
240 struct nlattr *nest;
241
242 t->tcm_handle = head->handle;
243
244 nest = nla_nest_start(skb, TCA_OPTIONS);
245 if (nest == NULL)
246 goto nla_put_failure;
247
248 if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 ||
249 tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0)
250 goto nla_put_failure;
251
252 nla_nest_end(skb, nest);
253
254 if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0)
255 goto nla_put_failure;
256
257 return skb->len;
258
259nla_put_failure:
260 nlmsg_trim(skb, b);
261 return -1;
262}
263
264static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
265 .kind = "cgroup",
266 .init = cls_cgroup_init,
267 .change = cls_cgroup_change,
268 .classify = cls_cgroup_classify,
269 .destroy = cls_cgroup_destroy,
270 .get = cls_cgroup_get,
271 .put = cls_cgroup_put,
272 .delete = cls_cgroup_delete,
273 .walk = cls_cgroup_walk,
274 .dump = cls_cgroup_dump,
275 .owner = THIS_MODULE,
276};
277
278static int __init init_cgroup_cls(void)
279{
280 return register_tcf_proto_ops(&cls_cgroup_ops);
281}
282
283static void __exit exit_cgroup_cls(void)
284{
285 unregister_tcf_proto_ops(&cls_cgroup_ops);
286}
287
288module_init(init_cgroup_cls);
289module_exit(exit_cgroup_cls);
290MODULE_LICENSE("GPL");