aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2008-11-20 07:10:00 -0500
committerDavid S. Miller <davem@davemloft.net>2008-11-20 07:10:00 -0500
commit13d2a1d2b032de08d7dcab6a1edcd47802681f96 (patch)
treea60915e015f1dc7a9b5681ef5c5135c59167edb3
parent0c19b0adb8dd33dbd10ff48e41971231c486855c (diff)
pkt_sched: add DRR scheduler
Add classful DRR scheduler as a more flexible replacement for SFQ. The main difference to the algorithm described in "Efficient Fair Queueing using Deficit Round Robin" is that this implementation doesn't drop packets from the longest queue on overrun because its classful and limits are handled by each individual child qdisc. Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/pkt_sched.h16
-rw-r--r--net/sched/Kconfig11
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/sch_drr.c505
4 files changed, 533 insertions, 0 deletions
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 5d921fa91a5b..e3f133adba78 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -500,4 +500,20 @@ struct tc_netem_corrupt
500 500
501#define NETEM_DIST_SCALE 8192 501#define NETEM_DIST_SCALE 8192
502 502
503/* DRR */
504
505enum
506{
507 TCA_DRR_UNSPEC,
508 TCA_DRR_QUANTUM,
509 __TCA_DRR_MAX
510};
511
512#define TCA_DRR_MAX (__TCA_DRR_MAX - 1)
513
514struct tc_drr_stats
515{
516 u32 deficit;
517};
518
503#endif 519#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 36543b6fcef3..4f7ef0db302b 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -194,6 +194,17 @@ config NET_SCH_NETEM
194 194
195 If unsure, say N. 195 If unsure, say N.
196 196
197config NET_SCH_DRR
198 tristate "Deficit Round Robin scheduler (DRR)"
199 help
200 Say Y here if you want to use the Deficit Round Robin (DRR) packet
201 scheduling algorithm.
202
203 To compile this driver as a module, choose M here: the module
204 will be called sch_drr.
205
206 If unsure, say N.
207
197config NET_SCH_INGRESS 208config NET_SCH_INGRESS
198 tristate "Ingress Qdisc" 209 tristate "Ingress Qdisc"
199 depends on NET_CLS_ACT 210 depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 70b35f8708c3..54d950cd4b8d 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
30obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o 30obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o
31obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o 31obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
32obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o 32obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
33obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
33obj-$(CONFIG_NET_CLS_U32) += cls_u32.o 34obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
34obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o 35obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
35obj-$(CONFIG_NET_CLS_FW) += cls_fw.o 36obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
new file mode 100644
index 000000000000..e71a5de23e23
--- /dev/null
+++ b/net/sched/sch_drr.c
@@ -0,0 +1,505 @@
1/*
2 * net/sched/sch_drr.c Deficit Round Robin scheduler
3 *
4 * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/errno.h>
14#include <linux/netdevice.h>
15#include <linux/pkt_sched.h>
16#include <net/sch_generic.h>
17#include <net/pkt_sched.h>
18#include <net/pkt_cls.h>
19
20struct drr_class {
21 struct Qdisc_class_common common;
22 unsigned int refcnt;
23 unsigned int filter_cnt;
24
25 struct gnet_stats_basic bstats;
26 struct gnet_stats_queue qstats;
27 struct gnet_stats_rate_est rate_est;
28 struct list_head alist;
29 struct Qdisc *qdisc;
30
31 u32 quantum;
32 u32 deficit;
33};
34
35struct drr_sched {
36 struct list_head active;
37 struct tcf_proto *filter_list;
38 struct Qdisc_class_hash clhash;
39};
40
41static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
42{
43 struct drr_sched *q = qdisc_priv(sch);
44 struct Qdisc_class_common *clc;
45
46 clc = qdisc_class_find(&q->clhash, classid);
47 if (clc == NULL)
48 return NULL;
49 return container_of(clc, struct drr_class, common);
50}
51
52static void drr_purge_queue(struct drr_class *cl)
53{
54 unsigned int len = cl->qdisc->q.qlen;
55
56 qdisc_reset(cl->qdisc);
57 qdisc_tree_decrease_qlen(cl->qdisc, len);
58}
59
60static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
61 [TCA_DRR_QUANTUM] = { .type = NLA_U32 },
62};
63
64static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
65 struct nlattr **tca, unsigned long *arg)
66{
67 struct drr_sched *q = qdisc_priv(sch);
68 struct drr_class *cl = (struct drr_class *)*arg;
69 struct nlattr *tb[TCA_DRR_MAX + 1];
70 u32 quantum;
71 int err;
72
73 err = nla_parse_nested(tb, TCA_DRR_MAX, tca[TCA_OPTIONS], drr_policy);
74 if (err < 0)
75 return err;
76
77 if (tb[TCA_DRR_QUANTUM]) {
78 quantum = nla_get_u32(tb[TCA_DRR_QUANTUM]);
79 if (quantum == 0)
80 return -EINVAL;
81 } else
82 quantum = psched_mtu(qdisc_dev(sch));
83
84 if (cl != NULL) {
85 sch_tree_lock(sch);
86 if (tb[TCA_DRR_QUANTUM])
87 cl->quantum = quantum;
88 sch_tree_unlock(sch);
89
90 if (tca[TCA_RATE])
91 gen_replace_estimator(&cl->bstats, &cl->rate_est,
92 qdisc_root_sleeping_lock(sch),
93 tca[TCA_RATE]);
94 return 0;
95 }
96
97 cl = kzalloc(sizeof(struct drr_class), GFP_KERNEL);
98 if (cl == NULL)
99 return -ENOBUFS;
100
101 cl->refcnt = 1;
102 cl->common.classid = classid;
103 cl->quantum = quantum;
104 cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
105 &pfifo_qdisc_ops, classid);
106 if (cl->qdisc == NULL)
107 cl->qdisc = &noop_qdisc;
108
109 if (tca[TCA_RATE])
110 gen_replace_estimator(&cl->bstats, &cl->rate_est,
111 qdisc_root_sleeping_lock(sch),
112 tca[TCA_RATE]);
113
114 sch_tree_lock(sch);
115 qdisc_class_hash_insert(&q->clhash, &cl->common);
116 sch_tree_unlock(sch);
117
118 qdisc_class_hash_grow(sch, &q->clhash);
119
120 *arg = (unsigned long)cl;
121 return 0;
122}
123
124static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl)
125{
126 gen_kill_estimator(&cl->bstats, &cl->rate_est);
127 qdisc_destroy(cl->qdisc);
128 kfree(cl);
129}
130
131static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
132{
133 struct drr_sched *q = qdisc_priv(sch);
134 struct drr_class *cl = (struct drr_class *)arg;
135
136 if (cl->filter_cnt > 0)
137 return -EBUSY;
138
139 sch_tree_lock(sch);
140
141 drr_purge_queue(cl);
142 qdisc_class_hash_remove(&q->clhash, &cl->common);
143
144 if (--cl->refcnt == 0)
145 drr_destroy_class(sch, cl);
146
147 sch_tree_unlock(sch);
148 return 0;
149}
150
151static unsigned long drr_get_class(struct Qdisc *sch, u32 classid)
152{
153 struct drr_class *cl = drr_find_class(sch, classid);
154
155 if (cl != NULL)
156 cl->refcnt++;
157
158 return (unsigned long)cl;
159}
160
161static void drr_put_class(struct Qdisc *sch, unsigned long arg)
162{
163 struct drr_class *cl = (struct drr_class *)arg;
164
165 if (--cl->refcnt == 0)
166 drr_destroy_class(sch, cl);
167}
168
169static struct tcf_proto **drr_tcf_chain(struct Qdisc *sch, unsigned long cl)
170{
171 struct drr_sched *q = qdisc_priv(sch);
172
173 if (cl)
174 return NULL;
175
176 return &q->filter_list;
177}
178
179static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent,
180 u32 classid)
181{
182 struct drr_class *cl = drr_find_class(sch, classid);
183
184 if (cl != NULL)
185 cl->filter_cnt++;
186
187 return (unsigned long)cl;
188}
189
190static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg)
191{
192 struct drr_class *cl = (struct drr_class *)arg;
193
194 cl->filter_cnt--;
195}
196
197static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
198 struct Qdisc *new, struct Qdisc **old)
199{
200 struct drr_class *cl = (struct drr_class *)arg;
201
202 if (new == NULL) {
203 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
204 &pfifo_qdisc_ops, cl->common.classid);
205 if (new == NULL)
206 new = &noop_qdisc;
207 }
208
209 sch_tree_lock(sch);
210 drr_purge_queue(cl);
211 *old = xchg(&cl->qdisc, new);
212 sch_tree_unlock(sch);
213 return 0;
214}
215
216static struct Qdisc *drr_class_leaf(struct Qdisc *sch, unsigned long arg)
217{
218 struct drr_class *cl = (struct drr_class *)arg;
219
220 return cl->qdisc;
221}
222
223static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg)
224{
225 struct drr_class *cl = (struct drr_class *)arg;
226
227 if (cl->qdisc->q.qlen == 0)
228 list_del(&cl->alist);
229}
230
231static int drr_dump_class(struct Qdisc *sch, unsigned long arg,
232 struct sk_buff *skb, struct tcmsg *tcm)
233{
234 struct drr_class *cl = (struct drr_class *)arg;
235 struct nlattr *nest;
236
237 tcm->tcm_parent = TC_H_ROOT;
238 tcm->tcm_handle = cl->common.classid;
239 tcm->tcm_info = cl->qdisc->handle;
240
241 nest = nla_nest_start(skb, TCA_OPTIONS);
242 if (nest == NULL)
243 goto nla_put_failure;
244 NLA_PUT_U32(skb, TCA_DRR_QUANTUM, cl->quantum);
245 return nla_nest_end(skb, nest);
246
247nla_put_failure:
248 nla_nest_cancel(skb, nest);
249 return -EMSGSIZE;
250}
251
252static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
253 struct gnet_dump *d)
254{
255 struct drr_class *cl = (struct drr_class *)arg;
256 struct tc_drr_stats xstats;
257
258 memset(&xstats, 0, sizeof(xstats));
259 if (cl->qdisc->q.qlen)
260 xstats.deficit = cl->deficit;
261
262 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
263 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
264 gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
265 return -1;
266
267 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
268}
269
270static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg)
271{
272 struct drr_sched *q = qdisc_priv(sch);
273 struct drr_class *cl;
274 struct hlist_node *n;
275 unsigned int i;
276
277 if (arg->stop)
278 return;
279
280 for (i = 0; i < q->clhash.hashsize; i++) {
281 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
282 if (arg->count < arg->skip) {
283 arg->count++;
284 continue;
285 }
286 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
287 arg->stop = 1;
288 return;
289 }
290 arg->count++;
291 }
292 }
293}
294
295static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
296 int *qerr)
297{
298 struct drr_sched *q = qdisc_priv(sch);
299 struct drr_class *cl;
300 struct tcf_result res;
301 int result;
302
303 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
304 cl = drr_find_class(sch, skb->priority);
305 if (cl != NULL)
306 return cl;
307 }
308
309 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
310 result = tc_classify(skb, q->filter_list, &res);
311 if (result >= 0) {
312#ifdef CONFIG_NET_CLS_ACT
313 switch (result) {
314 case TC_ACT_QUEUED:
315 case TC_ACT_STOLEN:
316 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
317 case TC_ACT_SHOT:
318 return NULL;
319 }
320#endif
321 cl = (struct drr_class *)res.class;
322 if (cl == NULL)
323 cl = drr_find_class(sch, res.classid);
324 return cl;
325 }
326 return NULL;
327}
328
329static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
330{
331 struct drr_sched *q = qdisc_priv(sch);
332 struct drr_class *cl;
333 unsigned int len;
334 int err;
335
336 cl = drr_classify(skb, sch, &err);
337 if (cl == NULL) {
338 if (err & __NET_XMIT_BYPASS)
339 sch->qstats.drops++;
340 kfree_skb(skb);
341 return err;
342 }
343
344 len = qdisc_pkt_len(skb);
345 err = qdisc_enqueue(skb, cl->qdisc);
346 if (unlikely(err != NET_XMIT_SUCCESS)) {
347 if (net_xmit_drop_count(err)) {
348 cl->qstats.drops++;
349 sch->qstats.drops++;
350 }
351 return err;
352 }
353
354 if (cl->qdisc->q.qlen == 1) {
355 list_add_tail(&cl->alist, &q->active);
356 cl->deficit = cl->quantum;
357 }
358
359 cl->bstats.packets++;
360 cl->bstats.bytes += len;
361 sch->bstats.packets++;
362 sch->bstats.bytes += len;
363
364 sch->q.qlen++;
365 return err;
366}
367
368static struct sk_buff *drr_dequeue(struct Qdisc *sch)
369{
370 struct drr_sched *q = qdisc_priv(sch);
371 struct drr_class *cl;
372 struct sk_buff *skb;
373 unsigned int len;
374
375 while (!list_empty(&q->active)) {
376 cl = list_first_entry(&q->active, struct drr_class, alist);
377 skb = cl->qdisc->ops->peek(cl->qdisc);
378 if (skb == NULL)
379 goto skip;
380
381 len = qdisc_pkt_len(skb);
382 if (len <= cl->deficit) {
383 cl->deficit -= len;
384 skb = qdisc_dequeue_peeked(cl->qdisc);
385 if (cl->qdisc->q.qlen == 0)
386 list_del(&cl->alist);
387 sch->q.qlen--;
388 return skb;
389 }
390
391 cl->deficit += cl->quantum;
392skip:
393 list_move_tail(&cl->alist, &q->active);
394 }
395 return NULL;
396}
397
398static unsigned int drr_drop(struct Qdisc *sch)
399{
400 struct drr_sched *q = qdisc_priv(sch);
401 struct drr_class *cl;
402 unsigned int len;
403
404 list_for_each_entry(cl, &q->active, alist) {
405 if (cl->qdisc->ops->drop) {
406 len = cl->qdisc->ops->drop(cl->qdisc);
407 if (len > 0) {
408 if (cl->qdisc->q.qlen == 0)
409 list_del(&cl->alist);
410 return len;
411 }
412 }
413 }
414 return 0;
415}
416
417static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
418{
419 struct drr_sched *q = qdisc_priv(sch);
420 int err;
421
422 err = qdisc_class_hash_init(&q->clhash);
423 if (err < 0)
424 return err;
425 INIT_LIST_HEAD(&q->active);
426 return 0;
427}
428
429static void drr_reset_qdisc(struct Qdisc *sch)
430{
431 struct drr_sched *q = qdisc_priv(sch);
432 struct drr_class *cl;
433 struct hlist_node *n;
434 unsigned int i;
435
436 for (i = 0; i < q->clhash.hashsize; i++) {
437 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
438 if (cl->qdisc->q.qlen)
439 list_del(&cl->alist);
440 qdisc_reset(cl->qdisc);
441 }
442 }
443 sch->q.qlen = 0;
444}
445
446static void drr_destroy_qdisc(struct Qdisc *sch)
447{
448 struct drr_sched *q = qdisc_priv(sch);
449 struct drr_class *cl;
450 struct hlist_node *n, *next;
451 unsigned int i;
452
453 tcf_destroy_chain(&q->filter_list);
454
455 for (i = 0; i < q->clhash.hashsize; i++) {
456 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
457 common.hnode)
458 drr_destroy_class(sch, cl);
459 }
460 qdisc_class_hash_destroy(&q->clhash);
461}
462
463static const struct Qdisc_class_ops drr_class_ops = {
464 .change = drr_change_class,
465 .delete = drr_delete_class,
466 .get = drr_get_class,
467 .put = drr_put_class,
468 .tcf_chain = drr_tcf_chain,
469 .bind_tcf = drr_bind_tcf,
470 .unbind_tcf = drr_unbind_tcf,
471 .graft = drr_graft_class,
472 .leaf = drr_class_leaf,
473 .qlen_notify = drr_qlen_notify,
474 .dump = drr_dump_class,
475 .dump_stats = drr_dump_class_stats,
476 .walk = drr_walk,
477};
478
479static struct Qdisc_ops drr_qdisc_ops __read_mostly = {
480 .cl_ops = &drr_class_ops,
481 .id = "drr",
482 .priv_size = sizeof(struct drr_sched),
483 .enqueue = drr_enqueue,
484 .dequeue = drr_dequeue,
485 .peek = qdisc_peek_dequeued,
486 .drop = drr_drop,
487 .init = drr_init_qdisc,
488 .reset = drr_reset_qdisc,
489 .destroy = drr_destroy_qdisc,
490 .owner = THIS_MODULE,
491};
492
493static int __init drr_init(void)
494{
495 return register_qdisc(&drr_qdisc_ops);
496}
497
498static void __exit drr_exit(void)
499{
500 unregister_qdisc(&drr_qdisc_ops);
501}
502
503module_init(drr_init);
504module_exit(drr_exit);
505MODULE_LICENSE("GPL");