aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2012-05-10 03:51:25 -0400
committerDavid S. Miller <davem@davemloft.net>2012-05-10 23:35:02 -0400
commit76e3cc126bb223013a6b9a0e2a51238d1ef2e409 (patch)
tree37d1c2a3c4f4ebf68e9849262c7d75115652313f
parent2dd875ff31ac7ff42d6fc7d7f78ac6c0635439f5 (diff)
codel: Controlled Delay AQM
An implementation of CoDel AQM, from Kathleen Nichols and Van Jacobson. http://queue.acm.org/detail.cfm?id=2209336 This AQM main input is no longer queue size in bytes or packets, but the delay packets stay in (FIFO) queue. As we don't have infinite memory, we still can drop packets in enqueue() in case of massive load, but mean of CoDel is to drop packets in dequeue(), using a control law based on two simple parameters : target : target sojourn time (default 5ms) interval : width of moving time window (default 100ms) Based on initial work from Dave Taht. Refactored to help future codel inclusion as a plugin for other linux qdisc (FQ_CODEL, ...), like RED. include/net/codel.h contains codel algorithm as close as possible than Kathleen reference. net/sched/sch_codel.c contains the linux qdisc specific glue. Separate structures permit a memory efficient implementation of fq_codel (to be sent as a separate work) : Each flow has its own struct codel_vars. timestamps are taken at enqueue() time with 1024 ns precision, allowing a range of 2199 seconds in queue, and 100Gb links support. iproute2 uses usec as base unit. Selected packets are dropped, unless ECN is enabled and packets can get ECN mark instead. Tested from 2Mb to 10Gb speeds with no particular problems, on ixgbe and tg3 drivers (BQL enabled). Usage: tc qdisc ... codel [ limit PACKETS ] [ target TIME ] [ interval TIME ] [ ecn ] qdisc codel 10: parent 1:1 limit 2000p target 3.0ms interval 60.0ms ecn Sent 13347099587 bytes 8815805 pkt (dropped 0, overlimits 0 requeues 0) rate 202365Kbit 16708pps backlog 113550b 75p requeues 0 count 116 lastcount 98 ldelay 4.3ms dropping drop_next 816us maxpacket 1514 ecn_mark 84399 drop_overlimit 0 CoDel must be seen as a base module, and should be used keeping in mind there is still a FIFO queue. So a typical setup will probably need a hierarchy of several qdiscs and packet classifiers to be able to meet whatever constraints a user might have. One possible example would be to use fq_codel, which combines Fair Queueing and CoDel, in replacement of sfq / sfq_red. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Dave Taht <dave.taht@bufferbloat.net> Cc: Kathleen Nichols <nichols@pollere.com> Cc: Van Jacobson <van@pollere.net> Cc: Tom Herbert <therbert@google.com> Cc: Matt Mathis <mattmathis@google.com> Cc: Yuchung Cheng <ycheng@google.com> Cc: Stephen Hemminger <shemminger@vyatta.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/pkt_sched.h26
-rw-r--r--include/net/codel.h332
-rw-r--r--net/sched/Kconfig11
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/sch_codel.c275
5 files changed, 645 insertions, 0 deletions
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index ffe975c3f1d8..cde56c22bdab 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -655,4 +655,30 @@ struct tc_qfq_stats {
655 __u32 lmax; 655 __u32 lmax;
656}; 656};
657 657
658/* CODEL */
659
660enum {
661 TCA_CODEL_UNSPEC,
662 TCA_CODEL_TARGET,
663 TCA_CODEL_LIMIT,
664 TCA_CODEL_INTERVAL,
665 TCA_CODEL_ECN,
666 __TCA_CODEL_MAX
667};
668
669#define TCA_CODEL_MAX (__TCA_CODEL_MAX - 1)
670
671struct tc_codel_xstats {
672 __u32 maxpacket; /* largest packet we've seen so far */
673 __u32 count; /* how many drops we've done since the last time we
674 * entered dropping state
675 */
676 __u32 lastcount; /* count at entry to dropping state */
677 __u32 ldelay; /* in-queue delay seen by most recently dequeued packet */
678 __s32 drop_next; /* time to drop next packet */
679 __u32 drop_overlimit; /* number of time max qdisc packet limit was hit */
680 __u32 ecn_mark; /* number of packets we ECN marked instead of dropped */
681 __u32 dropping; /* are we in dropping state ? */
682};
683
658#endif 684#endif
diff --git a/include/net/codel.h b/include/net/codel.h
new file mode 100644
index 000000000000..bce2cefa8c94
--- /dev/null
+++ b/include/net/codel.h
@@ -0,0 +1,332 @@
1#ifndef __NET_SCHED_CODEL_H
2#define __NET_SCHED_CODEL_H
3
4/*
5 * Codel - The Controlled-Delay Active Queue Management algorithm
6 *
7 * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
8 * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
9 * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
10 * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions, and the following disclaimer,
17 * without modification.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. The names of the authors may not be used to endorse or promote products
22 * derived from this software without specific prior written permission.
23 *
24 * Alternatively, provided that this notice is retained in full, this
25 * software may be distributed under the terms of the GNU General
26 * Public License ("GPL") version 2, in which case the provisions of the
27 * GPL apply INSTEAD OF those given above.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
40 * DAMAGE.
41 *
42 */
43
44#include <linux/types.h>
45#include <linux/ktime.h>
46#include <linux/skbuff.h>
47#include <net/pkt_sched.h>
48#include <net/inet_ecn.h>
49
50/* Controlling Queue Delay (CoDel) algorithm
51 * =========================================
52 * Source : Kathleen Nichols and Van Jacobson
53 * http://queue.acm.org/detail.cfm?id=2209336
54 *
55 * Implemented on linux by Dave Taht and Eric Dumazet
56 */
57
58
59/* CoDel uses a 1024 nsec clock, encoded in u32
60 * This gives a range of 2199 seconds, because of signed compares
61 */
62typedef u32 codel_time_t;
63typedef s32 codel_tdiff_t;
64#define CODEL_SHIFT 10
65#define MS2TIME(a) ((a * NSEC_PER_MSEC) >> CODEL_SHIFT)
66
67static inline codel_time_t codel_get_time(void)
68{
69 u64 ns = ktime_to_ns(ktime_get());
70
71 return ns >> CODEL_SHIFT;
72}
73
74#define codel_time_after(a, b) ((s32)(a) - (s32)(b) > 0)
75#define codel_time_after_eq(a, b) ((s32)(a) - (s32)(b) >= 0)
76#define codel_time_before(a, b) ((s32)(a) - (s32)(b) < 0)
77#define codel_time_before_eq(a, b) ((s32)(a) - (s32)(b) <= 0)
78
79/* Qdiscs using codel plugin must use codel_skb_cb in their own cb[] */
80struct codel_skb_cb {
81 codel_time_t enqueue_time;
82};
83
84static struct codel_skb_cb *get_codel_cb(const struct sk_buff *skb)
85{
86 qdisc_cb_private_validate(skb, sizeof(struct codel_skb_cb));
87 return (struct codel_skb_cb *)qdisc_skb_cb(skb)->data;
88}
89
90static codel_time_t codel_get_enqueue_time(const struct sk_buff *skb)
91{
92 return get_codel_cb(skb)->enqueue_time;
93}
94
95static void codel_set_enqueue_time(struct sk_buff *skb)
96{
97 get_codel_cb(skb)->enqueue_time = codel_get_time();
98}
99
100static inline u32 codel_time_to_us(codel_time_t val)
101{
102 u64 valns = ((u64)val << CODEL_SHIFT);
103
104 do_div(valns, NSEC_PER_USEC);
105 return (u32)valns;
106}
107
108/**
109 * struct codel_params - contains codel parameters
110 * @target: target queue size (in time units)
111 * @interval: width of moving time window
112 * @ecn: is Explicit Congestion Notification enabled
113 */
114struct codel_params {
115 codel_time_t target;
116 codel_time_t interval;
117 bool ecn;
118};
119
120/**
121 * struct codel_vars - contains codel variables
122 * @count: how many drops we've done since the last time we
123 * entered dropping state
124 * @lastcount: count at entry to dropping state
125 * @dropping: set to true if in dropping state
126 * @first_above_time: when we went (or will go) continuously above target
127 * for interval
128 * @drop_next: time to drop next packet, or when we dropped last
129 * @ldelay: sojourn time of last dequeued packet
130 */
131struct codel_vars {
132 u32 count;
133 u32 lastcount;
134 bool dropping;
135 codel_time_t first_above_time;
136 codel_time_t drop_next;
137 codel_time_t ldelay;
138};
139
140/**
141 * struct codel_stats - contains codel shared variables and stats
142 * @maxpacket: largest packet we've seen so far
143 * @drop_count: temp count of dropped packets in dequeue()
144 * ecn_mark: number of packets we ECN marked instead of dropping
145 */
146struct codel_stats {
147 u32 maxpacket;
148 u32 drop_count;
149 u32 ecn_mark;
150};
151
152static void codel_params_init(struct codel_params *params)
153{
154 params->interval = MS2TIME(100);
155 params->target = MS2TIME(5);
156 params->ecn = false;
157}
158
159static void codel_vars_init(struct codel_vars *vars)
160{
161 vars->drop_next = 0;
162 vars->first_above_time = 0;
163 vars->dropping = false; /* exit dropping state */
164 vars->count = 0;
165 vars->lastcount = 0;
166}
167
168static void codel_stats_init(struct codel_stats *stats)
169{
170 stats->maxpacket = 256;
171}
172
173/* return interval/sqrt(x) with good precision
174 * relies on int_sqrt(unsigned long x) kernel implementation
175 */
176static u32 codel_inv_sqrt(u32 _interval, u32 _x)
177{
178 u64 interval = _interval;
179 unsigned long x = _x;
180
181 /* Scale operands for max precision */
182
183#if BITS_PER_LONG == 64
184 x <<= 32; /* On 64bit arches, we can prescale x by 32bits */
185 interval <<= 16;
186#endif
187
188 while (x < (1UL << (BITS_PER_LONG - 2))) {
189 x <<= 2;
190 interval <<= 1;
191 }
192 do_div(interval, int_sqrt(x));
193 return (u32)interval;
194}
195
196static codel_time_t codel_control_law(codel_time_t t,
197 codel_time_t interval,
198 u32 count)
199{
200 return t + codel_inv_sqrt(interval, count);
201}
202
203
204static bool codel_should_drop(struct sk_buff *skb,
205 unsigned int *backlog,
206 struct codel_vars *vars,
207 struct codel_params *params,
208 struct codel_stats *stats,
209 codel_time_t now)
210{
211 bool ok_to_drop;
212
213 if (!skb) {
214 vars->first_above_time = 0;
215 return false;
216 }
217
218 vars->ldelay = now - codel_get_enqueue_time(skb);
219 *backlog -= qdisc_pkt_len(skb);
220
221 if (unlikely(qdisc_pkt_len(skb) > stats->maxpacket))
222 stats->maxpacket = qdisc_pkt_len(skb);
223
224 if (codel_time_before(vars->ldelay, params->target) ||
225 *backlog <= stats->maxpacket) {
226 /* went below - stay below for at least interval */
227 vars->first_above_time = 0;
228 return false;
229 }
230 ok_to_drop = false;
231 if (vars->first_above_time == 0) {
232 /* just went above from below. If we stay above
233 * for at least interval we'll say it's ok to drop
234 */
235 vars->first_above_time = now + params->interval;
236 } else if (codel_time_after(now, vars->first_above_time)) {
237 ok_to_drop = true;
238 }
239 return ok_to_drop;
240}
241
242typedef struct sk_buff * (*codel_skb_dequeue_t)(struct codel_vars *vars,
243 struct Qdisc *sch);
244
245static struct sk_buff *codel_dequeue(struct Qdisc *sch,
246 struct codel_params *params,
247 struct codel_vars *vars,
248 struct codel_stats *stats,
249 codel_skb_dequeue_t dequeue_func,
250 u32 *backlog)
251{
252 struct sk_buff *skb = dequeue_func(vars, sch);
253 codel_time_t now;
254 bool drop;
255
256 if (!skb) {
257 vars->dropping = false;
258 return skb;
259 }
260 now = codel_get_time();
261 drop = codel_should_drop(skb, backlog, vars, params, stats, now);
262 if (vars->dropping) {
263 if (!drop) {
264 /* sojourn time below target - leave dropping state */
265 vars->dropping = false;
266 } else if (codel_time_after_eq(now, vars->drop_next)) {
267 /* It's time for the next drop. Drop the current
268 * packet and dequeue the next. The dequeue might
269 * take us out of dropping state.
270 * If not, schedule the next drop.
271 * A large backlog might result in drop rates so high
272 * that the next drop should happen now,
273 * hence the while loop.
274 */
275 while (vars->dropping &&
276 codel_time_after_eq(now, vars->drop_next)) {
277 if (++vars->count == 0) /* avoid zero divides */
278 vars->count = ~0U;
279 if (params->ecn && INET_ECN_set_ce(skb)) {
280 stats->ecn_mark++;
281 vars->drop_next =
282 codel_control_law(vars->drop_next,
283 params->interval,
284 vars->count);
285 goto end;
286 }
287 qdisc_drop(skb, sch);
288 stats->drop_count++;
289 skb = dequeue_func(vars, sch);
290 if (!codel_should_drop(skb, backlog,
291 vars, params, stats, now)) {
292 /* leave dropping state */
293 vars->dropping = false;
294 } else {
295 /* and schedule the next drop */
296 vars->drop_next =
297 codel_control_law(vars->drop_next,
298 params->interval,
299 vars->count);
300 }
301 }
302 }
303 } else if (drop) {
304 if (params->ecn && INET_ECN_set_ce(skb)) {
305 stats->ecn_mark++;
306 } else {
307 qdisc_drop(skb, sch);
308 stats->drop_count++;
309
310 skb = dequeue_func(vars, sch);
311 drop = codel_should_drop(skb, backlog, vars, params,
312 stats, now);
313 }
314 vars->dropping = true;
315 /* if min went above target close to when we last went below it
316 * assume that the drop rate that controlled the queue on the
317 * last cycle is a good starting point to control it now.
318 */
319 if (codel_time_before(now - vars->drop_next,
320 16 * params->interval)) {
321 vars->count = (vars->count - vars->lastcount) | 1;
322 } else {
323 vars->count = 1;
324 }
325 vars->lastcount = vars->count;
326 vars->drop_next = codel_control_law(now, params->interval,
327 vars->count);
328 }
329end:
330 return skb;
331}
332#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 75b58f81d53d..fadd2522053d 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -250,6 +250,17 @@ config NET_SCH_QFQ
250 250
251 If unsure, say N. 251 If unsure, say N.
252 252
253config NET_SCH_CODEL
254 tristate "Controlled Delay AQM (CODEL)"
255 help
256 Say Y here if you want to use the Controlled Delay (CODEL)
257 packet scheduling algorithm.
258
259 To compile this driver as a module, choose M here: the module
260 will be called sch_codel.
261
262 If unsure, say N.
263
253config NET_SCH_INGRESS 264config NET_SCH_INGRESS
254 tristate "Ingress Qdisc" 265 tristate "Ingress Qdisc"
255 depends on NET_CLS_ACT 266 depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8cdf4e2b51d3..30fab03b8516 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
37obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o 37obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o
38obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o 38obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
39obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o 39obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
40obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o
40 41
41obj-$(CONFIG_NET_CLS_U32) += cls_u32.o 42obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
42obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o 43obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
new file mode 100644
index 000000000000..b4a1a81e757e
--- /dev/null
+++ b/net/sched/sch_codel.c
@@ -0,0 +1,275 @@
1/*
2 * Codel - The Controlled-Delay Active Queue Management algorithm
3 *
4 * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
5 * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
6 *
7 * Implemented on linux by :
8 * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
9 * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions, and the following disclaimer,
16 * without modification.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. The names of the authors may not be used to endorse or promote products
21 * derived from this software without specific prior written permission.
22 *
23 * Alternatively, provided that this notice is retained in full, this
24 * software may be distributed under the terms of the GNU General
25 * Public License ("GPL") version 2, in which case the provisions of the
26 * GPL apply INSTEAD OF those given above.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
39 * DAMAGE.
40 *
41 */
42
43#include <linux/module.h>
44#include <linux/slab.h>
45#include <linux/types.h>
46#include <linux/kernel.h>
47#include <linux/errno.h>
48#include <linux/skbuff.h>
49#include <net/pkt_sched.h>
50#include <net/codel.h>
51
52
53#define DEFAULT_CODEL_LIMIT 1000
54
55struct codel_sched_data {
56 struct codel_params params;
57 struct codel_vars vars;
58 struct codel_stats stats;
59 u32 drop_overlimit;
60};
61
62/* This is the specific function called from codel_dequeue()
63 * to dequeue a packet from queue. Note: backlog is handled in
64 * codel, we dont need to reduce it here.
65 */
66static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch)
67{
68 struct sk_buff *skb = __skb_dequeue(&sch->q);
69
70 prefetch(&skb->end); /* we'll need skb_shinfo() */
71 return skb;
72}
73
74static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
75{
76 struct codel_sched_data *q = qdisc_priv(sch);
77 struct sk_buff *skb;
78
79 skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats,
80 dequeue, &sch->qstats.backlog);
81 /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
82 * or HTB crashes. Defer it for next round.
83 */
84 if (q->stats.drop_count && sch->q.qlen) {
85 qdisc_tree_decrease_qlen(sch, q->stats.drop_count);
86 q->stats.drop_count = 0;
87 }
88 if (skb)
89 qdisc_bstats_update(sch, skb);
90 return skb;
91}
92
93static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
94{
95 struct codel_sched_data *q;
96
97 if (likely(qdisc_qlen(sch) < sch->limit)) {
98 codel_set_enqueue_time(skb);
99 return qdisc_enqueue_tail(skb, sch);
100 }
101 q = qdisc_priv(sch);
102 q->drop_overlimit++;
103 return qdisc_drop(skb, sch);
104}
105
106static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
107 [TCA_CODEL_TARGET] = { .type = NLA_U32 },
108 [TCA_CODEL_LIMIT] = { .type = NLA_U32 },
109 [TCA_CODEL_INTERVAL] = { .type = NLA_U32 },
110 [TCA_CODEL_ECN] = { .type = NLA_U32 },
111};
112
113static int codel_change(struct Qdisc *sch, struct nlattr *opt)
114{
115 struct codel_sched_data *q = qdisc_priv(sch);
116 struct nlattr *tb[TCA_CODEL_MAX + 1];
117 unsigned int qlen;
118 int err;
119
120 if (!opt)
121 return -EINVAL;
122
123 err = nla_parse_nested(tb, TCA_CODEL_MAX, opt, codel_policy);
124 if (err < 0)
125 return err;
126
127 sch_tree_lock(sch);
128
129 if (tb[TCA_CODEL_TARGET]) {
130 u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
131
132 q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
133 }
134
135 if (tb[TCA_CODEL_INTERVAL]) {
136 u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
137
138 q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
139 }
140
141 if (tb[TCA_CODEL_LIMIT])
142 sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
143
144 if (tb[TCA_CODEL_ECN])
145 q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]);
146
147 qlen = sch->q.qlen;
148 while (sch->q.qlen > sch->limit) {
149 struct sk_buff *skb = __skb_dequeue(&sch->q);
150
151 sch->qstats.backlog -= qdisc_pkt_len(skb);
152 qdisc_drop(skb, sch);
153 }
154 qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
155
156 sch_tree_unlock(sch);
157 return 0;
158}
159
160static int codel_init(struct Qdisc *sch, struct nlattr *opt)
161{
162 struct codel_sched_data *q = qdisc_priv(sch);
163
164 sch->limit = DEFAULT_CODEL_LIMIT;
165
166 codel_params_init(&q->params);
167 codel_vars_init(&q->vars);
168 codel_stats_init(&q->stats);
169
170 if (opt) {
171 int err = codel_change(sch, opt);
172
173 if (err)
174 return err;
175 }
176
177 if (sch->limit >= 1)
178 sch->flags |= TCQ_F_CAN_BYPASS;
179 else
180 sch->flags &= ~TCQ_F_CAN_BYPASS;
181
182 return 0;
183}
184
185static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
186{
187 struct codel_sched_data *q = qdisc_priv(sch);
188 struct nlattr *opts;
189
190 opts = nla_nest_start(skb, TCA_OPTIONS);
191 if (opts == NULL)
192 goto nla_put_failure;
193
194 if (nla_put_u32(skb, TCA_CODEL_TARGET,
195 codel_time_to_us(q->params.target)) ||
196 nla_put_u32(skb, TCA_CODEL_LIMIT,
197 sch->limit) ||
198 nla_put_u32(skb, TCA_CODEL_INTERVAL,
199 codel_time_to_us(q->params.interval)) ||
200 nla_put_u32(skb, TCA_CODEL_ECN,
201 q->params.ecn))
202 goto nla_put_failure;
203
204 return nla_nest_end(skb, opts);
205
206nla_put_failure:
207 nla_nest_cancel(skb, opts);
208 return -1;
209}
210
211static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
212{
213 const struct codel_sched_data *q = qdisc_priv(sch);
214 struct tc_codel_xstats st = {
215 .maxpacket = q->stats.maxpacket,
216 .count = q->vars.count,
217 .lastcount = q->vars.lastcount,
218 .drop_overlimit = q->drop_overlimit,
219 .ldelay = codel_time_to_us(q->vars.ldelay),
220 .dropping = q->vars.dropping,
221 .ecn_mark = q->stats.ecn_mark,
222 };
223
224 if (q->vars.dropping) {
225 codel_tdiff_t delta = q->vars.drop_next - codel_get_time();
226
227 if (delta >= 0)
228 st.drop_next = codel_time_to_us(delta);
229 else
230 st.drop_next = -codel_time_to_us(-delta);
231 }
232
233 return gnet_stats_copy_app(d, &st, sizeof(st));
234}
235
236static void codel_reset(struct Qdisc *sch)
237{
238 struct codel_sched_data *q = qdisc_priv(sch);
239
240 qdisc_reset_queue(sch);
241 codel_vars_init(&q->vars);
242}
243
244static struct Qdisc_ops codel_qdisc_ops __read_mostly = {
245 .id = "codel",
246 .priv_size = sizeof(struct codel_sched_data),
247
248 .enqueue = codel_qdisc_enqueue,
249 .dequeue = codel_qdisc_dequeue,
250 .peek = qdisc_peek_dequeued,
251 .init = codel_init,
252 .reset = codel_reset,
253 .change = codel_change,
254 .dump = codel_dump,
255 .dump_stats = codel_dump_stats,
256 .owner = THIS_MODULE,
257};
258
259static int __init codel_module_init(void)
260{
261 return register_qdisc(&codel_qdisc_ops);
262}
263
264static void __exit codel_module_exit(void)
265{
266 unregister_qdisc(&codel_qdisc_ops);
267}
268
269module_init(codel_module_init)
270module_exit(codel_module_exit)
271
272MODULE_DESCRIPTION("Controlled Delay queue discipline");
273MODULE_AUTHOR("Dave Taht");
274MODULE_AUTHOR("Eric Dumazet");
275MODULE_LICENSE("Dual BSD/GPL");