codel: Controlled Delay AQM

An implementation of CoDel AQM, from Kathleen Nichols and Van Jacobson. http://queue.acm.org/detail.cfm?id=2209336 This AQM main input is no longer queue size in bytes or packets, but the delay packets stay in (FIFO) queue. As we don't have infinite memory, we still can drop packets in enqueue() in case of massive load, but mean of CoDel is to drop packets in dequeue(), using a control law based on two simple parameters : target : target sojourn time (default 5ms) interval : width of moving time window (default 100ms) Based on initial work from Dave Taht. Refactored to help future codel inclusion as a plugin for other linux qdisc (FQ_CODEL, ...), like RED. include/net/codel.h contains codel algorithm as close as possible than Kathleen reference. net/sched/sch_codel.c contains the linux qdisc specific glue. Separate structures permit a memory efficient implementation of fq_codel (to be sent as a separate work) : Each flow has its own struct codel_vars. timestamps are taken at enqueue() time with 1024 ns precision, allowing a range of 2199 seconds in queue, and 100Gb links support. iproute2 uses usec as base unit. Selected packets are dropped, unless ECN is enabled and packets can get ECN mark instead. Tested from 2Mb to 10Gb speeds with no particular problems, on ixgbe and tg3 drivers (BQL enabled). Usage: tc qdisc ... codel [ limit PACKETS ] [ target TIME ] [ interval TIME ] [ ecn ] qdisc codel 10: parent 1:1 limit 2000p target 3.0ms interval 60.0ms ecn Sent 13347099587 bytes 8815805 pkt (dropped 0, overlimits 0 requeues 0) rate 202365Kbit 16708pps backlog 113550b 75p requeues 0 count 116 lastcount 98 ldelay 4.3ms dropping drop_next 816us maxpacket 1514 ecn_mark 84399 drop_overlimit 0 CoDel must be seen as a base module, and should be used keeping in mind there is still a FIFO queue. So a typical setup will probably need a hierarchy of several qdiscs and packet classifiers to be able to meet whatever constraints a user might have. One possible example would be to use fq_codel, which combines Fair Queueing and CoDel, in replacement of sfq / sfq_red. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Dave Taht <dave.taht@bufferbloat.net> Cc: Kathleen Nichols <nichols@pollere.com> Cc: Van Jacobson <van@pollere.net> Cc: Tom Herbert <therbert@google.com> Cc: Matt Mathis <mattmathis@google.com> Cc: Yuchung Cheng <ycheng@google.com> Cc: Stephen Hemminger <shemminger@vyatta.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Eric Dumazet <edumazet@google.com> 2012-05-10 03:51:25 -0400
committer: David S. Miller <davem@davemloft.net> 2012-05-10 23:35:02 -0400
commit: 76e3cc126bb223013a6b9a0e2a51238d1ef2e409 (patch)
tree: 37d1c2a3c4f4ebf68e9849262c7d75115652313f /net/sched
parent: 2dd875ff31ac7ff42d6fc7d7f78ac6c0635439f5 (diff)
3 files changed, 287 insertions, 0 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 75b58f81d53d..fadd2522053d 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -250,6 +250,17 @@ config NET_SCH_QFQ
          If unsure, say N.
+config NET_SCH_CODEL
+        tristate "Controlled Delay AQM (CODEL)"
+        help
+          Say Y here if you want to use the Controlled Delay (CODEL)
+          packet scheduling algorithm.
+          To compile this driver as a module, choose M here: the module
+          will be called sch_codel.
+          If unsure, say N.
 config NET_SCH_INGRESS
        tristate "Ingress Qdisc"
        depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8cdf4e2b51d3..30fab03b8516 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_NET_SCH_PLUG)	+= sch_plug.o
 obj-$(CONFIG_NET_SCH_MQPRIO)    += sch_mqprio.o
 obj-$(CONFIG_NET_SCH_CHOKE)     += sch_choke.o
 obj-$(CONFIG_NET_SCH_QFQ)       += sch_qfq.o
+obj-$(CONFIG_NET_SCH_CODEL)     += sch_codel.o
 obj-$(CONFIG_NET_CLS_U32)       += cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)    += cls_route.o
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
new file mode 100644
index 000000000000..b4a1a81e757e
--- /dev/null
+++ b/net/sched/sch_codel.c
@@ -0,0 +1,275 @@
+/*
+ * Codel - The Controlled-Delay Active Queue Management algorithm
+ *
+ *  Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
+ *  Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
+ *
+ *  Implemented on linux by :
+ *  Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
+ *  Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ */
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/pkt_sched.h>
+#include <net/codel.h>
+#define DEFAULT_CODEL_LIMIT 1000
+struct codel_sched_data {
+        struct codel_params     params;
+        struct codel_vars       vars;
+        struct codel_stats      stats;
+        u32                     drop_overlimit;
+};
+/* This is the specific function called from codel_dequeue()
+ * to dequeue a packet from queue. Note: backlog is handled in
+ * codel, we dont need to reduce it here.
+ */
+static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch)
+{
+        struct sk_buff *skb = __skb_dequeue(&sch->q);
+        prefetch(&skb->end); /* we'll need skb_shinfo() */
+        return skb;
+}
+static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
+{
+        struct codel_sched_data *q = qdisc_priv(sch);
+        struct sk_buff *skb;
+        skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats,
+                            dequeue, &sch->qstats.backlog);
+        /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
+         * or HTB crashes. Defer it for next round.
+         */
+        if (q->stats.drop_count && sch->q.qlen) {
+                qdisc_tree_decrease_qlen(sch, q->stats.drop_count);
+                q->stats.drop_count = 0;
+        }
+        if (skb)
+                qdisc_bstats_update(sch, skb);
+        return skb;
+}
+static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+        struct codel_sched_data *q;
+        if (likely(qdisc_qlen(sch) < sch->limit)) {
+                codel_set_enqueue_time(skb);
+                return qdisc_enqueue_tail(skb, sch);
+        }
+        q = qdisc_priv(sch);
+        q->drop_overlimit++;
+        return qdisc_drop(skb, sch);
+}
+static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
+        [TCA_CODEL_TARGET]      = { .type = NLA_U32 },
+        [TCA_CODEL_LIMIT]       = { .type = NLA_U32 },
+        [TCA_CODEL_INTERVAL]    = { .type = NLA_U32 },
+        [TCA_CODEL_ECN]         = { .type = NLA_U32 },
+};
+static int codel_change(struct Qdisc *sch, struct nlattr *opt)
+{
+        struct codel_sched_data *q = qdisc_priv(sch);
+        struct nlattr *tb[TCA_CODEL_MAX + 1];
+        unsigned int qlen;
+        int err;
+        if (!opt)
+                return -EINVAL;
+        err = nla_parse_nested(tb, TCA_CODEL_MAX, opt, codel_policy);
+        if (err < 0)
+                return err;
+        sch_tree_lock(sch);
+        if (tb[TCA_CODEL_TARGET]) {
+                u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
+                q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
+        }
+        if (tb[TCA_CODEL_INTERVAL]) {
+                u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
+                q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
+        }
+        if (tb[TCA_CODEL_LIMIT])
+                sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
+        if (tb[TCA_CODEL_ECN])
+                q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]);
+        qlen = sch->q.qlen;
+        while (sch->q.qlen > sch->limit) {
+                struct sk_buff *skb = __skb_dequeue(&sch->q);
+                sch->qstats.backlog -= qdisc_pkt_len(skb);
+                qdisc_drop(skb, sch);
+        }
+        qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+        sch_tree_unlock(sch);
+        return 0;
+}
+static int codel_init(struct Qdisc *sch, struct nlattr *opt)
+{
+        struct codel_sched_data *q = qdisc_priv(sch);
+        sch->limit = DEFAULT_CODEL_LIMIT;
+        codel_params_init(&q->params);
+        codel_vars_init(&q->vars);
+        codel_stats_init(&q->stats);
+        if (opt) {
+                int err = codel_change(sch, opt);
+                if (err)
+                        return err;
+        }
+        if (sch->limit >= 1)
+                sch->flags |= TCQ_F_CAN_BYPASS;
+        else
+                sch->flags &= ~TCQ_F_CAN_BYPASS;
+        return 0;
+}
+static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+        struct codel_sched_data *q = qdisc_priv(sch);
+        struct nlattr *opts;
+        opts = nla_nest_start(skb, TCA_OPTIONS);
+        if (opts == NULL)
+                goto nla_put_failure;
+        if (nla_put_u32(skb, TCA_CODEL_TARGET,
+                        codel_time_to_us(q->params.target)) ||
+            nla_put_u32(skb, TCA_CODEL_LIMIT,
+                        sch->limit) ||
+            nla_put_u32(skb, TCA_CODEL_INTERVAL,
+                        codel_time_to_us(q->params.interval)) ||
+            nla_put_u32(skb, TCA_CODEL_ECN,
+                        q->params.ecn))
+                goto nla_put_failure;
+        return nla_nest_end(skb, opts);
+nla_put_failure:
+        nla_nest_cancel(skb, opts);
+        return -1;
+}
+static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+        const struct codel_sched_data *q = qdisc_priv(sch);
+        struct tc_codel_xstats st = {
+                .maxpacket      = q->stats.maxpacket,
+                .count          = q->vars.count,
+                .lastcount      = q->vars.lastcount,
+                .drop_overlimit = q->drop_overlimit,
+                .ldelay         = codel_time_to_us(q->vars.ldelay),
+                .dropping       = q->vars.dropping,
+                .ecn_mark       = q->stats.ecn_mark,
+        };
+        if (q->vars.dropping) {
+                codel_tdiff_t delta = q->vars.drop_next - codel_get_time();
+                if (delta >= 0)
+                        st.drop_next = codel_time_to_us(delta);
+                else
+                        st.drop_next = -codel_time_to_us(-delta);
+        }
+        return gnet_stats_copy_app(d, &st, sizeof(st));
+}
+static void codel_reset(struct Qdisc *sch)
+{
+        struct codel_sched_data *q = qdisc_priv(sch);
+        qdisc_reset_queue(sch);
+        codel_vars_init(&q->vars);
+}
+static struct Qdisc_ops codel_qdisc_ops __read_mostly = {
+        .id             =       "codel",
+        .priv_size      =       sizeof(struct codel_sched_data),
+        .enqueue        =       codel_qdisc_enqueue,
+        .dequeue        =       codel_qdisc_dequeue,
+        .peek           =       qdisc_peek_dequeued,
+        .init           =       codel_init,
+        .reset          =       codel_reset,
+        .change         =       codel_change,
+        .dump           =       codel_dump,
+        .dump_stats     =       codel_dump_stats,
+        .owner          =       THIS_MODULE,
+};
+static int __init codel_module_init(void)
+{
+        return register_qdisc(&codel_qdisc_ops);
+}
+static void __exit codel_module_exit(void)
+{
+        unregister_qdisc(&codel_qdisc_ops);
+}
+module_init(codel_module_init)
+module_exit(codel_module_exit)
+MODULE_DESCRIPTION("Controlled Delay queue discipline");
+MODULE_AUTHOR("Dave Taht");
+MODULE_AUTHOR("Eric Dumazet");
+MODULE_LICENSE("Dual BSD/GPL");
author	Eric Dumazet <edumazet@google.com>	2012-05-10 03:51:25 -0400
committer	David S. Miller <davem@davemloft.net>	2012-05-10 23:35:02 -0400
commit	76e3cc126bb223013a6b9a0e2a51238d1ef2e409 (patch)
tree	37d1c2a3c4f4ebf68e9849262c7d75115652313f /net/sched
parent	2dd875ff31ac7ff42d6fc7d7f78ac6c0635439f5 (diff)

diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 75b58f81d53d..fadd2522053d 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig
@@ -250,6 +250,17 @@ config NET_SCH_QFQ
250		250
251	If unsure, say N.	251	If unsure, say N.
252		252
		253	config NET_SCH_CODEL
		254	tristate "Controlled Delay AQM (CODEL)"
		255	help
		256	Say Y here if you want to use the Controlled Delay (CODEL)
		257	packet scheduling algorithm.
		258
		259	To compile this driver as a module, choose M here: the module
		260	will be called sch_codel.
		261
		262	If unsure, say N.
		263
253	config NET_SCH_INGRESS	264	config NET_SCH_INGRESS
254	tristate "Ingress Qdisc"	265	tristate "Ingress Qdisc"
255	depends on NET_CLS_ACT	266	depends on NET_CLS_ACT


diff --git a/net/sched/Makefile b/net/sched/Makefile index 8cdf4e2b51d3..30fab03b8516 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
37	obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o	37	obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o
38	obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o	38	obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
39	obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o	39	obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
		40	obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o
40		41
41	obj-$(CONFIG_NET_CLS_U32) += cls_u32.o	42	obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
42	obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o	43	obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o


diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c new file mode 100644 index 000000000000..b4a1a81e757e --- /dev/null +++ b/net/sched/sch_codel.c
@@ -0,0 +1,275 @@
		1	/*
		2	* Codel - The Controlled-Delay Active Queue Management algorithm
		3	*
		4	* Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
		5	* Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
		6	*
		7	* Implemented on linux by :
		8	* Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
		9	* Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
		10	*
		11	* Redistribution and use in source and binary forms, with or without
		12	* modification, are permitted provided that the following conditions
		13	* are met:
		14	* 1. Redistributions of source code must retain the above copyright
		15	* notice, this list of conditions, and the following disclaimer,
		16	* without modification.
		17	* 2. Redistributions in binary form must reproduce the above copyright
		18	* notice, this list of conditions and the following disclaimer in the
		19	* documentation and/or other materials provided with the distribution.
		20	* 3. The names of the authors may not be used to endorse or promote products
		21	* derived from this software without specific prior written permission.
		22	*
		23	* Alternatively, provided that this notice is retained in full, this
		24	* software may be distributed under the terms of the GNU General
		25	* Public License ("GPL") version 2, in which case the provisions of the
		26	* GPL apply INSTEAD OF those given above.
		27	*
		28	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
		29	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
		30	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
		31	* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
		32	* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
		33	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
		34	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
		35	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
		36	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
		37	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
		38	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
		39	* DAMAGE.
		40	*
		41	*/
		42
		43	#include <linux/module.h>
		44	#include <linux/slab.h>
		45	#include <linux/types.h>
		46	#include <linux/kernel.h>
		47	#include <linux/errno.h>
		48	#include <linux/skbuff.h>
		49	#include <net/pkt_sched.h>
		50	#include <net/codel.h>
		51
		52
		53	#define DEFAULT_CODEL_LIMIT 1000
		54
		55	struct codel_sched_data {
		56	struct codel_params params;
		57	struct codel_vars vars;
		58	struct codel_stats stats;
		59	u32 drop_overlimit;
		60	};
		61
		62	/* This is the specific function called from codel_dequeue()
		63	* to dequeue a packet from queue. Note: backlog is handled in
		64	* codel, we dont need to reduce it here.
		65	*/
		66	static struct sk_buff dequeue(struct codel_vars vars, struct Qdisc *sch)
		67	{
		68	struct sk_buff *skb = __skb_dequeue(&sch->q);
		69
		70	prefetch(&skb->end); /* we'll need skb_shinfo() */
		71	return skb;
		72	}
		73
		74	static struct sk_buff codel_qdisc_dequeue(struct Qdisc sch)
		75	{
		76	struct codel_sched_data *q = qdisc_priv(sch);
		77	struct sk_buff *skb;
		78
		79	skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats,
		80	dequeue, &sch->qstats.backlog);
		81	/* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
		82	* or HTB crashes. Defer it for next round.
		83	*/
		84	if (q->stats.drop_count && sch->q.qlen) {
		85	qdisc_tree_decrease_qlen(sch, q->stats.drop_count);
		86	q->stats.drop_count = 0;
		87	}
		88	if (skb)
		89	qdisc_bstats_update(sch, skb);
		90	return skb;
		91	}
		92
		93	static int codel_qdisc_enqueue(struct sk_buff skb, struct Qdisc sch)
		94	{
		95	struct codel_sched_data *q;
		96
		97	if (likely(qdisc_qlen(sch) < sch->limit)) {
		98	codel_set_enqueue_time(skb);
		99	return qdisc_enqueue_tail(skb, sch);
		100	}
		101	q = qdisc_priv(sch);
		102	q->drop_overlimit++;
		103	return qdisc_drop(skb, sch);
		104	}
		105
		106	static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
		107	[TCA_CODEL_TARGET] = { .type = NLA_U32 },
		108	[TCA_CODEL_LIMIT] = { .type = NLA_U32 },
		109	[TCA_CODEL_INTERVAL] = { .type = NLA_U32 },
		110	[TCA_CODEL_ECN] = { .type = NLA_U32 },
		111	};
		112
		113	static int codel_change(struct Qdisc sch, struct nlattr opt)
		114	{
		115	struct codel_sched_data *q = qdisc_priv(sch);
		116	struct nlattr *tb[TCA_CODEL_MAX + 1];
		117	unsigned int qlen;
		118	int err;
		119
		120	if (!opt)
		121	return -EINVAL;
		122
		123	err = nla_parse_nested(tb, TCA_CODEL_MAX, opt, codel_policy);
		124	if (err < 0)
		125	return err;
		126
		127	sch_tree_lock(sch);
		128
		129	if (tb[TCA_CODEL_TARGET]) {
		130	u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
		131
		132	q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
		133	}
		134
		135	if (tb[TCA_CODEL_INTERVAL]) {
		136	u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
		137
		138	q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
		139	}
		140
		141	if (tb[TCA_CODEL_LIMIT])
		142	sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
		143
		144	if (tb[TCA_CODEL_ECN])
		145	q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]);
		146
		147	qlen = sch->q.qlen;
		148	while (sch->q.qlen > sch->limit) {
		149	struct sk_buff *skb = __skb_dequeue(&sch->q);
		150
		151	sch->qstats.backlog -= qdisc_pkt_len(skb);
		152	qdisc_drop(skb, sch);
		153	}
		154	qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
		155
		156	sch_tree_unlock(sch);
		157	return 0;
		158	}
		159
		160	static int codel_init(struct Qdisc sch, struct nlattr opt)
		161	{
		162	struct codel_sched_data *q = qdisc_priv(sch);
		163
		164	sch->limit = DEFAULT_CODEL_LIMIT;
		165
		166	codel_params_init(&q->params);
		167	codel_vars_init(&q->vars);
		168	codel_stats_init(&q->stats);
		169
		170	if (opt) {
		171	int err = codel_change(sch, opt);
		172
		173	if (err)
		174	return err;
		175	}
		176
		177	if (sch->limit >= 1)
		178	sch->flags \|= TCQ_F_CAN_BYPASS;
		179	else
		180	sch->flags &= ~TCQ_F_CAN_BYPASS;
		181
		182	return 0;
		183	}
		184
		185	static int codel_dump(struct Qdisc sch, struct sk_buff skb)
		186	{
		187	struct codel_sched_data *q = qdisc_priv(sch);
		188	struct nlattr *opts;
		189
		190	opts = nla_nest_start(skb, TCA_OPTIONS);
		191	if (opts == NULL)
		192	goto nla_put_failure;
		193
		194	if (nla_put_u32(skb, TCA_CODEL_TARGET,
		195	codel_time_to_us(q->params.target)) \|\|
		196	nla_put_u32(skb, TCA_CODEL_LIMIT,
		197	sch->limit) \|\|
		198	nla_put_u32(skb, TCA_CODEL_INTERVAL,
		199	codel_time_to_us(q->params.interval)) \|\|
		200	nla_put_u32(skb, TCA_CODEL_ECN,
		201	q->params.ecn))
		202	goto nla_put_failure;
		203
		204	return nla_nest_end(skb, opts);
		205
		206	nla_put_failure:
		207	nla_nest_cancel(skb, opts);
		208	return -1;
		209	}
		210
		211	static int codel_dump_stats(struct Qdisc sch, struct gnet_dump d)
		212	{
		213	const struct codel_sched_data *q = qdisc_priv(sch);
		214	struct tc_codel_xstats st = {
		215	.maxpacket = q->stats.maxpacket,
		216	.count = q->vars.count,
		217	.lastcount = q->vars.lastcount,
		218	.drop_overlimit = q->drop_overlimit,
		219	.ldelay = codel_time_to_us(q->vars.ldelay),
		220	.dropping = q->vars.dropping,
		221	.ecn_mark = q->stats.ecn_mark,
		222	};
		223
		224	if (q->vars.dropping) {
		225	codel_tdiff_t delta = q->vars.drop_next - codel_get_time();
		226
		227	if (delta >= 0)
		228	st.drop_next = codel_time_to_us(delta);
		229	else
		230	st.drop_next = -codel_time_to_us(-delta);
		231	}
		232
		233	return gnet_stats_copy_app(d, &st, sizeof(st));
		234	}
		235
		236	static void codel_reset(struct Qdisc *sch)
		237	{
		238	struct codel_sched_data *q = qdisc_priv(sch);
		239
		240	qdisc_reset_queue(sch);
		241	codel_vars_init(&q->vars);
		242	}
		243
		244	static struct Qdisc_ops codel_qdisc_ops __read_mostly = {
		245	.id = "codel",
		246	.priv_size = sizeof(struct codel_sched_data),
		247
		248	.enqueue = codel_qdisc_enqueue,
		249	.dequeue = codel_qdisc_dequeue,
		250	.peek = qdisc_peek_dequeued,
		251	.init = codel_init,
		252	.reset = codel_reset,
		253	.change = codel_change,
		254	.dump = codel_dump,
		255	.dump_stats = codel_dump_stats,
		256	.owner = THIS_MODULE,
		257	};
		258
		259	static int __init codel_module_init(void)
		260	{
		261	return register_qdisc(&codel_qdisc_ops);
		262	}
		263
		264	static void __exit codel_module_exit(void)
		265	{
		266	unregister_qdisc(&codel_qdisc_ops);
		267	}
		268
		269	module_init(codel_module_init)
		270	module_exit(codel_module_exit)
		271
		272	MODULE_DESCRIPTION("Controlled Delay queue discipline");
		273	MODULE_AUTHOR("Dave Taht");
		274	MODULE_AUTHOR("Eric Dumazet");
		275	MODULE_LICENSE("Dual BSD/GPL");