[PKT_SCHED]: RED: Use new generic red interface

Simplifies code a lot by separating the red algorithm and the queueing logic. We now differentiate between probability marks and forced marks but sum them together again to not break backwards compatibility. Signed-off-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
author: Thomas Graf <tgraf@suug.ch> 2005-11-05 15:14:05 -0500
committer: Thomas Graf <tgr@axs.localdomain> 2005-11-05 16:02:25 -0500
commit: 6b31b28a441c9ba33889f88ac1d9451ed9532ada (patch)
tree: d1a74aa7d1a78fdfb2674c7ee56a5be714a8fe83
parent: 2566a509cacc8b8eaea2e5b54068816c9cfb41c2 (diff)
1 files changed, 74 insertions, 247 deletions
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 7845d045eec4..0dabcc9091be 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -41,44 +41,10 @@
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
 #include <net/dsfield.h>
+#include <net/red.h>
-/*      Random Early Detection (RED) algorithm.
+/*      Parameters, settable by user:
-        =======================================
-        Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways
-        for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking.
-        This file codes a "divisionless" version of RED algorithm
-        as written down in Fig.17 of the paper.
-Short description.
------------------
-        When a new packet arrives we calculate the average queue length:
-        avg = (1-W)*avg + W*current_queue_len,
-        W is the filter time constant (chosen as 2^(-Wlog)), it controls
-        the inertia of the algorithm. To allow larger bursts, W should be
-        decreased.
-        if (avg > th_max) -> packet marked (dropped).
-        if (avg < th_min) -> packet passes.
-        if (th_min < avg < th_max) we calculate probability:
-        Pb = max_P * (avg - th_min)/(th_max-th_min)
-        and mark (drop) packet with this probability.
-        Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).
-        max_P should be small (not 1), usually 0.01..0.02 is good value.
-        max_P is chosen as a number, so that max_P/(th_max-th_min)
-        is a negative power of two in order arithmetics to contain
-        only shifts.
-        Parameters, settable by user:
        -----------------------------
        limit           - bytes (must be > qth_max + burst)
@@ -89,92 +55,19 @@ Short description.
        arbitrarily high (well, less than ram size)
        Really, this limit will never be reached
        if RED works correctly.
-        qth_min         - bytes (should be < qth_max/2)
-        qth_max         - bytes (should be at least 2*qth_min and less limit)
-        Wlog            - bits (<32) log(1/W).
-        Plog            - bits (<32)
-        Plog is related to max_P by formula:
-        max_P = (qth_max-qth_min)/2^Plog;
-        F.e. if qth_max=128K and qth_min=32K, then Plog=22
-        corresponds to max_P=0.02
-        Scell_log
-        Stab
-        Lookup table for log((1-W)^(t/t_ave).
-NOTES:
-Upper bound on W.
-----------------
-        If you want to allow bursts of L packets of size S,
-        you should choose W:
-        L + 1 - th_min/S < (1-(1-W)^L)/W
-        th_min/S = 32         th_min/S = 4
-                                               
-        log(W)  L
-        -1      33
-        -2      35
-        -3      39
-        -4      46
-        -5      57
-        -6      75
-        -7      101
-        -8      135
-        -9      190
-        etc.
 */
 struct red_sched_data
 {
-/* Parameters */
+        u32                     limit;          /* HARD maximal queue length */
-        u32             limit;          /* HARD maximal queue length    */
+        unsigned char           flags;
-        u32             qth_min;        /* Min average length threshold: A scaled */
+        struct red_parms        parms;
-        u32             qth_max;        /* Max average length threshold: A scaled */
+        struct red_stats        stats;
-        u32             Rmask;
-        u32             Scell_max;
-        unsigned char   flags;
-        char            Wlog;           /* log(W)               */
-        char            Plog;           /* random number bits   */
-        char            Scell_log;
-        u8              Stab[256];
-/* Variables */
-        unsigned long   qave;           /* Average queue length: A scaled */
-        int             qcount;         /* Packets since last random number generation */
-        u32             qR;             /* Cached random number */
-        psched_time_t   qidlestart;     /* Start of idle period         */
-        struct tc_red_xstats st;
 };
-static int red_ecn_mark(struct sk_buff *skb)
+static inline int red_use_ecn(struct red_sched_data *q)
 {
-        if (skb->nh.raw + 20 > skb->tail)
+        return q->flags & TC_RED_ECN;
-                return 0;
-        switch (skb->protocol) {
-        case __constant_htons(ETH_P_IP):
-                if (INET_ECN_is_not_ect(skb->nh.iph->tos))
-                        return 0;
-                IP_ECN_set_ce(skb->nh.iph);
-                return 1;
-        case __constant_htons(ETH_P_IPV6):
-                if (INET_ECN_is_not_ect(ipv6_get_dsfield(skb->nh.ipv6h)))
-                        return 0;
-                IP6_ECN_set_ce(skb->nh.ipv6h);
-                return 1;
-        default:
-                return 0;
-        }
 }
 static int
@@ -182,119 +75,50 @@ red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 {
        struct red_sched_data *q = qdisc_priv(sch);
-        psched_time_t now;
+        q->parms.qavg = red_calc_qavg(&q->parms, sch->qstats.backlog);
-        if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
+        if (red_is_idling(&q->parms))
-                long us_idle;
+                red_end_of_idle_period(&q->parms);
-                int  shift;
-                PSCHED_GET_TIME(now);
+        switch (red_action(&q->parms, q->parms.qavg)) {
-                us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
+                case RED_DONT_MARK:
-                PSCHED_SET_PASTPERFECT(q->qidlestart);
+                        break;
-/*
+                case RED_PROB_MARK:
-   The problem: ideally, average length queue recalcultion should
+                        sch->qstats.overlimits++;
-   be done over constant clock intervals. This is too expensive, so that
+                        if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
-   the calculation is driven by outgoing packets.
+                                q->stats.prob_drop++;
-   When the queue is idle we have to model this clock by hand.
+                                goto congestion_drop;
+                        }
-   SF+VJ proposed to "generate" m = idletime/(average_pkt_size/bandwidth)
-   dummy packets as a burst after idle time, i.e.
-          q->qave *= (1-W)^m
-   This is an apparently overcomplicated solution (f.e. we have to precompute
-   a table to make this calculation in reasonable time)
-   I believe that a simpler model may be used here,
-   but it is field for experiments.
-*/
-                shift = q->Stab[us_idle>>q->Scell_log];
-                if (shift) {
-                        q->qave >>= shift;
-                } else {
-                        /* Approximate initial part of exponent
-                           with linear function:
-                           (1-W)^m ~= 1-mW + ...
-                           Seems, it is the best solution to
-                           problem of too coarce exponent tabulation.
-                         */
-                        us_idle = (q->qave * us_idle)>>q->Scell_log;
-                        if (us_idle < q->qave/2)
-                                q->qave -= us_idle;
-                        else
-                                q->qave >>= 1;
-                }
-        } else {
-                q->qave += sch->qstats.backlog - (q->qave >> q->Wlog);
-                /* NOTE:
-                   q->qave is fixed point number with point at Wlog.
-                   The formulae above is equvalent to floating point
-                   version:
-                   qave = qave*(1-W) + sch->qstats.backlog*W;
-                                                           --ANK (980924)
-                 */
-        }
-        if (q->qave < q->qth_min) {
+                        q->stats.prob_mark++;
-                q->qcount = -1;
+                        break;
-enqueue:
-                if (sch->qstats.backlog + skb->len <= q->limit) {
+                case RED_HARD_MARK:
-                        __skb_queue_tail(&sch->q, skb);
+                        sch->qstats.overlimits++;
-                        sch->qstats.backlog += skb->len;
+                        if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
-                        sch->bstats.bytes += skb->len;
+                                q->stats.forced_drop++;
-                        sch->bstats.packets++;
+                                goto congestion_drop;
-                        return NET_XMIT_SUCCESS;
+                        }
-                } else {
-                        q->st.pdrop++;
+                        q->stats.forced_mark++;
-                }
+                        break;
-                kfree_skb(skb);
-                sch->qstats.drops++;
-                return NET_XMIT_DROP;
-        }
-        if (q->qave >= q->qth_max) {
-                q->qcount = -1;
-                sch->qstats.overlimits++;
-mark:
-                if  (!(q->flags&TC_RED_ECN) || !red_ecn_mark(skb)) {
-                        q->st.early++;
-                        goto drop;
-                }
-                q->st.marked++;
-                goto enqueue;
        }
-        if (++q->qcount) {
+        if (sch->qstats.backlog + skb->len <= q->limit) {
-                /* The formula used below causes questions.
+                __skb_queue_tail(&sch->q, skb);
+                sch->qstats.backlog += skb->len;
-                   OK. qR is random number in the interval 0..Rmask
+                sch->bstats.bytes += skb->len;
-                   i.e. 0..(2^Plog). If we used floating point
+                sch->bstats.packets++;
-                   arithmetics, it would be: (2^Plog)*rnd_num,
+                return NET_XMIT_SUCCESS;
-                   where rnd_num is less 1.
-                   Taking into account, that qave have fixed
-                   point at Wlog, and Plog is related to max_P by
-                   max_P = (qth_max-qth_min)/2^Plog; two lines
-                   below have the following floating point equivalent:
-                   
-                   max_P*(qave - qth_min)/(qth_max-qth_min) < rnd/qcount
-                   Any questions? --ANK (980924)
-                 */
-                if (((q->qave - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
-                        goto enqueue;
-                q->qcount = 0;
-                q->qR = net_random()&q->Rmask;
-                sch->qstats.overlimits++;
-                goto mark;
        }
-        q->qR = net_random()&q->Rmask;
-        goto enqueue;
-drop:
+        q->stats.pdrop++;
+        kfree_skb(skb);
+        sch->qstats.drops++;
+        return NET_XMIT_DROP;
+congestion_drop:
        kfree_skb(skb);
        sch->qstats.drops++;
        return NET_XMIT_CN;
@@ -305,7 +129,8 @@ red_requeue(struct sk_buff *skb, struct Qdisc* sch)
 {
        struct red_sched_data *q = qdisc_priv(sch);
-        PSCHED_SET_PASTPERFECT(q->qidlestart);
+        if (red_is_idling(&q->parms))
+                red_end_of_idle_period(&q->parms);
        __skb_queue_head(&sch->q, skb);
        sch->qstats.backlog += skb->len;
@@ -324,7 +149,8 @@ red_dequeue(struct Qdisc* sch)
                sch->qstats.backlog -= skb->len;
                return skb;
        }
-        PSCHED_GET_TIME(q->qidlestart);
+        red_start_of_idle_period(&q->parms);
        return NULL;
 }
@@ -338,11 +164,12 @@ static unsigned int red_drop(struct Qdisc* sch)
                unsigned int len = skb->len;
                sch->qstats.backlog -= len;
                sch->qstats.drops++;
-                q->st.other++;
+                q->stats.other++;
                kfree_skb(skb);
                return len;
        }
-        PSCHED_GET_TIME(q->qidlestart);
+        red_start_of_idle_period(&q->parms);
        return 0;
 }
@@ -352,9 +179,7 @@ static void red_reset(struct Qdisc* sch)
        __skb_queue_purge(&sch->q);
        sch->qstats.backlog = 0;
-        PSCHED_SET_PASTPERFECT(q->qidlestart);
+        red_restart(&q->parms);
-        q->qave = 0;
-        q->qcount = -1;
 }
 static int red_change(struct Qdisc *sch, struct rtattr *opt)
@@ -374,19 +199,14 @@ static int red_change(struct Qdisc *sch, struct rtattr *opt)
        sch_tree_lock(sch);
        q->flags = ctl->flags;
-        q->Wlog = ctl->Wlog;
-        q->Plog = ctl->Plog;
-        q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
-        q->Scell_log = ctl->Scell_log;
-        q->Scell_max = (255<<q->Scell_log);
-        q->qth_min = ctl->qth_min<<ctl->Wlog;
-        q->qth_max = ctl->qth_max<<ctl->Wlog;
        q->limit = ctl->limit;
-        memcpy(q->Stab, RTA_DATA(tb[TCA_RED_STAB-1]), 256);
-        q->qcount = -1;
+        red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
+                                 ctl->Plog, ctl->Scell_log,
+                                 RTA_DATA(tb[TCA_RED_STAB-1]));
        if (skb_queue_empty(&sch->q))
-                PSCHED_SET_PASTPERFECT(q->qidlestart);
+                red_end_of_idle_period(&q->parms);
        sch_tree_unlock(sch);
        return 0;
 }
@@ -401,17 +221,18 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
        struct red_sched_data *q = qdisc_priv(sch);
        unsigned char    *b = skb->tail;
        struct rtattr *rta;
-        struct tc_red_qopt opt;
+        struct tc_red_qopt opt = {
+                .limit          = q->limit,
+                .flags          = q->flags,
+                .qth_min        = q->parms.qth_min >> q->parms.Wlog,
+                .qth_max        = q->parms.qth_max >> q->parms.Wlog,
+                .Wlog           = q->parms.Wlog,
+                .Plog           = q->parms.Plog,
+                .Scell_log      = q->parms.Scell_log,
+        };
        rta = (struct rtattr*)b;
        RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
-        opt.limit = q->limit;
-        opt.qth_min = q->qth_min>>q->Wlog;
-        opt.qth_max = q->qth_max>>q->Wlog;
-        opt.Wlog = q->Wlog;
-        opt.Plog = q->Plog;
-        opt.Scell_log = q->Scell_log;
-        opt.flags = q->flags;
        RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
        rta->rta_len = skb->tail - b;
@@ -425,8 +246,14 @@ rtattr_failure:
 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 {
        struct red_sched_data *q = qdisc_priv(sch);
+        struct tc_red_xstats st = {
-        return gnet_stats_copy_app(d, &q->st, sizeof(q->st));
+                .early  = q->stats.prob_drop + q->stats.forced_drop,
+                .pdrop  = q->stats.pdrop,
+                .other  = q->stats.other,
+                .marked = q->stats.prob_mark + q->stats.forced_mark,
+        };
+        return gnet_stats_copy_app(d, &st, sizeof(st));
 }
 static struct Qdisc_ops red_qdisc_ops = {
author	Thomas Graf <tgraf@suug.ch>	2005-11-05 15:14:05 -0500
committer	Thomas Graf <tgr@axs.localdomain>	2005-11-05 16:02:25 -0500
commit	6b31b28a441c9ba33889f88ac1d9451ed9532ada (patch)
tree	d1a74aa7d1a78fdfb2674c7ee56a5be714a8fe83
parent	2566a509cacc8b8eaea2e5b54068816c9cfb41c2 (diff)

diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 7845d045eec4..0dabcc9091be 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c
@@ -41,44 +41,10 @@
41	#include <net/pkt_sched.h>	41	#include <net/pkt_sched.h>
42	#include <net/inet_ecn.h>	42	#include <net/inet_ecn.h>
43	#include <net/dsfield.h>	43	#include <net/dsfield.h>
		44	#include <net/red.h>
44		45
45		46
46	/* Random Early Detection (RED) algorithm.	47	/* Parameters, settable by user:
47	=======================================
48
49	Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways
50	for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking.
51
52	This file codes a "divisionless" version of RED algorithm
53	as written down in Fig.17 of the paper.
54
55	Short description.
56	------------------
57
58	When a new packet arrives we calculate the average queue length:
59
60	avg = (1-W)avg + Wcurrent_queue_len,
61
62	W is the filter time constant (chosen as 2^(-Wlog)), it controls
63	the inertia of the algorithm. To allow larger bursts, W should be
64	decreased.
65
66	if (avg > th_max) -> packet marked (dropped).
67	if (avg < th_min) -> packet passes.
68	if (th_min < avg < th_max) we calculate probability:
69
70	Pb = max_P * (avg - th_min)/(th_max-th_min)
71
72	and mark (drop) packet with this probability.
73	Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).
74	max_P should be small (not 1), usually 0.01..0.02 is good value.
75
76	max_P is chosen as a number, so that max_P/(th_max-th_min)
77	is a negative power of two in order arithmetics to contain
78	only shifts.
79
80
81	Parameters, settable by user:
82	-----------------------------	48	-----------------------------
83		49
84	limit - bytes (must be > qth_max + burst)	50	limit - bytes (must be > qth_max + burst)
@@ -89,92 +55,19 @@ Short description.
89	arbitrarily high (well, less than ram size)	55	arbitrarily high (well, less than ram size)
90	Really, this limit will never be reached	56	Really, this limit will never be reached
91	if RED works correctly.	57	if RED works correctly.
92
93	qth_min - bytes (should be < qth_max/2)
94	qth_max - bytes (should be at least 2*qth_min and less limit)
95	Wlog - bits (<32) log(1/W).
96	Plog - bits (<32)
97
98	Plog is related to max_P by formula:
99
100	max_P = (qth_max-qth_min)/2^Plog;
101
102	F.e. if qth_max=128K and qth_min=32K, then Plog=22
103	corresponds to max_P=0.02
104
105	Scell_log
106	Stab
107
108	Lookup table for log((1-W)^(t/t_ave).
109
110
111	NOTES:
112
113	Upper bound on W.
114	-----------------
115
116	If you want to allow bursts of L packets of size S,
117	you should choose W:
118
119	L + 1 - th_min/S < (1-(1-W)^L)/W
120
121	th_min/S = 32 th_min/S = 4
122
123	log(W) L
124	-1 33
125	-2 35
126	-3 39
127	-4 46
128	-5 57
129	-6 75
130	-7 101
131	-8 135
132	-9 190
133	etc.
134	*/	58	*/
135		59
136	struct red_sched_data	60	struct red_sched_data
137	{	61	{
138	/* Parameters */	62	u32 limit; /* HARD maximal queue length */
139	u32 limit; /* HARD maximal queue length */	63	unsigned char flags;
140	u32 qth_min; /* Min average length threshold: A scaled */	64	struct red_parms parms;
141	u32 qth_max; /* Max average length threshold: A scaled */	65	struct red_stats stats;
142	u32 Rmask;
143	u32 Scell_max;
144	unsigned char flags;
145	char Wlog; /* log(W) */
146	char Plog; /* random number bits */
147	char Scell_log;
148	u8 Stab[256];
149
150	/* Variables */
151	unsigned long qave; /* Average queue length: A scaled */
152	int qcount; /* Packets since last random number generation */
153	u32 qR; /* Cached random number */
154
155	psched_time_t qidlestart; /* Start of idle period */
156	struct tc_red_xstats st;
157	};	66	};
158		67
159	static int red_ecn_mark(struct sk_buff *skb)	68	static inline int red_use_ecn(struct red_sched_data *q)
160	{	69	{
161	if (skb->nh.raw + 20 > skb->tail)	70	return q->flags & TC_RED_ECN;
162	return 0;
163
164	switch (skb->protocol) {
165	case __constant_htons(ETH_P_IP):
166	if (INET_ECN_is_not_ect(skb->nh.iph->tos))
167	return 0;
168	IP_ECN_set_ce(skb->nh.iph);
169	return 1;
170	case __constant_htons(ETH_P_IPV6):
171	if (INET_ECN_is_not_ect(ipv6_get_dsfield(skb->nh.ipv6h)))
172	return 0;
173	IP6_ECN_set_ce(skb->nh.ipv6h);
174	return 1;
175	default:
176	return 0;
177	}
178	}	71	}
179		72
180	static int	73	static int
@@ -182,119 +75,50 @@ red_enqueue(struct sk_buff skb, struct Qdisc sch)
182	{	75	{
183	struct red_sched_data *q = qdisc_priv(sch);	76	struct red_sched_data *q = qdisc_priv(sch);
184		77
185	psched_time_t now;	78	q->parms.qavg = red_calc_qavg(&q->parms, sch->qstats.backlog);
186		79
187	if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {	80	if (red_is_idling(&q->parms))
188	long us_idle;	81	red_end_of_idle_period(&q->parms);
189	int shift;
190		82
191	PSCHED_GET_TIME(now);	83	switch (red_action(&q->parms, q->parms.qavg)) {
192	us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);	84	case RED_DONT_MARK:
193	PSCHED_SET_PASTPERFECT(q->qidlestart);	85	break;
194		86
195	/*	87	case RED_PROB_MARK:
196	The problem: ideally, average length queue recalcultion should	88	sch->qstats.overlimits++;
197	be done over constant clock intervals. This is too expensive, so that	89	if (!red_use_ecn(q) \|\| !INET_ECN_set_ce(skb)) {
198	the calculation is driven by outgoing packets.	90	q->stats.prob_drop++;
199	When the queue is idle we have to model this clock by hand.	91	goto congestion_drop;
200		92	}
201	SF+VJ proposed to "generate" m = idletime/(average_pkt_size/bandwidth)
202	dummy packets as a burst after idle time, i.e.
203
204	q->qave *= (1-W)^m
205
206	This is an apparently overcomplicated solution (f.e. we have to precompute
207	a table to make this calculation in reasonable time)
208	I believe that a simpler model may be used here,
209	but it is field for experiments.
210	*/
211	shift = q->Stab[us_idle>>q->Scell_log];
212
213	if (shift) {
214	q->qave >>= shift;
215	} else {
216	/* Approximate initial part of exponent
217	with linear function:
218	(1-W)^m ~= 1-mW + ...
219
220	Seems, it is the best solution to
221	problem of too coarce exponent tabulation.
222	*/
223
224	us_idle = (q->qave * us_idle)>>q->Scell_log;
225	if (us_idle < q->qave/2)
226	q->qave -= us_idle;
227	else
228	q->qave >>= 1;
229	}
230	} else {
231	q->qave += sch->qstats.backlog - (q->qave >> q->Wlog);
232	/* NOTE:
233	q->qave is fixed point number with point at Wlog.
234	The formulae above is equvalent to floating point
235	version:
236
237	qave = qave(1-W) + sch->qstats.backlogW;
238	--ANK (980924)
239	*/
240	}
241		93
242	if (q->qave < q->qth_min) {	94	q->stats.prob_mark++;
243	q->qcount = -1;	95	break;
244	enqueue:	96
245	if (sch->qstats.backlog + skb->len <= q->limit) {	97	case RED_HARD_MARK:
246	__skb_queue_tail(&sch->q, skb);	98	sch->qstats.overlimits++;
247	sch->qstats.backlog += skb->len;	99	if (!red_use_ecn(q) \|\| !INET_ECN_set_ce(skb)) {
248	sch->bstats.bytes += skb->len;	100	q->stats.forced_drop++;
249	sch->bstats.packets++;	101	goto congestion_drop;
250	return NET_XMIT_SUCCESS;	102	}
251	} else {	103
252	q->st.pdrop++;	104	q->stats.forced_mark++;
253	}	105	break;
254	kfree_skb(skb);
255	sch->qstats.drops++;
256	return NET_XMIT_DROP;
257	}
258	if (q->qave >= q->qth_max) {
259	q->qcount = -1;
260	sch->qstats.overlimits++;
261	mark:
262	if (!(q->flags&TC_RED_ECN) \|\| !red_ecn_mark(skb)) {
263	q->st.early++;
264	goto drop;
265	}
266	q->st.marked++;
267	goto enqueue;
268	}	106	}
269		107
270	if (++q->qcount) {	108	if (sch->qstats.backlog + skb->len <= q->limit) {
271	/* The formula used below causes questions.	109	__skb_queue_tail(&sch->q, skb);
272		110	sch->qstats.backlog += skb->len;
273	OK. qR is random number in the interval 0..Rmask	111	sch->bstats.bytes += skb->len;
274	i.e. 0..(2^Plog). If we used floating point	112	sch->bstats.packets++;
275	arithmetics, it would be: (2^Plog)*rnd_num,	113	return NET_XMIT_SUCCESS;
276	where rnd_num is less 1.
277
278	Taking into account, that qave have fixed
279	point at Wlog, and Plog is related to max_P by
280	max_P = (qth_max-qth_min)/2^Plog; two lines
281	below have the following floating point equivalent:
282
283	max_P*(qave - qth_min)/(qth_max-qth_min) < rnd/qcount
284
285	Any questions? --ANK (980924)
286	*/
287	if (((q->qave - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
288	goto enqueue;
289	q->qcount = 0;
290	q->qR = net_random()&q->Rmask;
291	sch->qstats.overlimits++;
292	goto mark;
293	}	114	}
294	q->qR = net_random()&q->Rmask;
295	goto enqueue;
296		115
297	drop:	116	q->stats.pdrop++;
		117	kfree_skb(skb);
		118	sch->qstats.drops++;
		119	return NET_XMIT_DROP;
		120
		121	congestion_drop:
298	kfree_skb(skb);	122	kfree_skb(skb);
299	sch->qstats.drops++;	123	sch->qstats.drops++;
300	return NET_XMIT_CN;	124	return NET_XMIT_CN;
@@ -305,7 +129,8 @@ red_requeue(struct sk_buff skb, struct Qdisc sch)
305	{	129	{
306	struct red_sched_data *q = qdisc_priv(sch);	130	struct red_sched_data *q = qdisc_priv(sch);
307		131
308	PSCHED_SET_PASTPERFECT(q->qidlestart);	132	if (red_is_idling(&q->parms))
		133	red_end_of_idle_period(&q->parms);
309		134
310	__skb_queue_head(&sch->q, skb);	135	__skb_queue_head(&sch->q, skb);
311	sch->qstats.backlog += skb->len;	136	sch->qstats.backlog += skb->len;
@@ -324,7 +149,8 @@ red_dequeue(struct Qdisc* sch)
324	sch->qstats.backlog -= skb->len;	149	sch->qstats.backlog -= skb->len;
325	return skb;	150	return skb;
326	}	151	}
327	PSCHED_GET_TIME(q->qidlestart);	152
		153	red_start_of_idle_period(&q->parms);
328	return NULL;	154	return NULL;
329	}	155	}
330		156
@@ -338,11 +164,12 @@ static unsigned int red_drop(struct Qdisc* sch)
338	unsigned int len = skb->len;	164	unsigned int len = skb->len;
339	sch->qstats.backlog -= len;	165	sch->qstats.backlog -= len;
340	sch->qstats.drops++;	166	sch->qstats.drops++;
341	q->st.other++;	167	q->stats.other++;
342	kfree_skb(skb);	168	kfree_skb(skb);
343	return len;	169	return len;
344	}	170	}
345	PSCHED_GET_TIME(q->qidlestart);	171
		172	red_start_of_idle_period(&q->parms);
346	return 0;	173	return 0;
347	}	174	}
348		175
@@ -352,9 +179,7 @@ static void red_reset(struct Qdisc* sch)
352		179
353	__skb_queue_purge(&sch->q);	180	__skb_queue_purge(&sch->q);
354	sch->qstats.backlog = 0;	181	sch->qstats.backlog = 0;
355	PSCHED_SET_PASTPERFECT(q->qidlestart);	182	red_restart(&q->parms);
356	q->qave = 0;
357	q->qcount = -1;
358	}	183	}
359		184
360	static int red_change(struct Qdisc sch, struct rtattr opt)	185	static int red_change(struct Qdisc sch, struct rtattr opt)
@@ -374,19 +199,14 @@ static int red_change(struct Qdisc sch, struct rtattr opt)
374		199
375	sch_tree_lock(sch);	200	sch_tree_lock(sch);
376	q->flags = ctl->flags;	201	q->flags = ctl->flags;
377	q->Wlog = ctl->Wlog;
378	q->Plog = ctl->Plog;
379	q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
380	q->Scell_log = ctl->Scell_log;
381	q->Scell_max = (255<<q->Scell_log);
382	q->qth_min = ctl->qth_min<<ctl->Wlog;
383	q->qth_max = ctl->qth_max<<ctl->Wlog;
384	q->limit = ctl->limit;	202	q->limit = ctl->limit;
385	memcpy(q->Stab, RTA_DATA(tb[TCA_RED_STAB-1]), 256);
386		203
387	q->qcount = -1;	204	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
		205	ctl->Plog, ctl->Scell_log,
		206	RTA_DATA(tb[TCA_RED_STAB-1]));
		207
388	if (skb_queue_empty(&sch->q))	208	if (skb_queue_empty(&sch->q))
389	PSCHED_SET_PASTPERFECT(q->qidlestart);	209	red_end_of_idle_period(&q->parms);
390	sch_tree_unlock(sch);	210	sch_tree_unlock(sch);
391	return 0;	211	return 0;
392	}	212	}
@@ -401,17 +221,18 @@ static int red_dump(struct Qdisc sch, struct sk_buff skb)
401	struct red_sched_data *q = qdisc_priv(sch);	221	struct red_sched_data *q = qdisc_priv(sch);
402	unsigned char *b = skb->tail;	222	unsigned char *b = skb->tail;
403	struct rtattr *rta;	223	struct rtattr *rta;
404	struct tc_red_qopt opt;	224	struct tc_red_qopt opt = {
		225	.limit = q->limit,
		226	.flags = q->flags,
		227	.qth_min = q->parms.qth_min >> q->parms.Wlog,
		228	.qth_max = q->parms.qth_max >> q->parms.Wlog,
		229	.Wlog = q->parms.Wlog,
		230	.Plog = q->parms.Plog,
		231	.Scell_log = q->parms.Scell_log,
		232	};
405		233
406	rta = (struct rtattr*)b;	234	rta = (struct rtattr*)b;
407	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);	235	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
408	opt.limit = q->limit;
409	opt.qth_min = q->qth_min>>q->Wlog;
410	opt.qth_max = q->qth_max>>q->Wlog;
411	opt.Wlog = q->Wlog;
412	opt.Plog = q->Plog;
413	opt.Scell_log = q->Scell_log;
414	opt.flags = q->flags;
415	RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);	236	RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
416	rta->rta_len = skb->tail - b;	237	rta->rta_len = skb->tail - b;
417		238
@@ -425,8 +246,14 @@ rtattr_failure:
425	static int red_dump_stats(struct Qdisc sch, struct gnet_dump d)	246	static int red_dump_stats(struct Qdisc sch, struct gnet_dump d)
426	{	247	{
427	struct red_sched_data *q = qdisc_priv(sch);	248	struct red_sched_data *q = qdisc_priv(sch);
428		249	struct tc_red_xstats st = {
429	return gnet_stats_copy_app(d, &q->st, sizeof(q->st));	250	.early = q->stats.prob_drop + q->stats.forced_drop,
		251	.pdrop = q->stats.pdrop,
		252	.other = q->stats.other,
		253	.marked = q->stats.prob_mark + q->stats.forced_mark,
		254	};
		255
		256	return gnet_stats_copy_app(d, &st, sizeof(st));
430	}	257	}
431		258
432	static struct Qdisc_ops red_qdisc_ops = {	259	static struct Qdisc_ops red_qdisc_ops = {