aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-12-08 01:06:03 -0500
committerDavid S. Miller <davem@davemloft.net>2011-12-08 19:52:43 -0500
commit8af2a218de38f51ea4b4fa48cac1273319ae260c (patch)
tree07a4557322b79878096172355fb02ab2bae3f432
parent57459185a19b0246866479522b77cbb9732201d1 (diff)
sch_red: Adaptative RED AQM
Adaptative RED AQM for linux, based on paper from Sally FLoyd, Ramakrishna Gummadi, and Scott Shenker, August 2001 : http://icir.org/floyd/papers/adaptiveRed.pdf Goal of Adaptative RED is to make max_p a dynamic value between 1% and 50% to reach the target average queue : (max_th - min_th) / 2 Every 500 ms: if (avg > target and max_p <= 0.5) increase max_p : max_p += alpha; else if (avg < target and max_p >= 0.01) decrease max_p : max_p *= beta; target :[min_th + 0.4*(min_th - max_th), min_th + 0.6*(min_th - max_th)]. alpha : min(0.01, max_p / 4) beta : 0.9 max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa) Changes against our RED implementation are : max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32 fixed point number, to allow full range described in Adatative paper. To deliver a random number, we now use a reciprocal divide (thats really a multiply), but this operation is done once per marked/droped packet when in RED_BETWEEN_TRESH window, so added cost (compared to previous AND operation) is near zero. dump operation gives current max_p value in a new TCA_RED_MAX_P attribute. Example on a 10Mbit link : tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \ limit 400000 min 30000 max 90000 avpkt 1000 \ burst 55 ecn adaptative bandwidth 10Mbit # tc -s -d qdisc show dev eth3 ... qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn adaptative ewma 5 max_p=0.113335 Scell_log 15 Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0) rate 9749Kbit 831pps backlog 72056b 16p requeues 0 marked 1357 early 35 pdrop 0 other 0 Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/pkt_sched.h6
-rw-r--r--include/net/red.h101
-rw-r--r--lib/reciprocal_div.c2
-rw-r--r--net/sched/sch_red.c21
4 files changed, 111 insertions, 19 deletions
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index fb556dc594d3..e41e0d4de24b 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -181,6 +181,7 @@ enum {
181 TCA_RED_UNSPEC, 181 TCA_RED_UNSPEC,
182 TCA_RED_PARMS, 182 TCA_RED_PARMS,
183 TCA_RED_STAB, 183 TCA_RED_STAB,
184 TCA_RED_MAX_P,
184 __TCA_RED_MAX, 185 __TCA_RED_MAX,
185}; 186};
186 187
@@ -194,8 +195,9 @@ struct tc_red_qopt {
194 unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ 195 unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */
195 unsigned char Scell_log; /* cell size for idle damping */ 196 unsigned char Scell_log; /* cell size for idle damping */
196 unsigned char flags; 197 unsigned char flags;
197#define TC_RED_ECN 1 198#define TC_RED_ECN 1
198#define TC_RED_HARDDROP 2 199#define TC_RED_HARDDROP 2
200#define TC_RED_ADAPTATIVE 4
199}; 201};
200 202
201struct tc_red_xstats { 203struct tc_red_xstats {
diff --git a/include/net/red.h b/include/net/red.h
index b72a3b833936..24606b22d01e 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -5,6 +5,7 @@
5#include <net/pkt_sched.h> 5#include <net/pkt_sched.h>
6#include <net/inet_ecn.h> 6#include <net/inet_ecn.h>
7#include <net/dsfield.h> 7#include <net/dsfield.h>
8#include <linux/reciprocal_div.h>
8 9
9/* Random Early Detection (RED) algorithm. 10/* Random Early Detection (RED) algorithm.
10 ======================================= 11 =======================================
@@ -87,6 +88,29 @@
87 etc. 88 etc.
88 */ 89 */
89 90
91/*
92 * Adaptative RED : An Algorithm for Increasing the Robustness of RED's AQM
93 * (Sally FLoyd, Ramakrishna Gummadi, and Scott Shenker) August 2001
94 *
95 * Every 500 ms:
96 * if (avg > target and max_p <= 0.5)
97 * increase max_p : max_p += alpha;
98 * else if (avg < target and max_p >= 0.01)
99 * decrease max_p : max_p *= beta;
100 *
101 * target :[qth_min + 0.4*(qth_min - qth_max),
102 * qth_min + 0.6*(qth_min - qth_max)].
103 * alpha : min(0.01, max_p / 4)
104 * beta : 0.9
105 * max_P is a Q0.32 fixed point number (with 32 bits mantissa)
106 * max_P between 0.01 and 0.5 (1% - 50%) [ Its no longer a negative power of two ]
107 */
108#define RED_ONE_PERCENT ((u32)DIV_ROUND_CLOSEST(1ULL<<32, 100))
109
110#define MAX_P_MIN (1 * RED_ONE_PERCENT)
111#define MAX_P_MAX (50 * RED_ONE_PERCENT)
112#define MAX_P_ALPHA(val) min(MAX_P_MIN, val / 4)
113
90#define RED_STAB_SIZE 256 114#define RED_STAB_SIZE 256
91#define RED_STAB_MASK (RED_STAB_SIZE - 1) 115#define RED_STAB_MASK (RED_STAB_SIZE - 1)
92 116
@@ -101,10 +125,14 @@ struct red_stats {
101 125
102struct red_parms { 126struct red_parms {
103 /* Parameters */ 127 /* Parameters */
104 u32 qth_min; /* Min avg length threshold: A scaled */ 128 u32 qth_min; /* Min avg length threshold: Wlog scaled */
105 u32 qth_max; /* Max avg length threshold: A scaled */ 129 u32 qth_max; /* Max avg length threshold: Wlog scaled */
106 u32 Scell_max; 130 u32 Scell_max;
107 u32 Rmask; /* Cached random mask, see red_rmask */ 131 u32 max_P; /* probability, [0 .. 1.0] 32 scaled */
132 u32 max_P_reciprocal; /* reciprocal_value(max_P / qth_delta) */
133 u32 qth_delta; /* max_th - min_th */
134 u32 target_min; /* min_th + 0.4*(max_th - min_th) */
135 u32 target_max; /* min_th + 0.6*(max_th - min_th) */
108 u8 Scell_log; 136 u8 Scell_log;
109 u8 Wlog; /* log(W) */ 137 u8 Wlog; /* log(W) */
110 u8 Plog; /* random number bits */ 138 u8 Plog; /* random number bits */
@@ -115,19 +143,22 @@ struct red_parms {
115 number generation */ 143 number generation */
116 u32 qR; /* Cached random number */ 144 u32 qR; /* Cached random number */
117 145
118 unsigned long qavg; /* Average queue length: A scaled */ 146 unsigned long qavg; /* Average queue length: Wlog scaled */
119 ktime_t qidlestart; /* Start of current idle period */ 147 ktime_t qidlestart; /* Start of current idle period */
120}; 148};
121 149
122static inline u32 red_rmask(u8 Plog) 150static inline u32 red_maxp(u8 Plog)
123{ 151{
124 return Plog < 32 ? ((1 << Plog) - 1) : ~0UL; 152 return Plog < 32 ? (~0U >> Plog) : ~0U;
125} 153}
126 154
155
127static inline void red_set_parms(struct red_parms *p, 156static inline void red_set_parms(struct red_parms *p,
128 u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog, 157 u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog,
129 u8 Scell_log, u8 *stab) 158 u8 Scell_log, u8 *stab)
130{ 159{
160 int delta = qth_max - qth_min;
161
131 /* Reset average queue length, the value is strictly bound 162 /* Reset average queue length, the value is strictly bound
132 * to the parameters below, reseting hurts a bit but leaving 163 * to the parameters below, reseting hurts a bit but leaving
133 * it might result in an unreasonable qavg for a while. --TGR 164 * it might result in an unreasonable qavg for a while. --TGR
@@ -139,14 +170,29 @@ static inline void red_set_parms(struct red_parms *p,
139 p->qth_max = qth_max << Wlog; 170 p->qth_max = qth_max << Wlog;
140 p->Wlog = Wlog; 171 p->Wlog = Wlog;
141 p->Plog = Plog; 172 p->Plog = Plog;
142 p->Rmask = red_rmask(Plog); 173 if (delta < 0)
174 delta = 1;
175 p->qth_delta = delta;
176 p->max_P = red_maxp(Plog);
177 p->max_P *= delta; /* max_P = (qth_max-qth_min)/2^Plog */
178
179 p->max_P_reciprocal = reciprocal_value(p->max_P / delta);
180
181 /* RED Adaptative target :
182 * [min_th + 0.4*(min_th - max_th),
183 * min_th + 0.6*(min_th - max_th)].
184 */
185 delta /= 5;
186 p->target_min = qth_min + 2*delta;
187 p->target_max = qth_min + 3*delta;
188
143 p->Scell_log = Scell_log; 189 p->Scell_log = Scell_log;
144 p->Scell_max = (255 << Scell_log); 190 p->Scell_max = (255 << Scell_log);
145 191
146 memcpy(p->Stab, stab, sizeof(p->Stab)); 192 memcpy(p->Stab, stab, sizeof(p->Stab));
147} 193}
148 194
149static inline int red_is_idling(struct red_parms *p) 195static inline int red_is_idling(const struct red_parms *p)
150{ 196{
151 return p->qidlestart.tv64 != 0; 197 return p->qidlestart.tv64 != 0;
152} 198}
@@ -168,7 +214,7 @@ static inline void red_restart(struct red_parms *p)
168 p->qcount = -1; 214 p->qcount = -1;
169} 215}
170 216
171static inline unsigned long red_calc_qavg_from_idle_time(struct red_parms *p) 217static inline unsigned long red_calc_qavg_from_idle_time(const struct red_parms *p)
172{ 218{
173 s64 delta = ktime_us_delta(ktime_get(), p->qidlestart); 219 s64 delta = ktime_us_delta(ktime_get(), p->qidlestart);
174 long us_idle = min_t(s64, delta, p->Scell_max); 220 long us_idle = min_t(s64, delta, p->Scell_max);
@@ -215,7 +261,7 @@ static inline unsigned long red_calc_qavg_from_idle_time(struct red_parms *p)
215 } 261 }
216} 262}
217 263
218static inline unsigned long red_calc_qavg_no_idle_time(struct red_parms *p, 264static inline unsigned long red_calc_qavg_no_idle_time(const struct red_parms *p,
219 unsigned int backlog) 265 unsigned int backlog)
220{ 266{
221 /* 267 /*
@@ -230,7 +276,7 @@ static inline unsigned long red_calc_qavg_no_idle_time(struct red_parms *p,
230 return p->qavg + (backlog - (p->qavg >> p->Wlog)); 276 return p->qavg + (backlog - (p->qavg >> p->Wlog));
231} 277}
232 278
233static inline unsigned long red_calc_qavg(struct red_parms *p, 279static inline unsigned long red_calc_qavg(const struct red_parms *p,
234 unsigned int backlog) 280 unsigned int backlog)
235{ 281{
236 if (!red_is_idling(p)) 282 if (!red_is_idling(p))
@@ -239,23 +285,24 @@ static inline unsigned long red_calc_qavg(struct red_parms *p,
239 return red_calc_qavg_from_idle_time(p); 285 return red_calc_qavg_from_idle_time(p);
240} 286}
241 287
242static inline u32 red_random(struct red_parms *p) 288
289static inline u32 red_random(const struct red_parms *p)
243{ 290{
244 return net_random() & p->Rmask; 291 return reciprocal_divide(net_random(), p->max_P_reciprocal);
245} 292}
246 293
247static inline int red_mark_probability(struct red_parms *p, unsigned long qavg) 294static inline int red_mark_probability(const struct red_parms *p, unsigned long qavg)
248{ 295{
249 /* The formula used below causes questions. 296 /* The formula used below causes questions.
250 297
251 OK. qR is random number in the interval 0..Rmask 298 OK. qR is random number in the interval
299 (0..1/max_P)*(qth_max-qth_min)
252 i.e. 0..(2^Plog). If we used floating point 300 i.e. 0..(2^Plog). If we used floating point
253 arithmetics, it would be: (2^Plog)*rnd_num, 301 arithmetics, it would be: (2^Plog)*rnd_num,
254 where rnd_num is less 1. 302 where rnd_num is less 1.
255 303
256 Taking into account, that qavg have fixed 304 Taking into account, that qavg have fixed
257 point at Wlog, and Plog is related to max_P by 305 point at Wlog, two lines
258 max_P = (qth_max-qth_min)/2^Plog; two lines
259 below have the following floating point equivalent: 306 below have the following floating point equivalent:
260 307
261 max_P*(qavg - qth_min)/(qth_max-qth_min) < rnd/qcount 308 max_P*(qavg - qth_min)/(qth_max-qth_min) < rnd/qcount
@@ -315,4 +362,24 @@ static inline int red_action(struct red_parms *p, unsigned long qavg)
315 return RED_DONT_MARK; 362 return RED_DONT_MARK;
316} 363}
317 364
365static inline void red_adaptative_algo(struct red_parms *p)
366{
367 unsigned long qavg;
368 u32 max_p_delta;
369
370 qavg = p->qavg;
371 if (red_is_idling(p))
372 qavg = red_calc_qavg_from_idle_time(p);
373
374 /* p->qavg is fixed point number with point at Wlog */
375 qavg >>= p->Wlog;
376
377 if (qavg > p->target_max && p->max_P <= MAX_P_MAX)
378 p->max_P += MAX_P_ALPHA(p->max_P); /* maxp = maxp + alpha */
379 else if (qavg < p->target_min && p->max_P >= MAX_P_MIN)
380 p->max_P = (p->max_P/10)*9; /* maxp = maxp * Beta */
381
382 max_p_delta = DIV_ROUND_CLOSEST(p->max_P, p->qth_delta);
383 p->max_P_reciprocal = reciprocal_value(max_p_delta);
384}
318#endif 385#endif
diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c
index 6a3bd48fa2a0..75510e94f7d0 100644
--- a/lib/reciprocal_div.c
+++ b/lib/reciprocal_div.c
@@ -1,5 +1,6 @@
1#include <asm/div64.h> 1#include <asm/div64.h>
2#include <linux/reciprocal_div.h> 2#include <linux/reciprocal_div.h>
3#include <linux/export.h>
3 4
4u32 reciprocal_value(u32 k) 5u32 reciprocal_value(u32 k)
5{ 6{
@@ -7,3 +8,4 @@ u32 reciprocal_value(u32 k)
7 do_div(val, k); 8 do_div(val, k);
8 return (u32)val; 9 return (u32)val;
9} 10}
11EXPORT_SYMBOL(reciprocal_value);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index d617161f8dd3..8f5a85bf9d10 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -39,6 +39,7 @@
39struct red_sched_data { 39struct red_sched_data {
40 u32 limit; /* HARD maximal queue length */ 40 u32 limit; /* HARD maximal queue length */
41 unsigned char flags; 41 unsigned char flags;
42 struct timer_list adapt_timer;
42 struct red_parms parms; 43 struct red_parms parms;
43 struct red_stats stats; 44 struct red_stats stats;
44 struct Qdisc *qdisc; 45 struct Qdisc *qdisc;
@@ -161,6 +162,8 @@ static void red_reset(struct Qdisc *sch)
161static void red_destroy(struct Qdisc *sch) 162static void red_destroy(struct Qdisc *sch)
162{ 163{
163 struct red_sched_data *q = qdisc_priv(sch); 164 struct red_sched_data *q = qdisc_priv(sch);
165
166 del_timer_sync(&q->adapt_timer);
164 qdisc_destroy(q->qdisc); 167 qdisc_destroy(q->qdisc);
165} 168}
166 169
@@ -209,6 +212,10 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
209 ctl->Plog, ctl->Scell_log, 212 ctl->Plog, ctl->Scell_log,
210 nla_data(tb[TCA_RED_STAB])); 213 nla_data(tb[TCA_RED_STAB]));
211 214
215 del_timer(&q->adapt_timer);
216 if (ctl->flags & TC_RED_ADAPTATIVE)
217 mod_timer(&q->adapt_timer, jiffies + HZ/2);
218
212 if (!q->qdisc->q.qlen) 219 if (!q->qdisc->q.qlen)
213 red_start_of_idle_period(&q->parms); 220 red_start_of_idle_period(&q->parms);
214 221
@@ -216,11 +223,24 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
216 return 0; 223 return 0;
217} 224}
218 225
226static inline void red_adaptative_timer(unsigned long arg)
227{
228 struct Qdisc *sch = (struct Qdisc *)arg;
229 struct red_sched_data *q = qdisc_priv(sch);
230 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
231
232 spin_lock(root_lock);
233 red_adaptative_algo(&q->parms);
234 mod_timer(&q->adapt_timer, jiffies + HZ/2);
235 spin_unlock(root_lock);
236}
237
219static int red_init(struct Qdisc *sch, struct nlattr *opt) 238static int red_init(struct Qdisc *sch, struct nlattr *opt)
220{ 239{
221 struct red_sched_data *q = qdisc_priv(sch); 240 struct red_sched_data *q = qdisc_priv(sch);
222 241
223 q->qdisc = &noop_qdisc; 242 q->qdisc = &noop_qdisc;
243 setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch);
224 return red_change(sch, opt); 244 return red_change(sch, opt);
225} 245}
226 246
@@ -243,6 +263,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
243 if (opts == NULL) 263 if (opts == NULL)
244 goto nla_put_failure; 264 goto nla_put_failure;
245 NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt); 265 NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
266 NLA_PUT_U32(skb, TCA_RED_MAX_P, q->parms.max_P);
246 return nla_nest_end(skb, opts); 267 return nla_nest_end(skb, opts);
247 268
248nla_put_failure: 269nla_put_failure: