aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched/sch_tbf.c
diff options
context:
space:
mode:
authorJiri Pirko <jiri@resnulli.us>2013-02-11 19:12:05 -0500
committerDavid S. Miller <davem@davemloft.net>2013-02-12 18:59:45 -0500
commitb757c9336d63f94c6b57532bb4e8651d8b28786f (patch)
treec269cb8abddd6d563e466436c8abf6684a3da5e7 /net/sched/sch_tbf.c
parent34c5d292ce05d2bf52e692c44292b0ababba2853 (diff)
tbf: improved accuracy at high rates
Current TBF uses rate table computed by the "tc" userspace program, which has the following issue: The rate table has 256 entries to map packet lengths to token (time units). With TSO sized packets, the 256 entry granularity leads to loss/gain of rate, making the token bucket inaccurate. Thus, instead of relying on rate table, this patch explicitly computes the time and accounts for packet transmission times with nanosecond granularity. This is a followup to 56b765b79e9a78dc7d3f8850ba5e5567205a3ecd ("htb: improved accuracy at high rates"). Signed-off-by: Jiri Pirko <jiri@resnulli.us> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sched/sch_tbf.c')
-rw-r--r--net/sched/sch_tbf.c76
1 files changed, 37 insertions, 39 deletions
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 4b056c15e90c..c8388f3c3426 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -19,6 +19,7 @@
19#include <linux/errno.h> 19#include <linux/errno.h>
20#include <linux/skbuff.h> 20#include <linux/skbuff.h>
21#include <net/netlink.h> 21#include <net/netlink.h>
22#include <net/sch_generic.h>
22#include <net/pkt_sched.h> 23#include <net/pkt_sched.h>
23 24
24 25
@@ -100,23 +101,21 @@
100struct tbf_sched_data { 101struct tbf_sched_data {
101/* Parameters */ 102/* Parameters */
102 u32 limit; /* Maximal length of backlog: bytes */ 103 u32 limit; /* Maximal length of backlog: bytes */
103 u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ 104 s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
104 u32 mtu; 105 s64 mtu;
105 u32 max_size; 106 u32 max_size;
106 struct qdisc_rate_table *R_tab; 107 struct psched_ratecfg rate;
107 struct qdisc_rate_table *P_tab; 108 struct psched_ratecfg peak;
109 bool peak_present;
108 110
109/* Variables */ 111/* Variables */
110 long tokens; /* Current number of B tokens */ 112 s64 tokens; /* Current number of B tokens */
111 long ptokens; /* Current number of P tokens */ 113 s64 ptokens; /* Current number of P tokens */
112 psched_time_t t_c; /* Time check-point */ 114 s64 t_c; /* Time check-point */
113 struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */ 115 struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */
114 struct qdisc_watchdog watchdog; /* Watchdog timer */ 116 struct qdisc_watchdog watchdog; /* Watchdog timer */
115}; 117};
116 118
117#define L2T(q, L) qdisc_l2t((q)->R_tab, L)
118#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L)
119
120static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) 119static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
121{ 120{
122 struct tbf_sched_data *q = qdisc_priv(sch); 121 struct tbf_sched_data *q = qdisc_priv(sch);
@@ -156,24 +155,24 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
156 skb = q->qdisc->ops->peek(q->qdisc); 155 skb = q->qdisc->ops->peek(q->qdisc);
157 156
158 if (skb) { 157 if (skb) {
159 psched_time_t now; 158 s64 now;
160 long toks; 159 s64 toks;
161 long ptoks = 0; 160 s64 ptoks = 0;
162 unsigned int len = qdisc_pkt_len(skb); 161 unsigned int len = qdisc_pkt_len(skb);
163 162
164 now = psched_get_time(); 163 now = ktime_to_ns(ktime_get());
165 toks = psched_tdiff_bounded(now, q->t_c, q->buffer); 164 toks = min_t(s64, now - q->t_c, q->buffer);
166 165
167 if (q->P_tab) { 166 if (q->peak_present) {
168 ptoks = toks + q->ptokens; 167 ptoks = toks + q->ptokens;
169 if (ptoks > (long)q->mtu) 168 if (ptoks > q->mtu)
170 ptoks = q->mtu; 169 ptoks = q->mtu;
171 ptoks -= L2T_P(q, len); 170 ptoks -= (s64) psched_l2t_ns(&q->peak, len);
172 } 171 }
173 toks += q->tokens; 172 toks += q->tokens;
174 if (toks > (long)q->buffer) 173 if (toks > q->buffer)
175 toks = q->buffer; 174 toks = q->buffer;
176 toks -= L2T(q, len); 175 toks -= (s64) psched_l2t_ns(&q->rate, len);
177 176
178 if ((toks|ptoks) >= 0) { 177 if ((toks|ptoks) >= 0) {
179 skb = qdisc_dequeue_peeked(q->qdisc); 178 skb = qdisc_dequeue_peeked(q->qdisc);
@@ -189,8 +188,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
189 return skb; 188 return skb;
190 } 189 }
191 190
192 qdisc_watchdog_schedule(&q->watchdog, 191 qdisc_watchdog_schedule_ns(&q->watchdog,
193 now + max_t(long, -toks, -ptoks)); 192 now + max_t(long, -toks, -ptoks));
194 193
195 /* Maybe we have a shorter packet in the queue, 194 /* Maybe we have a shorter packet in the queue,
196 which can be sent now. It sounds cool, 195 which can be sent now. It sounds cool,
@@ -214,7 +213,7 @@ static void tbf_reset(struct Qdisc *sch)
214 213
215 qdisc_reset(q->qdisc); 214 qdisc_reset(q->qdisc);
216 sch->q.qlen = 0; 215 sch->q.qlen = 0;
217 q->t_c = psched_get_time(); 216 q->t_c = ktime_to_ns(ktime_get());
218 q->tokens = q->buffer; 217 q->tokens = q->buffer;
219 q->ptokens = q->mtu; 218 q->ptokens = q->mtu;
220 qdisc_watchdog_cancel(&q->watchdog); 219 qdisc_watchdog_cancel(&q->watchdog);
@@ -293,14 +292,19 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
293 q->qdisc = child; 292 q->qdisc = child;
294 } 293 }
295 q->limit = qopt->limit; 294 q->limit = qopt->limit;
296 q->mtu = qopt->mtu; 295 q->mtu = PSCHED_TICKS2NS(qopt->mtu);
297 q->max_size = max_size; 296 q->max_size = max_size;
298 q->buffer = qopt->buffer; 297 q->buffer = PSCHED_TICKS2NS(qopt->buffer);
299 q->tokens = q->buffer; 298 q->tokens = q->buffer;
300 q->ptokens = q->mtu; 299 q->ptokens = q->mtu;
301 300
302 swap(q->R_tab, rtab); 301 psched_ratecfg_precompute(&q->rate, rtab->rate.rate);
303 swap(q->P_tab, ptab); 302 if (ptab) {
303 psched_ratecfg_precompute(&q->peak, ptab->rate.rate);
304 q->peak_present = true;
305 } else {
306 q->peak_present = false;
307 }
304 308
305 sch_tree_unlock(sch); 309 sch_tree_unlock(sch);
306 err = 0; 310 err = 0;
@@ -319,7 +323,7 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
319 if (opt == NULL) 323 if (opt == NULL)
320 return -EINVAL; 324 return -EINVAL;
321 325
322 q->t_c = psched_get_time(); 326 q->t_c = ktime_to_ns(ktime_get());
323 qdisc_watchdog_init(&q->watchdog, sch); 327 qdisc_watchdog_init(&q->watchdog, sch);
324 q->qdisc = &noop_qdisc; 328 q->qdisc = &noop_qdisc;
325 329
@@ -331,12 +335,6 @@ static void tbf_destroy(struct Qdisc *sch)
331 struct tbf_sched_data *q = qdisc_priv(sch); 335 struct tbf_sched_data *q = qdisc_priv(sch);
332 336
333 qdisc_watchdog_cancel(&q->watchdog); 337 qdisc_watchdog_cancel(&q->watchdog);
334
335 if (q->P_tab)
336 qdisc_put_rtab(q->P_tab);
337 if (q->R_tab)
338 qdisc_put_rtab(q->R_tab);
339
340 qdisc_destroy(q->qdisc); 338 qdisc_destroy(q->qdisc);
341} 339}
342 340
@@ -352,13 +350,13 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
352 goto nla_put_failure; 350 goto nla_put_failure;
353 351
354 opt.limit = q->limit; 352 opt.limit = q->limit;
355 opt.rate = q->R_tab->rate; 353 opt.rate.rate = psched_ratecfg_getrate(&q->rate);
356 if (q->P_tab) 354 if (q->peak_present)
357 opt.peakrate = q->P_tab->rate; 355 opt.peakrate.rate = psched_ratecfg_getrate(&q->peak);
358 else 356 else
359 memset(&opt.peakrate, 0, sizeof(opt.peakrate)); 357 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
360 opt.mtu = q->mtu; 358 opt.mtu = PSCHED_NS2TICKS(q->mtu);
361 opt.buffer = q->buffer; 359 opt.buffer = PSCHED_NS2TICKS(q->buffer);
362 if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt)) 360 if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
363 goto nla_put_failure; 361 goto nla_put_failure;
364 362