aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYang Yingliang <yangyingliang@huawei.com>2013-12-10 01:59:27 -0500
committerDavid S. Miller <davem@davemloft.net>2013-12-11 15:08:41 -0500
commitcc106e441a63bec3b1cb72948df82ea15945c449 (patch)
treebe4ee6109410b1613b0f465b94cca6dbb87a1561
parent4bd7b5127bd02c12c1cc837a7a0b6ce295eb2505 (diff)
net: sched: tbf: fix the calculation of max_size
Current max_size is caluated from rate table. Now, the rate table has been replaced and it's wrong to caculate max_size based on this rate table. It can lead wrong calculation of max_size. The burst in kernel may be lower than user asked, because burst may gets some loss when transform it to buffer(E.g. "burst 40kb rate 30mbit/s") and it seems we cannot avoid this loss. Burst's value(max_size) based on rate table may be equal user asked. If a packet's length is max_size, this packet will be stalled in tbf_dequeue() because its length is above the burst in kernel so that it cannot get enough tokens. The max_size guards against enqueuing packet sizes above q->buffer "time" in tbf_enqueue(). To make consistent with the calculation of tokens, this patch add a helper psched_ns_t2l() to calculate burst(max_size) directly to fix this problem. After this fix, we can support to using 64bit rates to calculate burst as well. Signed-off-by: Yang Yingliang <yangyingliang@huawei.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/sched/sch_tbf.c115
1 files changed, 70 insertions, 45 deletions
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index a6090051c5db..a44928c6ba24 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -118,6 +118,30 @@ struct tbf_sched_data {
118}; 118};
119 119
120 120
121/* Time to Length, convert time in ns to length in bytes
122 * to determinate how many bytes can be sent in given time.
123 */
124static u64 psched_ns_t2l(const struct psched_ratecfg *r,
125 u64 time_in_ns)
126{
127 /* The formula is :
128 * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC
129 */
130 u64 len = time_in_ns * r->rate_bytes_ps;
131
132 do_div(len, NSEC_PER_SEC);
133
134 if (unlikely(r->linklayer == TC_LINKLAYER_ATM))
135 len = (len / 53) * 48;
136
137 if (len > r->overhead)
138 len -= r->overhead;
139 else
140 len = 0;
141
142 return len;
143}
144
121/* 145/*
122 * Return length of individual segments of a gso packet, 146 * Return length of individual segments of a gso packet,
123 * including all headers (MAC, IP, TCP/UDP) 147 * including all headers (MAC, IP, TCP/UDP)
@@ -289,10 +313,11 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
289 struct tbf_sched_data *q = qdisc_priv(sch); 313 struct tbf_sched_data *q = qdisc_priv(sch);
290 struct nlattr *tb[TCA_TBF_MAX + 1]; 314 struct nlattr *tb[TCA_TBF_MAX + 1];
291 struct tc_tbf_qopt *qopt; 315 struct tc_tbf_qopt *qopt;
292 struct qdisc_rate_table *rtab = NULL;
293 struct qdisc_rate_table *ptab = NULL;
294 struct Qdisc *child = NULL; 316 struct Qdisc *child = NULL;
295 int max_size, n; 317 struct psched_ratecfg rate;
318 struct psched_ratecfg peak;
319 u64 max_size;
320 s64 buffer, mtu;
296 u64 rate64 = 0, prate64 = 0; 321 u64 rate64 = 0, prate64 = 0;
297 322
298 err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy); 323 err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy);
@@ -304,38 +329,13 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
304 goto done; 329 goto done;
305 330
306 qopt = nla_data(tb[TCA_TBF_PARMS]); 331 qopt = nla_data(tb[TCA_TBF_PARMS]);
307 rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]); 332 if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
308 if (rtab == NULL) 333 qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
309 goto done; 334 tb[TCA_TBF_RTAB]));
310
311 if (qopt->peakrate.rate) {
312 if (qopt->peakrate.rate > qopt->rate.rate)
313 ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
314 if (ptab == NULL)
315 goto done;
316 }
317
318 for (n = 0; n < 256; n++)
319 if (rtab->data[n] > qopt->buffer)
320 break;
321 max_size = (n << qopt->rate.cell_log) - 1;
322 if (ptab) {
323 int size;
324
325 for (n = 0; n < 256; n++)
326 if (ptab->data[n] > qopt->mtu)
327 break;
328 size = (n << qopt->peakrate.cell_log) - 1;
329 if (size < max_size)
330 max_size = size;
331 }
332 if (max_size < 0)
333 goto done;
334 335
335 if (max_size < psched_mtu(qdisc_dev(sch))) 336 if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
336 pr_warn_ratelimited("sch_tbf: burst %u is lower than device %s mtu (%u) !\n", 337 qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
337 max_size, qdisc_dev(sch)->name, 338 tb[TCA_TBF_PTAB]));
338 psched_mtu(qdisc_dev(sch)));
339 339
340 if (q->qdisc != &noop_qdisc) { 340 if (q->qdisc != &noop_qdisc) {
341 err = fifo_set_limit(q->qdisc, qopt->limit); 341 err = fifo_set_limit(q->qdisc, qopt->limit);
@@ -349,6 +349,39 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
349 } 349 }
350 } 350 }
351 351
352 buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
353 mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
354
355 if (tb[TCA_TBF_RATE64])
356 rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
357 psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
358
359 max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
360
361 if (qopt->peakrate.rate) {
362 if (tb[TCA_TBF_PRATE64])
363 prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
364 psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
365 if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
366 pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
367 peak.rate_bytes_ps, rate.rate_bytes_ps);
368 err = -EINVAL;
369 goto done;
370 }
371
372 max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
373 }
374
375 if (max_size < psched_mtu(qdisc_dev(sch)))
376 pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
377 max_size, qdisc_dev(sch)->name,
378 psched_mtu(qdisc_dev(sch)));
379
380 if (!max_size) {
381 err = -EINVAL;
382 goto done;
383 }
384
352 sch_tree_lock(sch); 385 sch_tree_lock(sch);
353 if (child) { 386 if (child) {
354 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); 387 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
@@ -362,13 +395,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
362 q->tokens = q->buffer; 395 q->tokens = q->buffer;
363 q->ptokens = q->mtu; 396 q->ptokens = q->mtu;
364 397
365 if (tb[TCA_TBF_RATE64]) 398 memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
366 rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); 399 if (qopt->peakrate.rate) {
367 psched_ratecfg_precompute(&q->rate, &rtab->rate, rate64); 400 memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
368 if (ptab) {
369 if (tb[TCA_TBF_PRATE64])
370 prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
371 psched_ratecfg_precompute(&q->peak, &ptab->rate, prate64);
372 q->peak_present = true; 401 q->peak_present = true;
373 } else { 402 } else {
374 q->peak_present = false; 403 q->peak_present = false;
@@ -377,10 +406,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
377 sch_tree_unlock(sch); 406 sch_tree_unlock(sch);
378 err = 0; 407 err = 0;
379done: 408done:
380 if (rtab)
381 qdisc_put_rtab(rtab);
382 if (ptab)
383 qdisc_put_rtab(ptab);
384 return err; 409 return err;
385} 410}
386 411