diff options
author | Yang Yingliang <yangyingliang@huawei.com> | 2013-12-10 01:59:27 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-12-11 15:08:41 -0500 |
commit | cc106e441a63bec3b1cb72948df82ea15945c449 (patch) | |
tree | be4ee6109410b1613b0f465b94cca6dbb87a1561 | |
parent | 4bd7b5127bd02c12c1cc837a7a0b6ce295eb2505 (diff) |
net: sched: tbf: fix the calculation of max_size
Current max_size is caluated from rate table. Now, the rate table
has been replaced and it's wrong to caculate max_size based on this
rate table. It can lead wrong calculation of max_size.
The burst in kernel may be lower than user asked, because burst may gets
some loss when transform it to buffer(E.g. "burst 40kb rate 30mbit/s")
and it seems we cannot avoid this loss. Burst's value(max_size) based on
rate table may be equal user asked. If a packet's length is max_size, this
packet will be stalled in tbf_dequeue() because its length is above the
burst in kernel so that it cannot get enough tokens. The max_size guards
against enqueuing packet sizes above q->buffer "time" in tbf_enqueue().
To make consistent with the calculation of tokens, this patch add a helper
psched_ns_t2l() to calculate burst(max_size) directly to fix this problem.
After this fix, we can support to using 64bit rates to calculate burst as well.
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/sched/sch_tbf.c | 115 |
1 files changed, 70 insertions, 45 deletions
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index a6090051c5db..a44928c6ba24 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c | |||
@@ -118,6 +118,30 @@ struct tbf_sched_data { | |||
118 | }; | 118 | }; |
119 | 119 | ||
120 | 120 | ||
121 | /* Time to Length, convert time in ns to length in bytes | ||
122 | * to determinate how many bytes can be sent in given time. | ||
123 | */ | ||
124 | static u64 psched_ns_t2l(const struct psched_ratecfg *r, | ||
125 | u64 time_in_ns) | ||
126 | { | ||
127 | /* The formula is : | ||
128 | * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC | ||
129 | */ | ||
130 | u64 len = time_in_ns * r->rate_bytes_ps; | ||
131 | |||
132 | do_div(len, NSEC_PER_SEC); | ||
133 | |||
134 | if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) | ||
135 | len = (len / 53) * 48; | ||
136 | |||
137 | if (len > r->overhead) | ||
138 | len -= r->overhead; | ||
139 | else | ||
140 | len = 0; | ||
141 | |||
142 | return len; | ||
143 | } | ||
144 | |||
121 | /* | 145 | /* |
122 | * Return length of individual segments of a gso packet, | 146 | * Return length of individual segments of a gso packet, |
123 | * including all headers (MAC, IP, TCP/UDP) | 147 | * including all headers (MAC, IP, TCP/UDP) |
@@ -289,10 +313,11 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) | |||
289 | struct tbf_sched_data *q = qdisc_priv(sch); | 313 | struct tbf_sched_data *q = qdisc_priv(sch); |
290 | struct nlattr *tb[TCA_TBF_MAX + 1]; | 314 | struct nlattr *tb[TCA_TBF_MAX + 1]; |
291 | struct tc_tbf_qopt *qopt; | 315 | struct tc_tbf_qopt *qopt; |
292 | struct qdisc_rate_table *rtab = NULL; | ||
293 | struct qdisc_rate_table *ptab = NULL; | ||
294 | struct Qdisc *child = NULL; | 316 | struct Qdisc *child = NULL; |
295 | int max_size, n; | 317 | struct psched_ratecfg rate; |
318 | struct psched_ratecfg peak; | ||
319 | u64 max_size; | ||
320 | s64 buffer, mtu; | ||
296 | u64 rate64 = 0, prate64 = 0; | 321 | u64 rate64 = 0, prate64 = 0; |
297 | 322 | ||
298 | err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy); | 323 | err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy); |
@@ -304,38 +329,13 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) | |||
304 | goto done; | 329 | goto done; |
305 | 330 | ||
306 | qopt = nla_data(tb[TCA_TBF_PARMS]); | 331 | qopt = nla_data(tb[TCA_TBF_PARMS]); |
307 | rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]); | 332 | if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE) |
308 | if (rtab == NULL) | 333 | qdisc_put_rtab(qdisc_get_rtab(&qopt->rate, |
309 | goto done; | 334 | tb[TCA_TBF_RTAB])); |
310 | |||
311 | if (qopt->peakrate.rate) { | ||
312 | if (qopt->peakrate.rate > qopt->rate.rate) | ||
313 | ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]); | ||
314 | if (ptab == NULL) | ||
315 | goto done; | ||
316 | } | ||
317 | |||
318 | for (n = 0; n < 256; n++) | ||
319 | if (rtab->data[n] > qopt->buffer) | ||
320 | break; | ||
321 | max_size = (n << qopt->rate.cell_log) - 1; | ||
322 | if (ptab) { | ||
323 | int size; | ||
324 | |||
325 | for (n = 0; n < 256; n++) | ||
326 | if (ptab->data[n] > qopt->mtu) | ||
327 | break; | ||
328 | size = (n << qopt->peakrate.cell_log) - 1; | ||
329 | if (size < max_size) | ||
330 | max_size = size; | ||
331 | } | ||
332 | if (max_size < 0) | ||
333 | goto done; | ||
334 | 335 | ||
335 | if (max_size < psched_mtu(qdisc_dev(sch))) | 336 | if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE) |
336 | pr_warn_ratelimited("sch_tbf: burst %u is lower than device %s mtu (%u) !\n", | 337 | qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate, |
337 | max_size, qdisc_dev(sch)->name, | 338 | tb[TCA_TBF_PTAB])); |
338 | psched_mtu(qdisc_dev(sch))); | ||
339 | 339 | ||
340 | if (q->qdisc != &noop_qdisc) { | 340 | if (q->qdisc != &noop_qdisc) { |
341 | err = fifo_set_limit(q->qdisc, qopt->limit); | 341 | err = fifo_set_limit(q->qdisc, qopt->limit); |
@@ -349,6 +349,39 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) | |||
349 | } | 349 | } |
350 | } | 350 | } |
351 | 351 | ||
352 | buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U); | ||
353 | mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U); | ||
354 | |||
355 | if (tb[TCA_TBF_RATE64]) | ||
356 | rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); | ||
357 | psched_ratecfg_precompute(&rate, &qopt->rate, rate64); | ||
358 | |||
359 | max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U); | ||
360 | |||
361 | if (qopt->peakrate.rate) { | ||
362 | if (tb[TCA_TBF_PRATE64]) | ||
363 | prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]); | ||
364 | psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64); | ||
365 | if (peak.rate_bytes_ps <= rate.rate_bytes_ps) { | ||
366 | pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n", | ||
367 | peak.rate_bytes_ps, rate.rate_bytes_ps); | ||
368 | err = -EINVAL; | ||
369 | goto done; | ||
370 | } | ||
371 | |||
372 | max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu)); | ||
373 | } | ||
374 | |||
375 | if (max_size < psched_mtu(qdisc_dev(sch))) | ||
376 | pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n", | ||
377 | max_size, qdisc_dev(sch)->name, | ||
378 | psched_mtu(qdisc_dev(sch))); | ||
379 | |||
380 | if (!max_size) { | ||
381 | err = -EINVAL; | ||
382 | goto done; | ||
383 | } | ||
384 | |||
352 | sch_tree_lock(sch); | 385 | sch_tree_lock(sch); |
353 | if (child) { | 386 | if (child) { |
354 | qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); | 387 | qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); |
@@ -362,13 +395,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) | |||
362 | q->tokens = q->buffer; | 395 | q->tokens = q->buffer; |
363 | q->ptokens = q->mtu; | 396 | q->ptokens = q->mtu; |
364 | 397 | ||
365 | if (tb[TCA_TBF_RATE64]) | 398 | memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg)); |
366 | rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); | 399 | if (qopt->peakrate.rate) { |
367 | psched_ratecfg_precompute(&q->rate, &rtab->rate, rate64); | 400 | memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg)); |
368 | if (ptab) { | ||
369 | if (tb[TCA_TBF_PRATE64]) | ||
370 | prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]); | ||
371 | psched_ratecfg_precompute(&q->peak, &ptab->rate, prate64); | ||
372 | q->peak_present = true; | 401 | q->peak_present = true; |
373 | } else { | 402 | } else { |
374 | q->peak_present = false; | 403 | q->peak_present = false; |
@@ -377,10 +406,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) | |||
377 | sch_tree_unlock(sch); | 406 | sch_tree_unlock(sch); |
378 | err = 0; | 407 | err = 0; |
379 | done: | 408 | done: |
380 | if (rtab) | ||
381 | qdisc_put_rtab(rtab); | ||
382 | if (ptab) | ||
383 | qdisc_put_rtab(ptab); | ||
384 | return err; | 409 | return err; |
385 | } | 410 | } |
386 | 411 | ||