diff options
Diffstat (limited to 'net/sched/sch_tbf.c')
-rw-r--r-- | net/sched/sch_tbf.c | 162 |
1 files changed, 113 insertions, 49 deletions
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 68f98595819c..fbba5b0ec121 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <net/netlink.h> | 21 | #include <net/netlink.h> |
22 | #include <net/sch_generic.h> | 22 | #include <net/sch_generic.h> |
23 | #include <net/pkt_sched.h> | 23 | #include <net/pkt_sched.h> |
24 | #include <net/tcp.h> | ||
24 | 25 | ||
25 | 26 | ||
26 | /* Simple Token Bucket Filter. | 27 | /* Simple Token Bucket Filter. |
@@ -117,6 +118,48 @@ struct tbf_sched_data { | |||
117 | }; | 118 | }; |
118 | 119 | ||
119 | 120 | ||
121 | /* Time to Length, convert time in ns to length in bytes | ||
122 | * to determinate how many bytes can be sent in given time. | ||
123 | */ | ||
124 | static u64 psched_ns_t2l(const struct psched_ratecfg *r, | ||
125 | u64 time_in_ns) | ||
126 | { | ||
127 | /* The formula is : | ||
128 | * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC | ||
129 | */ | ||
130 | u64 len = time_in_ns * r->rate_bytes_ps; | ||
131 | |||
132 | do_div(len, NSEC_PER_SEC); | ||
133 | |||
134 | if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) { | ||
135 | do_div(len, 53); | ||
136 | len = len * 48; | ||
137 | } | ||
138 | |||
139 | if (len > r->overhead) | ||
140 | len -= r->overhead; | ||
141 | else | ||
142 | len = 0; | ||
143 | |||
144 | return len; | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * Return length of individual segments of a gso packet, | ||
149 | * including all headers (MAC, IP, TCP/UDP) | ||
150 | */ | ||
151 | static unsigned int skb_gso_seglen(const struct sk_buff *skb) | ||
152 | { | ||
153 | unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); | ||
154 | const struct skb_shared_info *shinfo = skb_shinfo(skb); | ||
155 | |||
156 | if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) | ||
157 | hdr_len += tcp_hdrlen(skb); | ||
158 | else | ||
159 | hdr_len += sizeof(struct udphdr); | ||
160 | return hdr_len + shinfo->gso_size; | ||
161 | } | ||
162 | |||
120 | /* GSO packet is too big, segment it so that tbf can transmit | 163 | /* GSO packet is too big, segment it so that tbf can transmit |
121 | * each segment in time | 164 | * each segment in time |
122 | */ | 165 | */ |
@@ -136,12 +179,8 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) | |||
136 | while (segs) { | 179 | while (segs) { |
137 | nskb = segs->next; | 180 | nskb = segs->next; |
138 | segs->next = NULL; | 181 | segs->next = NULL; |
139 | if (likely(segs->len <= q->max_size)) { | 182 | qdisc_skb_cb(segs)->pkt_len = segs->len; |
140 | qdisc_skb_cb(segs)->pkt_len = segs->len; | 183 | ret = qdisc_enqueue(segs, q->qdisc); |
141 | ret = qdisc_enqueue(segs, q->qdisc); | ||
142 | } else { | ||
143 | ret = qdisc_reshape_fail(skb, sch); | ||
144 | } | ||
145 | if (ret != NET_XMIT_SUCCESS) { | 184 | if (ret != NET_XMIT_SUCCESS) { |
146 | if (net_xmit_drop_count(ret)) | 185 | if (net_xmit_drop_count(ret)) |
147 | sch->qstats.drops++; | 186 | sch->qstats.drops++; |
@@ -163,7 +202,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
163 | int ret; | 202 | int ret; |
164 | 203 | ||
165 | if (qdisc_pkt_len(skb) > q->max_size) { | 204 | if (qdisc_pkt_len(skb) > q->max_size) { |
166 | if (skb_is_gso(skb)) | 205 | if (skb_is_gso(skb) && skb_gso_seglen(skb) <= q->max_size) |
167 | return tbf_segment(skb, sch); | 206 | return tbf_segment(skb, sch); |
168 | return qdisc_reshape_fail(skb, sch); | 207 | return qdisc_reshape_fail(skb, sch); |
169 | } | 208 | } |
@@ -268,6 +307,8 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = { | |||
268 | [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, | 307 | [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, |
269 | [TCA_TBF_RATE64] = { .type = NLA_U64 }, | 308 | [TCA_TBF_RATE64] = { .type = NLA_U64 }, |
270 | [TCA_TBF_PRATE64] = { .type = NLA_U64 }, | 309 | [TCA_TBF_PRATE64] = { .type = NLA_U64 }, |
310 | [TCA_TBF_BURST] = { .type = NLA_U32 }, | ||
311 | [TCA_TBF_PBURST] = { .type = NLA_U32 }, | ||
271 | }; | 312 | }; |
272 | 313 | ||
273 | static int tbf_change(struct Qdisc *sch, struct nlattr *opt) | 314 | static int tbf_change(struct Qdisc *sch, struct nlattr *opt) |
@@ -276,10 +317,11 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) | |||
276 | struct tbf_sched_data *q = qdisc_priv(sch); | 317 | struct tbf_sched_data *q = qdisc_priv(sch); |
277 | struct nlattr *tb[TCA_TBF_MAX + 1]; | 318 | struct nlattr *tb[TCA_TBF_MAX + 1]; |
278 | struct tc_tbf_qopt *qopt; | 319 | struct tc_tbf_qopt *qopt; |
279 | struct qdisc_rate_table *rtab = NULL; | ||
280 | struct qdisc_rate_table *ptab = NULL; | ||
281 | struct Qdisc *child = NULL; | 320 | struct Qdisc *child = NULL; |
282 | int max_size, n; | 321 | struct psched_ratecfg rate; |
322 | struct psched_ratecfg peak; | ||
323 | u64 max_size; | ||
324 | s64 buffer, mtu; | ||
283 | u64 rate64 = 0, prate64 = 0; | 325 | u64 rate64 = 0, prate64 = 0; |
284 | 326 | ||
285 | err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy); | 327 | err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy); |
@@ -291,33 +333,13 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) | |||
291 | goto done; | 333 | goto done; |
292 | 334 | ||
293 | qopt = nla_data(tb[TCA_TBF_PARMS]); | 335 | qopt = nla_data(tb[TCA_TBF_PARMS]); |
294 | rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]); | 336 | if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE) |
295 | if (rtab == NULL) | 337 | qdisc_put_rtab(qdisc_get_rtab(&qopt->rate, |
296 | goto done; | 338 | tb[TCA_TBF_RTAB])); |
297 | 339 | ||
298 | if (qopt->peakrate.rate) { | 340 | if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE) |
299 | if (qopt->peakrate.rate > qopt->rate.rate) | 341 | qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate, |
300 | ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]); | 342 | tb[TCA_TBF_PTAB])); |
301 | if (ptab == NULL) | ||
302 | goto done; | ||
303 | } | ||
304 | |||
305 | for (n = 0; n < 256; n++) | ||
306 | if (rtab->data[n] > qopt->buffer) | ||
307 | break; | ||
308 | max_size = (n << qopt->rate.cell_log) - 1; | ||
309 | if (ptab) { | ||
310 | int size; | ||
311 | |||
312 | for (n = 0; n < 256; n++) | ||
313 | if (ptab->data[n] > qopt->mtu) | ||
314 | break; | ||
315 | size = (n << qopt->peakrate.cell_log) - 1; | ||
316 | if (size < max_size) | ||
317 | max_size = size; | ||
318 | } | ||
319 | if (max_size < 0) | ||
320 | goto done; | ||
321 | 343 | ||
322 | if (q->qdisc != &noop_qdisc) { | 344 | if (q->qdisc != &noop_qdisc) { |
323 | err = fifo_set_limit(q->qdisc, qopt->limit); | 345 | err = fifo_set_limit(q->qdisc, qopt->limit); |
@@ -331,6 +353,50 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) | |||
331 | } | 353 | } |
332 | } | 354 | } |
333 | 355 | ||
356 | buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U); | ||
357 | mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U); | ||
358 | |||
359 | if (tb[TCA_TBF_RATE64]) | ||
360 | rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); | ||
361 | psched_ratecfg_precompute(&rate, &qopt->rate, rate64); | ||
362 | |||
363 | if (tb[TCA_TBF_BURST]) { | ||
364 | max_size = nla_get_u32(tb[TCA_TBF_BURST]); | ||
365 | buffer = psched_l2t_ns(&rate, max_size); | ||
366 | } else { | ||
367 | max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U); | ||
368 | } | ||
369 | |||
370 | if (qopt->peakrate.rate) { | ||
371 | if (tb[TCA_TBF_PRATE64]) | ||
372 | prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]); | ||
373 | psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64); | ||
374 | if (peak.rate_bytes_ps <= rate.rate_bytes_ps) { | ||
375 | pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n", | ||
376 | peak.rate_bytes_ps, rate.rate_bytes_ps); | ||
377 | err = -EINVAL; | ||
378 | goto done; | ||
379 | } | ||
380 | |||
381 | if (tb[TCA_TBF_PBURST]) { | ||
382 | u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]); | ||
383 | max_size = min_t(u32, max_size, pburst); | ||
384 | mtu = psched_l2t_ns(&peak, pburst); | ||
385 | } else { | ||
386 | max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu)); | ||
387 | } | ||
388 | } | ||
389 | |||
390 | if (max_size < psched_mtu(qdisc_dev(sch))) | ||
391 | pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n", | ||
392 | max_size, qdisc_dev(sch)->name, | ||
393 | psched_mtu(qdisc_dev(sch))); | ||
394 | |||
395 | if (!max_size) { | ||
396 | err = -EINVAL; | ||
397 | goto done; | ||
398 | } | ||
399 | |||
334 | sch_tree_lock(sch); | 400 | sch_tree_lock(sch); |
335 | if (child) { | 401 | if (child) { |
336 | qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); | 402 | qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); |
@@ -338,19 +404,21 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) | |||
338 | q->qdisc = child; | 404 | q->qdisc = child; |
339 | } | 405 | } |
340 | q->limit = qopt->limit; | 406 | q->limit = qopt->limit; |
341 | q->mtu = PSCHED_TICKS2NS(qopt->mtu); | 407 | if (tb[TCA_TBF_PBURST]) |
408 | q->mtu = mtu; | ||
409 | else | ||
410 | q->mtu = PSCHED_TICKS2NS(qopt->mtu); | ||
342 | q->max_size = max_size; | 411 | q->max_size = max_size; |
343 | q->buffer = PSCHED_TICKS2NS(qopt->buffer); | 412 | if (tb[TCA_TBF_BURST]) |
413 | q->buffer = buffer; | ||
414 | else | ||
415 | q->buffer = PSCHED_TICKS2NS(qopt->buffer); | ||
344 | q->tokens = q->buffer; | 416 | q->tokens = q->buffer; |
345 | q->ptokens = q->mtu; | 417 | q->ptokens = q->mtu; |
346 | 418 | ||
347 | if (tb[TCA_TBF_RATE64]) | 419 | memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg)); |
348 | rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); | 420 | if (qopt->peakrate.rate) { |
349 | psched_ratecfg_precompute(&q->rate, &rtab->rate, rate64); | 421 | memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg)); |
350 | if (ptab) { | ||
351 | if (tb[TCA_TBF_PRATE64]) | ||
352 | prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]); | ||
353 | psched_ratecfg_precompute(&q->peak, &ptab->rate, prate64); | ||
354 | q->peak_present = true; | 422 | q->peak_present = true; |
355 | } else { | 423 | } else { |
356 | q->peak_present = false; | 424 | q->peak_present = false; |
@@ -359,10 +427,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) | |||
359 | sch_tree_unlock(sch); | 427 | sch_tree_unlock(sch); |
360 | err = 0; | 428 | err = 0; |
361 | done: | 429 | done: |
362 | if (rtab) | ||
363 | qdisc_put_rtab(rtab); | ||
364 | if (ptab) | ||
365 | qdisc_put_rtab(ptab); | ||
366 | return err; | 430 | return err; |
367 | } | 431 | } |
368 | 432 | ||