aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2013-10-01 12:10:16 -0400
committerDavid S. Miller <davem@davemloft.net>2013-10-01 13:00:38 -0400
commit0eab5eb7a3a9a6ccfcdecbffff00d60a86a004bb (patch)
tree0d6ed9a2e3cc158adf2e12c34755f933162f0b45 /net/sched
parentbb8140947a247b9aa15652cc24dc555ebb0b64b0 (diff)
pkt_sched: fq: rate limiting improvements
FQ rate limiting suffers from two problems, reported by Steinar : 1) FQ enforces a delay when flow quantum is exhausted in order to reduce cpu overhead. But if packets are small, current delay computation is slightly wrong, and observed rates can be too high. Steinar had this problem because he disabled TSO and GSO, and default FQ quantum is 2*1514. (Of course, I wish recent TSO auto sizing changes will help to not having to disable TSO in the first place) 2) maxrate was not used for forwarded flows (skbs not attached to a socket) Tested: tc qdisc add dev eth0 root est 1sec 4sec fq maxrate 8Mbit netperf -H lpq84 -l 1000 & sleep 10 ; tc -s qdisc show dev eth0 qdisc fq 8003: root refcnt 32 limit 10000p flow_limit 100p buckets 1024 quantum 3028 initial_quantum 15140 maxrate 8000Kbit Sent 16819357 bytes 11258 pkt (dropped 0, overlimits 0 requeues 0) rate 7831Kbit 653pps backlog 7570b 5p requeues 0 44 flows (43 inactive, 1 throttled), next packet delay 2977352 ns 0 gc, 0 highprio, 5545 throttled lpq83:~# tcpdump -p -i eth0 host lpq84 -c 12 09:02:52.079484 IP lpq83 > lpq84: . 1389536928:1389538376(1448) ack 3808678021 win 457 <nop,nop,timestamp 961812 572609068> 09:02:52.079499 IP lpq83 > lpq84: . 1448:2896(1448) ack 1 win 457 <nop,nop,timestamp 961812 572609068> 09:02:52.079906 IP lpq84 > lpq83: . ack 2896 win 16384 <nop,nop,timestamp 572609080 961812> 09:02:52.082568 IP lpq83 > lpq84: . 2896:4344(1448) ack 1 win 457 <nop,nop,timestamp 961815 572609071> 09:02:52.082581 IP lpq83 > lpq84: . 4344:5792(1448) ack 1 win 457 <nop,nop,timestamp 961815 572609071> 09:02:52.083017 IP lpq84 > lpq83: . ack 5792 win 16384 <nop,nop,timestamp 572609083 961815> 09:02:52.085678 IP lpq83 > lpq84: . 5792:7240(1448) ack 1 win 457 <nop,nop,timestamp 961818 572609074> 09:02:52.085693 IP lpq83 > lpq84: . 7240:8688(1448) ack 1 win 457 <nop,nop,timestamp 961818 572609074> 09:02:52.086117 IP lpq84 > lpq83: . ack 8688 win 16384 <nop,nop,timestamp 572609086 961818> 09:02:52.088792 IP lpq83 > lpq84: . 8688:10136(1448) ack 1 win 457 <nop,nop,timestamp 961821 572609077> 09:02:52.088806 IP lpq83 > lpq84: . 10136:11584(1448) ack 1 win 457 <nop,nop,timestamp 961821 572609077> 09:02:52.089217 IP lpq84 > lpq83: . ack 11584 win 16384 <nop,nop,timestamp 572609090 961821> Reported-by: Steinar H. Gunderson <sesse@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/sch_fq.c45
1 files changed, 26 insertions, 19 deletions
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index fc6de56a331e..a2fef8b10b96 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -420,6 +420,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
420 struct fq_flow_head *head; 420 struct fq_flow_head *head;
421 struct sk_buff *skb; 421 struct sk_buff *skb;
422 struct fq_flow *f; 422 struct fq_flow *f;
423 u32 rate;
423 424
424 skb = fq_dequeue_head(sch, &q->internal); 425 skb = fq_dequeue_head(sch, &q->internal);
425 if (skb) 426 if (skb)
@@ -468,28 +469,34 @@ begin:
468 f->time_next_packet = now; 469 f->time_next_packet = now;
469 f->credit -= qdisc_pkt_len(skb); 470 f->credit -= qdisc_pkt_len(skb);
470 471
471 if (f->credit <= 0 && 472 if (f->credit > 0 || !q->rate_enable)
472 q->rate_enable && 473 goto out;
473 skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) {
474 u32 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate;
475 474
476 rate = min(rate, q->flow_max_rate); 475 if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) {
477 if (rate) { 476 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate;
478 u64 len = (u64)qdisc_pkt_len(skb) * NSEC_PER_SEC;
479
480 do_div(len, rate);
481 /* Since socket rate can change later,
482 * clamp the delay to 125 ms.
483 * TODO: maybe segment the too big skb, as in commit
484 * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
485 */
486 if (unlikely(len > 125 * NSEC_PER_MSEC)) {
487 len = 125 * NSEC_PER_MSEC;
488 q->stat_pkts_too_long++;
489 }
490 477
491 f->time_next_packet = now + len; 478 rate = min(rate, q->flow_max_rate);
479 } else {
480 rate = q->flow_max_rate;
481 if (rate == ~0U)
482 goto out;
483 }
484 if (rate) {
485 u32 plen = max(qdisc_pkt_len(skb), q->quantum);
486 u64 len = (u64)plen * NSEC_PER_SEC;
487
488 do_div(len, rate);
489 /* Since socket rate can change later,
490 * clamp the delay to 125 ms.
491 * TODO: maybe segment the too big skb, as in commit
492 * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
493 */
494 if (unlikely(len > 125 * NSEC_PER_MSEC)) {
495 len = 125 * NSEC_PER_MSEC;
496 q->stat_pkts_too_long++;
492 } 497 }
498
499 f->time_next_packet = now + len;
493 } 500 }
494out: 501out:
495 qdisc_bstats_update(sch, skb); 502 qdisc_bstats_update(sch, skb);