diff options
author | David S. Miller <davem@davemloft.net> | 2014-08-19 13:59:12 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-08-19 13:59:12 -0400 |
commit | d3b6f9ffca9598381f63119f1cce77508fd37f64 (patch) | |
tree | a199e218bf0b0603bd71ebe7a74d14a4f54c7bbf | |
parent | ac32c7f705692b92fe12dcbe88fe87136fdfff6f (diff) | |
parent | 7201c1ddf774c12daa2dd5da098b8929db53f047 (diff) |
Merge branch 'cbq-fixes'
Vasily Averin says:
====================
cbq: incorrectly low bandwidth blocks limited traffic
v2: patch description changes
Fixes: f0f6ee1f70c4 ("cbq: incorrect processing of high limits")
Mainstream commit f0f6ee1f70c4 ("cbq: incorrect processing of high limits")
have side effect: if cbq bandwidth setting is less than real interface
throughput non-limited traffic can delay limited traffic for a very long time.
This happen because of q->now changes incorrectly in cbq_dequeue():
in described scenario L2T is much greater than real time delay,
and q->now gets an extra boost for each transmitted packet.
Accumulated boost prevents update q->now, and blocked class can wait
very long time until (q->now >= cl->undertime) will be true again.
More detailed problem description can be found here:
http://www.spinics.net/lists/netdev/msg292493.html
Following patches should fix the problem.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/sched/sch_cbq.c | 48 |
1 files changed, 14 insertions, 34 deletions
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index ead526467cca..762a04bb8f6d 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c | |||
@@ -159,7 +159,6 @@ struct cbq_sched_data { | |||
159 | struct cbq_class *tx_borrowed; | 159 | struct cbq_class *tx_borrowed; |
160 | int tx_len; | 160 | int tx_len; |
161 | psched_time_t now; /* Cached timestamp */ | 161 | psched_time_t now; /* Cached timestamp */ |
162 | psched_time_t now_rt; /* Cached real time */ | ||
163 | unsigned int pmask; | 162 | unsigned int pmask; |
164 | 163 | ||
165 | struct hrtimer delay_timer; | 164 | struct hrtimer delay_timer; |
@@ -353,12 +352,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) | |||
353 | int toplevel = q->toplevel; | 352 | int toplevel = q->toplevel; |
354 | 353 | ||
355 | if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) { | 354 | if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) { |
356 | psched_time_t now; | 355 | psched_time_t now = psched_get_time(); |
357 | psched_tdiff_t incr; | ||
358 | |||
359 | now = psched_get_time(); | ||
360 | incr = now - q->now_rt; | ||
361 | now = q->now + incr; | ||
362 | 356 | ||
363 | do { | 357 | do { |
364 | if (cl->undertime < now) { | 358 | if (cl->undertime < now) { |
@@ -700,8 +694,13 @@ cbq_update(struct cbq_sched_data *q) | |||
700 | struct cbq_class *this = q->tx_class; | 694 | struct cbq_class *this = q->tx_class; |
701 | struct cbq_class *cl = this; | 695 | struct cbq_class *cl = this; |
702 | int len = q->tx_len; | 696 | int len = q->tx_len; |
697 | psched_time_t now; | ||
703 | 698 | ||
704 | q->tx_class = NULL; | 699 | q->tx_class = NULL; |
700 | /* Time integrator. We calculate EOS time | ||
701 | * by adding expected packet transmission time. | ||
702 | */ | ||
703 | now = q->now + L2T(&q->link, len); | ||
705 | 704 | ||
706 | for ( ; cl; cl = cl->share) { | 705 | for ( ; cl; cl = cl->share) { |
707 | long avgidle = cl->avgidle; | 706 | long avgidle = cl->avgidle; |
@@ -717,7 +716,7 @@ cbq_update(struct cbq_sched_data *q) | |||
717 | * idle = (now - last) - last_pktlen/rate | 716 | * idle = (now - last) - last_pktlen/rate |
718 | */ | 717 | */ |
719 | 718 | ||
720 | idle = q->now - cl->last; | 719 | idle = now - cl->last; |
721 | if ((unsigned long)idle > 128*1024*1024) { | 720 | if ((unsigned long)idle > 128*1024*1024) { |
722 | avgidle = cl->maxidle; | 721 | avgidle = cl->maxidle; |
723 | } else { | 722 | } else { |
@@ -761,7 +760,7 @@ cbq_update(struct cbq_sched_data *q) | |||
761 | idle -= L2T(&q->link, len); | 760 | idle -= L2T(&q->link, len); |
762 | idle += L2T(cl, len); | 761 | idle += L2T(cl, len); |
763 | 762 | ||
764 | cl->undertime = q->now + idle; | 763 | cl->undertime = now + idle; |
765 | } else { | 764 | } else { |
766 | /* Underlimit */ | 765 | /* Underlimit */ |
767 | 766 | ||
@@ -771,7 +770,8 @@ cbq_update(struct cbq_sched_data *q) | |||
771 | else | 770 | else |
772 | cl->avgidle = avgidle; | 771 | cl->avgidle = avgidle; |
773 | } | 772 | } |
774 | cl->last = q->now; | 773 | if ((s64)(now - cl->last) > 0) |
774 | cl->last = now; | ||
775 | } | 775 | } |
776 | 776 | ||
777 | cbq_update_toplevel(q, this, q->tx_borrowed); | 777 | cbq_update_toplevel(q, this, q->tx_borrowed); |
@@ -943,31 +943,13 @@ cbq_dequeue(struct Qdisc *sch) | |||
943 | struct sk_buff *skb; | 943 | struct sk_buff *skb; |
944 | struct cbq_sched_data *q = qdisc_priv(sch); | 944 | struct cbq_sched_data *q = qdisc_priv(sch); |
945 | psched_time_t now; | 945 | psched_time_t now; |
946 | psched_tdiff_t incr; | ||
947 | 946 | ||
948 | now = psched_get_time(); | 947 | now = psched_get_time(); |
949 | incr = now - q->now_rt; | 948 | |
950 | 949 | if (q->tx_class) | |
951 | if (q->tx_class) { | ||
952 | psched_tdiff_t incr2; | ||
953 | /* Time integrator. We calculate EOS time | ||
954 | * by adding expected packet transmission time. | ||
955 | * If real time is greater, we warp artificial clock, | ||
956 | * so that: | ||
957 | * | ||
958 | * cbq_time = max(real_time, work); | ||
959 | */ | ||
960 | incr2 = L2T(&q->link, q->tx_len); | ||
961 | q->now += incr2; | ||
962 | cbq_update(q); | 950 | cbq_update(q); |
963 | if ((incr -= incr2) < 0) | 951 | |
964 | incr = 0; | 952 | q->now = now; |
965 | q->now += incr; | ||
966 | } else { | ||
967 | if (now > q->now) | ||
968 | q->now = now; | ||
969 | } | ||
970 | q->now_rt = now; | ||
971 | 953 | ||
972 | for (;;) { | 954 | for (;;) { |
973 | q->wd_expires = 0; | 955 | q->wd_expires = 0; |
@@ -1223,7 +1205,6 @@ cbq_reset(struct Qdisc *sch) | |||
1223 | hrtimer_cancel(&q->delay_timer); | 1205 | hrtimer_cancel(&q->delay_timer); |
1224 | q->toplevel = TC_CBQ_MAXLEVEL; | 1206 | q->toplevel = TC_CBQ_MAXLEVEL; |
1225 | q->now = psched_get_time(); | 1207 | q->now = psched_get_time(); |
1226 | q->now_rt = q->now; | ||
1227 | 1208 | ||
1228 | for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++) | 1209 | for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++) |
1229 | q->active[prio] = NULL; | 1210 | q->active[prio] = NULL; |
@@ -1407,7 +1388,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) | |||
1407 | q->delay_timer.function = cbq_undelay; | 1388 | q->delay_timer.function = cbq_undelay; |
1408 | q->toplevel = TC_CBQ_MAXLEVEL; | 1389 | q->toplevel = TC_CBQ_MAXLEVEL; |
1409 | q->now = psched_get_time(); | 1390 | q->now = psched_get_time(); |
1410 | q->now_rt = q->now; | ||
1411 | 1391 | ||
1412 | cbq_link_class(&q->link); | 1392 | cbq_link_class(&q->link); |
1413 | 1393 | ||