aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched/sch_fq.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched/sch_fq.c')
-rw-r--r--net/sched/sch_fq.c137
1 files changed, 83 insertions, 54 deletions
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 32ad015ee8ce..95d843961907 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -88,7 +88,7 @@ struct fq_sched_data {
88 struct fq_flow internal; /* for non classified or high prio packets */ 88 struct fq_flow internal; /* for non classified or high prio packets */
89 u32 quantum; 89 u32 quantum;
90 u32 initial_quantum; 90 u32 initial_quantum;
91 u32 flow_default_rate;/* rate per flow : bytes per second */ 91 u32 flow_refill_delay;
92 u32 flow_max_rate; /* optional max rate per flow */ 92 u32 flow_max_rate; /* optional max rate per flow */
93 u32 flow_plimit; /* max packets per flow */ 93 u32 flow_plimit; /* max packets per flow */
94 struct rb_root *fq_root; 94 struct rb_root *fq_root;
@@ -115,6 +115,7 @@ static struct fq_flow detached, throttled;
115static void fq_flow_set_detached(struct fq_flow *f) 115static void fq_flow_set_detached(struct fq_flow *f)
116{ 116{
117 f->next = &detached; 117 f->next = &detached;
118 f->age = jiffies;
118} 119}
119 120
120static bool fq_flow_is_detached(const struct fq_flow *f) 121static bool fq_flow_is_detached(const struct fq_flow *f)
@@ -209,21 +210,15 @@ static void fq_gc(struct fq_sched_data *q,
209 } 210 }
210} 211}
211 212
212static const u8 prio2band[TC_PRIO_MAX + 1] = {
213 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
214};
215
216static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) 213static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
217{ 214{
218 struct rb_node **p, *parent; 215 struct rb_node **p, *parent;
219 struct sock *sk = skb->sk; 216 struct sock *sk = skb->sk;
220 struct rb_root *root; 217 struct rb_root *root;
221 struct fq_flow *f; 218 struct fq_flow *f;
222 int band;
223 219
224 /* warning: no starvation prevention... */ 220 /* warning: no starvation prevention... */
225 band = prio2band[skb->priority & TC_PRIO_MAX]; 221 if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL))
226 if (unlikely(band == 0))
227 return &q->internal; 222 return &q->internal;
228 223
229 if (unlikely(!sk)) { 224 if (unlikely(!sk)) {
@@ -255,6 +250,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
255 f->socket_hash != sk->sk_hash)) { 250 f->socket_hash != sk->sk_hash)) {
256 f->credit = q->initial_quantum; 251 f->credit = q->initial_quantum;
257 f->socket_hash = sk->sk_hash; 252 f->socket_hash = sk->sk_hash;
253 f->time_next_packet = 0ULL;
258 } 254 }
259 return f; 255 return f;
260 } 256 }
@@ -285,7 +281,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
285 281
286 282
287/* remove one skb from head of flow queue */ 283/* remove one skb from head of flow queue */
288static struct sk_buff *fq_dequeue_head(struct fq_flow *flow) 284static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
289{ 285{
290 struct sk_buff *skb = flow->head; 286 struct sk_buff *skb = flow->head;
291 287
@@ -293,6 +289,8 @@ static struct sk_buff *fq_dequeue_head(struct fq_flow *flow)
293 flow->head = skb->next; 289 flow->head = skb->next;
294 skb->next = NULL; 290 skb->next = NULL;
295 flow->qlen--; 291 flow->qlen--;
292 sch->qstats.backlog -= qdisc_pkt_len(skb);
293 sch->q.qlen--;
296 } 294 }
297 return skb; 295 return skb;
298} 296}
@@ -370,17 +368,20 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
370 } 368 }
371 369
372 f->qlen++; 370 f->qlen++;
373 flow_queue_add(f, skb);
374 if (skb_is_retransmit(skb)) 371 if (skb_is_retransmit(skb))
375 q->stat_tcp_retrans++; 372 q->stat_tcp_retrans++;
376 sch->qstats.backlog += qdisc_pkt_len(skb); 373 sch->qstats.backlog += qdisc_pkt_len(skb);
377 if (fq_flow_is_detached(f)) { 374 if (fq_flow_is_detached(f)) {
378 fq_flow_add_tail(&q->new_flows, f); 375 fq_flow_add_tail(&q->new_flows, f);
379 if (q->quantum > f->credit) 376 if (time_after(jiffies, f->age + q->flow_refill_delay))
380 f->credit = q->quantum; 377 f->credit = max_t(u32, f->credit, q->quantum);
381 q->inactive_flows--; 378 q->inactive_flows--;
382 qdisc_unthrottled(sch); 379 qdisc_unthrottled(sch);
383 } 380 }
381
382 /* Note: this overwrites f->age */
383 flow_queue_add(f, skb);
384
384 if (unlikely(f == &q->internal)) { 385 if (unlikely(f == &q->internal)) {
385 q->stat_internal_packets++; 386 q->stat_internal_packets++;
386 qdisc_unthrottled(sch); 387 qdisc_unthrottled(sch);
@@ -418,8 +419,9 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
418 struct fq_flow_head *head; 419 struct fq_flow_head *head;
419 struct sk_buff *skb; 420 struct sk_buff *skb;
420 struct fq_flow *f; 421 struct fq_flow *f;
422 u32 rate;
421 423
422 skb = fq_dequeue_head(&q->internal); 424 skb = fq_dequeue_head(sch, &q->internal);
423 if (skb) 425 if (skb)
424 goto out; 426 goto out;
425 fq_check_throttled(q, now); 427 fq_check_throttled(q, now);
@@ -449,7 +451,7 @@ begin:
449 goto begin; 451 goto begin;
450 } 452 }
451 453
452 skb = fq_dequeue_head(f); 454 skb = fq_dequeue_head(sch, f);
453 if (!skb) { 455 if (!skb) {
454 head->first = f->next; 456 head->first = f->next;
455 /* force a pass through old_flows to prevent starvation */ 457 /* force a pass through old_flows to prevent starvation */
@@ -457,7 +459,6 @@ begin:
457 fq_flow_add_tail(&q->old_flows, f); 459 fq_flow_add_tail(&q->old_flows, f);
458 } else { 460 } else {
459 fq_flow_set_detached(f); 461 fq_flow_set_detached(f);
460 f->age = jiffies;
461 q->inactive_flows++; 462 q->inactive_flows++;
462 } 463 }
463 goto begin; 464 goto begin;
@@ -466,43 +467,70 @@ begin:
466 f->time_next_packet = now; 467 f->time_next_packet = now;
467 f->credit -= qdisc_pkt_len(skb); 468 f->credit -= qdisc_pkt_len(skb);
468 469
469 if (f->credit <= 0 && 470 if (f->credit > 0 || !q->rate_enable)
470 q->rate_enable && 471 goto out;
471 skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) {
472 u32 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate;
473 472
474 rate = min(rate, q->flow_max_rate); 473 rate = q->flow_max_rate;
475 if (rate) { 474 if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT)
476 u64 len = (u64)qdisc_pkt_len(skb) * NSEC_PER_SEC; 475 rate = min(skb->sk->sk_pacing_rate, rate);
477 476
478 do_div(len, rate); 477 if (rate != ~0U) {
479 /* Since socket rate can change later, 478 u32 plen = max(qdisc_pkt_len(skb), q->quantum);
480 * clamp the delay to 125 ms. 479 u64 len = (u64)plen * NSEC_PER_SEC;
481 * TODO: maybe segment the too big skb, as in commit
482 * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
483 */
484 if (unlikely(len > 125 * NSEC_PER_MSEC)) {
485 len = 125 * NSEC_PER_MSEC;
486 q->stat_pkts_too_long++;
487 }
488 480
489 f->time_next_packet = now + len; 481 if (likely(rate))
482 do_div(len, rate);
483 /* Since socket rate can change later,
484 * clamp the delay to 125 ms.
485 * TODO: maybe segment the too big skb, as in commit
486 * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
487 */
488 if (unlikely(len > 125 * NSEC_PER_MSEC)) {
489 len = 125 * NSEC_PER_MSEC;
490 q->stat_pkts_too_long++;
490 } 491 }
492
493 f->time_next_packet = now + len;
491 } 494 }
492out: 495out:
493 sch->qstats.backlog -= qdisc_pkt_len(skb);
494 qdisc_bstats_update(sch, skb); 496 qdisc_bstats_update(sch, skb);
495 sch->q.qlen--;
496 qdisc_unthrottled(sch); 497 qdisc_unthrottled(sch);
497 return skb; 498 return skb;
498} 499}
499 500
500static void fq_reset(struct Qdisc *sch) 501static void fq_reset(struct Qdisc *sch)
501{ 502{
503 struct fq_sched_data *q = qdisc_priv(sch);
504 struct rb_root *root;
502 struct sk_buff *skb; 505 struct sk_buff *skb;
506 struct rb_node *p;
507 struct fq_flow *f;
508 unsigned int idx;
503 509
504 while ((skb = fq_dequeue(sch)) != NULL) 510 while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL)
505 kfree_skb(skb); 511 kfree_skb(skb);
512
513 if (!q->fq_root)
514 return;
515
516 for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
517 root = &q->fq_root[idx];
518 while ((p = rb_first(root)) != NULL) {
519 f = container_of(p, struct fq_flow, fq_node);
520 rb_erase(p, root);
521
522 while ((skb = fq_dequeue_head(sch, f)) != NULL)
523 kfree_skb(skb);
524
525 kmem_cache_free(fq_flow_cachep, f);
526 }
527 }
528 q->new_flows.first = NULL;
529 q->old_flows.first = NULL;
530 q->delayed = RB_ROOT;
531 q->flows = 0;
532 q->inactive_flows = 0;
533 q->throttled_flows = 0;
506} 534}
507 535
508static void fq_rehash(struct fq_sched_data *q, 536static void fq_rehash(struct fq_sched_data *q,
@@ -584,6 +612,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
584 [TCA_FQ_FLOW_DEFAULT_RATE] = { .type = NLA_U32 }, 612 [TCA_FQ_FLOW_DEFAULT_RATE] = { .type = NLA_U32 },
585 [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 }, 613 [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
586 [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, 614 [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
615 [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 },
587}; 616};
588 617
589static int fq_change(struct Qdisc *sch, struct nlattr *opt) 618static int fq_change(struct Qdisc *sch, struct nlattr *opt)
@@ -622,10 +651,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
622 q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); 651 q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
623 652
624 if (tb[TCA_FQ_INITIAL_QUANTUM]) 653 if (tb[TCA_FQ_INITIAL_QUANTUM])
625 q->quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); 654 q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
626 655
627 if (tb[TCA_FQ_FLOW_DEFAULT_RATE]) 656 if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
628 q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]); 657 pr_warn_ratelimited("sch_fq: defrate %u ignored.\n",
658 nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]));
629 659
630 if (tb[TCA_FQ_FLOW_MAX_RATE]) 660 if (tb[TCA_FQ_FLOW_MAX_RATE])
631 q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]); 661 q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
@@ -639,12 +669,20 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
639 err = -EINVAL; 669 err = -EINVAL;
640 } 670 }
641 671
672 if (tb[TCA_FQ_FLOW_REFILL_DELAY]) {
673 u32 usecs_delay = nla_get_u32(tb[TCA_FQ_FLOW_REFILL_DELAY]) ;
674
675 q->flow_refill_delay = usecs_to_jiffies(usecs_delay);
676 }
677
642 if (!err) 678 if (!err)
643 err = fq_resize(q, fq_log); 679 err = fq_resize(q, fq_log);
644 680
645 while (sch->q.qlen > sch->limit) { 681 while (sch->q.qlen > sch->limit) {
646 struct sk_buff *skb = fq_dequeue(sch); 682 struct sk_buff *skb = fq_dequeue(sch);
647 683
684 if (!skb)
685 break;
648 kfree_skb(skb); 686 kfree_skb(skb);
649 drop_count++; 687 drop_count++;
650 } 688 }
@@ -657,21 +695,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
657static void fq_destroy(struct Qdisc *sch) 695static void fq_destroy(struct Qdisc *sch)
658{ 696{
659 struct fq_sched_data *q = qdisc_priv(sch); 697 struct fq_sched_data *q = qdisc_priv(sch);
660 struct rb_root *root;
661 struct rb_node *p;
662 unsigned int idx;
663 698
664 if (q->fq_root) { 699 fq_reset(sch);
665 for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { 700 kfree(q->fq_root);
666 root = &q->fq_root[idx];
667 while ((p = rb_first(root)) != NULL) {
668 rb_erase(p, root);
669 kmem_cache_free(fq_flow_cachep,
670 container_of(p, struct fq_flow, fq_node));
671 }
672 }
673 kfree(q->fq_root);
674 }
675 qdisc_watchdog_cancel(&q->watchdog); 701 qdisc_watchdog_cancel(&q->watchdog);
676} 702}
677 703
@@ -684,7 +710,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
684 q->flow_plimit = 100; 710 q->flow_plimit = 100;
685 q->quantum = 2 * psched_mtu(qdisc_dev(sch)); 711 q->quantum = 2 * psched_mtu(qdisc_dev(sch));
686 q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch)); 712 q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch));
687 q->flow_default_rate = 0; 713 q->flow_refill_delay = msecs_to_jiffies(40);
688 q->flow_max_rate = ~0U; 714 q->flow_max_rate = ~0U;
689 q->rate_enable = 1; 715 q->rate_enable = 1;
690 q->new_flows.first = NULL; 716 q->new_flows.first = NULL;
@@ -711,13 +737,16 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
711 if (opts == NULL) 737 if (opts == NULL)
712 goto nla_put_failure; 738 goto nla_put_failure;
713 739
740 /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
741
714 if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) || 742 if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
715 nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) || 743 nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
716 nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) || 744 nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
717 nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) || 745 nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
718 nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) || 746 nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
719 nla_put_u32(skb, TCA_FQ_FLOW_DEFAULT_RATE, q->flow_default_rate) ||
720 nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) || 747 nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
748 nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
749 jiffies_to_usecs(q->flow_refill_delay)) ||
721 nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) 750 nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
722 goto nla_put_failure; 751 goto nla_put_failure;
723 752