aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-12-28 18:12:02 -0500
committerDavid S. Miller <davem@davemloft.net>2011-12-30 17:12:23 -0500
commit50612537e9ab29693122fab20fc1eed235054ffe (patch)
tree389279a18e07ce046ef694b37f44ccbc7fe796a1 /net/sched
parent32b293a53deeb220769f9a29357cb151cfb8ee26 (diff)
netem: fix classful handling
Commit 10f6dfcfde (Revert "sch_netem: Remove classful functionality") reintroduced classful functionality to netem, but broke basic netem behavior : netem uses an t(ime)fifo queue, and store timestamps in skb->cb[] If qdisc is changed, time constraints are not respected and other qdisc can destroy skb->cb[] and block netem at dequeue time. Fix this by always using internal tfifo, and optionally attach a child qdisc to netem (or a tree of qdiscs) Example of use : DEV=eth3 tc qdisc del dev $DEV root tc qdisc add dev $DEV root handle 30: est 1sec 8sec netem delay 20ms 10ms tc qdisc add dev $DEV handle 40:0 parent 30:0 tbf \ burst 20480 limit 20480 mtu 1514 rate 32000bps qdisc netem 30: root refcnt 18 limit 1000 delay 20.0ms 10.0ms Sent 190792 bytes 413 pkt (dropped 0, overlimits 0 requeues 0) rate 18416bit 3pps backlog 0b 0p requeues 0 qdisc tbf 40: parent 30: rate 256000bit burst 20Kb/8 mpu 0b lat 0us Sent 190792 bytes 413 pkt (dropped 6, overlimits 10 requeues 0) backlog 0b 5p requeues 0 Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> CC: Stephen Hemminger <shemminger@vyatta.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/sch_netem.c202
1 files changed, 81 insertions, 121 deletions
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index a92c1b3dab83..06a5cebad342 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -67,7 +67,11 @@
67*/ 67*/
68 68
69struct netem_sched_data { 69struct netem_sched_data {
70 /* internal t(ime)fifo qdisc uses sch->q and sch->limit */
71
72 /* optional qdisc for classful handling (NULL at netem init) */
70 struct Qdisc *qdisc; 73 struct Qdisc *qdisc;
74
71 struct qdisc_watchdog watchdog; 75 struct qdisc_watchdog watchdog;
72 76
73 psched_tdiff_t latency; 77 psched_tdiff_t latency;
@@ -117,7 +121,9 @@ struct netem_sched_data {
117 121
118}; 122};
119 123
120/* Time stamp put into socket buffer control block */ 124/* Time stamp put into socket buffer control block
125 * Only valid when skbs are in our internal t(ime)fifo queue.
126 */
121struct netem_skb_cb { 127struct netem_skb_cb {
122 psched_time_t time_to_send; 128 psched_time_t time_to_send;
123}; 129};
@@ -324,6 +330,31 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
324 return PSCHED_NS2TICKS(ticks); 330 return PSCHED_NS2TICKS(ticks);
325} 331}
326 332
333static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
334{
335 struct sk_buff_head *list = &sch->q;
336 psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
337 struct sk_buff *skb;
338
339 if (likely(skb_queue_len(list) < sch->limit)) {
340 skb = skb_peek_tail(list);
341 /* Optimize for add at tail */
342 if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send))
343 return qdisc_enqueue_tail(nskb, sch);
344
345 skb_queue_reverse_walk(list, skb) {
346 if (tnext >= netem_skb_cb(skb)->time_to_send)
347 break;
348 }
349
350 __skb_queue_after(list, skb, nskb);
351 sch->qstats.backlog += qdisc_pkt_len(nskb);
352 return NET_XMIT_SUCCESS;
353 }
354
355 return qdisc_reshape_fail(nskb, sch);
356}
357
327/* 358/*
328 * Insert one skb into qdisc. 359 * Insert one skb into qdisc.
329 * Note: parent depends on return value to account for queue length. 360 * Note: parent depends on return value to account for queue length.
@@ -399,7 +430,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
399 now = psched_get_time(); 430 now = psched_get_time();
400 431
401 if (q->rate) { 432 if (q->rate) {
402 struct sk_buff_head *list = &q->qdisc->q; 433 struct sk_buff_head *list = &sch->q;
403 434
404 delay += packet_len_2_sched_time(skb->len, q); 435 delay += packet_len_2_sched_time(skb->len, q);
405 436
@@ -417,7 +448,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
417 448
418 cb->time_to_send = now + delay; 449 cb->time_to_send = now + delay;
419 ++q->counter; 450 ++q->counter;
420 ret = qdisc_enqueue(skb, q->qdisc); 451 ret = tfifo_enqueue(skb, sch);
421 } else { 452 } else {
422 /* 453 /*
423 * Do re-ordering by putting one out of N packets at the front 454 * Do re-ordering by putting one out of N packets at the front
@@ -426,7 +457,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
426 cb->time_to_send = psched_get_time(); 457 cb->time_to_send = psched_get_time();
427 q->counter = 0; 458 q->counter = 0;
428 459
429 __skb_queue_head(&q->qdisc->q, skb); 460 __skb_queue_head(&sch->q, skb);
430 q->qdisc->qstats.backlog += qdisc_pkt_len(skb); 461 q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
431 q->qdisc->qstats.requeues++; 462 q->qdisc->qstats.requeues++;
432 ret = NET_XMIT_SUCCESS; 463 ret = NET_XMIT_SUCCESS;
@@ -439,19 +470,20 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
439 } 470 }
440 } 471 }
441 472
442 sch->q.qlen++;
443 return NET_XMIT_SUCCESS; 473 return NET_XMIT_SUCCESS;
444} 474}
445 475
446static unsigned int netem_drop(struct Qdisc *sch) 476static unsigned int netem_drop(struct Qdisc *sch)
447{ 477{
448 struct netem_sched_data *q = qdisc_priv(sch); 478 struct netem_sched_data *q = qdisc_priv(sch);
449 unsigned int len = 0; 479 unsigned int len;
450 480
451 if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) { 481 len = qdisc_queue_drop(sch);
452 sch->q.qlen--; 482 if (!len && q->qdisc && q->qdisc->ops->drop)
483 len = q->qdisc->ops->drop(q->qdisc);
484 if (len)
453 sch->qstats.drops++; 485 sch->qstats.drops++;
454 } 486
455 return len; 487 return len;
456} 488}
457 489
@@ -463,16 +495,16 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
463 if (qdisc_is_throttled(sch)) 495 if (qdisc_is_throttled(sch))
464 return NULL; 496 return NULL;
465 497
466 skb = q->qdisc->ops->peek(q->qdisc); 498tfifo_dequeue:
499 skb = qdisc_peek_head(sch);
467 if (skb) { 500 if (skb) {
468 const struct netem_skb_cb *cb = netem_skb_cb(skb); 501 const struct netem_skb_cb *cb = netem_skb_cb(skb);
469 psched_time_t now = psched_get_time();
470 502
471 /* if more time remaining? */ 503 /* if more time remaining? */
472 if (cb->time_to_send <= now) { 504 if (cb->time_to_send <= psched_get_time()) {
473 skb = qdisc_dequeue_peeked(q->qdisc); 505 skb = qdisc_dequeue_tail(sch);
474 if (unlikely(!skb)) 506 if (unlikely(!skb))
475 return NULL; 507 goto qdisc_dequeue;
476 508
477#ifdef CONFIG_NET_CLS_ACT 509#ifdef CONFIG_NET_CLS_ACT
478 /* 510 /*
@@ -483,15 +515,37 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
483 skb->tstamp.tv64 = 0; 515 skb->tstamp.tv64 = 0;
484#endif 516#endif
485 517
486 sch->q.qlen--; 518 if (q->qdisc) {
519 int err = qdisc_enqueue(skb, q->qdisc);
520
521 if (unlikely(err != NET_XMIT_SUCCESS)) {
522 if (net_xmit_drop_count(err)) {
523 sch->qstats.drops++;
524 qdisc_tree_decrease_qlen(sch, 1);
525 }
526 }
527 goto tfifo_dequeue;
528 }
529deliver:
487 qdisc_unthrottled(sch); 530 qdisc_unthrottled(sch);
488 qdisc_bstats_update(sch, skb); 531 qdisc_bstats_update(sch, skb);
489 return skb; 532 return skb;
490 } 533 }
491 534
535 if (q->qdisc) {
536 skb = q->qdisc->ops->dequeue(q->qdisc);
537 if (skb)
538 goto deliver;
539 }
492 qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); 540 qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
493 } 541 }
494 542
543qdisc_dequeue:
544 if (q->qdisc) {
545 skb = q->qdisc->ops->dequeue(q->qdisc);
546 if (skb)
547 goto deliver;
548 }
495 return NULL; 549 return NULL;
496} 550}
497 551
@@ -499,8 +553,9 @@ static void netem_reset(struct Qdisc *sch)
499{ 553{
500 struct netem_sched_data *q = qdisc_priv(sch); 554 struct netem_sched_data *q = qdisc_priv(sch);
501 555
502 qdisc_reset(q->qdisc); 556 qdisc_reset_queue(sch);
503 sch->q.qlen = 0; 557 if (q->qdisc)
558 qdisc_reset(q->qdisc);
504 qdisc_watchdog_cancel(&q->watchdog); 559 qdisc_watchdog_cancel(&q->watchdog);
505} 560}
506 561
@@ -690,11 +745,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
690 if (ret < 0) 745 if (ret < 0)
691 return ret; 746 return ret;
692 747
693 ret = fifo_set_limit(q->qdisc, qopt->limit); 748 sch->limit = qopt->limit;
694 if (ret) {
695 pr_info("netem: can't set fifo limit\n");
696 return ret;
697 }
698 749
699 q->latency = qopt->latency; 750 q->latency = qopt->latency;
700 q->jitter = qopt->jitter; 751 q->jitter = qopt->jitter;
@@ -735,88 +786,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
735 return ret; 786 return ret;
736} 787}
737 788
738/*
739 * Special case version of FIFO queue for use by netem.
740 * It queues in order based on timestamps in skb's
741 */
742struct fifo_sched_data {
743 u32 limit;
744 psched_time_t oldest;
745};
746
747static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
748{
749 struct fifo_sched_data *q = qdisc_priv(sch);
750 struct sk_buff_head *list = &sch->q;
751 psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
752 struct sk_buff *skb;
753
754 if (likely(skb_queue_len(list) < q->limit)) {
755 /* Optimize for add at tail */
756 if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
757 q->oldest = tnext;
758 return qdisc_enqueue_tail(nskb, sch);
759 }
760
761 skb_queue_reverse_walk(list, skb) {
762 const struct netem_skb_cb *cb = netem_skb_cb(skb);
763
764 if (tnext >= cb->time_to_send)
765 break;
766 }
767
768 __skb_queue_after(list, skb, nskb);
769
770 sch->qstats.backlog += qdisc_pkt_len(nskb);
771
772 return NET_XMIT_SUCCESS;
773 }
774
775 return qdisc_reshape_fail(nskb, sch);
776}
777
778static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
779{
780 struct fifo_sched_data *q = qdisc_priv(sch);
781
782 if (opt) {
783 struct tc_fifo_qopt *ctl = nla_data(opt);
784 if (nla_len(opt) < sizeof(*ctl))
785 return -EINVAL;
786
787 q->limit = ctl->limit;
788 } else
789 q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
790
791 q->oldest = PSCHED_PASTPERFECT;
792 return 0;
793}
794
795static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
796{
797 struct fifo_sched_data *q = qdisc_priv(sch);
798 struct tc_fifo_qopt opt = { .limit = q->limit };
799
800 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
801 return skb->len;
802
803nla_put_failure:
804 return -1;
805}
806
807static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
808 .id = "tfifo",
809 .priv_size = sizeof(struct fifo_sched_data),
810 .enqueue = tfifo_enqueue,
811 .dequeue = qdisc_dequeue_head,
812 .peek = qdisc_peek_head,
813 .drop = qdisc_queue_drop,
814 .init = tfifo_init,
815 .reset = qdisc_reset_queue,
816 .change = tfifo_init,
817 .dump = tfifo_dump,
818};
819
820static int netem_init(struct Qdisc *sch, struct nlattr *opt) 789static int netem_init(struct Qdisc *sch, struct nlattr *opt)
821{ 790{
822 struct netem_sched_data *q = qdisc_priv(sch); 791 struct netem_sched_data *q = qdisc_priv(sch);
@@ -828,18 +797,9 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
828 qdisc_watchdog_init(&q->watchdog, sch); 797 qdisc_watchdog_init(&q->watchdog, sch);
829 798
830 q->loss_model = CLG_RANDOM; 799 q->loss_model = CLG_RANDOM;
831 q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
832 TC_H_MAKE(sch->handle, 1));
833 if (!q->qdisc) {
834 pr_notice("netem: qdisc create tfifo qdisc failed\n");
835 return -ENOMEM;
836 }
837
838 ret = netem_change(sch, opt); 800 ret = netem_change(sch, opt);
839 if (ret) { 801 if (ret)
840 pr_info("netem: change failed\n"); 802 pr_info("netem: change failed\n");
841 qdisc_destroy(q->qdisc);
842 }
843 return ret; 803 return ret;
844} 804}
845 805
@@ -848,7 +808,8 @@ static void netem_destroy(struct Qdisc *sch)
848 struct netem_sched_data *q = qdisc_priv(sch); 808 struct netem_sched_data *q = qdisc_priv(sch);
849 809
850 qdisc_watchdog_cancel(&q->watchdog); 810 qdisc_watchdog_cancel(&q->watchdog);
851 qdisc_destroy(q->qdisc); 811 if (q->qdisc)
812 qdisc_destroy(q->qdisc);
852 dist_free(q->delay_dist); 813 dist_free(q->delay_dist);
853} 814}
854 815
@@ -952,7 +913,7 @@ static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
952{ 913{
953 struct netem_sched_data *q = qdisc_priv(sch); 914 struct netem_sched_data *q = qdisc_priv(sch);
954 915
955 if (cl != 1) /* only one class */ 916 if (cl != 1 || !q->qdisc) /* only one class */
956 return -ENOENT; 917 return -ENOENT;
957 918
958 tcm->tcm_handle |= TC_H_MIN(1); 919 tcm->tcm_handle |= TC_H_MIN(1);
@@ -966,14 +927,13 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
966{ 927{
967 struct netem_sched_data *q = qdisc_priv(sch); 928 struct netem_sched_data *q = qdisc_priv(sch);
968 929
969 if (new == NULL)
970 new = &noop_qdisc;
971
972 sch_tree_lock(sch); 930 sch_tree_lock(sch);
973 *old = q->qdisc; 931 *old = q->qdisc;
974 q->qdisc = new; 932 q->qdisc = new;
975 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); 933 if (*old) {
976 qdisc_reset(*old); 934 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
935 qdisc_reset(*old);
936 }
977 sch_tree_unlock(sch); 937 sch_tree_unlock(sch);
978 938
979 return 0; 939 return 0;