diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2011-12-28 18:12:02 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-12-30 17:12:23 -0500 |
commit | 50612537e9ab29693122fab20fc1eed235054ffe (patch) | |
tree | 389279a18e07ce046ef694b37f44ccbc7fe796a1 /net/sched | |
parent | 32b293a53deeb220769f9a29357cb151cfb8ee26 (diff) |
netem: fix classful handling
Commit 10f6dfcfde (Revert "sch_netem: Remove classful functionality")
reintroduced classful functionality to netem, but broke basic netem
behavior :
netem uses an t(ime)fifo queue, and store timestamps in skb->cb[]
If qdisc is changed, time constraints are not respected and other qdisc
can destroy skb->cb[] and block netem at dequeue time.
Fix this by always using internal tfifo, and optionally attach a child
qdisc to netem (or a tree of qdiscs)
Example of use :
DEV=eth3
tc qdisc del dev $DEV root
tc qdisc add dev $DEV root handle 30: est 1sec 8sec netem delay 20ms 10ms
tc qdisc add dev $DEV handle 40:0 parent 30:0 tbf \
burst 20480 limit 20480 mtu 1514 rate 32000bps
qdisc netem 30: root refcnt 18 limit 1000 delay 20.0ms 10.0ms
Sent 190792 bytes 413 pkt (dropped 0, overlimits 0 requeues 0)
rate 18416bit 3pps backlog 0b 0p requeues 0
qdisc tbf 40: parent 30: rate 256000bit burst 20Kb/8 mpu 0b lat 0us
Sent 190792 bytes 413 pkt (dropped 6, overlimits 10 requeues 0)
backlog 0b 5p requeues 0
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/sch_netem.c | 202 |
1 files changed, 81 insertions, 121 deletions
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index a92c1b3dab83..06a5cebad342 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c | |||
@@ -67,7 +67,11 @@ | |||
67 | */ | 67 | */ |
68 | 68 | ||
69 | struct netem_sched_data { | 69 | struct netem_sched_data { |
70 | /* internal t(ime)fifo qdisc uses sch->q and sch->limit */ | ||
71 | |||
72 | /* optional qdisc for classful handling (NULL at netem init) */ | ||
70 | struct Qdisc *qdisc; | 73 | struct Qdisc *qdisc; |
74 | |||
71 | struct qdisc_watchdog watchdog; | 75 | struct qdisc_watchdog watchdog; |
72 | 76 | ||
73 | psched_tdiff_t latency; | 77 | psched_tdiff_t latency; |
@@ -117,7 +121,9 @@ struct netem_sched_data { | |||
117 | 121 | ||
118 | }; | 122 | }; |
119 | 123 | ||
120 | /* Time stamp put into socket buffer control block */ | 124 | /* Time stamp put into socket buffer control block |
125 | * Only valid when skbs are in our internal t(ime)fifo queue. | ||
126 | */ | ||
121 | struct netem_skb_cb { | 127 | struct netem_skb_cb { |
122 | psched_time_t time_to_send; | 128 | psched_time_t time_to_send; |
123 | }; | 129 | }; |
@@ -324,6 +330,31 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche | |||
324 | return PSCHED_NS2TICKS(ticks); | 330 | return PSCHED_NS2TICKS(ticks); |
325 | } | 331 | } |
326 | 332 | ||
333 | static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) | ||
334 | { | ||
335 | struct sk_buff_head *list = &sch->q; | ||
336 | psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; | ||
337 | struct sk_buff *skb; | ||
338 | |||
339 | if (likely(skb_queue_len(list) < sch->limit)) { | ||
340 | skb = skb_peek_tail(list); | ||
341 | /* Optimize for add at tail */ | ||
342 | if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send)) | ||
343 | return qdisc_enqueue_tail(nskb, sch); | ||
344 | |||
345 | skb_queue_reverse_walk(list, skb) { | ||
346 | if (tnext >= netem_skb_cb(skb)->time_to_send) | ||
347 | break; | ||
348 | } | ||
349 | |||
350 | __skb_queue_after(list, skb, nskb); | ||
351 | sch->qstats.backlog += qdisc_pkt_len(nskb); | ||
352 | return NET_XMIT_SUCCESS; | ||
353 | } | ||
354 | |||
355 | return qdisc_reshape_fail(nskb, sch); | ||
356 | } | ||
357 | |||
327 | /* | 358 | /* |
328 | * Insert one skb into qdisc. | 359 | * Insert one skb into qdisc. |
329 | * Note: parent depends on return value to account for queue length. | 360 | * Note: parent depends on return value to account for queue length. |
@@ -399,7 +430,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
399 | now = psched_get_time(); | 430 | now = psched_get_time(); |
400 | 431 | ||
401 | if (q->rate) { | 432 | if (q->rate) { |
402 | struct sk_buff_head *list = &q->qdisc->q; | 433 | struct sk_buff_head *list = &sch->q; |
403 | 434 | ||
404 | delay += packet_len_2_sched_time(skb->len, q); | 435 | delay += packet_len_2_sched_time(skb->len, q); |
405 | 436 | ||
@@ -417,7 +448,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
417 | 448 | ||
418 | cb->time_to_send = now + delay; | 449 | cb->time_to_send = now + delay; |
419 | ++q->counter; | 450 | ++q->counter; |
420 | ret = qdisc_enqueue(skb, q->qdisc); | 451 | ret = tfifo_enqueue(skb, sch); |
421 | } else { | 452 | } else { |
422 | /* | 453 | /* |
423 | * Do re-ordering by putting one out of N packets at the front | 454 | * Do re-ordering by putting one out of N packets at the front |
@@ -426,7 +457,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
426 | cb->time_to_send = psched_get_time(); | 457 | cb->time_to_send = psched_get_time(); |
427 | q->counter = 0; | 458 | q->counter = 0; |
428 | 459 | ||
429 | __skb_queue_head(&q->qdisc->q, skb); | 460 | __skb_queue_head(&sch->q, skb); |
430 | q->qdisc->qstats.backlog += qdisc_pkt_len(skb); | 461 | q->qdisc->qstats.backlog += qdisc_pkt_len(skb); |
431 | q->qdisc->qstats.requeues++; | 462 | q->qdisc->qstats.requeues++; |
432 | ret = NET_XMIT_SUCCESS; | 463 | ret = NET_XMIT_SUCCESS; |
@@ -439,19 +470,20 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
439 | } | 470 | } |
440 | } | 471 | } |
441 | 472 | ||
442 | sch->q.qlen++; | ||
443 | return NET_XMIT_SUCCESS; | 473 | return NET_XMIT_SUCCESS; |
444 | } | 474 | } |
445 | 475 | ||
446 | static unsigned int netem_drop(struct Qdisc *sch) | 476 | static unsigned int netem_drop(struct Qdisc *sch) |
447 | { | 477 | { |
448 | struct netem_sched_data *q = qdisc_priv(sch); | 478 | struct netem_sched_data *q = qdisc_priv(sch); |
449 | unsigned int len = 0; | 479 | unsigned int len; |
450 | 480 | ||
451 | if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) { | 481 | len = qdisc_queue_drop(sch); |
452 | sch->q.qlen--; | 482 | if (!len && q->qdisc && q->qdisc->ops->drop) |
483 | len = q->qdisc->ops->drop(q->qdisc); | ||
484 | if (len) | ||
453 | sch->qstats.drops++; | 485 | sch->qstats.drops++; |
454 | } | 486 | |
455 | return len; | 487 | return len; |
456 | } | 488 | } |
457 | 489 | ||
@@ -463,16 +495,16 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) | |||
463 | if (qdisc_is_throttled(sch)) | 495 | if (qdisc_is_throttled(sch)) |
464 | return NULL; | 496 | return NULL; |
465 | 497 | ||
466 | skb = q->qdisc->ops->peek(q->qdisc); | 498 | tfifo_dequeue: |
499 | skb = qdisc_peek_head(sch); | ||
467 | if (skb) { | 500 | if (skb) { |
468 | const struct netem_skb_cb *cb = netem_skb_cb(skb); | 501 | const struct netem_skb_cb *cb = netem_skb_cb(skb); |
469 | psched_time_t now = psched_get_time(); | ||
470 | 502 | ||
471 | /* if more time remaining? */ | 503 | /* if more time remaining? */ |
472 | if (cb->time_to_send <= now) { | 504 | if (cb->time_to_send <= psched_get_time()) { |
473 | skb = qdisc_dequeue_peeked(q->qdisc); | 505 | skb = qdisc_dequeue_tail(sch); |
474 | if (unlikely(!skb)) | 506 | if (unlikely(!skb)) |
475 | return NULL; | 507 | goto qdisc_dequeue; |
476 | 508 | ||
477 | #ifdef CONFIG_NET_CLS_ACT | 509 | #ifdef CONFIG_NET_CLS_ACT |
478 | /* | 510 | /* |
@@ -483,15 +515,37 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) | |||
483 | skb->tstamp.tv64 = 0; | 515 | skb->tstamp.tv64 = 0; |
484 | #endif | 516 | #endif |
485 | 517 | ||
486 | sch->q.qlen--; | 518 | if (q->qdisc) { |
519 | int err = qdisc_enqueue(skb, q->qdisc); | ||
520 | |||
521 | if (unlikely(err != NET_XMIT_SUCCESS)) { | ||
522 | if (net_xmit_drop_count(err)) { | ||
523 | sch->qstats.drops++; | ||
524 | qdisc_tree_decrease_qlen(sch, 1); | ||
525 | } | ||
526 | } | ||
527 | goto tfifo_dequeue; | ||
528 | } | ||
529 | deliver: | ||
487 | qdisc_unthrottled(sch); | 530 | qdisc_unthrottled(sch); |
488 | qdisc_bstats_update(sch, skb); | 531 | qdisc_bstats_update(sch, skb); |
489 | return skb; | 532 | return skb; |
490 | } | 533 | } |
491 | 534 | ||
535 | if (q->qdisc) { | ||
536 | skb = q->qdisc->ops->dequeue(q->qdisc); | ||
537 | if (skb) | ||
538 | goto deliver; | ||
539 | } | ||
492 | qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); | 540 | qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); |
493 | } | 541 | } |
494 | 542 | ||
543 | qdisc_dequeue: | ||
544 | if (q->qdisc) { | ||
545 | skb = q->qdisc->ops->dequeue(q->qdisc); | ||
546 | if (skb) | ||
547 | goto deliver; | ||
548 | } | ||
495 | return NULL; | 549 | return NULL; |
496 | } | 550 | } |
497 | 551 | ||
@@ -499,8 +553,9 @@ static void netem_reset(struct Qdisc *sch) | |||
499 | { | 553 | { |
500 | struct netem_sched_data *q = qdisc_priv(sch); | 554 | struct netem_sched_data *q = qdisc_priv(sch); |
501 | 555 | ||
502 | qdisc_reset(q->qdisc); | 556 | qdisc_reset_queue(sch); |
503 | sch->q.qlen = 0; | 557 | if (q->qdisc) |
558 | qdisc_reset(q->qdisc); | ||
504 | qdisc_watchdog_cancel(&q->watchdog); | 559 | qdisc_watchdog_cancel(&q->watchdog); |
505 | } | 560 | } |
506 | 561 | ||
@@ -690,11 +745,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) | |||
690 | if (ret < 0) | 745 | if (ret < 0) |
691 | return ret; | 746 | return ret; |
692 | 747 | ||
693 | ret = fifo_set_limit(q->qdisc, qopt->limit); | 748 | sch->limit = qopt->limit; |
694 | if (ret) { | ||
695 | pr_info("netem: can't set fifo limit\n"); | ||
696 | return ret; | ||
697 | } | ||
698 | 749 | ||
699 | q->latency = qopt->latency; | 750 | q->latency = qopt->latency; |
700 | q->jitter = qopt->jitter; | 751 | q->jitter = qopt->jitter; |
@@ -735,88 +786,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) | |||
735 | return ret; | 786 | return ret; |
736 | } | 787 | } |
737 | 788 | ||
738 | /* | ||
739 | * Special case version of FIFO queue for use by netem. | ||
740 | * It queues in order based on timestamps in skb's | ||
741 | */ | ||
742 | struct fifo_sched_data { | ||
743 | u32 limit; | ||
744 | psched_time_t oldest; | ||
745 | }; | ||
746 | |||
747 | static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) | ||
748 | { | ||
749 | struct fifo_sched_data *q = qdisc_priv(sch); | ||
750 | struct sk_buff_head *list = &sch->q; | ||
751 | psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; | ||
752 | struct sk_buff *skb; | ||
753 | |||
754 | if (likely(skb_queue_len(list) < q->limit)) { | ||
755 | /* Optimize for add at tail */ | ||
756 | if (likely(skb_queue_empty(list) || tnext >= q->oldest)) { | ||
757 | q->oldest = tnext; | ||
758 | return qdisc_enqueue_tail(nskb, sch); | ||
759 | } | ||
760 | |||
761 | skb_queue_reverse_walk(list, skb) { | ||
762 | const struct netem_skb_cb *cb = netem_skb_cb(skb); | ||
763 | |||
764 | if (tnext >= cb->time_to_send) | ||
765 | break; | ||
766 | } | ||
767 | |||
768 | __skb_queue_after(list, skb, nskb); | ||
769 | |||
770 | sch->qstats.backlog += qdisc_pkt_len(nskb); | ||
771 | |||
772 | return NET_XMIT_SUCCESS; | ||
773 | } | ||
774 | |||
775 | return qdisc_reshape_fail(nskb, sch); | ||
776 | } | ||
777 | |||
778 | static int tfifo_init(struct Qdisc *sch, struct nlattr *opt) | ||
779 | { | ||
780 | struct fifo_sched_data *q = qdisc_priv(sch); | ||
781 | |||
782 | if (opt) { | ||
783 | struct tc_fifo_qopt *ctl = nla_data(opt); | ||
784 | if (nla_len(opt) < sizeof(*ctl)) | ||
785 | return -EINVAL; | ||
786 | |||
787 | q->limit = ctl->limit; | ||
788 | } else | ||
789 | q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1); | ||
790 | |||
791 | q->oldest = PSCHED_PASTPERFECT; | ||
792 | return 0; | ||
793 | } | ||
794 | |||
795 | static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb) | ||
796 | { | ||
797 | struct fifo_sched_data *q = qdisc_priv(sch); | ||
798 | struct tc_fifo_qopt opt = { .limit = q->limit }; | ||
799 | |||
800 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | ||
801 | return skb->len; | ||
802 | |||
803 | nla_put_failure: | ||
804 | return -1; | ||
805 | } | ||
806 | |||
807 | static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = { | ||
808 | .id = "tfifo", | ||
809 | .priv_size = sizeof(struct fifo_sched_data), | ||
810 | .enqueue = tfifo_enqueue, | ||
811 | .dequeue = qdisc_dequeue_head, | ||
812 | .peek = qdisc_peek_head, | ||
813 | .drop = qdisc_queue_drop, | ||
814 | .init = tfifo_init, | ||
815 | .reset = qdisc_reset_queue, | ||
816 | .change = tfifo_init, | ||
817 | .dump = tfifo_dump, | ||
818 | }; | ||
819 | |||
820 | static int netem_init(struct Qdisc *sch, struct nlattr *opt) | 789 | static int netem_init(struct Qdisc *sch, struct nlattr *opt) |
821 | { | 790 | { |
822 | struct netem_sched_data *q = qdisc_priv(sch); | 791 | struct netem_sched_data *q = qdisc_priv(sch); |
@@ -828,18 +797,9 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) | |||
828 | qdisc_watchdog_init(&q->watchdog, sch); | 797 | qdisc_watchdog_init(&q->watchdog, sch); |
829 | 798 | ||
830 | q->loss_model = CLG_RANDOM; | 799 | q->loss_model = CLG_RANDOM; |
831 | q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops, | ||
832 | TC_H_MAKE(sch->handle, 1)); | ||
833 | if (!q->qdisc) { | ||
834 | pr_notice("netem: qdisc create tfifo qdisc failed\n"); | ||
835 | return -ENOMEM; | ||
836 | } | ||
837 | |||
838 | ret = netem_change(sch, opt); | 800 | ret = netem_change(sch, opt); |
839 | if (ret) { | 801 | if (ret) |
840 | pr_info("netem: change failed\n"); | 802 | pr_info("netem: change failed\n"); |
841 | qdisc_destroy(q->qdisc); | ||
842 | } | ||
843 | return ret; | 803 | return ret; |
844 | } | 804 | } |
845 | 805 | ||
@@ -848,7 +808,8 @@ static void netem_destroy(struct Qdisc *sch) | |||
848 | struct netem_sched_data *q = qdisc_priv(sch); | 808 | struct netem_sched_data *q = qdisc_priv(sch); |
849 | 809 | ||
850 | qdisc_watchdog_cancel(&q->watchdog); | 810 | qdisc_watchdog_cancel(&q->watchdog); |
851 | qdisc_destroy(q->qdisc); | 811 | if (q->qdisc) |
812 | qdisc_destroy(q->qdisc); | ||
852 | dist_free(q->delay_dist); | 813 | dist_free(q->delay_dist); |
853 | } | 814 | } |
854 | 815 | ||
@@ -952,7 +913,7 @@ static int netem_dump_class(struct Qdisc *sch, unsigned long cl, | |||
952 | { | 913 | { |
953 | struct netem_sched_data *q = qdisc_priv(sch); | 914 | struct netem_sched_data *q = qdisc_priv(sch); |
954 | 915 | ||
955 | if (cl != 1) /* only one class */ | 916 | if (cl != 1 || !q->qdisc) /* only one class */ |
956 | return -ENOENT; | 917 | return -ENOENT; |
957 | 918 | ||
958 | tcm->tcm_handle |= TC_H_MIN(1); | 919 | tcm->tcm_handle |= TC_H_MIN(1); |
@@ -966,14 +927,13 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, | |||
966 | { | 927 | { |
967 | struct netem_sched_data *q = qdisc_priv(sch); | 928 | struct netem_sched_data *q = qdisc_priv(sch); |
968 | 929 | ||
969 | if (new == NULL) | ||
970 | new = &noop_qdisc; | ||
971 | |||
972 | sch_tree_lock(sch); | 930 | sch_tree_lock(sch); |
973 | *old = q->qdisc; | 931 | *old = q->qdisc; |
974 | q->qdisc = new; | 932 | q->qdisc = new; |
975 | qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); | 933 | if (*old) { |
976 | qdisc_reset(*old); | 934 | qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); |
935 | qdisc_reset(*old); | ||
936 | } | ||
977 | sch_tree_unlock(sch); | 937 | sch_tree_unlock(sch); |
978 | 938 | ||
979 | return 0; | 939 | return 0; |