diff options
-rw-r--r-- | include/linux/pkt_sched.h | 20 | ||||
-rw-r--r-- | include/net/red.h | 3 | ||||
-rw-r--r-- | net/sched/sch_sfq.c | 146 |
3 files changed, 152 insertions, 17 deletions
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 8f1b928f777c..0d5b79365d03 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h | |||
@@ -162,10 +162,30 @@ struct tc_sfq_qopt { | |||
162 | unsigned flows; /* Maximal number of flows */ | 162 | unsigned flows; /* Maximal number of flows */ |
163 | }; | 163 | }; |
164 | 164 | ||
165 | struct tc_sfqred_stats { | ||
166 | __u32 prob_drop; /* Early drops, below max threshold */ | ||
167 | __u32 forced_drop; /* Early drops, after max threshold */ | ||
168 | __u32 prob_mark; /* Marked packets, below max threshold */ | ||
169 | __u32 forced_mark; /* Marked packets, after max threshold */ | ||
170 | __u32 prob_mark_head; /* Marked packets, below max threshold */ | ||
171 | __u32 forced_mark_head;/* Marked packets, after max threshold */ | ||
172 | }; | ||
173 | |||
165 | struct tc_sfq_qopt_v1 { | 174 | struct tc_sfq_qopt_v1 { |
166 | struct tc_sfq_qopt v0; | 175 | struct tc_sfq_qopt v0; |
167 | unsigned int depth; /* max number of packets per flow */ | 176 | unsigned int depth; /* max number of packets per flow */ |
168 | unsigned int headdrop; | 177 | unsigned int headdrop; |
178 | /* SFQRED parameters */ | ||
179 | __u32 limit; /* HARD maximal flow queue length (bytes) */ | ||
180 | __u32 qth_min; /* Min average length threshold (bytes) */ | ||
181 | __u32 qth_max; /* Max average length threshold (bytes) */ | ||
182 | unsigned char Wlog; /* log(W) */ | ||
183 | unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ | ||
184 | unsigned char Scell_log; /* cell size for idle damping */ | ||
185 | unsigned char flags; | ||
186 | __u32 max_P; /* probability, high resolution */ | ||
187 | /* SFQRED stats */ | ||
188 | struct tc_sfqred_stats stats; | ||
169 | }; | 189 | }; |
170 | 190 | ||
171 | 191 | ||
diff --git a/include/net/red.h b/include/net/red.h index baab385a4736..28068ec614b2 100644 --- a/include/net/red.h +++ b/include/net/red.h | |||
@@ -199,7 +199,8 @@ static inline void red_set_parms(struct red_parms *p, | |||
199 | p->Scell_log = Scell_log; | 199 | p->Scell_log = Scell_log; |
200 | p->Scell_max = (255 << Scell_log); | 200 | p->Scell_max = (255 << Scell_log); |
201 | 201 | ||
202 | memcpy(p->Stab, stab, sizeof(p->Stab)); | 202 | if (stab) |
203 | memcpy(p->Stab, stab, sizeof(p->Stab)); | ||
203 | } | 204 | } |
204 | 205 | ||
205 | static inline int red_is_idling(const struct red_vars *v) | 206 | static inline int red_is_idling(const struct red_vars *v) |
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 0a7964009e8c..67494aef9acf 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <net/netlink.h> | 24 | #include <net/netlink.h> |
25 | #include <net/pkt_sched.h> | 25 | #include <net/pkt_sched.h> |
26 | #include <net/flow_keys.h> | 26 | #include <net/flow_keys.h> |
27 | #include <net/red.h> | ||
27 | 28 | ||
28 | 29 | ||
29 | /* Stochastic Fairness Queuing algorithm. | 30 | /* Stochastic Fairness Queuing algorithm. |
@@ -108,24 +109,30 @@ struct sfq_slot { | |||
108 | struct sfq_head dep; /* anchor in dep[] chains */ | 109 | struct sfq_head dep; /* anchor in dep[] chains */ |
109 | unsigned short hash; /* hash value (index in ht[]) */ | 110 | unsigned short hash; /* hash value (index in ht[]) */ |
110 | short allot; /* credit for this slot */ | 111 | short allot; /* credit for this slot */ |
112 | |||
113 | unsigned int backlog; | ||
114 | struct red_vars vars; | ||
111 | }; | 115 | }; |
112 | 116 | ||
113 | struct sfq_sched_data { | 117 | struct sfq_sched_data { |
114 | /* frequently used fields */ | 118 | /* frequently used fields */ |
115 | int limit; /* limit of total number of packets in this qdisc */ | 119 | int limit; /* limit of total number of packets in this qdisc */ |
116 | unsigned int divisor; /* number of slots in hash table */ | 120 | unsigned int divisor; /* number of slots in hash table */ |
117 | unsigned int maxflows; /* number of flows in flows array */ | 121 | u8 headdrop; |
118 | int headdrop; | 122 | u8 maxdepth; /* limit of packets per flow */ |
119 | int maxdepth; /* limit of packets per flow */ | ||
120 | 123 | ||
121 | u32 perturbation; | 124 | u32 perturbation; |
122 | struct tcf_proto *filter_list; | 125 | u8 cur_depth; /* depth of longest slot */ |
123 | sfq_index cur_depth; /* depth of longest slot */ | 126 | u8 flags; |
124 | unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ | 127 | unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ |
125 | struct sfq_slot *tail; /* current slot in round */ | 128 | struct tcf_proto *filter_list; |
126 | sfq_index *ht; /* Hash table ('divisor' slots) */ | 129 | sfq_index *ht; /* Hash table ('divisor' slots) */ |
127 | struct sfq_slot *slots; /* Flows table ('maxflows' entries) */ | 130 | struct sfq_slot *slots; /* Flows table ('maxflows' entries) */ |
128 | 131 | ||
132 | struct red_parms *red_parms; | ||
133 | struct tc_sfqred_stats stats; | ||
134 | struct sfq_slot *tail; /* current slot in round */ | ||
135 | |||
129 | struct sfq_head dep[SFQ_MAX_DEPTH + 1]; | 136 | struct sfq_head dep[SFQ_MAX_DEPTH + 1]; |
130 | /* Linked lists of slots, indexed by depth | 137 | /* Linked lists of slots, indexed by depth |
131 | * dep[0] : list of unused flows | 138 | * dep[0] : list of unused flows |
@@ -133,6 +140,7 @@ struct sfq_sched_data { | |||
133 | * dep[X] : list of flows with X packets | 140 | * dep[X] : list of flows with X packets |
134 | */ | 141 | */ |
135 | 142 | ||
143 | unsigned int maxflows; /* number of flows in flows array */ | ||
136 | int perturb_period; | 144 | int perturb_period; |
137 | unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ | 145 | unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ |
138 | struct timer_list perturb_timer; | 146 | struct timer_list perturb_timer; |
@@ -321,6 +329,7 @@ static unsigned int sfq_drop(struct Qdisc *sch) | |||
321 | drop: | 329 | drop: |
322 | skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot); | 330 | skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot); |
323 | len = qdisc_pkt_len(skb); | 331 | len = qdisc_pkt_len(skb); |
332 | slot->backlog -= len; | ||
324 | sfq_dec(q, x); | 333 | sfq_dec(q, x); |
325 | kfree_skb(skb); | 334 | kfree_skb(skb); |
326 | sch->q.qlen--; | 335 | sch->q.qlen--; |
@@ -341,6 +350,23 @@ drop: | |||
341 | return 0; | 350 | return 0; |
342 | } | 351 | } |
343 | 352 | ||
353 | /* Is ECN parameter configured */ | ||
354 | static int sfq_prob_mark(const struct sfq_sched_data *q) | ||
355 | { | ||
356 | return q->flags & TC_RED_ECN; | ||
357 | } | ||
358 | |||
359 | /* Should packets over max threshold just be marked */ | ||
360 | static int sfq_hard_mark(const struct sfq_sched_data *q) | ||
361 | { | ||
362 | return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN; | ||
363 | } | ||
364 | |||
365 | static int sfq_headdrop(const struct sfq_sched_data *q) | ||
366 | { | ||
367 | return q->headdrop; | ||
368 | } | ||
369 | |||
344 | static int | 370 | static int |
345 | sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) | 371 | sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
346 | { | 372 | { |
@@ -349,6 +375,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
349 | sfq_index x, qlen; | 375 | sfq_index x, qlen; |
350 | struct sfq_slot *slot; | 376 | struct sfq_slot *slot; |
351 | int uninitialized_var(ret); | 377 | int uninitialized_var(ret); |
378 | struct sk_buff *head; | ||
379 | int delta; | ||
352 | 380 | ||
353 | hash = sfq_classify(skb, sch, &ret); | 381 | hash = sfq_classify(skb, sch, &ret); |
354 | if (hash == 0) { | 382 | if (hash == 0) { |
@@ -368,24 +396,75 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
368 | q->ht[hash] = x; | 396 | q->ht[hash] = x; |
369 | slot = &q->slots[x]; | 397 | slot = &q->slots[x]; |
370 | slot->hash = hash; | 398 | slot->hash = hash; |
399 | slot->backlog = 0; /* should already be 0 anyway... */ | ||
400 | red_set_vars(&slot->vars); | ||
401 | goto enqueue; | ||
371 | } | 402 | } |
403 | if (q->red_parms) { | ||
404 | slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms, | ||
405 | &slot->vars, | ||
406 | slot->backlog); | ||
407 | switch (red_action(q->red_parms, | ||
408 | &slot->vars, | ||
409 | slot->vars.qavg)) { | ||
410 | case RED_DONT_MARK: | ||
411 | break; | ||
372 | 412 | ||
373 | if (slot->qlen >= q->maxdepth) { | 413 | case RED_PROB_MARK: |
374 | struct sk_buff *head; | 414 | sch->qstats.overlimits++; |
415 | if (sfq_prob_mark(q)) { | ||
416 | /* We know we have at least one packet in queue */ | ||
417 | if (sfq_headdrop(q) && | ||
418 | INET_ECN_set_ce(slot->skblist_next)) { | ||
419 | q->stats.prob_mark_head++; | ||
420 | break; | ||
421 | } | ||
422 | if (INET_ECN_set_ce(skb)) { | ||
423 | q->stats.prob_mark++; | ||
424 | break; | ||
425 | } | ||
426 | } | ||
427 | q->stats.prob_drop++; | ||
428 | goto congestion_drop; | ||
429 | |||
430 | case RED_HARD_MARK: | ||
431 | sch->qstats.overlimits++; | ||
432 | if (sfq_hard_mark(q)) { | ||
433 | /* We know we have at least one packet in queue */ | ||
434 | if (sfq_headdrop(q) && | ||
435 | INET_ECN_set_ce(slot->skblist_next)) { | ||
436 | q->stats.forced_mark_head++; | ||
437 | break; | ||
438 | } | ||
439 | if (INET_ECN_set_ce(skb)) { | ||
440 | q->stats.forced_mark++; | ||
441 | break; | ||
442 | } | ||
443 | } | ||
444 | q->stats.forced_drop++; | ||
445 | goto congestion_drop; | ||
446 | } | ||
447 | } | ||
375 | 448 | ||
376 | if (!q->headdrop) | 449 | if (slot->qlen >= q->maxdepth) { |
450 | congestion_drop: | ||
451 | if (!sfq_headdrop(q)) | ||
377 | return qdisc_drop(skb, sch); | 452 | return qdisc_drop(skb, sch); |
378 | 453 | ||
454 | /* We know we have at least one packet in queue */ | ||
379 | head = slot_dequeue_head(slot); | 455 | head = slot_dequeue_head(slot); |
380 | sch->qstats.backlog -= qdisc_pkt_len(head); | 456 | delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb); |
457 | sch->qstats.backlog -= delta; | ||
458 | slot->backlog -= delta; | ||
381 | qdisc_drop(head, sch); | 459 | qdisc_drop(head, sch); |
382 | 460 | ||
383 | sch->qstats.backlog += qdisc_pkt_len(skb); | ||
384 | slot_queue_add(slot, skb); | 461 | slot_queue_add(slot, skb); |
385 | return NET_XMIT_CN; | 462 | return NET_XMIT_CN; |
386 | } | 463 | } |
387 | 464 | ||
465 | enqueue: | ||
388 | sch->qstats.backlog += qdisc_pkt_len(skb); | 466 | sch->qstats.backlog += qdisc_pkt_len(skb); |
467 | slot->backlog += qdisc_pkt_len(skb); | ||
389 | slot_queue_add(slot, skb); | 468 | slot_queue_add(slot, skb); |
390 | sfq_inc(q, x); | 469 | sfq_inc(q, x); |
391 | if (slot->qlen == 1) { /* The flow is new */ | 470 | if (slot->qlen == 1) { /* The flow is new */ |
@@ -396,6 +475,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
396 | slot->next = q->tail->next; | 475 | slot->next = q->tail->next; |
397 | q->tail->next = x; | 476 | q->tail->next = x; |
398 | } | 477 | } |
478 | /* We could use a bigger initial quantum for new flows */ | ||
399 | slot->allot = q->scaled_quantum; | 479 | slot->allot = q->scaled_quantum; |
400 | } | 480 | } |
401 | if (++sch->q.qlen <= q->limit) | 481 | if (++sch->q.qlen <= q->limit) |
@@ -439,7 +519,7 @@ next_slot: | |||
439 | qdisc_bstats_update(sch, skb); | 519 | qdisc_bstats_update(sch, skb); |
440 | sch->q.qlen--; | 520 | sch->q.qlen--; |
441 | sch->qstats.backlog -= qdisc_pkt_len(skb); | 521 | sch->qstats.backlog -= qdisc_pkt_len(skb); |
442 | 522 | slot->backlog -= qdisc_pkt_len(skb); | |
443 | /* Is the slot empty? */ | 523 | /* Is the slot empty? */ |
444 | if (slot->qlen == 0) { | 524 | if (slot->qlen == 0) { |
445 | q->ht[slot->hash] = SFQ_EMPTY_SLOT; | 525 | q->ht[slot->hash] = SFQ_EMPTY_SLOT; |
@@ -490,6 +570,8 @@ static void sfq_rehash(struct Qdisc *sch) | |||
490 | sfq_dec(q, i); | 570 | sfq_dec(q, i); |
491 | __skb_queue_tail(&list, skb); | 571 | __skb_queue_tail(&list, skb); |
492 | } | 572 | } |
573 | slot->backlog = 0; | ||
574 | red_set_vars(&slot->vars); | ||
493 | q->ht[slot->hash] = SFQ_EMPTY_SLOT; | 575 | q->ht[slot->hash] = SFQ_EMPTY_SLOT; |
494 | } | 576 | } |
495 | q->tail = NULL; | 577 | q->tail = NULL; |
@@ -514,6 +596,11 @@ drop: sch->qstats.backlog -= qdisc_pkt_len(skb); | |||
514 | if (slot->qlen >= q->maxdepth) | 596 | if (slot->qlen >= q->maxdepth) |
515 | goto drop; | 597 | goto drop; |
516 | slot_queue_add(slot, skb); | 598 | slot_queue_add(slot, skb); |
599 | if (q->red_parms) | ||
600 | slot->vars.qavg = red_calc_qavg(q->red_parms, | ||
601 | &slot->vars, | ||
602 | slot->backlog); | ||
603 | slot->backlog += qdisc_pkt_len(skb); | ||
517 | sfq_inc(q, x); | 604 | sfq_inc(q, x); |
518 | if (slot->qlen == 1) { /* The flow is new */ | 605 | if (slot->qlen == 1) { /* The flow is new */ |
519 | if (q->tail == NULL) { /* It is the first flow */ | 606 | if (q->tail == NULL) { /* It is the first flow */ |
@@ -552,6 +639,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) | |||
552 | struct tc_sfq_qopt *ctl = nla_data(opt); | 639 | struct tc_sfq_qopt *ctl = nla_data(opt); |
553 | struct tc_sfq_qopt_v1 *ctl_v1 = NULL; | 640 | struct tc_sfq_qopt_v1 *ctl_v1 = NULL; |
554 | unsigned int qlen; | 641 | unsigned int qlen; |
642 | struct red_parms *p = NULL; | ||
555 | 643 | ||
556 | if (opt->nla_len < nla_attr_size(sizeof(*ctl))) | 644 | if (opt->nla_len < nla_attr_size(sizeof(*ctl))) |
557 | return -EINVAL; | 645 | return -EINVAL; |
@@ -560,7 +648,11 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) | |||
560 | if (ctl->divisor && | 648 | if (ctl->divisor && |
561 | (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) | 649 | (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) |
562 | return -EINVAL; | 650 | return -EINVAL; |
563 | 651 | if (ctl_v1 && ctl_v1->qth_min) { | |
652 | p = kmalloc(sizeof(*p), GFP_KERNEL); | ||
653 | if (!p) | ||
654 | return -ENOMEM; | ||
655 | } | ||
564 | sch_tree_lock(sch); | 656 | sch_tree_lock(sch); |
565 | if (ctl->quantum) { | 657 | if (ctl->quantum) { |
566 | q->quantum = ctl->quantum; | 658 | q->quantum = ctl->quantum; |
@@ -576,6 +668,16 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) | |||
576 | if (ctl_v1) { | 668 | if (ctl_v1) { |
577 | if (ctl_v1->depth) | 669 | if (ctl_v1->depth) |
578 | q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); | 670 | q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); |
671 | if (p) { | ||
672 | swap(q->red_parms, p); | ||
673 | red_set_parms(q->red_parms, | ||
674 | ctl_v1->qth_min, ctl_v1->qth_max, | ||
675 | ctl_v1->Wlog, | ||
676 | ctl_v1->Plog, ctl_v1->Scell_log, | ||
677 | NULL, | ||
678 | ctl_v1->max_P); | ||
679 | } | ||
680 | q->flags = ctl_v1->flags; | ||
579 | q->headdrop = ctl_v1->headdrop; | 681 | q->headdrop = ctl_v1->headdrop; |
580 | } | 682 | } |
581 | if (ctl->limit) { | 683 | if (ctl->limit) { |
@@ -594,6 +696,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) | |||
594 | q->perturbation = net_random(); | 696 | q->perturbation = net_random(); |
595 | } | 697 | } |
596 | sch_tree_unlock(sch); | 698 | sch_tree_unlock(sch); |
699 | kfree(p); | ||
597 | return 0; | 700 | return 0; |
598 | } | 701 | } |
599 | 702 | ||
@@ -625,6 +728,7 @@ static void sfq_destroy(struct Qdisc *sch) | |||
625 | del_timer_sync(&q->perturb_timer); | 728 | del_timer_sync(&q->perturb_timer); |
626 | sfq_free(q->ht); | 729 | sfq_free(q->ht); |
627 | sfq_free(q->slots); | 730 | sfq_free(q->slots); |
731 | kfree(q->red_parms); | ||
628 | } | 732 | } |
629 | 733 | ||
630 | static int sfq_init(struct Qdisc *sch, struct nlattr *opt) | 734 | static int sfq_init(struct Qdisc *sch, struct nlattr *opt) |
@@ -683,6 +787,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
683 | struct sfq_sched_data *q = qdisc_priv(sch); | 787 | struct sfq_sched_data *q = qdisc_priv(sch); |
684 | unsigned char *b = skb_tail_pointer(skb); | 788 | unsigned char *b = skb_tail_pointer(skb); |
685 | struct tc_sfq_qopt_v1 opt; | 789 | struct tc_sfq_qopt_v1 opt; |
790 | struct red_parms *p = q->red_parms; | ||
686 | 791 | ||
687 | memset(&opt, 0, sizeof(opt)); | 792 | memset(&opt, 0, sizeof(opt)); |
688 | opt.v0.quantum = q->quantum; | 793 | opt.v0.quantum = q->quantum; |
@@ -693,6 +798,17 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
693 | opt.depth = q->maxdepth; | 798 | opt.depth = q->maxdepth; |
694 | opt.headdrop = q->headdrop; | 799 | opt.headdrop = q->headdrop; |
695 | 800 | ||
801 | if (p) { | ||
802 | opt.qth_min = p->qth_min >> p->Wlog; | ||
803 | opt.qth_max = p->qth_max >> p->Wlog; | ||
804 | opt.Wlog = p->Wlog; | ||
805 | opt.Plog = p->Plog; | ||
806 | opt.Scell_log = p->Scell_log; | ||
807 | opt.max_P = p->max_P; | ||
808 | } | ||
809 | memcpy(&opt.stats, &q->stats, sizeof(opt.stats)); | ||
810 | opt.flags = q->flags; | ||
811 | |||
696 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | 812 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); |
697 | 813 | ||
698 | return skb->len; | 814 | return skb->len; |
@@ -747,15 +863,13 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, | |||
747 | sfq_index idx = q->ht[cl - 1]; | 863 | sfq_index idx = q->ht[cl - 1]; |
748 | struct gnet_stats_queue qs = { 0 }; | 864 | struct gnet_stats_queue qs = { 0 }; |
749 | struct tc_sfq_xstats xstats = { 0 }; | 865 | struct tc_sfq_xstats xstats = { 0 }; |
750 | struct sk_buff *skb; | ||
751 | 866 | ||
752 | if (idx != SFQ_EMPTY_SLOT) { | 867 | if (idx != SFQ_EMPTY_SLOT) { |
753 | const struct sfq_slot *slot = &q->slots[idx]; | 868 | const struct sfq_slot *slot = &q->slots[idx]; |
754 | 869 | ||
755 | xstats.allot = slot->allot << SFQ_ALLOT_SHIFT; | 870 | xstats.allot = slot->allot << SFQ_ALLOT_SHIFT; |
756 | qs.qlen = slot->qlen; | 871 | qs.qlen = slot->qlen; |
757 | slot_queue_walk(slot, skb) | 872 | qs.backlog = slot->backlog; |
758 | qs.backlog += qdisc_pkt_len(skb); | ||
759 | } | 873 | } |
760 | if (gnet_stats_copy_queue(d, &qs) < 0) | 874 | if (gnet_stats_copy_queue(d, &qs) < 0) |
761 | return -1; | 875 | return -1; |