diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2011-12-20 22:30:11 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-12-21 15:44:34 -0500 |
commit | 225d9b89c937633dfeec502741a174fe0bab5b9f (patch) | |
tree | d9a25ffaa6b4f14c0f3d624d3c654e9b0f7ec3bd | |
parent | 4e68ea26e76273cc62a981a414a8319a7f4c1077 (diff) |
sch_sfq: rehash queues in perturb timer
A known Out Of Order (OOO) problem hurts SFQ when timer changes
perturbation value, since all new packets delivered to SFQ enqueue might
end on different slots than previous in-flight packets.
With round robin delivery, we can thus deliver packets in a different
order.
Since SFQ is limited to small amount of in-flight packets, we can rehash
packets so that this OOO problem is fixed.
This rehashing is performed only if internal flow classifier is in use.
We now store in skb->cb[] the "struct flow_keys" so that we dont call
skb_flow_dissect() again while rehashing.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/sched/sch_sfq.c | 87 |
1 files changed, 81 insertions, 6 deletions
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 30cda707e400..d329a8a72357 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c | |||
@@ -136,16 +136,30 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index | |||
136 | return &q->dep[val - SFQ_SLOTS]; | 136 | return &q->dep[val - SFQ_SLOTS]; |
137 | } | 137 | } |
138 | 138 | ||
139 | /* | ||
140 | * In order to be able to quickly rehash our queue when timer changes | ||
141 | * q->perturbation, we store flow_keys in skb->cb[] | ||
142 | */ | ||
143 | struct sfq_skb_cb { | ||
144 | struct flow_keys keys; | ||
145 | }; | ||
146 | |||
147 | static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb) | ||
148 | { | ||
149 | BUILD_BUG_ON(sizeof(skb->cb) < | ||
150 | sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb)); | ||
151 | return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data; | ||
152 | } | ||
153 | |||
139 | static unsigned int sfq_hash(const struct sfq_sched_data *q, | 154 | static unsigned int sfq_hash(const struct sfq_sched_data *q, |
140 | const struct sk_buff *skb) | 155 | const struct sk_buff *skb) |
141 | { | 156 | { |
142 | struct flow_keys keys; | 157 | const struct flow_keys *keys = &sfq_skb_cb(skb)->keys; |
143 | unsigned int hash; | 158 | unsigned int hash; |
144 | 159 | ||
145 | skb_flow_dissect(skb, &keys); | 160 | hash = jhash_3words((__force u32)keys->dst, |
146 | hash = jhash_3words((__force u32)keys.dst, | 161 | (__force u32)keys->src ^ keys->ip_proto, |
147 | (__force u32)keys.src ^ keys.ip_proto, | 162 | (__force u32)keys->ports, q->perturbation); |
148 | (__force u32)keys.ports, q->perturbation); | ||
149 | return hash & (q->divisor - 1); | 163 | return hash & (q->divisor - 1); |
150 | } | 164 | } |
151 | 165 | ||
@@ -161,8 +175,10 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, | |||
161 | TC_H_MIN(skb->priority) <= q->divisor) | 175 | TC_H_MIN(skb->priority) <= q->divisor) |
162 | return TC_H_MIN(skb->priority); | 176 | return TC_H_MIN(skb->priority); |
163 | 177 | ||
164 | if (!q->filter_list) | 178 | if (!q->filter_list) { |
179 | skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys); | ||
165 | return sfq_hash(q, skb) + 1; | 180 | return sfq_hash(q, skb) + 1; |
181 | } | ||
166 | 182 | ||
167 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; | 183 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; |
168 | result = tc_classify(skb, q->filter_list, &res); | 184 | result = tc_classify(skb, q->filter_list, &res); |
@@ -423,12 +439,71 @@ sfq_reset(struct Qdisc *sch) | |||
423 | kfree_skb(skb); | 439 | kfree_skb(skb); |
424 | } | 440 | } |
425 | 441 | ||
442 | /* | ||
443 | * When q->perturbation is changed, we rehash all queued skbs | ||
444 | * to avoid OOO (Out Of Order) effects. | ||
445 | * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change | ||
446 | * counters. | ||
447 | */ | ||
448 | static void sfq_rehash(struct sfq_sched_data *q) | ||
449 | { | ||
450 | struct sk_buff *skb; | ||
451 | int i; | ||
452 | struct sfq_slot *slot; | ||
453 | struct sk_buff_head list; | ||
454 | |||
455 | __skb_queue_head_init(&list); | ||
456 | |||
457 | for (i = 0; i < SFQ_SLOTS; i++) { | ||
458 | slot = &q->slots[i]; | ||
459 | if (!slot->qlen) | ||
460 | continue; | ||
461 | while (slot->qlen) { | ||
462 | skb = slot_dequeue_head(slot); | ||
463 | sfq_dec(q, i); | ||
464 | __skb_queue_tail(&list, skb); | ||
465 | } | ||
466 | q->ht[slot->hash] = SFQ_EMPTY_SLOT; | ||
467 | } | ||
468 | q->tail = NULL; | ||
469 | |||
470 | while ((skb = __skb_dequeue(&list)) != NULL) { | ||
471 | unsigned int hash = sfq_hash(q, skb); | ||
472 | sfq_index x = q->ht[hash]; | ||
473 | |||
474 | slot = &q->slots[x]; | ||
475 | if (x == SFQ_EMPTY_SLOT) { | ||
476 | x = q->dep[0].next; /* get a free slot */ | ||
477 | q->ht[hash] = x; | ||
478 | slot = &q->slots[x]; | ||
479 | slot->hash = hash; | ||
480 | } | ||
481 | slot_queue_add(slot, skb); | ||
482 | sfq_inc(q, x); | ||
483 | if (slot->qlen == 1) { /* The flow is new */ | ||
484 | if (q->tail == NULL) { /* It is the first flow */ | ||
485 | slot->next = x; | ||
486 | } else { | ||
487 | slot->next = q->tail->next; | ||
488 | q->tail->next = x; | ||
489 | } | ||
490 | q->tail = slot; | ||
491 | slot->allot = q->scaled_quantum; | ||
492 | } | ||
493 | } | ||
494 | } | ||
495 | |||
426 | static void sfq_perturbation(unsigned long arg) | 496 | static void sfq_perturbation(unsigned long arg) |
427 | { | 497 | { |
428 | struct Qdisc *sch = (struct Qdisc *)arg; | 498 | struct Qdisc *sch = (struct Qdisc *)arg; |
429 | struct sfq_sched_data *q = qdisc_priv(sch); | 499 | struct sfq_sched_data *q = qdisc_priv(sch); |
500 | spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); | ||
430 | 501 | ||
502 | spin_lock(root_lock); | ||
431 | q->perturbation = net_random(); | 503 | q->perturbation = net_random(); |
504 | if (!q->filter_list && q->tail) | ||
505 | sfq_rehash(q); | ||
506 | spin_unlock(root_lock); | ||
432 | 507 | ||
433 | if (q->perturb_period) | 508 | if (q->perturb_period) |
434 | mod_timer(&q->perturb_timer, jiffies + q->perturb_period); | 509 | mod_timer(&q->perturb_timer, jiffies + q->perturb_period); |