aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-12-20 22:30:11 -0500
committerDavid S. Miller <davem@davemloft.net>2011-12-21 15:44:34 -0500
commit225d9b89c937633dfeec502741a174fe0bab5b9f (patch)
treed9a25ffaa6b4f14c0f3d624d3c654e9b0f7ec3bd
parent4e68ea26e76273cc62a981a414a8319a7f4c1077 (diff)
sch_sfq: rehash queues in perturb timer
A known Out Of Order (OOO) problem hurts SFQ when timer changes perturbation value, since all new packets delivered to SFQ enqueue might end on different slots than previous in-flight packets. With round robin delivery, we can thus deliver packets in a different order. Since SFQ is limited to small amount of in-flight packets, we can rehash packets so that this OOO problem is fixed. This rehashing is performed only if internal flow classifier is in use. We now store in skb->cb[] the "struct flow_keys" so that we dont call skb_flow_dissect() again while rehashing. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/sched/sch_sfq.c87
1 files changed, 81 insertions, 6 deletions
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 30cda707e400..d329a8a72357 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -136,16 +136,30 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
136 return &q->dep[val - SFQ_SLOTS]; 136 return &q->dep[val - SFQ_SLOTS];
137} 137}
138 138
139/*
140 * In order to be able to quickly rehash our queue when timer changes
141 * q->perturbation, we store flow_keys in skb->cb[]
142 */
143struct sfq_skb_cb {
144 struct flow_keys keys;
145};
146
147static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
148{
149 BUILD_BUG_ON(sizeof(skb->cb) <
150 sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb));
151 return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
152}
153
139static unsigned int sfq_hash(const struct sfq_sched_data *q, 154static unsigned int sfq_hash(const struct sfq_sched_data *q,
140 const struct sk_buff *skb) 155 const struct sk_buff *skb)
141{ 156{
142 struct flow_keys keys; 157 const struct flow_keys *keys = &sfq_skb_cb(skb)->keys;
143 unsigned int hash; 158 unsigned int hash;
144 159
145 skb_flow_dissect(skb, &keys); 160 hash = jhash_3words((__force u32)keys->dst,
146 hash = jhash_3words((__force u32)keys.dst, 161 (__force u32)keys->src ^ keys->ip_proto,
147 (__force u32)keys.src ^ keys.ip_proto, 162 (__force u32)keys->ports, q->perturbation);
148 (__force u32)keys.ports, q->perturbation);
149 return hash & (q->divisor - 1); 163 return hash & (q->divisor - 1);
150} 164}
151 165
@@ -161,8 +175,10 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
161 TC_H_MIN(skb->priority) <= q->divisor) 175 TC_H_MIN(skb->priority) <= q->divisor)
162 return TC_H_MIN(skb->priority); 176 return TC_H_MIN(skb->priority);
163 177
164 if (!q->filter_list) 178 if (!q->filter_list) {
179 skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
165 return sfq_hash(q, skb) + 1; 180 return sfq_hash(q, skb) + 1;
181 }
166 182
167 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 183 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
168 result = tc_classify(skb, q->filter_list, &res); 184 result = tc_classify(skb, q->filter_list, &res);
@@ -423,12 +439,71 @@ sfq_reset(struct Qdisc *sch)
423 kfree_skb(skb); 439 kfree_skb(skb);
424} 440}
425 441
442/*
443 * When q->perturbation is changed, we rehash all queued skbs
444 * to avoid OOO (Out Of Order) effects.
445 * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
446 * counters.
447 */
448static void sfq_rehash(struct sfq_sched_data *q)
449{
450 struct sk_buff *skb;
451 int i;
452 struct sfq_slot *slot;
453 struct sk_buff_head list;
454
455 __skb_queue_head_init(&list);
456
457 for (i = 0; i < SFQ_SLOTS; i++) {
458 slot = &q->slots[i];
459 if (!slot->qlen)
460 continue;
461 while (slot->qlen) {
462 skb = slot_dequeue_head(slot);
463 sfq_dec(q, i);
464 __skb_queue_tail(&list, skb);
465 }
466 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
467 }
468 q->tail = NULL;
469
470 while ((skb = __skb_dequeue(&list)) != NULL) {
471 unsigned int hash = sfq_hash(q, skb);
472 sfq_index x = q->ht[hash];
473
474 slot = &q->slots[x];
475 if (x == SFQ_EMPTY_SLOT) {
476 x = q->dep[0].next; /* get a free slot */
477 q->ht[hash] = x;
478 slot = &q->slots[x];
479 slot->hash = hash;
480 }
481 slot_queue_add(slot, skb);
482 sfq_inc(q, x);
483 if (slot->qlen == 1) { /* The flow is new */
484 if (q->tail == NULL) { /* It is the first flow */
485 slot->next = x;
486 } else {
487 slot->next = q->tail->next;
488 q->tail->next = x;
489 }
490 q->tail = slot;
491 slot->allot = q->scaled_quantum;
492 }
493 }
494}
495
426static void sfq_perturbation(unsigned long arg) 496static void sfq_perturbation(unsigned long arg)
427{ 497{
428 struct Qdisc *sch = (struct Qdisc *)arg; 498 struct Qdisc *sch = (struct Qdisc *)arg;
429 struct sfq_sched_data *q = qdisc_priv(sch); 499 struct sfq_sched_data *q = qdisc_priv(sch);
500 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
430 501
502 spin_lock(root_lock);
431 q->perturbation = net_random(); 503 q->perturbation = net_random();
504 if (!q->filter_list && q->tail)
505 sfq_rehash(q);
506 spin_unlock(root_lock);
432 507
433 if (q->perturb_period) 508 if (q->perturb_period)
434 mod_timer(&q->perturb_timer, jiffies + q->perturb_period); 509 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);