aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2012-10-06 04:08:49 -0400
committerDavid S. Miller <davem@davemloft.net>2012-10-08 14:51:51 -0400
commit2e71a6f8084e7ac87166dd77d99c44190fb844fc (patch)
treeeb2e2d47361b35b2b5a3f26beac3d1fbd888c372 /net/core
parenta2af139ff1cd85df586690ff626619ab1ee88b0a (diff)
net: gro: selective flush of packets
Current GRO can hold packets in gro_list for almost unlimited time, in case napi->poll() handler consumes its budget over and over. In this case, napi_complete()/napi_gro_flush() are not called. Another problem is that gro_list is flushed in non friendly way : We scan the list and complete packets in the reverse order. (youngest packets first, oldest packets last) This defeats priorities that sender could have cooked. Since GRO currently only store TCP packets, we dont really notice the bug because of retransmits, but this behavior can add unexpected latencies, particularly on mice flows clamped by elephant flows. This patch makes sure no packet can stay more than 1 ms in queue, and only in stress situations. It also complete packets in the right order to minimize latencies. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: Jesse Gross <jesse@nicira.com> Cc: Tom Herbert <therbert@google.com> Cc: Yuchung Cheng <ycheng@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c38
1 files changed, 31 insertions, 7 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index de2bad717d56..d44668f63c88 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3471,17 +3471,31 @@ out:
3471 return netif_receive_skb(skb); 3471 return netif_receive_skb(skb);
3472} 3472}
3473 3473
3474inline void napi_gro_flush(struct napi_struct *napi) 3474/* napi->gro_list contains packets ordered by age.
3475 * youngest packets at the head of it.
3476 * Complete skbs in reverse order to reduce latencies.
3477 */
3478void napi_gro_flush(struct napi_struct *napi, bool flush_old)
3475{ 3479{
3476 struct sk_buff *skb, *next; 3480 struct sk_buff *skb, *prev = NULL;
3477 3481
3478 for (skb = napi->gro_list; skb; skb = next) { 3482 /* scan list and build reverse chain */
3479 next = skb->next; 3483 for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
3484 skb->prev = prev;
3485 prev = skb;
3486 }
3487
3488 for (skb = prev; skb; skb = prev) {
3480 skb->next = NULL; 3489 skb->next = NULL;
3490
3491 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
3492 return;
3493
3494 prev = skb->prev;
3481 napi_gro_complete(skb); 3495 napi_gro_complete(skb);
3496 napi->gro_count--;
3482 } 3497 }
3483 3498
3484 napi->gro_count = 0;
3485 napi->gro_list = NULL; 3499 napi->gro_list = NULL;
3486} 3500}
3487EXPORT_SYMBOL(napi_gro_flush); 3501EXPORT_SYMBOL(napi_gro_flush);
@@ -3542,6 +3556,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3542 3556
3543 napi->gro_count++; 3557 napi->gro_count++;
3544 NAPI_GRO_CB(skb)->count = 1; 3558 NAPI_GRO_CB(skb)->count = 1;
3559 NAPI_GRO_CB(skb)->age = jiffies;
3545 skb_shinfo(skb)->gso_size = skb_gro_len(skb); 3560 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
3546 skb->next = napi->gro_list; 3561 skb->next = napi->gro_list;
3547 napi->gro_list = skb; 3562 napi->gro_list = skb;
@@ -3878,7 +3893,7 @@ void napi_complete(struct napi_struct *n)
3878 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) 3893 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
3879 return; 3894 return;
3880 3895
3881 napi_gro_flush(n); 3896 napi_gro_flush(n, false);
3882 local_irq_save(flags); 3897 local_irq_save(flags);
3883 __napi_complete(n); 3898 __napi_complete(n);
3884 local_irq_restore(flags); 3899 local_irq_restore(flags);
@@ -3983,8 +3998,17 @@ static void net_rx_action(struct softirq_action *h)
3983 local_irq_enable(); 3998 local_irq_enable();
3984 napi_complete(n); 3999 napi_complete(n);
3985 local_irq_disable(); 4000 local_irq_disable();
3986 } else 4001 } else {
4002 if (n->gro_list) {
4003 /* flush too old packets
4004 * If HZ < 1000, flush all packets.
4005 */
4006 local_irq_enable();
4007 napi_gro_flush(n, HZ >= 1000);
4008 local_irq_disable();
4009 }
3987 list_move_tail(&n->poll_list, &sd->poll_list); 4010 list_move_tail(&n->poll_list, &sd->poll_list);
4011 }
3988 } 4012 }
3989 4013
3990 netpoll_poll_unlock(have); 4014 netpoll_poll_unlock(have);