aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2013-10-08 12:02:23 -0400
committerDavid S. Miller <davem@davemloft.net>2013-10-10 00:08:07 -0400
commit8a29111c7ca68d928dfab58636f3f6acf0ac04f7 (patch)
tree3cfc591c8a733e1e8b3d55a3b52caef936022415 /net/core
parent4c60f1d67fae632743df9324301e3cb2682f54d4 (diff)
net: gro: allow to build full sized skb
skb_gro_receive() is currently limited to 16 or 17 MSS per GRO skb, typically 24616 bytes, because it fills up to MAX_SKB_FRAGS frags. It's relatively easy to extend the skb using frag_list to allow more frags to be appended into the last sk_buff. This still builds very efficient skbs, and allows reaching 45 MSS per skb. (45 MSS GRO packet uses one skb plus a frag_list containing 2 additional sk_buff) High speed TCP flows benefit from this extension by lowering TCP stack cpu usage (less packets stored in receive queue, less ACK packets processed) Forwarding setups could be hurt, as such skbs will need to be linearized, although its not a new problem, as GRO could already provide skbs with a frag_list. We could make the 65536 bytes threshold a tunable to mitigate this. (First time we need to linearize skb in skb_needs_linearize(), we could lower the tunable to ~16*1460 so that following skb_gro_receive() calls build smaller skbs) Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/skbuff.c43
1 files changed, 26 insertions, 17 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d81cff119f73..8ead744fcc94 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2936,32 +2936,30 @@ EXPORT_SYMBOL_GPL(skb_segment);
2936 2936
2937int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) 2937int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2938{ 2938{
2939 struct sk_buff *p = *head; 2939 struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb);
2940 struct sk_buff *nskb;
2941 struct skb_shared_info *skbinfo = skb_shinfo(skb);
2942 struct skb_shared_info *pinfo = skb_shinfo(p);
2943 unsigned int headroom;
2944 unsigned int len = skb_gro_len(skb);
2945 unsigned int offset = skb_gro_offset(skb); 2940 unsigned int offset = skb_gro_offset(skb);
2946 unsigned int headlen = skb_headlen(skb); 2941 unsigned int headlen = skb_headlen(skb);
2942 struct sk_buff *nskb, *lp, *p = *head;
2943 unsigned int len = skb_gro_len(skb);
2947 unsigned int delta_truesize; 2944 unsigned int delta_truesize;
2945 unsigned int headroom;
2948 2946
2949 if (p->len + len >= 65536) 2947 if (unlikely(p->len + len >= 65536))
2950 return -E2BIG; 2948 return -E2BIG;
2951 2949
2952 if (pinfo->frag_list) 2950 lp = NAPI_GRO_CB(p)->last ?: p;
2953 goto merge; 2951 pinfo = skb_shinfo(lp);
2954 else if (headlen <= offset) { 2952
2953 if (headlen <= offset) {
2955 skb_frag_t *frag; 2954 skb_frag_t *frag;
2956 skb_frag_t *frag2; 2955 skb_frag_t *frag2;
2957 int i = skbinfo->nr_frags; 2956 int i = skbinfo->nr_frags;
2958 int nr_frags = pinfo->nr_frags + i; 2957 int nr_frags = pinfo->nr_frags + i;
2959 2958
2960 offset -= headlen;
2961
2962 if (nr_frags > MAX_SKB_FRAGS) 2959 if (nr_frags > MAX_SKB_FRAGS)
2963 return -E2BIG; 2960 goto merge;
2964 2961
2962 offset -= headlen;
2965 pinfo->nr_frags = nr_frags; 2963 pinfo->nr_frags = nr_frags;
2966 skbinfo->nr_frags = 0; 2964 skbinfo->nr_frags = 0;
2967 2965
@@ -2992,7 +2990,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2992 unsigned int first_offset; 2990 unsigned int first_offset;
2993 2991
2994 if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS) 2992 if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS)
2995 return -E2BIG; 2993 goto merge;
2996 2994
2997 first_offset = skb->data - 2995 first_offset = skb->data -
2998 (unsigned char *)page_address(page) + 2996 (unsigned char *)page_address(page) +
@@ -3010,7 +3008,10 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
3010 delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff)); 3008 delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
3011 NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; 3009 NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
3012 goto done; 3010 goto done;
3013 } else if (skb_gro_len(p) != pinfo->gso_size) 3011 }
3012 if (pinfo->frag_list)
3013 goto merge;
3014 if (skb_gro_len(p) != pinfo->gso_size)
3014 return -E2BIG; 3015 return -E2BIG;
3015 3016
3016 headroom = skb_headroom(p); 3017 headroom = skb_headroom(p);
@@ -3062,16 +3063,24 @@ merge:
3062 3063
3063 __skb_pull(skb, offset); 3064 __skb_pull(skb, offset);
3064 3065
3065 NAPI_GRO_CB(p)->last->next = skb; 3066 if (!NAPI_GRO_CB(p)->last)
3067 skb_shinfo(p)->frag_list = skb;
3068 else
3069 NAPI_GRO_CB(p)->last->next = skb;
3066 NAPI_GRO_CB(p)->last = skb; 3070 NAPI_GRO_CB(p)->last = skb;
3067 skb_header_release(skb); 3071 skb_header_release(skb);
3072 lp = p;
3068 3073
3069done: 3074done:
3070 NAPI_GRO_CB(p)->count++; 3075 NAPI_GRO_CB(p)->count++;
3071 p->data_len += len; 3076 p->data_len += len;
3072 p->truesize += delta_truesize; 3077 p->truesize += delta_truesize;
3073 p->len += len; 3078 p->len += len;
3074 3079 if (lp != p) {
3080 lp->data_len += len;
3081 lp->truesize += delta_truesize;
3082 lp->len += len;
3083 }
3075 NAPI_GRO_CB(skb)->same_flow = 1; 3084 NAPI_GRO_CB(skb)->same_flow = 1;
3076 return 0; 3085 return 0;
3077} 3086}