aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2013-05-15 11:38:01 -0400
committerDavid S. Miller <davem@davemloft.net>2013-05-16 17:43:40 -0400
commit6ff50cd55545d922f5c62776fe1feb38a9846168 (patch)
treecf3a50a628e14fe105a04050823e3051aebf92f3 /net/ipv4
parent5c4b274981950049af3330f14ed9e9aa25afb2fb (diff)
tcp: gso: do not generate out of order packets
GSO TCP handler has following issues : 1) ooo_okay from original GSO packet is duplicated to all segments 2) segments (but the last one) are orphaned, so transmit path can not get transmit queue number from the socket. This happens if GSO segmentation is done before stacked device for example. Result is we can send packets from a given TCP flow to different TX queues (if using multiqueue NICS). This generates OOO problems and spurious SACK & retransmits. Fix this by keeping socket pointer set for all segments. This means that every segment must also have a destructor, and the original gso skb truesize must be split on all segments, to keep precise sk->sk_wmem_alloc accounting. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Maciej Żenczykowski <maze@google.com> Cc: Tom Herbert <therbert@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Yuchung Cheng <ycheng@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp.c22
1 files changed, 21 insertions, 1 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 10c93930abda..ab450c099aa4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2887,6 +2887,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
2887 unsigned int mss; 2887 unsigned int mss;
2888 struct sk_buff *gso_skb = skb; 2888 struct sk_buff *gso_skb = skb;
2889 __sum16 newcheck; 2889 __sum16 newcheck;
2890 bool ooo_okay, copy_destructor;
2890 2891
2891 if (!pskb_may_pull(skb, sizeof(*th))) 2892 if (!pskb_may_pull(skb, sizeof(*th)))
2892 goto out; 2893 goto out;
@@ -2927,10 +2928,18 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
2927 goto out; 2928 goto out;
2928 } 2929 }
2929 2930
2931 copy_destructor = gso_skb->destructor == tcp_wfree;
2932 ooo_okay = gso_skb->ooo_okay;
2933 /* All segments but the first should have ooo_okay cleared */
2934 skb->ooo_okay = 0;
2935
2930 segs = skb_segment(skb, features); 2936 segs = skb_segment(skb, features);
2931 if (IS_ERR(segs)) 2937 if (IS_ERR(segs))
2932 goto out; 2938 goto out;
2933 2939
2940 /* Only first segment might have ooo_okay set */
2941 segs->ooo_okay = ooo_okay;
2942
2934 delta = htonl(oldlen + (thlen + mss)); 2943 delta = htonl(oldlen + (thlen + mss));
2935 2944
2936 skb = segs; 2945 skb = segs;
@@ -2950,6 +2959,17 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
2950 thlen, skb->csum)); 2959 thlen, skb->csum));
2951 2960
2952 seq += mss; 2961 seq += mss;
2962 if (copy_destructor) {
2963 skb->destructor = gso_skb->destructor;
2964 skb->sk = gso_skb->sk;
2965 /* {tcp|sock}_wfree() use exact truesize accounting :
2966 * sum(skb->truesize) MUST be exactly be gso_skb->truesize
2967 * So we account mss bytes of 'true size' for each segment.
2968 * The last segment will contain the remaining.
2969 */
2970 skb->truesize = mss;
2971 gso_skb->truesize -= mss;
2972 }
2953 skb = skb->next; 2973 skb = skb->next;
2954 th = tcp_hdr(skb); 2974 th = tcp_hdr(skb);
2955 2975
@@ -2962,7 +2982,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
2962 * is freed at TX completion, and not right now when gso_skb 2982 * is freed at TX completion, and not right now when gso_skb
2963 * is freed by GSO engine 2983 * is freed by GSO engine
2964 */ 2984 */
2965 if (gso_skb->destructor == tcp_wfree) { 2985 if (copy_destructor) {
2966 swap(gso_skb->sk, skb->sk); 2986 swap(gso_skb->sk, skb->sk);
2967 swap(gso_skb->destructor, skb->destructor); 2987 swap(gso_skb->destructor, skb->destructor);
2968 swap(gso_skb->truesize, skb->truesize); 2988 swap(gso_skb->truesize, skb->truesize);