aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2012-04-10 16:30:48 -0400
committerDavid S. Miller <davem@davemloft.net>2012-04-11 10:11:12 -0400
commita21d45726acacc963d8baddf74607d9b74e2b723 (patch)
tree7cd7e1409ac0242148a0a8117956d08572eb32cf /net
parent87151b8689d890dfb495081f7be9b9e257f7a2df (diff)
tcp: avoid order-1 allocations on wifi and tx path
Marc Merlin reported many order-1 allocations failures in TX path on its wireless setup, that dont make any sense with MTU=1500 network, and non SG capable hardware. After investigation, it turns out TCP uses sk_stream_alloc_skb() and used as a convention skb_tailroom(skb) to know how many bytes of data payload could be put in this skb (for non SG capable devices) Note : these skb used kmalloc-4096 (MTU=1500 + MAX_HEADER + sizeof(struct skb_shared_info) being above 2048) Later, mac80211 layer need to add some bytes at the tail of skb (IEEE80211_ENCRYPT_TAILROOM = 18 bytes) and since no more tailroom is available has to call pskb_expand_head() and request order-1 allocations. This patch changes sk_stream_alloc_skb() so that only sk->sk_prot->max_header bytes of headroom are reserved, and use a new skb field, avail_size to hold the data payload limit. This way, order-0 allocations done by TCP stack can leave more than 2 KB of tailroom and no more allocation is performed in mac80211 layer (or any layer needing some tailroom) avail_size is unioned with mark/dropcount, since mark will be set later in IP stack for output packets. Therefore, skb size is unchanged. Reported-by: Marc MERLIN <marc@merlins.org> Tested-by: Marc MERLIN <marc@merlins.org> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp.c8
-rw-r--r--net/ipv4/tcp_output.c2
2 files changed, 5 insertions, 5 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7758a83f98ff..a5daa211a8e1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -701,11 +701,12 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
701 skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); 701 skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
702 if (skb) { 702 if (skb) {
703 if (sk_wmem_schedule(sk, skb->truesize)) { 703 if (sk_wmem_schedule(sk, skb->truesize)) {
704 skb_reserve(skb, sk->sk_prot->max_header);
704 /* 705 /*
705 * Make sure that we have exactly size bytes 706 * Make sure that we have exactly size bytes
706 * available to the caller, no more, no less. 707 * available to the caller, no more, no less.
707 */ 708 */
708 skb_reserve(skb, skb_tailroom(skb) - size); 709 skb->avail_size = size;
709 return skb; 710 return skb;
710 } 711 }
711 __kfree_skb(skb); 712 __kfree_skb(skb);
@@ -995,10 +996,9 @@ new_segment:
995 copy = seglen; 996 copy = seglen;
996 997
997 /* Where to copy to? */ 998 /* Where to copy to? */
998 if (skb_tailroom(skb) > 0) { 999 if (skb_availroom(skb) > 0) {
999 /* We have some space in skb head. Superb! */ 1000 /* We have some space in skb head. Superb! */
1000 if (copy > skb_tailroom(skb)) 1001 copy = min_t(int, copy, skb_availroom(skb));
1001 copy = skb_tailroom(skb);
1002 err = skb_add_data_nocache(sk, skb, from, copy); 1002 err = skb_add_data_nocache(sk, skb, from, copy);
1003 if (err) 1003 if (err)
1004 goto do_fault; 1004 goto do_fault;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 364784a91939..376b2cfbb685 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2060,7 +2060,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2060 /* Punt if not enough space exists in the first SKB for 2060 /* Punt if not enough space exists in the first SKB for
2061 * the data in the second 2061 * the data in the second
2062 */ 2062 */
2063 if (skb->len > skb_tailroom(to)) 2063 if (skb->len > skb_availroom(to))
2064 break; 2064 break;
2065 2065
2066 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) 2066 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))