From fda55eca5a33f33ffcd4192c6b2d75179714a52c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jan 2013 09:28:21 +0000 Subject: net: introduce skb_transport_header_was_set() We have skb_mac_header_was_set() helper to tell if mac_header was set on a skb. We would like the same for transport_header. __netif_receive_skb() doesn't reset the transport header if already set by GRO layer. Note that network stacks usually reset the transport header anyway, after pulling the network header, so this change only allows a followup patch to have more precise qdisc pkt_len computation for GSO packets at ingress side. Signed-off-by: Eric Dumazet Cc: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 320e976d5ab8..8b2256e880e0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1492,6 +1492,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header += offset; } +static inline bool skb_transport_header_was_set(const struct sk_buff *skb) +{ + return skb->transport_header != ~0U; +} + static inline unsigned char *skb_transport_header(const struct sk_buff *skb) { return skb->head + skb->transport_header; @@ -1580,6 +1585,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header = skb->data + offset; } +static inline bool skb_transport_header_was_set(const struct sk_buff *skb) +{ + return skb->transport_header != NULL; +} + static inline unsigned char *skb_transport_header(const struct sk_buff *skb) { return skb->transport_header; -- cgit v1.2.2 From cef401de7be8c4e155c6746bfccf721a4fa5fab9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 25 Jan 2013 20:34:37 +0000 Subject: net: fix possible wrong checksum generation Pravin Shelar mentioned that GSO could potentially generate wrong TX checksum if skb has fragments that are overwritten by the user between the checksum computation and transmit. He suggested to linearize skbs but this extra copy can be avoided for normal tcp skbs cooked by tcp_sendmsg(). This patch introduces a new SKB_GSO_SHARED_FRAG flag, set in skb_shinfo(skb)->gso_type if at least one frag can be modified by the user. Typical sources of such possible overwrites are {vm}splice(), sendfile(), and macvtap/tun/virtio_net drivers. Tested: $ netperf -H 7.7.8.84 MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.8.84 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 3959.52 $ netperf -H 7.7.8.84 -t TCP_SENDFILE TCP SENDFILE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.8.84 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 3216.80 Performance of the SENDFILE is impacted by the extra allocation and copy, and because we use order-0 pages, while the TCP_STREAM uses bigger pages. Reported-by: Pravin Shelar Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8b2256e880e0..0259b719bebf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -307,6 +307,13 @@ enum { SKB_GSO_TCPV6 = 1 << 4, SKB_GSO_FCOE = 1 << 5, + + /* This indicates at least one fragment might be overwritten + * (as in vmsplice(), sendfile() ...) + * If we need to compute a TX checksum, we'll need to copy + * all frags to avoid possible bad checksum + */ + SKB_GSO_SHARED_FRAG = 1 << 6, }; #if BITS_PER_LONG > 32 @@ -2200,6 +2207,18 @@ static inline int skb_linearize(struct sk_buff *skb) return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0; } +/** + * skb_has_shared_frag - can any frag be overwritten + * @skb: buffer to test + * + * Return true if the skb has at least one frag that might be modified + * by an external entity (as in vmsplice()/sendfile()) + */ +static inline bool skb_has_shared_frag(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->gso_type & SKB_GSO_SHARED_FRAG; +} + /** * skb_linearize_cow - make sure skb is linear and writable * @skb: buffer to process -- cgit v1.2.2 From e5e67305885eb12849b5475764b0542f03dc2b59 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 8 Feb 2013 10:17:15 +0000 Subject: skbuff: Move definition of NETDEV_FRAG_PAGE_MAX_SIZE In order to address the fact that some devices cannot support the full 32K frag size we need to have the value accessible somewhere so that we can use it to do comparisons against what the device can support. As such I am moving the values out of skbuff.c and into skbuff.h. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0259b719bebf..d7573c37a51d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1832,6 +1832,10 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } +#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) +#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) +#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE + extern void *netdev_alloc_frag(unsigned int fragsz); extern struct sk_buff *__netdev_alloc_skb(struct net_device *dev, -- cgit v1.2.2 From c9af6db4c11ccc6c3e7f19bbc15d54023956f97c Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 11 Feb 2013 09:27:41 +0000 Subject: net: Fix possible wrong checksum generation. Patch cef401de7be8c4e (net: fix possible wrong checksum generation) fixed wrong checksum calculation but it broke TSO by defining new GSO type but not a netdev feature for that type. net_gso_ok() would not allow hardware checksum/segmentation offload of such packets without the feature. Following patch fixes TSO and wrong checksum. This patch uses same logic that Eric Dumazet used. Patch introduces new flag SKBTX_SHARED_FRAG if at least one frag can be modified by the user. but SKBTX_SHARED_FRAG flag is kept in skb shared info tx_flags rather than gso_type. tx_flags is better compared to gso_type since we can have skb with shared frag without gso packet. It does not link SHARED_FRAG to GSO, So there is no need to define netdev feature for this. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/skbuff.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d7573c37a51d..9da99520ccd5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -230,6 +230,13 @@ enum { /* generate wifi status information (where possible) */ SKBTX_WIFI_STATUS = 1 << 4, + + /* This indicates at least one fragment might be overwritten + * (as in vmsplice(), sendfile() ...) + * If we need to compute a TX checksum, we'll need to copy + * all frags to avoid possible bad checksum + */ + SKBTX_SHARED_FRAG = 1 << 5, }; /* @@ -307,13 +314,6 @@ enum { SKB_GSO_TCPV6 = 1 << 4, SKB_GSO_FCOE = 1 << 5, - - /* This indicates at least one fragment might be overwritten - * (as in vmsplice(), sendfile() ...) - * If we need to compute a TX checksum, we'll need to copy - * all frags to avoid possible bad checksum - */ - SKB_GSO_SHARED_FRAG = 1 << 6, }; #if BITS_PER_LONG > 32 @@ -2220,7 +2220,8 @@ static inline int skb_linearize(struct sk_buff *skb) */ static inline bool skb_has_shared_frag(const struct sk_buff *skb) { - return skb_shinfo(skb)->gso_type & SKB_GSO_SHARED_FRAG; + return skb_is_nonlinear(skb) && + skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; } /** -- cgit v1.2.2 From 14bbd6a565e1bcdc240d44687edb93f721cfdf99 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 14 Feb 2013 09:44:49 +0000 Subject: net: Add skb_unclone() helper function. This function will be used in next GRE_GSO patch. This patch does not change any functionality. Signed-off-by: Pravin B Shelar Acked-by: Eric Dumazet --- include/linux/skbuff.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9da99520ccd5..ca6ee7d93edb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -804,6 +804,16 @@ static inline int skb_cloned(const struct sk_buff *skb) (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1; } +static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) +{ + might_sleep_if(pri & __GFP_WAIT); + + if (skb_cloned(skb)) + return pskb_expand_head(skb, 0, 0, pri); + + return 0; +} + /** * skb_header_cloned - is the header a clone * @skb: buffer to check -- cgit v1.2.2 From 68c331631143f5f039baac99a650e0b9e1ea02b6 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 14 Feb 2013 14:02:41 +0000 Subject: v4 GRE: Add TCP segmentation offload for GRE Following patch adds GRE protocol offload handler so that skb_gso_segment() can segment GRE packets. SKB GSO CB is added to keep track of total header length so that skb_segment can push entire header. e.g. in case of GRE, skb_segment need to push inner and outer headers to every segment. New NETIF_F_GRE_GSO feature is added for devices which support HW GRE TSO offload. Currently none of devices support it therefore GRE GSO always fall backs to software GSO. [ Compute pkt_len before ip_local_out() invocation. -DaveM ] Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/skbuff.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ca6ee7d93edb..821c7f45d2a7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -314,6 +314,8 @@ enum { SKB_GSO_TCPV6 = 1 << 4, SKB_GSO_FCOE = 1 << 5, + + SKB_GSO_GRE = 1 << 6, }; #if BITS_PER_LONG > 32 @@ -2732,6 +2734,21 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb) } #endif +/* Keeps track of mac header offset relative to skb->head. + * It is useful for TSO of Tunneling protocol. e.g. GRE. + * For non-tunnel skb it points to skb_mac_header() and for + * tunnel skb it points to outer mac header. */ +struct skb_gso_cb { + int mac_offset; +}; +#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb) + +static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) +{ + return (skb_mac_header(inner_skb) - inner_skb->head) - + SKB_GSO_CB(inner_skb)->mac_offset; +} + static inline bool skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; -- cgit v1.2.2