diff options
| -rw-r--r-- | include/linux/netdevice.h | 18 | ||||
| -rw-r--r-- | net/core/dev.c | 26 | ||||
| -rw-r--r-- | net/ipv4/af_inet.c | 10 | ||||
| -rw-r--r-- | net/ipv4/gre_offload.c | 160 | ||||
| -rw-r--r-- | net/ipv4/tcp_offload.c | 7 | ||||
| -rw-r--r-- | net/ipv6/ip6_offload.c | 2 |
6 files changed, 216 insertions, 7 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d9c961aa6a7f..a2a70cc70e7b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
| @@ -1632,7 +1632,10 @@ struct napi_gro_cb { | |||
| 1632 | int data_offset; | 1632 | int data_offset; |
| 1633 | 1633 | ||
| 1634 | /* This is non-zero if the packet cannot be merged with the new skb. */ | 1634 | /* This is non-zero if the packet cannot be merged with the new skb. */ |
| 1635 | int flush; | 1635 | u16 flush; |
| 1636 | |||
| 1637 | /* Save the IP ID here and check when we get to the transport layer */ | ||
| 1638 | u16 flush_id; | ||
| 1636 | 1639 | ||
| 1637 | /* Number of segments aggregated. */ | 1640 | /* Number of segments aggregated. */ |
| 1638 | u16 count; | 1641 | u16 count; |
| @@ -1651,6 +1654,9 @@ struct napi_gro_cb { | |||
| 1651 | /* Used in ipv6_gro_receive() */ | 1654 | /* Used in ipv6_gro_receive() */ |
| 1652 | int proto; | 1655 | int proto; |
| 1653 | 1656 | ||
| 1657 | /* used to support CHECKSUM_COMPLETE for tunneling protocols */ | ||
| 1658 | __wsum csum; | ||
| 1659 | |||
| 1654 | /* used in skb_gro_receive() slow path */ | 1660 | /* used in skb_gro_receive() slow path */ |
| 1655 | struct sk_buff *last; | 1661 | struct sk_buff *last; |
| 1656 | }; | 1662 | }; |
| @@ -1900,6 +1906,14 @@ static inline void *skb_gro_network_header(struct sk_buff *skb) | |||
| 1900 | skb_network_offset(skb); | 1906 | skb_network_offset(skb); |
| 1901 | } | 1907 | } |
| 1902 | 1908 | ||
| 1909 | static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, | ||
| 1910 | const void *start, unsigned int len) | ||
| 1911 | { | ||
| 1912 | if (skb->ip_summed == CHECKSUM_COMPLETE) | ||
| 1913 | NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum, | ||
| 1914 | csum_partial(start, len, 0)); | ||
| 1915 | } | ||
| 1916 | |||
| 1903 | static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, | 1917 | static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, |
| 1904 | unsigned short type, | 1918 | unsigned short type, |
| 1905 | const void *daddr, const void *saddr, | 1919 | const void *daddr, const void *saddr, |
| @@ -2440,6 +2454,8 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); | |||
| 2440 | void napi_gro_flush(struct napi_struct *napi, bool flush_old); | 2454 | void napi_gro_flush(struct napi_struct *napi, bool flush_old); |
| 2441 | struct sk_buff *napi_get_frags(struct napi_struct *napi); | 2455 | struct sk_buff *napi_get_frags(struct napi_struct *napi); |
| 2442 | gro_result_t napi_gro_frags(struct napi_struct *napi); | 2456 | gro_result_t napi_gro_frags(struct napi_struct *napi); |
| 2457 | struct packet_offload *gro_find_receive_by_type(__be16 type); | ||
| 2458 | struct packet_offload *gro_find_complete_by_type(__be16 type); | ||
| 2443 | 2459 | ||
| 2444 | static inline void napi_free_frags(struct napi_struct *napi) | 2460 | static inline void napi_free_frags(struct napi_struct *napi) |
| 2445 | { | 2461 | { |
diff --git a/net/core/dev.c b/net/core/dev.c index b3c574a88026..ce01847793c0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
| @@ -3846,6 +3846,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff | |||
| 3846 | 3846 | ||
| 3847 | skb_gro_reset_offset(skb); | 3847 | skb_gro_reset_offset(skb); |
| 3848 | gro_list_prepare(napi, skb); | 3848 | gro_list_prepare(napi, skb); |
| 3849 | NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */ | ||
| 3849 | 3850 | ||
| 3850 | rcu_read_lock(); | 3851 | rcu_read_lock(); |
| 3851 | list_for_each_entry_rcu(ptype, head, list) { | 3852 | list_for_each_entry_rcu(ptype, head, list) { |
| @@ -3922,6 +3923,31 @@ normal: | |||
| 3922 | goto pull; | 3923 | goto pull; |
| 3923 | } | 3924 | } |
| 3924 | 3925 | ||
| 3926 | struct packet_offload *gro_find_receive_by_type(__be16 type) | ||
| 3927 | { | ||
| 3928 | struct list_head *offload_head = &offload_base; | ||
| 3929 | struct packet_offload *ptype; | ||
| 3930 | |||
| 3931 | list_for_each_entry_rcu(ptype, offload_head, list) { | ||
| 3932 | if (ptype->type != type || !ptype->callbacks.gro_receive) | ||
| 3933 | continue; | ||
| 3934 | return ptype; | ||
| 3935 | } | ||
| 3936 | return NULL; | ||
| 3937 | } | ||
| 3938 | |||
| 3939 | struct packet_offload *gro_find_complete_by_type(__be16 type) | ||
| 3940 | { | ||
| 3941 | struct list_head *offload_head = &offload_base; | ||
| 3942 | struct packet_offload *ptype; | ||
| 3943 | |||
| 3944 | list_for_each_entry_rcu(ptype, offload_head, list) { | ||
| 3945 | if (ptype->type != type || !ptype->callbacks.gro_complete) | ||
| 3946 | continue; | ||
| 3947 | return ptype; | ||
| 3948 | } | ||
| 3949 | return NULL; | ||
| 3950 | } | ||
| 3925 | 3951 | ||
| 3926 | static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) | 3952 | static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) |
| 3927 | { | 3953 | { |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b8bc1a3d5cf1..6268a4751e64 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
| @@ -1391,9 +1391,15 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, | |||
| 1391 | NAPI_GRO_CB(p)->flush |= | 1391 | NAPI_GRO_CB(p)->flush |= |
| 1392 | (iph->ttl ^ iph2->ttl) | | 1392 | (iph->ttl ^ iph2->ttl) | |
| 1393 | (iph->tos ^ iph2->tos) | | 1393 | (iph->tos ^ iph2->tos) | |
| 1394 | (__force int)((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) | | 1394 | ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)); |
| 1395 | ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); | ||
| 1396 | 1395 | ||
| 1396 | /* Save the IP ID check to be included later when we get to | ||
| 1397 | * the transport layer so only the inner most IP ID is checked. | ||
| 1398 | * This is because some GSO/TSO implementations do not | ||
| 1399 | * correctly increment the IP ID for the outer hdrs. | ||
| 1400 | */ | ||
| 1401 | NAPI_GRO_CB(p)->flush_id = | ||
| 1402 | ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); | ||
| 1397 | NAPI_GRO_CB(p)->flush |= flush; | 1403 | NAPI_GRO_CB(p)->flush |= flush; |
| 1398 | } | 1404 | } |
| 1399 | 1405 | ||
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 9138cfb10140..746a7b10d434 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c | |||
| @@ -116,10 +116,170 @@ out: | |||
| 116 | return segs; | 116 | return segs; |
| 117 | } | 117 | } |
| 118 | 118 | ||
| 119 | /* Compute the whole skb csum in s/w and store it, then verify GRO csum | ||
| 120 | * starting from gro_offset. | ||
| 121 | */ | ||
| 122 | static __sum16 gro_skb_checksum(struct sk_buff *skb) | ||
| 123 | { | ||
| 124 | __sum16 sum; | ||
| 125 | |||
| 126 | skb->csum = skb_checksum(skb, 0, skb->len, 0); | ||
| 127 | NAPI_GRO_CB(skb)->csum = csum_sub(skb->csum, | ||
| 128 | csum_partial(skb->data, skb_gro_offset(skb), 0)); | ||
| 129 | sum = csum_fold(NAPI_GRO_CB(skb)->csum); | ||
| 130 | if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) { | ||
| 131 | if (unlikely(!sum)) | ||
| 132 | netdev_rx_csum_fault(skb->dev); | ||
| 133 | } else | ||
| 134 | skb->ip_summed = CHECKSUM_COMPLETE; | ||
| 135 | |||
| 136 | return sum; | ||
| 137 | } | ||
| 138 | |||
| 139 | static struct sk_buff **gre_gro_receive(struct sk_buff **head, | ||
| 140 | struct sk_buff *skb) | ||
| 141 | { | ||
| 142 | struct sk_buff **pp = NULL; | ||
| 143 | struct sk_buff *p; | ||
| 144 | const struct gre_base_hdr *greh; | ||
| 145 | unsigned int hlen, grehlen; | ||
| 146 | unsigned int off; | ||
| 147 | int flush = 1; | ||
| 148 | struct packet_offload *ptype; | ||
| 149 | __be16 type; | ||
| 150 | |||
| 151 | off = skb_gro_offset(skb); | ||
| 152 | hlen = off + sizeof(*greh); | ||
| 153 | greh = skb_gro_header_fast(skb, off); | ||
| 154 | if (skb_gro_header_hard(skb, hlen)) { | ||
| 155 | greh = skb_gro_header_slow(skb, hlen, off); | ||
| 156 | if (unlikely(!greh)) | ||
| 157 | goto out; | ||
| 158 | } | ||
| 159 | |||
| 160 | /* Only support version 0 and K (key), C (csum) flags. Note that | ||
| 161 | * although the support for the S (seq#) flag can be added easily | ||
| 162 | * for GRO, this is problematic for GSO hence can not be enabled | ||
| 163 | * here because a GRO pkt may end up in the forwarding path, thus | ||
| 164 | * requiring GSO support to break it up correctly. | ||
| 165 | */ | ||
| 166 | if ((greh->flags & ~(GRE_KEY|GRE_CSUM)) != 0) | ||
| 167 | goto out; | ||
| 168 | |||
| 169 | type = greh->protocol; | ||
| 170 | |||
| 171 | rcu_read_lock(); | ||
| 172 | ptype = gro_find_receive_by_type(type); | ||
| 173 | if (ptype == NULL) | ||
| 174 | goto out_unlock; | ||
| 175 | |||
| 176 | grehlen = GRE_HEADER_SECTION; | ||
| 177 | |||
| 178 | if (greh->flags & GRE_KEY) | ||
| 179 | grehlen += GRE_HEADER_SECTION; | ||
| 180 | |||
| 181 | if (greh->flags & GRE_CSUM) | ||
| 182 | grehlen += GRE_HEADER_SECTION; | ||
| 183 | |||
| 184 | hlen = off + grehlen; | ||
| 185 | if (skb_gro_header_hard(skb, hlen)) { | ||
| 186 | greh = skb_gro_header_slow(skb, hlen, off); | ||
| 187 | if (unlikely(!greh)) | ||
| 188 | goto out_unlock; | ||
| 189 | } | ||
| 190 | if (greh->flags & GRE_CSUM) { /* Need to verify GRE csum first */ | ||
| 191 | __sum16 csum = 0; | ||
| 192 | |||
| 193 | if (skb->ip_summed == CHECKSUM_COMPLETE) | ||
| 194 | csum = csum_fold(NAPI_GRO_CB(skb)->csum); | ||
| 195 | /* Don't trust csum error calculated/reported by h/w */ | ||
| 196 | if (skb->ip_summed == CHECKSUM_NONE || csum != 0) | ||
| 197 | csum = gro_skb_checksum(skb); | ||
| 198 | |||
| 199 | /* GRE CSUM is the 1's complement of the 1's complement sum | ||
| 200 | * of the GRE hdr plus payload so it should add up to 0xffff | ||
| 201 | * (and 0 after csum_fold()) just like the IPv4 hdr csum. | ||
| 202 | */ | ||
| 203 | if (csum) | ||
| 204 | goto out_unlock; | ||
| 205 | } | ||
| 206 | flush = 0; | ||
| 207 | |||
| 208 | for (p = *head; p; p = p->next) { | ||
| 209 | const struct gre_base_hdr *greh2; | ||
| 210 | |||
| 211 | if (!NAPI_GRO_CB(p)->same_flow) | ||
| 212 | continue; | ||
| 213 | |||
| 214 | /* The following checks are needed to ensure only pkts | ||
| 215 | * from the same tunnel are considered for aggregation. | ||
| 216 | * The criteria for "the same tunnel" includes: | ||
| 217 | * 1) same version (we only support version 0 here) | ||
| 218 | * 2) same protocol (we only support ETH_P_IP for now) | ||
| 219 | * 3) same set of flags | ||
| 220 | * 4) same key if the key field is present. | ||
| 221 | */ | ||
| 222 | greh2 = (struct gre_base_hdr *)(p->data + off); | ||
| 223 | |||
| 224 | if (greh2->flags != greh->flags || | ||
| 225 | greh2->protocol != greh->protocol) { | ||
| 226 | NAPI_GRO_CB(p)->same_flow = 0; | ||
| 227 | continue; | ||
| 228 | } | ||
| 229 | if (greh->flags & GRE_KEY) { | ||
| 230 | /* compare keys */ | ||
| 231 | if (*(__be32 *)(greh2+1) != *(__be32 *)(greh+1)) { | ||
| 232 | NAPI_GRO_CB(p)->same_flow = 0; | ||
| 233 | continue; | ||
| 234 | } | ||
| 235 | } | ||
| 236 | } | ||
| 237 | |||
| 238 | skb_gro_pull(skb, grehlen); | ||
| 239 | |||
| 240 | /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/ | ||
| 241 | skb_gro_postpull_rcsum(skb, greh, grehlen); | ||
| 242 | |||
| 243 | pp = ptype->callbacks.gro_receive(head, skb); | ||
| 244 | |||
| 245 | out_unlock: | ||
| 246 | rcu_read_unlock(); | ||
| 247 | out: | ||
| 248 | NAPI_GRO_CB(skb)->flush |= flush; | ||
| 249 | |||
| 250 | return pp; | ||
| 251 | } | ||
| 252 | |||
| 253 | int gre_gro_complete(struct sk_buff *skb, int nhoff) | ||
| 254 | { | ||
| 255 | struct gre_base_hdr *greh = (struct gre_base_hdr *)(skb->data + nhoff); | ||
| 256 | struct packet_offload *ptype; | ||
| 257 | unsigned int grehlen = sizeof(*greh); | ||
| 258 | int err = -ENOENT; | ||
| 259 | __be16 type; | ||
| 260 | |||
| 261 | type = greh->protocol; | ||
| 262 | if (greh->flags & GRE_KEY) | ||
| 263 | grehlen += GRE_HEADER_SECTION; | ||
| 264 | |||
| 265 | if (greh->flags & GRE_CSUM) | ||
| 266 | grehlen += GRE_HEADER_SECTION; | ||
| 267 | |||
| 268 | rcu_read_lock(); | ||
| 269 | ptype = gro_find_complete_by_type(type); | ||
| 270 | if (ptype != NULL) | ||
| 271 | err = ptype->callbacks.gro_complete(skb, nhoff + grehlen); | ||
| 272 | |||
| 273 | rcu_read_unlock(); | ||
| 274 | return err; | ||
| 275 | } | ||
| 276 | |||
| 119 | static const struct net_offload gre_offload = { | 277 | static const struct net_offload gre_offload = { |
| 120 | .callbacks = { | 278 | .callbacks = { |
| 121 | .gso_send_check = gre_gso_send_check, | 279 | .gso_send_check = gre_gso_send_check, |
| 122 | .gso_segment = gre_gso_segment, | 280 | .gso_segment = gre_gso_segment, |
| 281 | .gro_receive = gre_gro_receive, | ||
| 282 | .gro_complete = gre_gro_complete, | ||
| 123 | }, | 283 | }, |
| 124 | }; | 284 | }; |
| 125 | 285 | ||
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 2658a27f540d..771a3950d87a 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c | |||
| @@ -197,7 +197,8 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |||
| 197 | goto out_check_final; | 197 | goto out_check_final; |
| 198 | 198 | ||
| 199 | found: | 199 | found: |
| 200 | flush = NAPI_GRO_CB(p)->flush; | 200 | /* Include the IP ID check below from the inner most IP hdr */ |
| 201 | flush = NAPI_GRO_CB(p)->flush | NAPI_GRO_CB(p)->flush_id; | ||
| 201 | flush |= (__force int)(flags & TCP_FLAG_CWR); | 202 | flush |= (__force int)(flags & TCP_FLAG_CWR); |
| 202 | flush |= (__force int)((flags ^ tcp_flag_word(th2)) & | 203 | flush |= (__force int)((flags ^ tcp_flag_word(th2)) & |
| 203 | ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); | 204 | ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); |
| @@ -230,7 +231,7 @@ out_check_final: | |||
| 230 | pp = head; | 231 | pp = head; |
| 231 | 232 | ||
| 232 | out: | 233 | out: |
| 233 | NAPI_GRO_CB(skb)->flush |= flush; | 234 | NAPI_GRO_CB(skb)->flush |= (flush != 0); |
| 234 | 235 | ||
| 235 | return pp; | 236 | return pp; |
| 236 | } | 237 | } |
| @@ -280,7 +281,7 @@ static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff * | |||
| 280 | if (NAPI_GRO_CB(skb)->flush) | 281 | if (NAPI_GRO_CB(skb)->flush) |
| 281 | goto skip_csum; | 282 | goto skip_csum; |
| 282 | 283 | ||
| 283 | wsum = skb->csum; | 284 | wsum = NAPI_GRO_CB(skb)->csum; |
| 284 | 285 | ||
| 285 | switch (skb->ip_summed) { | 286 | switch (skb->ip_summed) { |
| 286 | case CHECKSUM_NONE: | 287 | case CHECKSUM_NONE: |
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 6fb4162fa785..1e8683b135bb 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c | |||
| @@ -190,7 +190,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, | |||
| 190 | unsigned int nlen; | 190 | unsigned int nlen; |
| 191 | unsigned int hlen; | 191 | unsigned int hlen; |
| 192 | unsigned int off; | 192 | unsigned int off; |
| 193 | int flush = 1; | 193 | u16 flush = 1; |
| 194 | int proto; | 194 | int proto; |
| 195 | __wsum csum; | 195 | __wsum csum; |
| 196 | 196 | ||
