aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/xen-netback/netback.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-11-13 03:40:34 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-13 03:40:34 -0500
commit42a2d923cc349583ebf6fdd52a7d35e1c2f7e6bd (patch)
tree2b2b0c03b5389c1301800119333967efafd994ca /drivers/net/xen-netback/netback.c
parent5cbb3d216e2041700231bcfc383ee5f8b7fc8b74 (diff)
parent75ecab1df14d90e86cebef9ec5c76befde46e65f (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) The addition of nftables. No longer will we need protocol aware firewall filtering modules, it can all live in userspace. At the core of nftables is a, for lack of a better term, virtual machine that executes byte codes to inspect packet or metadata (arriving interface index, etc.) and make verdict decisions. Besides support for loading packet contents and comparing them, the interpreter supports lookups in various datastructures as fundamental operations. For example sets are supports, and therefore one could create a set of whitelist IP address entries which have ACCEPT verdicts attached to them, and use the appropriate byte codes to do such lookups. Since the interpreted code is composed in userspace, userspace can do things like optimize things before giving it to the kernel. Another major improvement is the capability of atomically updating portions of the ruleset. In the existing netfilter implementation, one has to update the entire rule set in order to make a change and this is very expensive. Userspace tools exist to create nftables rules using existing netfilter rule sets, but both kernel implementations will need to co-exist for quite some time as we transition from the old to the new stuff. Kudos to Patrick McHardy, Pablo Neira Ayuso, and others who have worked so hard on this. 2) Daniel Borkmann and Hannes Frederic Sowa made several improvements to our pseudo-random number generator, mostly used for things like UDP port randomization and netfitler, amongst other things. In particular the taus88 generater is updated to taus113, and test cases are added. 3) Support 64-bit rates in HTB and TBF schedulers, from Eric Dumazet and Yang Yingliang. 4) Add support for new 577xx tigon3 chips to tg3 driver, from Nithin Sujir. 5) Fix two fatal flaws in TCP dynamic right sizing, from Eric Dumazet, Neal Cardwell, and Yuchung Cheng. 6) Allow IP_TOS and IP_TTL to be specified in sendmsg() ancillary control message data, much like other socket option attributes. From Francesco Fusco. 7) Allow applications to specify a cap on the rate computed automatically by the kernel for pacing flows, via a new SO_MAX_PACING_RATE socket option. From Eric Dumazet. 8) Make the initial autotuned send buffer sizing in TCP more closely reflect actual needs, from Eric Dumazet. 9) Currently early socket demux only happens for TCP sockets, but we can do it for connected UDP sockets too. Implementation from Shawn Bohrer. 10) Refactor inet socket demux with the goal of improving hash demux performance for listening sockets. With the main goals being able to use RCU lookups on even request sockets, and eliminating the listening lock contention. From Eric Dumazet. 11) The bonding layer has many demuxes in it's fast path, and an RCU conversion was started back in 3.11, several changes here extend the RCU usage to even more locations. From Ding Tianhong and Wang Yufen, based upon suggestions by Nikolay Aleksandrov and Veaceslav Falico. 12) Allow stackability of segmentation offloads to, in particular, allow segmentation offloading over tunnels. From Eric Dumazet. 13) Significantly improve the handling of secret keys we input into the various hash functions in the inet hashtables, TCP fast open, as well as syncookies. From Hannes Frederic Sowa. The key fundamental operation is "net_get_random_once()" which uses static keys. Hannes even extended this to ipv4/ipv6 fragmentation handling and our generic flow dissector. 14) The generic driver layer takes care now to set the driver data to NULL on device removal, so it's no longer necessary for drivers to explicitly set it to NULL any more. Many drivers have been cleaned up in this way, from Jingoo Han. 15) Add a BPF based packet scheduler classifier, from Daniel Borkmann. 16) Improve CRC32 interfaces and generic SKB checksum iterators so that SCTP's checksumming can more cleanly be handled. Also from Daniel Borkmann. 17) Add a new PMTU discovery mode, IP_PMTUDISC_INTERFACE, which forces using the interface MTU value. This helps avoid PMTU attacks, particularly on DNS servers. From Hannes Frederic Sowa. 18) Use generic XPS for transmit queue steering rather than internal (re-)implementation in virtio-net. From Jason Wang. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1622 commits) random32: add test cases for taus113 implementation random32: upgrade taus88 generator to taus113 from errata paper random32: move rnd_state to linux/random.h random32: add prandom_reseed_late() and call when nonblocking pool becomes initialized random32: add periodic reseeding random32: fix off-by-one in seeding requirement PHY: Add RTL8201CP phy_driver to realtek xtsonic: add missing platform_set_drvdata() in xtsonic_probe() macmace: add missing platform_set_drvdata() in mace_probe() ethernet/arc/arc_emac: add missing platform_set_drvdata() in arc_emac_probe() ipv6: protect for_each_sk_fl_rcu in mem_check with rcu_read_lock_bh vlan: Implement vlan_dev_get_egress_qos_mask as an inline. ixgbe: add warning when max_vfs is out of range. igb: Update link modes display in ethtool netfilter: push reasm skb through instead of original frag skbs ip6_output: fragment outgoing reassembled skb properly MAINTAINERS: mv643xx_eth: take over maintainership from Lennart net_sched: tbf: support of 64bit rates ixgbe: deleting dfwd stations out of order can cause null ptr deref ixgbe: fix build err, num_rx_queues is only available with CONFIG_RPS ...
Diffstat (limited to 'drivers/net/xen-netback/netback.c')
-rw-r--r--drivers/net/xen-netback/netback.c294
1 files changed, 242 insertions, 52 deletions
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 900da4b243ad..919b6509455c 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -109,15 +109,12 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif,
109 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); 109 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
110} 110}
111 111
112/* 112/* This is a miniumum size for the linear area to avoid lots of
113 * This is the amount of packet we copy rather than map, so that the 113 * calls to __pskb_pull_tail() as we set up checksum offsets. The
114 * guest can't fiddle with the contents of the headers while we do 114 * value 128 was chosen as it covers all IPv4 and most likely
115 * packet processing on them (netfilter, routing, etc). 115 * IPv6 headers.
116 */ 116 */
117#define PKT_PROT_LEN (ETH_HLEN + \ 117#define PKT_PROT_LEN 128
118 VLAN_HLEN + \
119 sizeof(struct iphdr) + MAX_IPOPTLEN + \
120 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
121 118
122static u16 frag_get_pending_idx(skb_frag_t *frag) 119static u16 frag_get_pending_idx(skb_frag_t *frag)
123{ 120{
@@ -145,7 +142,7 @@ static int max_required_rx_slots(struct xenvif *vif)
145 int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); 142 int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
146 143
147 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ 144 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
148 if (vif->can_sg || vif->gso || vif->gso_prefix) 145 if (vif->can_sg || vif->gso_mask || vif->gso_prefix_mask)
149 max += MAX_SKB_FRAGS + 1; /* extra_info + frags */ 146 max += MAX_SKB_FRAGS + 1; /* extra_info + frags */
150 147
151 return max; 148 return max;
@@ -317,6 +314,7 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif,
317 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 314 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
318 315
319 meta = npo->meta + npo->meta_prod++; 316 meta = npo->meta + npo->meta_prod++;
317 meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
320 meta->gso_size = 0; 318 meta->gso_size = 0;
321 meta->size = 0; 319 meta->size = 0;
322 meta->id = req->id; 320 meta->id = req->id;
@@ -339,6 +337,7 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
339 struct gnttab_copy *copy_gop; 337 struct gnttab_copy *copy_gop;
340 struct xenvif_rx_meta *meta; 338 struct xenvif_rx_meta *meta;
341 unsigned long bytes; 339 unsigned long bytes;
340 int gso_type;
342 341
343 /* Data must not cross a page boundary. */ 342 /* Data must not cross a page boundary. */
344 BUG_ON(size + offset > PAGE_SIZE<<compound_order(page)); 343 BUG_ON(size + offset > PAGE_SIZE<<compound_order(page));
@@ -397,7 +396,14 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
397 } 396 }
398 397
399 /* Leave a gap for the GSO descriptor. */ 398 /* Leave a gap for the GSO descriptor. */
400 if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix) 399 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
400 gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
401 else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
402 gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
403 else
404 gso_type = XEN_NETIF_GSO_TYPE_NONE;
405
406 if (*head && ((1 << gso_type) & vif->gso_mask))
401 vif->rx.req_cons++; 407 vif->rx.req_cons++;
402 408
403 *head = 0; /* There must be something in this buffer now. */ 409 *head = 0; /* There must be something in this buffer now. */
@@ -428,14 +434,28 @@ static int xenvif_gop_skb(struct sk_buff *skb,
428 unsigned char *data; 434 unsigned char *data;
429 int head = 1; 435 int head = 1;
430 int old_meta_prod; 436 int old_meta_prod;
437 int gso_type;
438 int gso_size;
431 439
432 old_meta_prod = npo->meta_prod; 440 old_meta_prod = npo->meta_prod;
433 441
442 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
443 gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
444 gso_size = skb_shinfo(skb)->gso_size;
445 } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
446 gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
447 gso_size = skb_shinfo(skb)->gso_size;
448 } else {
449 gso_type = XEN_NETIF_GSO_TYPE_NONE;
450 gso_size = 0;
451 }
452
434 /* Set up a GSO prefix descriptor, if necessary */ 453 /* Set up a GSO prefix descriptor, if necessary */
435 if (skb_shinfo(skb)->gso_size && vif->gso_prefix) { 454 if ((1 << skb_shinfo(skb)->gso_type) & vif->gso_prefix_mask) {
436 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 455 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
437 meta = npo->meta + npo->meta_prod++; 456 meta = npo->meta + npo->meta_prod++;
438 meta->gso_size = skb_shinfo(skb)->gso_size; 457 meta->gso_type = gso_type;
458 meta->gso_size = gso_size;
439 meta->size = 0; 459 meta->size = 0;
440 meta->id = req->id; 460 meta->id = req->id;
441 } 461 }
@@ -443,10 +463,13 @@ static int xenvif_gop_skb(struct sk_buff *skb,
443 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 463 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
444 meta = npo->meta + npo->meta_prod++; 464 meta = npo->meta + npo->meta_prod++;
445 465
446 if (!vif->gso_prefix) 466 if ((1 << gso_type) & vif->gso_mask) {
447 meta->gso_size = skb_shinfo(skb)->gso_size; 467 meta->gso_type = gso_type;
448 else 468 meta->gso_size = gso_size;
469 } else {
470 meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
449 meta->gso_size = 0; 471 meta->gso_size = 0;
472 }
450 473
451 meta->size = 0; 474 meta->size = 0;
452 meta->id = req->id; 475 meta->id = req->id;
@@ -592,7 +615,8 @@ void xenvif_rx_action(struct xenvif *vif)
592 615
593 vif = netdev_priv(skb->dev); 616 vif = netdev_priv(skb->dev);
594 617
595 if (vif->meta[npo.meta_cons].gso_size && vif->gso_prefix) { 618 if ((1 << vif->meta[npo.meta_cons].gso_type) &
619 vif->gso_prefix_mask) {
596 resp = RING_GET_RESPONSE(&vif->rx, 620 resp = RING_GET_RESPONSE(&vif->rx,
597 vif->rx.rsp_prod_pvt++); 621 vif->rx.rsp_prod_pvt++);
598 622
@@ -629,7 +653,8 @@ void xenvif_rx_action(struct xenvif *vif)
629 vif->meta[npo.meta_cons].size, 653 vif->meta[npo.meta_cons].size,
630 flags); 654 flags);
631 655
632 if (vif->meta[npo.meta_cons].gso_size && !vif->gso_prefix) { 656 if ((1 << vif->meta[npo.meta_cons].gso_type) &
657 vif->gso_mask) {
633 struct xen_netif_extra_info *gso = 658 struct xen_netif_extra_info *gso =
634 (struct xen_netif_extra_info *) 659 (struct xen_netif_extra_info *)
635 RING_GET_RESPONSE(&vif->rx, 660 RING_GET_RESPONSE(&vif->rx,
@@ -637,8 +662,8 @@ void xenvif_rx_action(struct xenvif *vif)
637 662
638 resp->flags |= XEN_NETRXF_extra_info; 663 resp->flags |= XEN_NETRXF_extra_info;
639 664
665 gso->u.gso.type = vif->meta[npo.meta_cons].gso_type;
640 gso->u.gso.size = vif->meta[npo.meta_cons].gso_size; 666 gso->u.gso.size = vif->meta[npo.meta_cons].gso_size;
641 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
642 gso->u.gso.pad = 0; 667 gso->u.gso.pad = 0;
643 gso->u.gso.features = 0; 668 gso->u.gso.features = 0;
644 669
@@ -1101,15 +1126,20 @@ static int xenvif_set_skb_gso(struct xenvif *vif,
1101 return -EINVAL; 1126 return -EINVAL;
1102 } 1127 }
1103 1128
1104 /* Currently only TCPv4 S.O. is supported. */ 1129 switch (gso->u.gso.type) {
1105 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { 1130 case XEN_NETIF_GSO_TYPE_TCPV4:
1131 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1132 break;
1133 case XEN_NETIF_GSO_TYPE_TCPV6:
1134 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
1135 break;
1136 default:
1106 netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type); 1137 netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
1107 xenvif_fatal_tx_err(vif); 1138 xenvif_fatal_tx_err(vif);
1108 return -EINVAL; 1139 return -EINVAL;
1109 } 1140 }
1110 1141
1111 skb_shinfo(skb)->gso_size = gso->u.gso.size; 1142 skb_shinfo(skb)->gso_size = gso->u.gso.size;
1112 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1113 1143
1114 /* Header must be checked, and gso_segs computed. */ 1144 /* Header must be checked, and gso_segs computed. */
1115 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 1145 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
@@ -1118,61 +1148,74 @@ static int xenvif_set_skb_gso(struct xenvif *vif,
1118 return 0; 1148 return 0;
1119} 1149}
1120 1150
1121static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) 1151static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len)
1152{
1153 if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) {
1154 /* If we need to pullup then pullup to the max, so we
1155 * won't need to do it again.
1156 */
1157 int target = min_t(int, skb->len, MAX_TCP_HEADER);
1158 __pskb_pull_tail(skb, target - skb_headlen(skb));
1159 }
1160}
1161
1162static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
1163 int recalculate_partial_csum)
1122{ 1164{
1123 struct iphdr *iph; 1165 struct iphdr *iph = (void *)skb->data;
1166 unsigned int header_size;
1167 unsigned int off;
1124 int err = -EPROTO; 1168 int err = -EPROTO;
1125 int recalculate_partial_csum = 0;
1126 1169
1127 /* 1170 off = sizeof(struct iphdr);
1128 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1129 * peers can fail to set NETRXF_csum_blank when sending a GSO
1130 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1131 * recalculate the partial checksum.
1132 */
1133 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1134 vif->rx_gso_checksum_fixup++;
1135 skb->ip_summed = CHECKSUM_PARTIAL;
1136 recalculate_partial_csum = 1;
1137 }
1138 1171
1139 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ 1172 header_size = skb->network_header + off + MAX_IPOPTLEN;
1140 if (skb->ip_summed != CHECKSUM_PARTIAL) 1173 maybe_pull_tail(skb, header_size);
1141 return 0;
1142 1174
1143 if (skb->protocol != htons(ETH_P_IP)) 1175 off = iph->ihl * 4;
1144 goto out;
1145 1176
1146 iph = (void *)skb->data;
1147 switch (iph->protocol) { 1177 switch (iph->protocol) {
1148 case IPPROTO_TCP: 1178 case IPPROTO_TCP:
1149 if (!skb_partial_csum_set(skb, 4 * iph->ihl, 1179 if (!skb_partial_csum_set(skb, off,
1150 offsetof(struct tcphdr, check))) 1180 offsetof(struct tcphdr, check)))
1151 goto out; 1181 goto out;
1152 1182
1153 if (recalculate_partial_csum) { 1183 if (recalculate_partial_csum) {
1154 struct tcphdr *tcph = tcp_hdr(skb); 1184 struct tcphdr *tcph = tcp_hdr(skb);
1185
1186 header_size = skb->network_header +
1187 off +
1188 sizeof(struct tcphdr);
1189 maybe_pull_tail(skb, header_size);
1190
1155 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1191 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1156 skb->len - iph->ihl*4, 1192 skb->len - off,
1157 IPPROTO_TCP, 0); 1193 IPPROTO_TCP, 0);
1158 } 1194 }
1159 break; 1195 break;
1160 case IPPROTO_UDP: 1196 case IPPROTO_UDP:
1161 if (!skb_partial_csum_set(skb, 4 * iph->ihl, 1197 if (!skb_partial_csum_set(skb, off,
1162 offsetof(struct udphdr, check))) 1198 offsetof(struct udphdr, check)))
1163 goto out; 1199 goto out;
1164 1200
1165 if (recalculate_partial_csum) { 1201 if (recalculate_partial_csum) {
1166 struct udphdr *udph = udp_hdr(skb); 1202 struct udphdr *udph = udp_hdr(skb);
1203
1204 header_size = skb->network_header +
1205 off +
1206 sizeof(struct udphdr);
1207 maybe_pull_tail(skb, header_size);
1208
1167 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1209 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1168 skb->len - iph->ihl*4, 1210 skb->len - off,
1169 IPPROTO_UDP, 0); 1211 IPPROTO_UDP, 0);
1170 } 1212 }
1171 break; 1213 break;
1172 default: 1214 default:
1173 if (net_ratelimit()) 1215 if (net_ratelimit())
1174 netdev_err(vif->dev, 1216 netdev_err(vif->dev,
1175 "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n", 1217 "Attempting to checksum a non-TCP/UDP packet, "
1218 "dropping a protocol %d packet\n",
1176 iph->protocol); 1219 iph->protocol);
1177 goto out; 1220 goto out;
1178 } 1221 }
@@ -1183,6 +1226,158 @@ out:
1183 return err; 1226 return err;
1184} 1227}
1185 1228
1229static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
1230 int recalculate_partial_csum)
1231{
1232 int err = -EPROTO;
1233 struct ipv6hdr *ipv6h = (void *)skb->data;
1234 u8 nexthdr;
1235 unsigned int header_size;
1236 unsigned int off;
1237 bool fragment;
1238 bool done;
1239
1240 done = false;
1241
1242 off = sizeof(struct ipv6hdr);
1243
1244 header_size = skb->network_header + off;
1245 maybe_pull_tail(skb, header_size);
1246
1247 nexthdr = ipv6h->nexthdr;
1248
1249 while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) &&
1250 !done) {
1251 switch (nexthdr) {
1252 case IPPROTO_DSTOPTS:
1253 case IPPROTO_HOPOPTS:
1254 case IPPROTO_ROUTING: {
1255 struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
1256
1257 header_size = skb->network_header +
1258 off +
1259 sizeof(struct ipv6_opt_hdr);
1260 maybe_pull_tail(skb, header_size);
1261
1262 nexthdr = hp->nexthdr;
1263 off += ipv6_optlen(hp);
1264 break;
1265 }
1266 case IPPROTO_AH: {
1267 struct ip_auth_hdr *hp = (void *)(skb->data + off);
1268
1269 header_size = skb->network_header +
1270 off +
1271 sizeof(struct ip_auth_hdr);
1272 maybe_pull_tail(skb, header_size);
1273
1274 nexthdr = hp->nexthdr;
1275 off += (hp->hdrlen+2)<<2;
1276 break;
1277 }
1278 case IPPROTO_FRAGMENT:
1279 fragment = true;
1280 /* fall through */
1281 default:
1282 done = true;
1283 break;
1284 }
1285 }
1286
1287 if (!done) {
1288 if (net_ratelimit())
1289 netdev_err(vif->dev, "Failed to parse packet header\n");
1290 goto out;
1291 }
1292
1293 if (fragment) {
1294 if (net_ratelimit())
1295 netdev_err(vif->dev, "Packet is a fragment!\n");
1296 goto out;
1297 }
1298
1299 switch (nexthdr) {
1300 case IPPROTO_TCP:
1301 if (!skb_partial_csum_set(skb, off,
1302 offsetof(struct tcphdr, check)))
1303 goto out;
1304
1305 if (recalculate_partial_csum) {
1306 struct tcphdr *tcph = tcp_hdr(skb);
1307
1308 header_size = skb->network_header +
1309 off +
1310 sizeof(struct tcphdr);
1311 maybe_pull_tail(skb, header_size);
1312
1313 tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,
1314 &ipv6h->daddr,
1315 skb->len - off,
1316 IPPROTO_TCP, 0);
1317 }
1318 break;
1319 case IPPROTO_UDP:
1320 if (!skb_partial_csum_set(skb, off,
1321 offsetof(struct udphdr, check)))
1322 goto out;
1323
1324 if (recalculate_partial_csum) {
1325 struct udphdr *udph = udp_hdr(skb);
1326
1327 header_size = skb->network_header +
1328 off +
1329 sizeof(struct udphdr);
1330 maybe_pull_tail(skb, header_size);
1331
1332 udph->check = ~csum_ipv6_magic(&ipv6h->saddr,
1333 &ipv6h->daddr,
1334 skb->len - off,
1335 IPPROTO_UDP, 0);
1336 }
1337 break;
1338 default:
1339 if (net_ratelimit())
1340 netdev_err(vif->dev,
1341 "Attempting to checksum a non-TCP/UDP packet, "
1342 "dropping a protocol %d packet\n",
1343 nexthdr);
1344 goto out;
1345 }
1346
1347 err = 0;
1348
1349out:
1350 return err;
1351}
1352
1353static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
1354{
1355 int err = -EPROTO;
1356 int recalculate_partial_csum = 0;
1357
1358 /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1359 * peers can fail to set NETRXF_csum_blank when sending a GSO
1360 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1361 * recalculate the partial checksum.
1362 */
1363 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1364 vif->rx_gso_checksum_fixup++;
1365 skb->ip_summed = CHECKSUM_PARTIAL;
1366 recalculate_partial_csum = 1;
1367 }
1368
1369 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1370 if (skb->ip_summed != CHECKSUM_PARTIAL)
1371 return 0;
1372
1373 if (skb->protocol == htons(ETH_P_IP))
1374 err = checksum_setup_ip(vif, skb, recalculate_partial_csum);
1375 else if (skb->protocol == htons(ETH_P_IPV6))
1376 err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum);
1377
1378 return err;
1379}
1380
1186static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) 1381static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
1187{ 1382{
1188 u64 now = get_jiffies_64(); 1383 u64 now = get_jiffies_64();
@@ -1428,12 +1623,7 @@ static int xenvif_tx_submit(struct xenvif *vif, int budget)
1428 1623
1429 xenvif_fill_frags(vif, skb); 1624 xenvif_fill_frags(vif, skb);
1430 1625
1431 /* 1626 if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
1432 * If the initial fragment was < PKT_PROT_LEN then
1433 * pull through some bytes from the other fragments to
1434 * increase the linear region to PKT_PROT_LEN bytes.
1435 */
1436 if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
1437 int target = min_t(int, skb->len, PKT_PROT_LEN); 1627 int target = min_t(int, skb->len, PKT_PROT_LEN);
1438 __pskb_pull_tail(skb, target - skb_headlen(skb)); 1628 __pskb_pull_tail(skb, target - skb_headlen(skb));
1439 } 1629 }