aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/xen-netback/netback.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/xen-netback/netback.c')
-rw-r--r--drivers/net/xen-netback/netback.c294
1 files changed, 242 insertions, 52 deletions
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index f3e591c611de..828fdab4f1a4 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -109,15 +109,12 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif,
109 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); 109 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
110} 110}
111 111
112/* 112/* This is a miniumum size for the linear area to avoid lots of
113 * This is the amount of packet we copy rather than map, so that the 113 * calls to __pskb_pull_tail() as we set up checksum offsets. The
114 * guest can't fiddle with the contents of the headers while we do 114 * value 128 was chosen as it covers all IPv4 and most likely
115 * packet processing on them (netfilter, routing, etc). 115 * IPv6 headers.
116 */ 116 */
117#define PKT_PROT_LEN (ETH_HLEN + \ 117#define PKT_PROT_LEN 128
118 VLAN_HLEN + \
119 sizeof(struct iphdr) + MAX_IPOPTLEN + \
120 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
121 118
122static u16 frag_get_pending_idx(skb_frag_t *frag) 119static u16 frag_get_pending_idx(skb_frag_t *frag)
123{ 120{
@@ -145,7 +142,7 @@ static int max_required_rx_slots(struct xenvif *vif)
145 int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); 142 int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
146 143
147 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ 144 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
148 if (vif->can_sg || vif->gso || vif->gso_prefix) 145 if (vif->can_sg || vif->gso_mask || vif->gso_prefix_mask)
149 max += MAX_SKB_FRAGS + 1; /* extra_info + frags */ 146 max += MAX_SKB_FRAGS + 1; /* extra_info + frags */
150 147
151 return max; 148 return max;
@@ -317,6 +314,7 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif,
317 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 314 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
318 315
319 meta = npo->meta + npo->meta_prod++; 316 meta = npo->meta + npo->meta_prod++;
317 meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
320 meta->gso_size = 0; 318 meta->gso_size = 0;
321 meta->size = 0; 319 meta->size = 0;
322 meta->id = req->id; 320 meta->id = req->id;
@@ -339,6 +337,7 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
339 struct gnttab_copy *copy_gop; 337 struct gnttab_copy *copy_gop;
340 struct xenvif_rx_meta *meta; 338 struct xenvif_rx_meta *meta;
341 unsigned long bytes; 339 unsigned long bytes;
340 int gso_type;
342 341
343 /* Data must not cross a page boundary. */ 342 /* Data must not cross a page boundary. */
344 BUG_ON(size + offset > PAGE_SIZE<<compound_order(page)); 343 BUG_ON(size + offset > PAGE_SIZE<<compound_order(page));
@@ -397,7 +396,14 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
397 } 396 }
398 397
399 /* Leave a gap for the GSO descriptor. */ 398 /* Leave a gap for the GSO descriptor. */
400 if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix) 399 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
400 gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
401 else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
402 gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
403 else
404 gso_type = XEN_NETIF_GSO_TYPE_NONE;
405
406 if (*head && ((1 << gso_type) & vif->gso_mask))
401 vif->rx.req_cons++; 407 vif->rx.req_cons++;
402 408
403 *head = 0; /* There must be something in this buffer now. */ 409 *head = 0; /* There must be something in this buffer now. */
@@ -428,14 +434,28 @@ static int xenvif_gop_skb(struct sk_buff *skb,
428 unsigned char *data; 434 unsigned char *data;
429 int head = 1; 435 int head = 1;
430 int old_meta_prod; 436 int old_meta_prod;
437 int gso_type;
438 int gso_size;
431 439
432 old_meta_prod = npo->meta_prod; 440 old_meta_prod = npo->meta_prod;
433 441
442 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
443 gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
444 gso_size = skb_shinfo(skb)->gso_size;
445 } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
446 gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
447 gso_size = skb_shinfo(skb)->gso_size;
448 } else {
449 gso_type = XEN_NETIF_GSO_TYPE_NONE;
450 gso_size = 0;
451 }
452
434 /* Set up a GSO prefix descriptor, if necessary */ 453 /* Set up a GSO prefix descriptor, if necessary */
435 if (skb_shinfo(skb)->gso_size && vif->gso_prefix) { 454 if ((1 << skb_shinfo(skb)->gso_type) & vif->gso_prefix_mask) {
436 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 455 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
437 meta = npo->meta + npo->meta_prod++; 456 meta = npo->meta + npo->meta_prod++;
438 meta->gso_size = skb_shinfo(skb)->gso_size; 457 meta->gso_type = gso_type;
458 meta->gso_size = gso_size;
439 meta->size = 0; 459 meta->size = 0;
440 meta->id = req->id; 460 meta->id = req->id;
441 } 461 }
@@ -443,10 +463,13 @@ static int xenvif_gop_skb(struct sk_buff *skb,
443 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 463 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
444 meta = npo->meta + npo->meta_prod++; 464 meta = npo->meta + npo->meta_prod++;
445 465
446 if (!vif->gso_prefix) 466 if ((1 << gso_type) & vif->gso_mask) {
447 meta->gso_size = skb_shinfo(skb)->gso_size; 467 meta->gso_type = gso_type;
448 else 468 meta->gso_size = gso_size;
469 } else {
470 meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
449 meta->gso_size = 0; 471 meta->gso_size = 0;
472 }
450 473
451 meta->size = 0; 474 meta->size = 0;
452 meta->id = req->id; 475 meta->id = req->id;
@@ -592,7 +615,8 @@ void xenvif_rx_action(struct xenvif *vif)
592 615
593 vif = netdev_priv(skb->dev); 616 vif = netdev_priv(skb->dev);
594 617
595 if (vif->meta[npo.meta_cons].gso_size && vif->gso_prefix) { 618 if ((1 << vif->meta[npo.meta_cons].gso_type) &
619 vif->gso_prefix_mask) {
596 resp = RING_GET_RESPONSE(&vif->rx, 620 resp = RING_GET_RESPONSE(&vif->rx,
597 vif->rx.rsp_prod_pvt++); 621 vif->rx.rsp_prod_pvt++);
598 622
@@ -629,7 +653,8 @@ void xenvif_rx_action(struct xenvif *vif)
629 vif->meta[npo.meta_cons].size, 653 vif->meta[npo.meta_cons].size,
630 flags); 654 flags);
631 655
632 if (vif->meta[npo.meta_cons].gso_size && !vif->gso_prefix) { 656 if ((1 << vif->meta[npo.meta_cons].gso_type) &
657 vif->gso_mask) {
633 struct xen_netif_extra_info *gso = 658 struct xen_netif_extra_info *gso =
634 (struct xen_netif_extra_info *) 659 (struct xen_netif_extra_info *)
635 RING_GET_RESPONSE(&vif->rx, 660 RING_GET_RESPONSE(&vif->rx,
@@ -637,8 +662,8 @@ void xenvif_rx_action(struct xenvif *vif)
637 662
638 resp->flags |= XEN_NETRXF_extra_info; 663 resp->flags |= XEN_NETRXF_extra_info;
639 664
665 gso->u.gso.type = vif->meta[npo.meta_cons].gso_type;
640 gso->u.gso.size = vif->meta[npo.meta_cons].gso_size; 666 gso->u.gso.size = vif->meta[npo.meta_cons].gso_size;
641 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
642 gso->u.gso.pad = 0; 667 gso->u.gso.pad = 0;
643 gso->u.gso.features = 0; 668 gso->u.gso.features = 0;
644 669
@@ -1101,15 +1126,20 @@ static int xenvif_set_skb_gso(struct xenvif *vif,
1101 return -EINVAL; 1126 return -EINVAL;
1102 } 1127 }
1103 1128
1104 /* Currently only TCPv4 S.O. is supported. */ 1129 switch (gso->u.gso.type) {
1105 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { 1130 case XEN_NETIF_GSO_TYPE_TCPV4:
1131 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1132 break;
1133 case XEN_NETIF_GSO_TYPE_TCPV6:
1134 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
1135 break;
1136 default:
1106 netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type); 1137 netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
1107 xenvif_fatal_tx_err(vif); 1138 xenvif_fatal_tx_err(vif);
1108 return -EINVAL; 1139 return -EINVAL;
1109 } 1140 }
1110 1141
1111 skb_shinfo(skb)->gso_size = gso->u.gso.size; 1142 skb_shinfo(skb)->gso_size = gso->u.gso.size;
1112 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1113 1143
1114 /* Header must be checked, and gso_segs computed. */ 1144 /* Header must be checked, and gso_segs computed. */
1115 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 1145 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
@@ -1118,61 +1148,74 @@ static int xenvif_set_skb_gso(struct xenvif *vif,
1118 return 0; 1148 return 0;
1119} 1149}
1120 1150
1121static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) 1151static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len)
1152{
1153 if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) {
1154 /* If we need to pullup then pullup to the max, so we
1155 * won't need to do it again.
1156 */
1157 int target = min_t(int, skb->len, MAX_TCP_HEADER);
1158 __pskb_pull_tail(skb, target - skb_headlen(skb));
1159 }
1160}
1161
1162static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
1163 int recalculate_partial_csum)
1122{ 1164{
1123 struct iphdr *iph; 1165 struct iphdr *iph = (void *)skb->data;
1166 unsigned int header_size;
1167 unsigned int off;
1124 int err = -EPROTO; 1168 int err = -EPROTO;
1125 int recalculate_partial_csum = 0;
1126 1169
1127 /* 1170 off = sizeof(struct iphdr);
1128 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1129 * peers can fail to set NETRXF_csum_blank when sending a GSO
1130 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1131 * recalculate the partial checksum.
1132 */
1133 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1134 vif->rx_gso_checksum_fixup++;
1135 skb->ip_summed = CHECKSUM_PARTIAL;
1136 recalculate_partial_csum = 1;
1137 }
1138 1171
1139 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ 1172 header_size = skb->network_header + off + MAX_IPOPTLEN;
1140 if (skb->ip_summed != CHECKSUM_PARTIAL) 1173 maybe_pull_tail(skb, header_size);
1141 return 0;
1142 1174
1143 if (skb->protocol != htons(ETH_P_IP)) 1175 off = iph->ihl * 4;
1144 goto out;
1145 1176
1146 iph = (void *)skb->data;
1147 switch (iph->protocol) { 1177 switch (iph->protocol) {
1148 case IPPROTO_TCP: 1178 case IPPROTO_TCP:
1149 if (!skb_partial_csum_set(skb, 4 * iph->ihl, 1179 if (!skb_partial_csum_set(skb, off,
1150 offsetof(struct tcphdr, check))) 1180 offsetof(struct tcphdr, check)))
1151 goto out; 1181 goto out;
1152 1182
1153 if (recalculate_partial_csum) { 1183 if (recalculate_partial_csum) {
1154 struct tcphdr *tcph = tcp_hdr(skb); 1184 struct tcphdr *tcph = tcp_hdr(skb);
1185
1186 header_size = skb->network_header +
1187 off +
1188 sizeof(struct tcphdr);
1189 maybe_pull_tail(skb, header_size);
1190
1155 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1191 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1156 skb->len - iph->ihl*4, 1192 skb->len - off,
1157 IPPROTO_TCP, 0); 1193 IPPROTO_TCP, 0);
1158 } 1194 }
1159 break; 1195 break;
1160 case IPPROTO_UDP: 1196 case IPPROTO_UDP:
1161 if (!skb_partial_csum_set(skb, 4 * iph->ihl, 1197 if (!skb_partial_csum_set(skb, off,
1162 offsetof(struct udphdr, check))) 1198 offsetof(struct udphdr, check)))
1163 goto out; 1199 goto out;
1164 1200
1165 if (recalculate_partial_csum) { 1201 if (recalculate_partial_csum) {
1166 struct udphdr *udph = udp_hdr(skb); 1202 struct udphdr *udph = udp_hdr(skb);
1203
1204 header_size = skb->network_header +
1205 off +
1206 sizeof(struct udphdr);
1207 maybe_pull_tail(skb, header_size);
1208
1167 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1209 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1168 skb->len - iph->ihl*4, 1210 skb->len - off,
1169 IPPROTO_UDP, 0); 1211 IPPROTO_UDP, 0);
1170 } 1212 }
1171 break; 1213 break;
1172 default: 1214 default:
1173 if (net_ratelimit()) 1215 if (net_ratelimit())
1174 netdev_err(vif->dev, 1216 netdev_err(vif->dev,
1175 "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n", 1217 "Attempting to checksum a non-TCP/UDP packet, "
1218 "dropping a protocol %d packet\n",
1176 iph->protocol); 1219 iph->protocol);
1177 goto out; 1220 goto out;
1178 } 1221 }
@@ -1183,6 +1226,158 @@ out:
1183 return err; 1226 return err;
1184} 1227}
1185 1228
1229static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
1230 int recalculate_partial_csum)
1231{
1232 int err = -EPROTO;
1233 struct ipv6hdr *ipv6h = (void *)skb->data;
1234 u8 nexthdr;
1235 unsigned int header_size;
1236 unsigned int off;
1237 bool fragment;
1238 bool done;
1239
1240 done = false;
1241
1242 off = sizeof(struct ipv6hdr);
1243
1244 header_size = skb->network_header + off;
1245 maybe_pull_tail(skb, header_size);
1246
1247 nexthdr = ipv6h->nexthdr;
1248
1249 while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) &&
1250 !done) {
1251 switch (nexthdr) {
1252 case IPPROTO_DSTOPTS:
1253 case IPPROTO_HOPOPTS:
1254 case IPPROTO_ROUTING: {
1255 struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
1256
1257 header_size = skb->network_header +
1258 off +
1259 sizeof(struct ipv6_opt_hdr);
1260 maybe_pull_tail(skb, header_size);
1261
1262 nexthdr = hp->nexthdr;
1263 off += ipv6_optlen(hp);
1264 break;
1265 }
1266 case IPPROTO_AH: {
1267 struct ip_auth_hdr *hp = (void *)(skb->data + off);
1268
1269 header_size = skb->network_header +
1270 off +
1271 sizeof(struct ip_auth_hdr);
1272 maybe_pull_tail(skb, header_size);
1273
1274 nexthdr = hp->nexthdr;
1275 off += (hp->hdrlen+2)<<2;
1276 break;
1277 }
1278 case IPPROTO_FRAGMENT:
1279 fragment = true;
1280 /* fall through */
1281 default:
1282 done = true;
1283 break;
1284 }
1285 }
1286
1287 if (!done) {
1288 if (net_ratelimit())
1289 netdev_err(vif->dev, "Failed to parse packet header\n");
1290 goto out;
1291 }
1292
1293 if (fragment) {
1294 if (net_ratelimit())
1295 netdev_err(vif->dev, "Packet is a fragment!\n");
1296 goto out;
1297 }
1298
1299 switch (nexthdr) {
1300 case IPPROTO_TCP:
1301 if (!skb_partial_csum_set(skb, off,
1302 offsetof(struct tcphdr, check)))
1303 goto out;
1304
1305 if (recalculate_partial_csum) {
1306 struct tcphdr *tcph = tcp_hdr(skb);
1307
1308 header_size = skb->network_header +
1309 off +
1310 sizeof(struct tcphdr);
1311 maybe_pull_tail(skb, header_size);
1312
1313 tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,
1314 &ipv6h->daddr,
1315 skb->len - off,
1316 IPPROTO_TCP, 0);
1317 }
1318 break;
1319 case IPPROTO_UDP:
1320 if (!skb_partial_csum_set(skb, off,
1321 offsetof(struct udphdr, check)))
1322 goto out;
1323
1324 if (recalculate_partial_csum) {
1325 struct udphdr *udph = udp_hdr(skb);
1326
1327 header_size = skb->network_header +
1328 off +
1329 sizeof(struct udphdr);
1330 maybe_pull_tail(skb, header_size);
1331
1332 udph->check = ~csum_ipv6_magic(&ipv6h->saddr,
1333 &ipv6h->daddr,
1334 skb->len - off,
1335 IPPROTO_UDP, 0);
1336 }
1337 break;
1338 default:
1339 if (net_ratelimit())
1340 netdev_err(vif->dev,
1341 "Attempting to checksum a non-TCP/UDP packet, "
1342 "dropping a protocol %d packet\n",
1343 nexthdr);
1344 goto out;
1345 }
1346
1347 err = 0;
1348
1349out:
1350 return err;
1351}
1352
1353static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
1354{
1355 int err = -EPROTO;
1356 int recalculate_partial_csum = 0;
1357
1358 /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1359 * peers can fail to set NETRXF_csum_blank when sending a GSO
1360 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1361 * recalculate the partial checksum.
1362 */
1363 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1364 vif->rx_gso_checksum_fixup++;
1365 skb->ip_summed = CHECKSUM_PARTIAL;
1366 recalculate_partial_csum = 1;
1367 }
1368
1369 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1370 if (skb->ip_summed != CHECKSUM_PARTIAL)
1371 return 0;
1372
1373 if (skb->protocol == htons(ETH_P_IP))
1374 err = checksum_setup_ip(vif, skb, recalculate_partial_csum);
1375 else if (skb->protocol == htons(ETH_P_IPV6))
1376 err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum);
1377
1378 return err;
1379}
1380
1186static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) 1381static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
1187{ 1382{
1188 unsigned long now = jiffies; 1383 unsigned long now = jiffies;
@@ -1428,12 +1623,7 @@ static int xenvif_tx_submit(struct xenvif *vif, int budget)
1428 1623
1429 xenvif_fill_frags(vif, skb); 1624 xenvif_fill_frags(vif, skb);
1430 1625
1431 /* 1626 if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
1432 * If the initial fragment was < PKT_PROT_LEN then
1433 * pull through some bytes from the other fragments to
1434 * increase the linear region to PKT_PROT_LEN bytes.
1435 */
1436 if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
1437 int target = min_t(int, skb->len, PKT_PROT_LEN); 1627 int target = min_t(int, skb->len, PKT_PROT_LEN);
1438 __pskb_pull_tail(skb, target - skb_headlen(skb)); 1628 __pskb_pull_tail(skb, target - skb_headlen(skb));
1439 } 1629 }