aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorPaul Durrant <Paul.Durrant@citrix.com>2013-10-16 12:50:29 -0400
committerDavid S. Miller <davem@davemloft.net>2013-10-17 15:35:16 -0400
commit2eba61d55e5104d0bf08ba4a9cc609613f52b4c9 (patch)
tree0eb2237b5c36d8ff1bc40d8867150463f7e9b2e9 /drivers
parent146c8a77d27bcbd7722120f70f51e3b287205d0a (diff)
xen-netback: add support for IPv6 checksum offload from guest
For performance of VM to VM traffic on a single host it is better to avoid calculation of TCP/UDP checksum in the sending frontend. To allow this this patch adds the code necessary to set up partial checksum for IPv6 packets and xenstore flag feature-ipv6-csum-offload to advertise that fact to frontends. Signed-off-by: Paul Durrant <paul.durrant@citrix.com> Cc: Wei Liu <wei.liu2@citrix.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Ian Campbell <ian.campbell@citrix.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/xen-netback/netback.c235
-rw-r--r--drivers/net/xen-netback/xenbus.c9
2 files changed, 205 insertions, 39 deletions
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index f3e591c611de..4271f8d1da7a 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -109,15 +109,12 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif,
109 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); 109 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
110} 110}
111 111
112/* 112/* This is a miniumum size for the linear area to avoid lots of
113 * This is the amount of packet we copy rather than map, so that the 113 * calls to __pskb_pull_tail() as we set up checksum offsets. The
114 * guest can't fiddle with the contents of the headers while we do 114 * value 128 was chosen as it covers all IPv4 and most likely
115 * packet processing on them (netfilter, routing, etc). 115 * IPv6 headers.
116 */ 116 */
117#define PKT_PROT_LEN (ETH_HLEN + \ 117#define PKT_PROT_LEN 128
118 VLAN_HLEN + \
119 sizeof(struct iphdr) + MAX_IPOPTLEN + \
120 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
121 118
122static u16 frag_get_pending_idx(skb_frag_t *frag) 119static u16 frag_get_pending_idx(skb_frag_t *frag)
123{ 120{
@@ -1118,61 +1115,74 @@ static int xenvif_set_skb_gso(struct xenvif *vif,
1118 return 0; 1115 return 0;
1119} 1116}
1120 1117
1121static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) 1118static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len)
1119{
1120 if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) {
1121 /* If we need to pullup then pullup to the max, so we
1122 * won't need to do it again.
1123 */
1124 int target = min_t(int, skb->len, MAX_TCP_HEADER);
1125 __pskb_pull_tail(skb, target - skb_headlen(skb));
1126 }
1127}
1128
1129static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
1130 int recalculate_partial_csum)
1122{ 1131{
1123 struct iphdr *iph; 1132 struct iphdr *iph = (void *)skb->data;
1133 unsigned int header_size;
1134 unsigned int off;
1124 int err = -EPROTO; 1135 int err = -EPROTO;
1125 int recalculate_partial_csum = 0;
1126 1136
1127 /* 1137 off = sizeof(struct iphdr);
1128 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1129 * peers can fail to set NETRXF_csum_blank when sending a GSO
1130 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1131 * recalculate the partial checksum.
1132 */
1133 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1134 vif->rx_gso_checksum_fixup++;
1135 skb->ip_summed = CHECKSUM_PARTIAL;
1136 recalculate_partial_csum = 1;
1137 }
1138 1138
1139 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ 1139 header_size = skb->network_header + off + MAX_IPOPTLEN;
1140 if (skb->ip_summed != CHECKSUM_PARTIAL) 1140 maybe_pull_tail(skb, header_size);
1141 return 0;
1142 1141
1143 if (skb->protocol != htons(ETH_P_IP)) 1142 off = iph->ihl * 4;
1144 goto out;
1145 1143
1146 iph = (void *)skb->data;
1147 switch (iph->protocol) { 1144 switch (iph->protocol) {
1148 case IPPROTO_TCP: 1145 case IPPROTO_TCP:
1149 if (!skb_partial_csum_set(skb, 4 * iph->ihl, 1146 if (!skb_partial_csum_set(skb, off,
1150 offsetof(struct tcphdr, check))) 1147 offsetof(struct tcphdr, check)))
1151 goto out; 1148 goto out;
1152 1149
1153 if (recalculate_partial_csum) { 1150 if (recalculate_partial_csum) {
1154 struct tcphdr *tcph = tcp_hdr(skb); 1151 struct tcphdr *tcph = tcp_hdr(skb);
1152
1153 header_size = skb->network_header +
1154 off +
1155 sizeof(struct tcphdr);
1156 maybe_pull_tail(skb, header_size);
1157
1155 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1158 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1156 skb->len - iph->ihl*4, 1159 skb->len - off,
1157 IPPROTO_TCP, 0); 1160 IPPROTO_TCP, 0);
1158 } 1161 }
1159 break; 1162 break;
1160 case IPPROTO_UDP: 1163 case IPPROTO_UDP:
1161 if (!skb_partial_csum_set(skb, 4 * iph->ihl, 1164 if (!skb_partial_csum_set(skb, off,
1162 offsetof(struct udphdr, check))) 1165 offsetof(struct udphdr, check)))
1163 goto out; 1166 goto out;
1164 1167
1165 if (recalculate_partial_csum) { 1168 if (recalculate_partial_csum) {
1166 struct udphdr *udph = udp_hdr(skb); 1169 struct udphdr *udph = udp_hdr(skb);
1170
1171 header_size = skb->network_header +
1172 off +
1173 sizeof(struct udphdr);
1174 maybe_pull_tail(skb, header_size);
1175
1167 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1176 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1168 skb->len - iph->ihl*4, 1177 skb->len - off,
1169 IPPROTO_UDP, 0); 1178 IPPROTO_UDP, 0);
1170 } 1179 }
1171 break; 1180 break;
1172 default: 1181 default:
1173 if (net_ratelimit()) 1182 if (net_ratelimit())
1174 netdev_err(vif->dev, 1183 netdev_err(vif->dev,
1175 "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n", 1184 "Attempting to checksum a non-TCP/UDP packet, "
1185 "dropping a protocol %d packet\n",
1176 iph->protocol); 1186 iph->protocol);
1177 goto out; 1187 goto out;
1178 } 1188 }
@@ -1183,6 +1193,158 @@ out:
1183 return err; 1193 return err;
1184} 1194}
1185 1195
1196static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
1197 int recalculate_partial_csum)
1198{
1199 int err = -EPROTO;
1200 struct ipv6hdr *ipv6h = (void *)skb->data;
1201 u8 nexthdr;
1202 unsigned int header_size;
1203 unsigned int off;
1204 bool fragment;
1205 bool done;
1206
1207 done = false;
1208
1209 off = sizeof(struct ipv6hdr);
1210
1211 header_size = skb->network_header + off;
1212 maybe_pull_tail(skb, header_size);
1213
1214 nexthdr = ipv6h->nexthdr;
1215
1216 while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) &&
1217 !done) {
1218 switch (nexthdr) {
1219 case IPPROTO_DSTOPTS:
1220 case IPPROTO_HOPOPTS:
1221 case IPPROTO_ROUTING: {
1222 struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
1223
1224 header_size = skb->network_header +
1225 off +
1226 sizeof(struct ipv6_opt_hdr);
1227 maybe_pull_tail(skb, header_size);
1228
1229 nexthdr = hp->nexthdr;
1230 off += ipv6_optlen(hp);
1231 break;
1232 }
1233 case IPPROTO_AH: {
1234 struct ip_auth_hdr *hp = (void *)(skb->data + off);
1235
1236 header_size = skb->network_header +
1237 off +
1238 sizeof(struct ip_auth_hdr);
1239 maybe_pull_tail(skb, header_size);
1240
1241 nexthdr = hp->nexthdr;
1242 off += (hp->hdrlen+2)<<2;
1243 break;
1244 }
1245 case IPPROTO_FRAGMENT:
1246 fragment = true;
1247 /* fall through */
1248 default:
1249 done = true;
1250 break;
1251 }
1252 }
1253
1254 if (!done) {
1255 if (net_ratelimit())
1256 netdev_err(vif->dev, "Failed to parse packet header\n");
1257 goto out;
1258 }
1259
1260 if (fragment) {
1261 if (net_ratelimit())
1262 netdev_err(vif->dev, "Packet is a fragment!\n");
1263 goto out;
1264 }
1265
1266 switch (nexthdr) {
1267 case IPPROTO_TCP:
1268 if (!skb_partial_csum_set(skb, off,
1269 offsetof(struct tcphdr, check)))
1270 goto out;
1271
1272 if (recalculate_partial_csum) {
1273 struct tcphdr *tcph = tcp_hdr(skb);
1274
1275 header_size = skb->network_header +
1276 off +
1277 sizeof(struct tcphdr);
1278 maybe_pull_tail(skb, header_size);
1279
1280 tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,
1281 &ipv6h->daddr,
1282 skb->len - off,
1283 IPPROTO_TCP, 0);
1284 }
1285 break;
1286 case IPPROTO_UDP:
1287 if (!skb_partial_csum_set(skb, off,
1288 offsetof(struct udphdr, check)))
1289 goto out;
1290
1291 if (recalculate_partial_csum) {
1292 struct udphdr *udph = udp_hdr(skb);
1293
1294 header_size = skb->network_header +
1295 off +
1296 sizeof(struct udphdr);
1297 maybe_pull_tail(skb, header_size);
1298
1299 udph->check = ~csum_ipv6_magic(&ipv6h->saddr,
1300 &ipv6h->daddr,
1301 skb->len - off,
1302 IPPROTO_UDP, 0);
1303 }
1304 break;
1305 default:
1306 if (net_ratelimit())
1307 netdev_err(vif->dev,
1308 "Attempting to checksum a non-TCP/UDP packet, "
1309 "dropping a protocol %d packet\n",
1310 nexthdr);
1311 goto out;
1312 }
1313
1314 err = 0;
1315
1316out:
1317 return err;
1318}
1319
1320static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
1321{
1322 int err = -EPROTO;
1323 int recalculate_partial_csum = 0;
1324
1325 /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1326 * peers can fail to set NETRXF_csum_blank when sending a GSO
1327 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1328 * recalculate the partial checksum.
1329 */
1330 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1331 vif->rx_gso_checksum_fixup++;
1332 skb->ip_summed = CHECKSUM_PARTIAL;
1333 recalculate_partial_csum = 1;
1334 }
1335
1336 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1337 if (skb->ip_summed != CHECKSUM_PARTIAL)
1338 return 0;
1339
1340 if (skb->protocol == htons(ETH_P_IP))
1341 err = checksum_setup_ip(vif, skb, recalculate_partial_csum);
1342 else if (skb->protocol == htons(ETH_P_IPV6))
1343 err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum);
1344
1345 return err;
1346}
1347
1186static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) 1348static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
1187{ 1349{
1188 unsigned long now = jiffies; 1350 unsigned long now = jiffies;
@@ -1428,12 +1590,7 @@ static int xenvif_tx_submit(struct xenvif *vif, int budget)
1428 1590
1429 xenvif_fill_frags(vif, skb); 1591 xenvif_fill_frags(vif, skb);
1430 1592
1431 /* 1593 if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
1432 * If the initial fragment was < PKT_PROT_LEN then
1433 * pull through some bytes from the other fragments to
1434 * increase the linear region to PKT_PROT_LEN bytes.
1435 */
1436 if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
1437 int target = min_t(int, skb->len, PKT_PROT_LEN); 1594 int target = min_t(int, skb->len, PKT_PROT_LEN);
1438 __pskb_pull_tail(skb, target - skb_headlen(skb)); 1595 __pskb_pull_tail(skb, target - skb_headlen(skb));
1439 } 1596 }
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index ad27b15242cd..108e7523017a 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -105,6 +105,15 @@ static int netback_probe(struct xenbus_device *dev,
105 goto abort_transaction; 105 goto abort_transaction;
106 } 106 }
107 107
108 /* We support partial checksum setup for IPv6 packets */
109 err = xenbus_printf(xbt, dev->nodename,
110 "feature-ipv6-csum-offload",
111 "%d", 1);
112 if (err) {
113 message = "writing feature-ipv6-csum-offload";
114 goto abort_transaction;
115 }
116
108 /* We support rx-copy path. */ 117 /* We support rx-copy path. */
109 err = xenbus_printf(xbt, dev->nodename, 118 err = xenbus_printf(xbt, dev->nodename,
110 "feature-rx-copy", "%d", 1); 119 "feature-rx-copy", "%d", 1);