diff options
author | Paul Durrant <Paul.Durrant@citrix.com> | 2013-10-16 12:50:29 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-10-17 15:35:16 -0400 |
commit | 2eba61d55e5104d0bf08ba4a9cc609613f52b4c9 (patch) | |
tree | 0eb2237b5c36d8ff1bc40d8867150463f7e9b2e9 /drivers | |
parent | 146c8a77d27bcbd7722120f70f51e3b287205d0a (diff) |
xen-netback: add support for IPv6 checksum offload from guest
For performance of VM to VM traffic on a single host it is better to avoid
calculation of TCP/UDP checksum in the sending frontend. To allow this this
patch adds the code necessary to set up partial checksum for IPv6 packets
and xenstore flag feature-ipv6-csum-offload to advertise that fact to
frontends.
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/net/xen-netback/netback.c | 235 | ||||
-rw-r--r-- | drivers/net/xen-netback/xenbus.c | 9 |
2 files changed, 205 insertions, 39 deletions
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index f3e591c611de..4271f8d1da7a 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c | |||
@@ -109,15 +109,12 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif, | |||
109 | return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); | 109 | return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); |
110 | } | 110 | } |
111 | 111 | ||
112 | /* | 112 | /* This is a miniumum size for the linear area to avoid lots of |
113 | * This is the amount of packet we copy rather than map, so that the | 113 | * calls to __pskb_pull_tail() as we set up checksum offsets. The |
114 | * guest can't fiddle with the contents of the headers while we do | 114 | * value 128 was chosen as it covers all IPv4 and most likely |
115 | * packet processing on them (netfilter, routing, etc). | 115 | * IPv6 headers. |
116 | */ | 116 | */ |
117 | #define PKT_PROT_LEN (ETH_HLEN + \ | 117 | #define PKT_PROT_LEN 128 |
118 | VLAN_HLEN + \ | ||
119 | sizeof(struct iphdr) + MAX_IPOPTLEN + \ | ||
120 | sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE) | ||
121 | 118 | ||
122 | static u16 frag_get_pending_idx(skb_frag_t *frag) | 119 | static u16 frag_get_pending_idx(skb_frag_t *frag) |
123 | { | 120 | { |
@@ -1118,61 +1115,74 @@ static int xenvif_set_skb_gso(struct xenvif *vif, | |||
1118 | return 0; | 1115 | return 0; |
1119 | } | 1116 | } |
1120 | 1117 | ||
1121 | static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) | 1118 | static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len) |
1119 | { | ||
1120 | if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) { | ||
1121 | /* If we need to pullup then pullup to the max, so we | ||
1122 | * won't need to do it again. | ||
1123 | */ | ||
1124 | int target = min_t(int, skb->len, MAX_TCP_HEADER); | ||
1125 | __pskb_pull_tail(skb, target - skb_headlen(skb)); | ||
1126 | } | ||
1127 | } | ||
1128 | |||
1129 | static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, | ||
1130 | int recalculate_partial_csum) | ||
1122 | { | 1131 | { |
1123 | struct iphdr *iph; | 1132 | struct iphdr *iph = (void *)skb->data; |
1133 | unsigned int header_size; | ||
1134 | unsigned int off; | ||
1124 | int err = -EPROTO; | 1135 | int err = -EPROTO; |
1125 | int recalculate_partial_csum = 0; | ||
1126 | 1136 | ||
1127 | /* | 1137 | off = sizeof(struct iphdr); |
1128 | * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy | ||
1129 | * peers can fail to set NETRXF_csum_blank when sending a GSO | ||
1130 | * frame. In this case force the SKB to CHECKSUM_PARTIAL and | ||
1131 | * recalculate the partial checksum. | ||
1132 | */ | ||
1133 | if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { | ||
1134 | vif->rx_gso_checksum_fixup++; | ||
1135 | skb->ip_summed = CHECKSUM_PARTIAL; | ||
1136 | recalculate_partial_csum = 1; | ||
1137 | } | ||
1138 | 1138 | ||
1139 | /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ | 1139 | header_size = skb->network_header + off + MAX_IPOPTLEN; |
1140 | if (skb->ip_summed != CHECKSUM_PARTIAL) | 1140 | maybe_pull_tail(skb, header_size); |
1141 | return 0; | ||
1142 | 1141 | ||
1143 | if (skb->protocol != htons(ETH_P_IP)) | 1142 | off = iph->ihl * 4; |
1144 | goto out; | ||
1145 | 1143 | ||
1146 | iph = (void *)skb->data; | ||
1147 | switch (iph->protocol) { | 1144 | switch (iph->protocol) { |
1148 | case IPPROTO_TCP: | 1145 | case IPPROTO_TCP: |
1149 | if (!skb_partial_csum_set(skb, 4 * iph->ihl, | 1146 | if (!skb_partial_csum_set(skb, off, |
1150 | offsetof(struct tcphdr, check))) | 1147 | offsetof(struct tcphdr, check))) |
1151 | goto out; | 1148 | goto out; |
1152 | 1149 | ||
1153 | if (recalculate_partial_csum) { | 1150 | if (recalculate_partial_csum) { |
1154 | struct tcphdr *tcph = tcp_hdr(skb); | 1151 | struct tcphdr *tcph = tcp_hdr(skb); |
1152 | |||
1153 | header_size = skb->network_header + | ||
1154 | off + | ||
1155 | sizeof(struct tcphdr); | ||
1156 | maybe_pull_tail(skb, header_size); | ||
1157 | |||
1155 | tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, | 1158 | tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, |
1156 | skb->len - iph->ihl*4, | 1159 | skb->len - off, |
1157 | IPPROTO_TCP, 0); | 1160 | IPPROTO_TCP, 0); |
1158 | } | 1161 | } |
1159 | break; | 1162 | break; |
1160 | case IPPROTO_UDP: | 1163 | case IPPROTO_UDP: |
1161 | if (!skb_partial_csum_set(skb, 4 * iph->ihl, | 1164 | if (!skb_partial_csum_set(skb, off, |
1162 | offsetof(struct udphdr, check))) | 1165 | offsetof(struct udphdr, check))) |
1163 | goto out; | 1166 | goto out; |
1164 | 1167 | ||
1165 | if (recalculate_partial_csum) { | 1168 | if (recalculate_partial_csum) { |
1166 | struct udphdr *udph = udp_hdr(skb); | 1169 | struct udphdr *udph = udp_hdr(skb); |
1170 | |||
1171 | header_size = skb->network_header + | ||
1172 | off + | ||
1173 | sizeof(struct udphdr); | ||
1174 | maybe_pull_tail(skb, header_size); | ||
1175 | |||
1167 | udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, | 1176 | udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, |
1168 | skb->len - iph->ihl*4, | 1177 | skb->len - off, |
1169 | IPPROTO_UDP, 0); | 1178 | IPPROTO_UDP, 0); |
1170 | } | 1179 | } |
1171 | break; | 1180 | break; |
1172 | default: | 1181 | default: |
1173 | if (net_ratelimit()) | 1182 | if (net_ratelimit()) |
1174 | netdev_err(vif->dev, | 1183 | netdev_err(vif->dev, |
1175 | "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n", | 1184 | "Attempting to checksum a non-TCP/UDP packet, " |
1185 | "dropping a protocol %d packet\n", | ||
1176 | iph->protocol); | 1186 | iph->protocol); |
1177 | goto out; | 1187 | goto out; |
1178 | } | 1188 | } |
@@ -1183,6 +1193,158 @@ out: | |||
1183 | return err; | 1193 | return err; |
1184 | } | 1194 | } |
1185 | 1195 | ||
1196 | static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, | ||
1197 | int recalculate_partial_csum) | ||
1198 | { | ||
1199 | int err = -EPROTO; | ||
1200 | struct ipv6hdr *ipv6h = (void *)skb->data; | ||
1201 | u8 nexthdr; | ||
1202 | unsigned int header_size; | ||
1203 | unsigned int off; | ||
1204 | bool fragment; | ||
1205 | bool done; | ||
1206 | |||
1207 | done = false; | ||
1208 | |||
1209 | off = sizeof(struct ipv6hdr); | ||
1210 | |||
1211 | header_size = skb->network_header + off; | ||
1212 | maybe_pull_tail(skb, header_size); | ||
1213 | |||
1214 | nexthdr = ipv6h->nexthdr; | ||
1215 | |||
1216 | while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) && | ||
1217 | !done) { | ||
1218 | switch (nexthdr) { | ||
1219 | case IPPROTO_DSTOPTS: | ||
1220 | case IPPROTO_HOPOPTS: | ||
1221 | case IPPROTO_ROUTING: { | ||
1222 | struct ipv6_opt_hdr *hp = (void *)(skb->data + off); | ||
1223 | |||
1224 | header_size = skb->network_header + | ||
1225 | off + | ||
1226 | sizeof(struct ipv6_opt_hdr); | ||
1227 | maybe_pull_tail(skb, header_size); | ||
1228 | |||
1229 | nexthdr = hp->nexthdr; | ||
1230 | off += ipv6_optlen(hp); | ||
1231 | break; | ||
1232 | } | ||
1233 | case IPPROTO_AH: { | ||
1234 | struct ip_auth_hdr *hp = (void *)(skb->data + off); | ||
1235 | |||
1236 | header_size = skb->network_header + | ||
1237 | off + | ||
1238 | sizeof(struct ip_auth_hdr); | ||
1239 | maybe_pull_tail(skb, header_size); | ||
1240 | |||
1241 | nexthdr = hp->nexthdr; | ||
1242 | off += (hp->hdrlen+2)<<2; | ||
1243 | break; | ||
1244 | } | ||
1245 | case IPPROTO_FRAGMENT: | ||
1246 | fragment = true; | ||
1247 | /* fall through */ | ||
1248 | default: | ||
1249 | done = true; | ||
1250 | break; | ||
1251 | } | ||
1252 | } | ||
1253 | |||
1254 | if (!done) { | ||
1255 | if (net_ratelimit()) | ||
1256 | netdev_err(vif->dev, "Failed to parse packet header\n"); | ||
1257 | goto out; | ||
1258 | } | ||
1259 | |||
1260 | if (fragment) { | ||
1261 | if (net_ratelimit()) | ||
1262 | netdev_err(vif->dev, "Packet is a fragment!\n"); | ||
1263 | goto out; | ||
1264 | } | ||
1265 | |||
1266 | switch (nexthdr) { | ||
1267 | case IPPROTO_TCP: | ||
1268 | if (!skb_partial_csum_set(skb, off, | ||
1269 | offsetof(struct tcphdr, check))) | ||
1270 | goto out; | ||
1271 | |||
1272 | if (recalculate_partial_csum) { | ||
1273 | struct tcphdr *tcph = tcp_hdr(skb); | ||
1274 | |||
1275 | header_size = skb->network_header + | ||
1276 | off + | ||
1277 | sizeof(struct tcphdr); | ||
1278 | maybe_pull_tail(skb, header_size); | ||
1279 | |||
1280 | tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, | ||
1281 | &ipv6h->daddr, | ||
1282 | skb->len - off, | ||
1283 | IPPROTO_TCP, 0); | ||
1284 | } | ||
1285 | break; | ||
1286 | case IPPROTO_UDP: | ||
1287 | if (!skb_partial_csum_set(skb, off, | ||
1288 | offsetof(struct udphdr, check))) | ||
1289 | goto out; | ||
1290 | |||
1291 | if (recalculate_partial_csum) { | ||
1292 | struct udphdr *udph = udp_hdr(skb); | ||
1293 | |||
1294 | header_size = skb->network_header + | ||
1295 | off + | ||
1296 | sizeof(struct udphdr); | ||
1297 | maybe_pull_tail(skb, header_size); | ||
1298 | |||
1299 | udph->check = ~csum_ipv6_magic(&ipv6h->saddr, | ||
1300 | &ipv6h->daddr, | ||
1301 | skb->len - off, | ||
1302 | IPPROTO_UDP, 0); | ||
1303 | } | ||
1304 | break; | ||
1305 | default: | ||
1306 | if (net_ratelimit()) | ||
1307 | netdev_err(vif->dev, | ||
1308 | "Attempting to checksum a non-TCP/UDP packet, " | ||
1309 | "dropping a protocol %d packet\n", | ||
1310 | nexthdr); | ||
1311 | goto out; | ||
1312 | } | ||
1313 | |||
1314 | err = 0; | ||
1315 | |||
1316 | out: | ||
1317 | return err; | ||
1318 | } | ||
1319 | |||
1320 | static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) | ||
1321 | { | ||
1322 | int err = -EPROTO; | ||
1323 | int recalculate_partial_csum = 0; | ||
1324 | |||
1325 | /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy | ||
1326 | * peers can fail to set NETRXF_csum_blank when sending a GSO | ||
1327 | * frame. In this case force the SKB to CHECKSUM_PARTIAL and | ||
1328 | * recalculate the partial checksum. | ||
1329 | */ | ||
1330 | if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { | ||
1331 | vif->rx_gso_checksum_fixup++; | ||
1332 | skb->ip_summed = CHECKSUM_PARTIAL; | ||
1333 | recalculate_partial_csum = 1; | ||
1334 | } | ||
1335 | |||
1336 | /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ | ||
1337 | if (skb->ip_summed != CHECKSUM_PARTIAL) | ||
1338 | return 0; | ||
1339 | |||
1340 | if (skb->protocol == htons(ETH_P_IP)) | ||
1341 | err = checksum_setup_ip(vif, skb, recalculate_partial_csum); | ||
1342 | else if (skb->protocol == htons(ETH_P_IPV6)) | ||
1343 | err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum); | ||
1344 | |||
1345 | return err; | ||
1346 | } | ||
1347 | |||
1186 | static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) | 1348 | static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) |
1187 | { | 1349 | { |
1188 | unsigned long now = jiffies; | 1350 | unsigned long now = jiffies; |
@@ -1428,12 +1590,7 @@ static int xenvif_tx_submit(struct xenvif *vif, int budget) | |||
1428 | 1590 | ||
1429 | xenvif_fill_frags(vif, skb); | 1591 | xenvif_fill_frags(vif, skb); |
1430 | 1592 | ||
1431 | /* | 1593 | if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) { |
1432 | * If the initial fragment was < PKT_PROT_LEN then | ||
1433 | * pull through some bytes from the other fragments to | ||
1434 | * increase the linear region to PKT_PROT_LEN bytes. | ||
1435 | */ | ||
1436 | if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) { | ||
1437 | int target = min_t(int, skb->len, PKT_PROT_LEN); | 1594 | int target = min_t(int, skb->len, PKT_PROT_LEN); |
1438 | __pskb_pull_tail(skb, target - skb_headlen(skb)); | 1595 | __pskb_pull_tail(skb, target - skb_headlen(skb)); |
1439 | } | 1596 | } |
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index ad27b15242cd..108e7523017a 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c | |||
@@ -105,6 +105,15 @@ static int netback_probe(struct xenbus_device *dev, | |||
105 | goto abort_transaction; | 105 | goto abort_transaction; |
106 | } | 106 | } |
107 | 107 | ||
108 | /* We support partial checksum setup for IPv6 packets */ | ||
109 | err = xenbus_printf(xbt, dev->nodename, | ||
110 | "feature-ipv6-csum-offload", | ||
111 | "%d", 1); | ||
112 | if (err) { | ||
113 | message = "writing feature-ipv6-csum-offload"; | ||
114 | goto abort_transaction; | ||
115 | } | ||
116 | |||
108 | /* We support rx-copy path. */ | 117 | /* We support rx-copy path. */ |
109 | err = xenbus_printf(xbt, dev->nodename, | 118 | err = xenbus_printf(xbt, dev->nodename, |
110 | "feature-rx-copy", "%d", 1); | 119 | "feature-rx-copy", "%d", 1); |