aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorJohn Eaglesham <linux@8192.net>2012-08-21 16:43:35 -0400
committerDavid S. Miller <davem@davemloft.net>2012-08-23 01:49:30 -0400
commit6b923cb7188d46905f43fa84210c4c3e5f9cd8fb (patch)
tree397012f5f344a693e70999ef67fa1b2a23e7d96f /drivers
parentb87fb39e399137257a6db3224ea854117e9486e9 (diff)
bonding: support for IPv6 transmit hashing
Currently the "bonding" driver does not support load balancing outgoing traffic in LACP mode for IPv6 traffic. IPv4 (and TCP or UDP over IPv4) are currently supported; this patch adds transmit hashing for IPv6 (and TCP or UDP over IPv6), bringing IPv6 up to par with IPv4 support in the bonding driver. In addition, bounds checking has been added to all transmit hashing functions. The algorithm chosen (xor'ing the bottom three quads of the source and destination addresses together, then xor'ing each byte of that result into the bottom byte, finally xor'ing with the last bytes of the MAC addresses) was selected after testing almost 400,000 unique IPv6 addresses harvested from server logs. This algorithm had the most even distribution for both big- and little-endian architectures while still using few instructions. Its behavior also attempts to closely match that of the IPv4 algorithm. The IPv6 flow label was intentionally not included in the hash as it appears to be unset in the vast majority of IPv6 traffic sampled, and the current algorithm not using the flow label already offers a very even distribution. Fragmented IPv6 packets are handled the same way as fragmented IPv4 packets, ie, they are not balanced based on layer 4 information. Additionally, IPv6 packets with intermediate headers are not balanced based on layer 4 information. In practice these intermediate headers are not common and this should not cause any problems, and the alternative (a packet-parsing loop and look-up table) seemed slow and complicated for little gain. Tested-by: John Eaglesham <linux@8192.net> Signed-off-by: John Eaglesham <linux@8192.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/bonding/bond_main.c89
1 files changed, 63 insertions, 26 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index a86174c9fed1..b24ce257ac7b 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3352,56 +3352,93 @@ static struct notifier_block bond_netdev_notifier = {
3352/*---------------------------- Hashing Policies -----------------------------*/ 3352/*---------------------------- Hashing Policies -----------------------------*/
3353 3353
3354/* 3354/*
3355 * Hash for the output device based upon layer 2 data
3356 */
3357static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
3358{
3359 struct ethhdr *data = (struct ethhdr *)skb->data;
3360
3361 if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto))
3362 return (data->h_dest[5] ^ data->h_source[5]) % count;
3363
3364 return 0;
3365}
3366
3367/*
3355 * Hash for the output device based upon layer 2 and layer 3 data. If 3368 * Hash for the output device based upon layer 2 and layer 3 data. If
3356 * the packet is not IP mimic bond_xmit_hash_policy_l2() 3369 * the packet is not IP, fall back on bond_xmit_hash_policy_l2()
3357 */ 3370 */
3358static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count) 3371static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count)
3359{ 3372{
3360 struct ethhdr *data = (struct ethhdr *)skb->data; 3373 struct ethhdr *data = (struct ethhdr *)skb->data;
3361 struct iphdr *iph = ip_hdr(skb); 3374 struct iphdr *iph;
3362 3375 struct ipv6hdr *ipv6h;
3363 if (skb->protocol == htons(ETH_P_IP)) { 3376 u32 v6hash;
3377 __be32 *s, *d;
3378
3379 if (skb->protocol == htons(ETH_P_IP) &&
3380 skb_network_header_len(skb) >= sizeof(*iph)) {
3381 iph = ip_hdr(skb);
3364 return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^ 3382 return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
3365 (data->h_dest[5] ^ data->h_source[5])) % count; 3383 (data->h_dest[5] ^ data->h_source[5])) % count;
3384 } else if (skb->protocol == htons(ETH_P_IPV6) &&
3385 skb_network_header_len(skb) >= sizeof(*ipv6h)) {
3386 ipv6h = ipv6_hdr(skb);
3387 s = &ipv6h->saddr.s6_addr32[0];
3388 d = &ipv6h->daddr.s6_addr32[0];
3389 v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
3390 v6hash ^= (v6hash >> 24) ^ (v6hash >> 16) ^ (v6hash >> 8);
3391 return (v6hash ^ data->h_dest[5] ^ data->h_source[5]) % count;
3366 } 3392 }
3367 3393
3368 return (data->h_dest[5] ^ data->h_source[5]) % count; 3394 return bond_xmit_hash_policy_l2(skb, count);
3369} 3395}
3370 3396
3371/* 3397/*
3372 * Hash for the output device based upon layer 3 and layer 4 data. If 3398 * Hash for the output device based upon layer 3 and layer 4 data. If
3373 * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is 3399 * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is
3374 * altogether not IP, mimic bond_xmit_hash_policy_l2() 3400 * altogether not IP, fall back on bond_xmit_hash_policy_l2()
3375 */ 3401 */
3376static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count) 3402static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count)
3377{ 3403{
3378 struct ethhdr *data = (struct ethhdr *)skb->data; 3404 u32 layer4_xor = 0;
3379 struct iphdr *iph = ip_hdr(skb); 3405 struct iphdr *iph;
3380 __be16 *layer4hdr = (__be16 *)((u32 *)iph + iph->ihl); 3406 struct ipv6hdr *ipv6h;
3381 int layer4_xor = 0; 3407 __be32 *s, *d;
3382 3408 __be16 *layer4hdr;
3383 if (skb->protocol == htons(ETH_P_IP)) { 3409
3410 if (skb->protocol == htons(ETH_P_IP) &&
3411 skb_network_header_len(skb) >= sizeof(*iph)) {
3412 iph = ip_hdr(skb);
3384 if (!ip_is_fragment(iph) && 3413 if (!ip_is_fragment(iph) &&
3385 (iph->protocol == IPPROTO_TCP || 3414 (iph->protocol == IPPROTO_TCP ||
3386 iph->protocol == IPPROTO_UDP)) { 3415 iph->protocol == IPPROTO_UDP) &&
3387 layer4_xor = ntohs((*layer4hdr ^ *(layer4hdr + 1))); 3416 (skb_headlen(skb) - skb_network_offset(skb) >=
3417 iph->ihl * sizeof(u32) + sizeof(*layer4hdr) * 2)) {
3418 layer4hdr = (__be16 *)((u32 *)iph + iph->ihl);
3419 layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1));
3388 } 3420 }
3389 return (layer4_xor ^ 3421 return (layer4_xor ^
3390 ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; 3422 ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
3391 3423 } else if (skb->protocol == htons(ETH_P_IPV6) &&
3424 skb_network_header_len(skb) >= sizeof(*ipv6h)) {
3425 ipv6h = ipv6_hdr(skb);
3426 if ((ipv6h->nexthdr == IPPROTO_TCP ||
3427 ipv6h->nexthdr == IPPROTO_UDP) &&
3428 (skb_headlen(skb) - skb_network_offset(skb) >=
3429 sizeof(*ipv6h) + sizeof(*layer4hdr) * 2)) {
3430 layer4hdr = (__be16 *)(ipv6h + 1);
3431 layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1));
3432 }
3433 s = &ipv6h->saddr.s6_addr32[0];
3434 d = &ipv6h->daddr.s6_addr32[0];
3435 layer4_xor ^= (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
3436 layer4_xor ^= (layer4_xor >> 24) ^ (layer4_xor >> 16) ^
3437 (layer4_xor >> 8);
3438 return layer4_xor % count;
3392 } 3439 }
3393 3440
3394 return (data->h_dest[5] ^ data->h_source[5]) % count; 3441 return bond_xmit_hash_policy_l2(skb, count);
3395}
3396
3397/*
3398 * Hash for the output device based upon layer 2 data
3399 */
3400static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
3401{
3402 struct ethhdr *data = (struct ethhdr *)skb->data;
3403
3404 return (data->h_dest[5] ^ data->h_source[5]) % count;
3405} 3442}
3406 3443
3407/*-------------------------- Device entry points ----------------------------*/ 3444/*-------------------------- Device entry points ----------------------------*/