diff options
Diffstat (limited to 'drivers/net/bonding/bond_main.c')
-rw-r--r-- | drivers/net/bonding/bond_main.c | 140 |
1 files changed, 93 insertions, 47 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d688a8af432c..7858c58df4a3 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c | |||
@@ -1120,10 +1120,10 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) | |||
1120 | write_unlock_bh(&bond->curr_slave_lock); | 1120 | write_unlock_bh(&bond->curr_slave_lock); |
1121 | read_unlock(&bond->lock); | 1121 | read_unlock(&bond->lock); |
1122 | 1122 | ||
1123 | netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER); | 1123 | call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev); |
1124 | if (should_notify_peers) | 1124 | if (should_notify_peers) |
1125 | netdev_bonding_change(bond->dev, | 1125 | call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, |
1126 | NETDEV_NOTIFY_PEERS); | 1126 | bond->dev); |
1127 | 1127 | ||
1128 | read_lock(&bond->lock); | 1128 | read_lock(&bond->lock); |
1129 | write_lock_bh(&bond->curr_slave_lock); | 1129 | write_lock_bh(&bond->curr_slave_lock); |
@@ -1558,8 +1558,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) | |||
1558 | bond_dev->name, | 1558 | bond_dev->name, |
1559 | bond_dev->type, slave_dev->type); | 1559 | bond_dev->type, slave_dev->type); |
1560 | 1560 | ||
1561 | res = netdev_bonding_change(bond_dev, | 1561 | res = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE, |
1562 | NETDEV_PRE_TYPE_CHANGE); | 1562 | bond_dev); |
1563 | res = notifier_to_errno(res); | 1563 | res = notifier_to_errno(res); |
1564 | if (res) { | 1564 | if (res) { |
1565 | pr_err("%s: refused to change device type\n", | 1565 | pr_err("%s: refused to change device type\n", |
@@ -1579,8 +1579,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) | |||
1579 | bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING; | 1579 | bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING; |
1580 | } | 1580 | } |
1581 | 1581 | ||
1582 | netdev_bonding_change(bond_dev, | 1582 | call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, |
1583 | NETDEV_POST_TYPE_CHANGE); | 1583 | bond_dev); |
1584 | } | 1584 | } |
1585 | } else if (bond_dev->type != slave_dev->type) { | 1585 | } else if (bond_dev->type != slave_dev->type) { |
1586 | pr_err("%s ether type (%d) is different from other slaves (%d), can not enslave it.\n", | 1586 | pr_err("%s ether type (%d) is different from other slaves (%d), can not enslave it.\n", |
@@ -1941,7 +1941,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) | |||
1941 | } | 1941 | } |
1942 | 1942 | ||
1943 | block_netpoll_tx(); | 1943 | block_netpoll_tx(); |
1944 | netdev_bonding_change(bond_dev, NETDEV_RELEASE); | 1944 | call_netdevice_notifiers(NETDEV_RELEASE, bond_dev); |
1945 | write_lock_bh(&bond->lock); | 1945 | write_lock_bh(&bond->lock); |
1946 | 1946 | ||
1947 | slave = bond_get_slave_by_dev(bond, slave_dev); | 1947 | slave = bond_get_slave_by_dev(bond, slave_dev); |
@@ -2584,7 +2584,7 @@ re_arm: | |||
2584 | read_unlock(&bond->lock); | 2584 | read_unlock(&bond->lock); |
2585 | return; | 2585 | return; |
2586 | } | 2586 | } |
2587 | netdev_bonding_change(bond->dev, NETDEV_NOTIFY_PEERS); | 2587 | call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); |
2588 | rtnl_unlock(); | 2588 | rtnl_unlock(); |
2589 | } | 2589 | } |
2590 | } | 2590 | } |
@@ -2811,12 +2811,13 @@ void bond_loadbalance_arp_mon(struct work_struct *work) | |||
2811 | arp_work.work); | 2811 | arp_work.work); |
2812 | struct slave *slave, *oldcurrent; | 2812 | struct slave *slave, *oldcurrent; |
2813 | int do_failover = 0; | 2813 | int do_failover = 0; |
2814 | int delta_in_ticks; | 2814 | int delta_in_ticks, extra_ticks; |
2815 | int i; | 2815 | int i; |
2816 | 2816 | ||
2817 | read_lock(&bond->lock); | 2817 | read_lock(&bond->lock); |
2818 | 2818 | ||
2819 | delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); | 2819 | delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); |
2820 | extra_ticks = delta_in_ticks / 2; | ||
2820 | 2821 | ||
2821 | if (bond->slave_cnt == 0) | 2822 | if (bond->slave_cnt == 0) |
2822 | goto re_arm; | 2823 | goto re_arm; |
@@ -2839,10 +2840,10 @@ void bond_loadbalance_arp_mon(struct work_struct *work) | |||
2839 | if (slave->link != BOND_LINK_UP) { | 2840 | if (slave->link != BOND_LINK_UP) { |
2840 | if (time_in_range(jiffies, | 2841 | if (time_in_range(jiffies, |
2841 | trans_start - delta_in_ticks, | 2842 | trans_start - delta_in_ticks, |
2842 | trans_start + delta_in_ticks) && | 2843 | trans_start + delta_in_ticks + extra_ticks) && |
2843 | time_in_range(jiffies, | 2844 | time_in_range(jiffies, |
2844 | slave->dev->last_rx - delta_in_ticks, | 2845 | slave->dev->last_rx - delta_in_ticks, |
2845 | slave->dev->last_rx + delta_in_ticks)) { | 2846 | slave->dev->last_rx + delta_in_ticks + extra_ticks)) { |
2846 | 2847 | ||
2847 | slave->link = BOND_LINK_UP; | 2848 | slave->link = BOND_LINK_UP; |
2848 | bond_set_active_slave(slave); | 2849 | bond_set_active_slave(slave); |
@@ -2872,10 +2873,10 @@ void bond_loadbalance_arp_mon(struct work_struct *work) | |||
2872 | */ | 2873 | */ |
2873 | if (!time_in_range(jiffies, | 2874 | if (!time_in_range(jiffies, |
2874 | trans_start - delta_in_ticks, | 2875 | trans_start - delta_in_ticks, |
2875 | trans_start + 2 * delta_in_ticks) || | 2876 | trans_start + 2 * delta_in_ticks + extra_ticks) || |
2876 | !time_in_range(jiffies, | 2877 | !time_in_range(jiffies, |
2877 | slave->dev->last_rx - delta_in_ticks, | 2878 | slave->dev->last_rx - delta_in_ticks, |
2878 | slave->dev->last_rx + 2 * delta_in_ticks)) { | 2879 | slave->dev->last_rx + 2 * delta_in_ticks + extra_ticks)) { |
2879 | 2880 | ||
2880 | slave->link = BOND_LINK_DOWN; | 2881 | slave->link = BOND_LINK_DOWN; |
2881 | bond_set_backup_slave(slave); | 2882 | bond_set_backup_slave(slave); |
@@ -2933,6 +2934,14 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) | |||
2933 | struct slave *slave; | 2934 | struct slave *slave; |
2934 | int i, commit = 0; | 2935 | int i, commit = 0; |
2935 | unsigned long trans_start; | 2936 | unsigned long trans_start; |
2937 | int extra_ticks; | ||
2938 | |||
2939 | /* All the time comparisons below need some extra time. Otherwise, on | ||
2940 | * fast networks the ARP probe/reply may arrive within the same jiffy | ||
2941 | * as it was sent. Then, the next time the ARP monitor is run, one | ||
2942 | * arp_interval will already have passed in the comparisons. | ||
2943 | */ | ||
2944 | extra_ticks = delta_in_ticks / 2; | ||
2936 | 2945 | ||
2937 | bond_for_each_slave(bond, slave, i) { | 2946 | bond_for_each_slave(bond, slave, i) { |
2938 | slave->new_link = BOND_LINK_NOCHANGE; | 2947 | slave->new_link = BOND_LINK_NOCHANGE; |
@@ -2940,7 +2949,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) | |||
2940 | if (slave->link != BOND_LINK_UP) { | 2949 | if (slave->link != BOND_LINK_UP) { |
2941 | if (time_in_range(jiffies, | 2950 | if (time_in_range(jiffies, |
2942 | slave_last_rx(bond, slave) - delta_in_ticks, | 2951 | slave_last_rx(bond, slave) - delta_in_ticks, |
2943 | slave_last_rx(bond, slave) + delta_in_ticks)) { | 2952 | slave_last_rx(bond, slave) + delta_in_ticks + extra_ticks)) { |
2944 | 2953 | ||
2945 | slave->new_link = BOND_LINK_UP; | 2954 | slave->new_link = BOND_LINK_UP; |
2946 | commit++; | 2955 | commit++; |
@@ -2956,7 +2965,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) | |||
2956 | */ | 2965 | */ |
2957 | if (time_in_range(jiffies, | 2966 | if (time_in_range(jiffies, |
2958 | slave->jiffies - delta_in_ticks, | 2967 | slave->jiffies - delta_in_ticks, |
2959 | slave->jiffies + 2 * delta_in_ticks)) | 2968 | slave->jiffies + 2 * delta_in_ticks + extra_ticks)) |
2960 | continue; | 2969 | continue; |
2961 | 2970 | ||
2962 | /* | 2971 | /* |
@@ -2976,7 +2985,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) | |||
2976 | !bond->current_arp_slave && | 2985 | !bond->current_arp_slave && |
2977 | !time_in_range(jiffies, | 2986 | !time_in_range(jiffies, |
2978 | slave_last_rx(bond, slave) - delta_in_ticks, | 2987 | slave_last_rx(bond, slave) - delta_in_ticks, |
2979 | slave_last_rx(bond, slave) + 3 * delta_in_ticks)) { | 2988 | slave_last_rx(bond, slave) + 3 * delta_in_ticks + extra_ticks)) { |
2980 | 2989 | ||
2981 | slave->new_link = BOND_LINK_DOWN; | 2990 | slave->new_link = BOND_LINK_DOWN; |
2982 | commit++; | 2991 | commit++; |
@@ -2992,10 +3001,10 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) | |||
2992 | if (bond_is_active_slave(slave) && | 3001 | if (bond_is_active_slave(slave) && |
2993 | (!time_in_range(jiffies, | 3002 | (!time_in_range(jiffies, |
2994 | trans_start - delta_in_ticks, | 3003 | trans_start - delta_in_ticks, |
2995 | trans_start + 2 * delta_in_ticks) || | 3004 | trans_start + 2 * delta_in_ticks + extra_ticks) || |
2996 | !time_in_range(jiffies, | 3005 | !time_in_range(jiffies, |
2997 | slave_last_rx(bond, slave) - delta_in_ticks, | 3006 | slave_last_rx(bond, slave) - delta_in_ticks, |
2998 | slave_last_rx(bond, slave) + 2 * delta_in_ticks))) { | 3007 | slave_last_rx(bond, slave) + 2 * delta_in_ticks + extra_ticks))) { |
2999 | 3008 | ||
3000 | slave->new_link = BOND_LINK_DOWN; | 3009 | slave->new_link = BOND_LINK_DOWN; |
3001 | commit++; | 3010 | commit++; |
@@ -3027,7 +3036,7 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks) | |||
3027 | if ((!bond->curr_active_slave && | 3036 | if ((!bond->curr_active_slave && |
3028 | time_in_range(jiffies, | 3037 | time_in_range(jiffies, |
3029 | trans_start - delta_in_ticks, | 3038 | trans_start - delta_in_ticks, |
3030 | trans_start + delta_in_ticks)) || | 3039 | trans_start + delta_in_ticks + delta_in_ticks / 2)) || |
3031 | bond->curr_active_slave != slave) { | 3040 | bond->curr_active_slave != slave) { |
3032 | slave->link = BOND_LINK_UP; | 3041 | slave->link = BOND_LINK_UP; |
3033 | if (bond->current_arp_slave) { | 3042 | if (bond->current_arp_slave) { |
@@ -3203,7 +3212,7 @@ re_arm: | |||
3203 | read_unlock(&bond->lock); | 3212 | read_unlock(&bond->lock); |
3204 | return; | 3213 | return; |
3205 | } | 3214 | } |
3206 | netdev_bonding_change(bond->dev, NETDEV_NOTIFY_PEERS); | 3215 | call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); |
3207 | rtnl_unlock(); | 3216 | rtnl_unlock(); |
3208 | } | 3217 | } |
3209 | } | 3218 | } |
@@ -3352,56 +3361,93 @@ static struct notifier_block bond_netdev_notifier = { | |||
3352 | /*---------------------------- Hashing Policies -----------------------------*/ | 3361 | /*---------------------------- Hashing Policies -----------------------------*/ |
3353 | 3362 | ||
3354 | /* | 3363 | /* |
3364 | * Hash for the output device based upon layer 2 data | ||
3365 | */ | ||
3366 | static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count) | ||
3367 | { | ||
3368 | struct ethhdr *data = (struct ethhdr *)skb->data; | ||
3369 | |||
3370 | if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto)) | ||
3371 | return (data->h_dest[5] ^ data->h_source[5]) % count; | ||
3372 | |||
3373 | return 0; | ||
3374 | } | ||
3375 | |||
3376 | /* | ||
3355 | * Hash for the output device based upon layer 2 and layer 3 data. If | 3377 | * Hash for the output device based upon layer 2 and layer 3 data. If |
3356 | * the packet is not IP mimic bond_xmit_hash_policy_l2() | 3378 | * the packet is not IP, fall back on bond_xmit_hash_policy_l2() |
3357 | */ | 3379 | */ |
3358 | static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count) | 3380 | static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count) |
3359 | { | 3381 | { |
3360 | struct ethhdr *data = (struct ethhdr *)skb->data; | 3382 | struct ethhdr *data = (struct ethhdr *)skb->data; |
3361 | struct iphdr *iph = ip_hdr(skb); | 3383 | struct iphdr *iph; |
3362 | 3384 | struct ipv6hdr *ipv6h; | |
3363 | if (skb->protocol == htons(ETH_P_IP)) { | 3385 | u32 v6hash; |
3386 | __be32 *s, *d; | ||
3387 | |||
3388 | if (skb->protocol == htons(ETH_P_IP) && | ||
3389 | skb_network_header_len(skb) >= sizeof(*iph)) { | ||
3390 | iph = ip_hdr(skb); | ||
3364 | return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^ | 3391 | return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^ |
3365 | (data->h_dest[5] ^ data->h_source[5])) % count; | 3392 | (data->h_dest[5] ^ data->h_source[5])) % count; |
3393 | } else if (skb->protocol == htons(ETH_P_IPV6) && | ||
3394 | skb_network_header_len(skb) >= sizeof(*ipv6h)) { | ||
3395 | ipv6h = ipv6_hdr(skb); | ||
3396 | s = &ipv6h->saddr.s6_addr32[0]; | ||
3397 | d = &ipv6h->daddr.s6_addr32[0]; | ||
3398 | v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]); | ||
3399 | v6hash ^= (v6hash >> 24) ^ (v6hash >> 16) ^ (v6hash >> 8); | ||
3400 | return (v6hash ^ data->h_dest[5] ^ data->h_source[5]) % count; | ||
3366 | } | 3401 | } |
3367 | 3402 | ||
3368 | return (data->h_dest[5] ^ data->h_source[5]) % count; | 3403 | return bond_xmit_hash_policy_l2(skb, count); |
3369 | } | 3404 | } |
3370 | 3405 | ||
3371 | /* | 3406 | /* |
3372 | * Hash for the output device based upon layer 3 and layer 4 data. If | 3407 | * Hash for the output device based upon layer 3 and layer 4 data. If |
3373 | * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is | 3408 | * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is |
3374 | * altogether not IP, mimic bond_xmit_hash_policy_l2() | 3409 | * altogether not IP, fall back on bond_xmit_hash_policy_l2() |
3375 | */ | 3410 | */ |
3376 | static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count) | 3411 | static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count) |
3377 | { | 3412 | { |
3378 | struct ethhdr *data = (struct ethhdr *)skb->data; | 3413 | u32 layer4_xor = 0; |
3379 | struct iphdr *iph = ip_hdr(skb); | 3414 | struct iphdr *iph; |
3380 | __be16 *layer4hdr = (__be16 *)((u32 *)iph + iph->ihl); | 3415 | struct ipv6hdr *ipv6h; |
3381 | int layer4_xor = 0; | 3416 | __be32 *s, *d; |
3382 | 3417 | __be16 *layer4hdr; | |
3383 | if (skb->protocol == htons(ETH_P_IP)) { | 3418 | |
3419 | if (skb->protocol == htons(ETH_P_IP) && | ||
3420 | skb_network_header_len(skb) >= sizeof(*iph)) { | ||
3421 | iph = ip_hdr(skb); | ||
3384 | if (!ip_is_fragment(iph) && | 3422 | if (!ip_is_fragment(iph) && |
3385 | (iph->protocol == IPPROTO_TCP || | 3423 | (iph->protocol == IPPROTO_TCP || |
3386 | iph->protocol == IPPROTO_UDP)) { | 3424 | iph->protocol == IPPROTO_UDP) && |
3387 | layer4_xor = ntohs((*layer4hdr ^ *(layer4hdr + 1))); | 3425 | (skb_headlen(skb) - skb_network_offset(skb) >= |
3426 | iph->ihl * sizeof(u32) + sizeof(*layer4hdr) * 2)) { | ||
3427 | layer4hdr = (__be16 *)((u32 *)iph + iph->ihl); | ||
3428 | layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1)); | ||
3388 | } | 3429 | } |
3389 | return (layer4_xor ^ | 3430 | return (layer4_xor ^ |
3390 | ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; | 3431 | ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; |
3391 | 3432 | } else if (skb->protocol == htons(ETH_P_IPV6) && | |
3433 | skb_network_header_len(skb) >= sizeof(*ipv6h)) { | ||
3434 | ipv6h = ipv6_hdr(skb); | ||
3435 | if ((ipv6h->nexthdr == IPPROTO_TCP || | ||
3436 | ipv6h->nexthdr == IPPROTO_UDP) && | ||
3437 | (skb_headlen(skb) - skb_network_offset(skb) >= | ||
3438 | sizeof(*ipv6h) + sizeof(*layer4hdr) * 2)) { | ||
3439 | layer4hdr = (__be16 *)(ipv6h + 1); | ||
3440 | layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1)); | ||
3441 | } | ||
3442 | s = &ipv6h->saddr.s6_addr32[0]; | ||
3443 | d = &ipv6h->daddr.s6_addr32[0]; | ||
3444 | layer4_xor ^= (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]); | ||
3445 | layer4_xor ^= (layer4_xor >> 24) ^ (layer4_xor >> 16) ^ | ||
3446 | (layer4_xor >> 8); | ||
3447 | return layer4_xor % count; | ||
3392 | } | 3448 | } |
3393 | 3449 | ||
3394 | return (data->h_dest[5] ^ data->h_source[5]) % count; | 3450 | return bond_xmit_hash_policy_l2(skb, count); |
3395 | } | ||
3396 | |||
3397 | /* | ||
3398 | * Hash for the output device based upon layer 2 data | ||
3399 | */ | ||
3400 | static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count) | ||
3401 | { | ||
3402 | struct ethhdr *data = (struct ethhdr *)skb->data; | ||
3403 | |||
3404 | return (data->h_dest[5] ^ data->h_source[5]) % count; | ||
3405 | } | 3451 | } |
3406 | 3452 | ||
3407 | /*-------------------------- Device entry points ----------------------------*/ | 3453 | /*-------------------------- Device entry points ----------------------------*/ |