aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/bonding
diff options
context:
space:
mode:
authorNikolay Aleksandrov <nikolay@redhat.com>2013-10-02 07:39:25 -0400
committerDavid S. Miller <davem@davemloft.net>2013-10-03 15:36:38 -0400
commit32819dc1834866cb9547cb75f81af9edd58d33cd (patch)
treea624e228d0e8ca80ee302af4904d9c19551aa2c7 /drivers/net/bonding
parent357afe9c46c951c34769e39cabdf8d1637e2eecc (diff)
bonding: modify the old and add new xmit hash policies
This patch adds two new hash policy modes which use skb_flow_dissect: 3 - Encapsulated layer 2+3 4 - Encapsulated layer 3+4 There should be a good improvement for tunnel users in those modes. It also changes the old hash functions to: hash ^= (__force u32)flow.dst ^ (__force u32)flow.src; hash ^= (hash >> 16); hash ^= (hash >> 8); Where hash will be initialized either to L2 hash, that is SRCMAC[5] XOR DSTMAC[5], or to flow->ports which should be extracted from the upper layer. Flow's dst and src are also extracted based on the xmit policy either directly from the buffer or by using skb_flow_dissect, but in both cases if the protocol is IPv6 then dst and src are obtained by ipv6_addr_hash() on the real addresses. In case of a non-dissectable packet, the algorithms fall back to L2 hashing. The bond_set_mode_ops() function is now obsolete and thus deleted because it was used only to set the proper hash policy. Also we trim a pointer from struct bonding because we no longer need to keep the hash function, now there's only a single hash function - bond_xmit_hash that works based on bond->params.xmit_policy. The hash function and skb_flow_dissect were suggested by Eric Dumazet. The layer names were suggested by Andy Gospodarek, because I suck at semantics. Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Acked-by: Eric Dumazet <edumazet@google.com> Acked-by: Veaceslav Falico <vfalico@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/bonding')
-rw-r--r--drivers/net/bonding/bond_3ad.c2
-rw-r--r--drivers/net/bonding/bond_main.c197
-rw-r--r--drivers/net/bonding/bond_sysfs.c2
-rw-r--r--drivers/net/bonding/bonding.h3
4 files changed, 70 insertions, 134 deletions
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index c62606a67f6a..ea3e64e22e22 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2403,7 +2403,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
2403 goto out; 2403 goto out;
2404 } 2404 }
2405 2405
2406 slave_agg_no = bond->xmit_hash_policy(skb, slaves_in_agg); 2406 slave_agg_no = bond_xmit_hash(bond, skb, slaves_in_agg);
2407 first_ok_slave = NULL; 2407 first_ok_slave = NULL;
2408 2408
2409 bond_for_each_slave(bond, slave, iter) { 2409 bond_for_each_slave(bond, slave, iter) {
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index fe8a94f9d7db..dfb4f6dd5de0 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -78,6 +78,7 @@
78#include <net/netns/generic.h> 78#include <net/netns/generic.h>
79#include <net/pkt_sched.h> 79#include <net/pkt_sched.h>
80#include <linux/rculist.h> 80#include <linux/rculist.h>
81#include <net/flow_keys.h>
81#include "bonding.h" 82#include "bonding.h"
82#include "bond_3ad.h" 83#include "bond_3ad.h"
83#include "bond_alb.h" 84#include "bond_alb.h"
@@ -159,7 +160,8 @@ MODULE_PARM_DESC(min_links, "Minimum number of available links before turning on
159module_param(xmit_hash_policy, charp, 0); 160module_param(xmit_hash_policy, charp, 0);
160MODULE_PARM_DESC(xmit_hash_policy, "balance-xor and 802.3ad hashing method; " 161MODULE_PARM_DESC(xmit_hash_policy, "balance-xor and 802.3ad hashing method; "
161 "0 for layer 2 (default), 1 for layer 3+4, " 162 "0 for layer 2 (default), 1 for layer 3+4, "
162 "2 for layer 2+3"); 163 "2 for layer 2+3, 3 for encap layer 2+3, "
164 "4 for encap layer 3+4");
163module_param(arp_interval, int, 0); 165module_param(arp_interval, int, 0);
164MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); 166MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
165module_param_array(arp_ip_target, charp, NULL, 0); 167module_param_array(arp_ip_target, charp, NULL, 0);
@@ -217,6 +219,8 @@ const struct bond_parm_tbl xmit_hashtype_tbl[] = {
217{ "layer2", BOND_XMIT_POLICY_LAYER2}, 219{ "layer2", BOND_XMIT_POLICY_LAYER2},
218{ "layer3+4", BOND_XMIT_POLICY_LAYER34}, 220{ "layer3+4", BOND_XMIT_POLICY_LAYER34},
219{ "layer2+3", BOND_XMIT_POLICY_LAYER23}, 221{ "layer2+3", BOND_XMIT_POLICY_LAYER23},
222{ "encap2+3", BOND_XMIT_POLICY_ENCAP23},
223{ "encap3+4", BOND_XMIT_POLICY_ENCAP34},
220{ NULL, -1}, 224{ NULL, -1},
221}; 225};
222 226
@@ -3035,99 +3039,85 @@ static struct notifier_block bond_netdev_notifier = {
3035 3039
3036/*---------------------------- Hashing Policies -----------------------------*/ 3040/*---------------------------- Hashing Policies -----------------------------*/
3037 3041
3038/* 3042/* L2 hash helper */
3039 * Hash for the output device based upon layer 2 data 3043static inline u32 bond_eth_hash(struct sk_buff *skb)
3040 */
3041static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
3042{ 3044{
3043 struct ethhdr *data = (struct ethhdr *)skb->data; 3045 struct ethhdr *data = (struct ethhdr *)skb->data;
3044 3046
3045 if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto)) 3047 if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto))
3046 return (data->h_dest[5] ^ data->h_source[5]) % count; 3048 return data->h_dest[5] ^ data->h_source[5];
3047 3049
3048 return 0; 3050 return 0;
3049} 3051}
3050 3052
3051/* 3053/* Extract the appropriate headers based on bond's xmit policy */
3052 * Hash for the output device based upon layer 2 and layer 3 data. If 3054static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
3053 * the packet is not IP, fall back on bond_xmit_hash_policy_l2() 3055 struct flow_keys *fk)
3054 */
3055static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count)
3056{ 3056{
3057 const struct ethhdr *data; 3057 const struct ipv6hdr *iph6;
3058 const struct iphdr *iph; 3058 const struct iphdr *iph;
3059 const struct ipv6hdr *ipv6h; 3059 int noff, proto = -1;
3060 u32 v6hash;
3061 const __be32 *s, *d;
3062 3060
3063 if (skb->protocol == htons(ETH_P_IP) && 3061 if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
3064 pskb_network_may_pull(skb, sizeof(*iph))) { 3062 return skb_flow_dissect(skb, fk);
3063
3064 fk->ports = 0;
3065 noff = skb_network_offset(skb);
3066 if (skb->protocol == htons(ETH_P_IP)) {
3067 if (!pskb_may_pull(skb, noff + sizeof(*iph)))
3068 return false;
3065 iph = ip_hdr(skb); 3069 iph = ip_hdr(skb);
3066 data = (struct ethhdr *)skb->data; 3070 fk->src = iph->saddr;
3067 return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^ 3071 fk->dst = iph->daddr;
3068 (data->h_dest[5] ^ data->h_source[5])) % count; 3072 noff += iph->ihl << 2;
3069 } else if (skb->protocol == htons(ETH_P_IPV6) && 3073 if (!ip_is_fragment(iph))
3070 pskb_network_may_pull(skb, sizeof(*ipv6h))) { 3074 proto = iph->protocol;
3071 ipv6h = ipv6_hdr(skb); 3075 } else if (skb->protocol == htons(ETH_P_IPV6)) {
3072 data = (struct ethhdr *)skb->data; 3076 if (!pskb_may_pull(skb, noff + sizeof(*iph6)))
3073 s = &ipv6h->saddr.s6_addr32[0]; 3077 return false;
3074 d = &ipv6h->daddr.s6_addr32[0]; 3078 iph6 = ipv6_hdr(skb);
3075 v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]); 3079 fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
3076 v6hash ^= (v6hash >> 24) ^ (v6hash >> 16) ^ (v6hash >> 8); 3080 fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
3077 return (v6hash ^ data->h_dest[5] ^ data->h_source[5]) % count; 3081 noff += sizeof(*iph6);
3078 } 3082 proto = iph6->nexthdr;
3079 3083 } else {
3080 return bond_xmit_hash_policy_l2(skb, count); 3084 return false;
3085 }
3086 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0)
3087 fk->ports = skb_flow_get_ports(skb, noff, proto);
3088
3089 return true;
3081} 3090}
3082 3091
3083/* 3092/**
3084 * Hash for the output device based upon layer 3 and layer 4 data. If 3093 * bond_xmit_hash - generate a hash value based on the xmit policy
3085 * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is 3094 * @bond: bonding device
3086 * altogether not IP, fall back on bond_xmit_hash_policy_l2() 3095 * @skb: buffer to use for headers
3096 * @count: modulo value
3097 *
3098 * This function will extract the necessary headers from the skb buffer and use
3099 * them to generate a hash based on the xmit_policy set in the bonding device
3100 * which will be reduced modulo count before returning.
3087 */ 3101 */
3088static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count) 3102int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count)
3089{ 3103{
3090 u32 layer4_xor = 0; 3104 struct flow_keys flow;
3091 const struct iphdr *iph; 3105 u32 hash;
3092 const struct ipv6hdr *ipv6h;
3093 const __be32 *s, *d;
3094 const __be16 *l4 = NULL;
3095 __be16 _l4[2];
3096 int noff = skb_network_offset(skb);
3097 int poff;
3098
3099 if (skb->protocol == htons(ETH_P_IP) &&
3100 pskb_may_pull(skb, noff + sizeof(*iph))) {
3101 iph = ip_hdr(skb);
3102 poff = proto_ports_offset(iph->protocol);
3103 3106
3104 if (!ip_is_fragment(iph) && poff >= 0) { 3107 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
3105 l4 = skb_header_pointer(skb, noff + (iph->ihl << 2) + poff, 3108 !bond_flow_dissect(bond, skb, &flow))
3106 sizeof(_l4), &_l4); 3109 return bond_eth_hash(skb) % count;
3107 if (l4) 3110
3108 layer4_xor = ntohs(l4[0] ^ l4[1]); 3111 if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
3109 } 3112 bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
3110 return (layer4_xor ^ 3113 hash = bond_eth_hash(skb);
3111 ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; 3114 else
3112 } else if (skb->protocol == htons(ETH_P_IPV6) && 3115 hash = (__force u32)flow.ports;
3113 pskb_may_pull(skb, noff + sizeof(*ipv6h))) { 3116 hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
3114 ipv6h = ipv6_hdr(skb); 3117 hash ^= (hash >> 16);
3115 poff = proto_ports_offset(ipv6h->nexthdr); 3118 hash ^= (hash >> 8);
3116 if (poff >= 0) {
3117 l4 = skb_header_pointer(skb, noff + sizeof(*ipv6h) + poff,
3118 sizeof(_l4), &_l4);
3119 if (l4)
3120 layer4_xor = ntohs(l4[0] ^ l4[1]);
3121 }
3122 s = &ipv6h->saddr.s6_addr32[0];
3123 d = &ipv6h->daddr.s6_addr32[0];
3124 layer4_xor ^= (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
3125 layer4_xor ^= (layer4_xor >> 24) ^ (layer4_xor >> 16) ^
3126 (layer4_xor >> 8);
3127 return layer4_xor % count;
3128 }
3129 3119
3130 return bond_xmit_hash_policy_l2(skb, count); 3120 return hash % count;
3131} 3121}
3132 3122
3133/*-------------------------- Device entry points ----------------------------*/ 3123/*-------------------------- Device entry points ----------------------------*/
@@ -3721,8 +3711,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
3721 return NETDEV_TX_OK; 3711 return NETDEV_TX_OK;
3722} 3712}
3723 3713
3724/* 3714/* In bond_xmit_xor() , we determine the output device by using a pre-
3725 * In bond_xmit_xor() , we determine the output device by using a pre-
3726 * determined xmit_hash_policy(), If the selected device is not enabled, 3715 * determined xmit_hash_policy(), If the selected device is not enabled,
3727 * find the next active slave. 3716 * find the next active slave.
3728 */ 3717 */
@@ -3730,8 +3719,7 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
3730{ 3719{
3731 struct bonding *bond = netdev_priv(bond_dev); 3720 struct bonding *bond = netdev_priv(bond_dev);
3732 3721
3733 bond_xmit_slave_id(bond, skb, 3722 bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb, bond->slave_cnt));
3734 bond->xmit_hash_policy(skb, bond->slave_cnt));
3735 3723
3736 return NETDEV_TX_OK; 3724 return NETDEV_TX_OK;
3737} 3725}
@@ -3768,22 +3756,6 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
3768 3756
3769/*------------------------- Device initialization ---------------------------*/ 3757/*------------------------- Device initialization ---------------------------*/
3770 3758
3771static void bond_set_xmit_hash_policy(struct bonding *bond)
3772{
3773 switch (bond->params.xmit_policy) {
3774 case BOND_XMIT_POLICY_LAYER23:
3775 bond->xmit_hash_policy = bond_xmit_hash_policy_l23;
3776 break;
3777 case BOND_XMIT_POLICY_LAYER34:
3778 bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
3779 break;
3780 case BOND_XMIT_POLICY_LAYER2:
3781 default:
3782 bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
3783 break;
3784 }
3785}
3786
3787/* 3759/*
3788 * Lookup the slave that corresponds to a qid 3760 * Lookup the slave that corresponds to a qid
3789 */ 3761 */
@@ -3894,38 +3866,6 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
3894 return ret; 3866 return ret;
3895} 3867}
3896 3868
3897/*
3898 * set bond mode specific net device operations
3899 */
3900void bond_set_mode_ops(struct bonding *bond, int mode)
3901{
3902 struct net_device *bond_dev = bond->dev;
3903
3904 switch (mode) {
3905 case BOND_MODE_ROUNDROBIN:
3906 break;
3907 case BOND_MODE_ACTIVEBACKUP:
3908 break;
3909 case BOND_MODE_XOR:
3910 bond_set_xmit_hash_policy(bond);
3911 break;
3912 case BOND_MODE_BROADCAST:
3913 break;
3914 case BOND_MODE_8023AD:
3915 bond_set_xmit_hash_policy(bond);
3916 break;
3917 case BOND_MODE_ALB:
3918 /* FALLTHRU */
3919 case BOND_MODE_TLB:
3920 break;
3921 default:
3922 /* Should never happen, mode already checked */
3923 pr_err("%s: Error: Unknown bonding mode %d\n",
3924 bond_dev->name, mode);
3925 break;
3926 }
3927}
3928
3929static int bond_ethtool_get_settings(struct net_device *bond_dev, 3869static int bond_ethtool_get_settings(struct net_device *bond_dev,
3930 struct ethtool_cmd *ecmd) 3870 struct ethtool_cmd *ecmd)
3931{ 3871{
@@ -4027,7 +3967,6 @@ static void bond_setup(struct net_device *bond_dev)
4027 ether_setup(bond_dev); 3967 ether_setup(bond_dev);
4028 bond_dev->netdev_ops = &bond_netdev_ops; 3968 bond_dev->netdev_ops = &bond_netdev_ops;
4029 bond_dev->ethtool_ops = &bond_ethtool_ops; 3969 bond_dev->ethtool_ops = &bond_ethtool_ops;
4030 bond_set_mode_ops(bond, bond->params.mode);
4031 3970
4032 bond_dev->destructor = bond_destructor; 3971 bond_dev->destructor = bond_destructor;
4033 3972
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index e06c644470b1..e9249527e7e7 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -318,7 +318,6 @@ static ssize_t bonding_store_mode(struct device *d,
318 /* don't cache arp_validate between modes */ 318 /* don't cache arp_validate between modes */
319 bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; 319 bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
320 bond->params.mode = new_value; 320 bond->params.mode = new_value;
321 bond_set_mode_ops(bond, bond->params.mode);
322 pr_info("%s: setting mode to %s (%d).\n", 321 pr_info("%s: setting mode to %s (%d).\n",
323 bond->dev->name, bond_mode_tbl[new_value].modename, 322 bond->dev->name, bond_mode_tbl[new_value].modename,
324 new_value); 323 new_value);
@@ -358,7 +357,6 @@ static ssize_t bonding_store_xmit_hash(struct device *d,
358 ret = -EINVAL; 357 ret = -EINVAL;
359 } else { 358 } else {
360 bond->params.xmit_policy = new_value; 359 bond->params.xmit_policy = new_value;
361 bond_set_mode_ops(bond, bond->params.mode);
362 pr_info("%s: setting xmit hash policy to %s (%d).\n", 360 pr_info("%s: setting xmit hash policy to %s (%d).\n",
363 bond->dev->name, 361 bond->dev->name,
364 xmit_hashtype_tbl[new_value].modename, new_value); 362 xmit_hashtype_tbl[new_value].modename, new_value);
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 9a26fbd82645..0bd04fbda8e9 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -217,7 +217,6 @@ struct bonding {
217 char proc_file_name[IFNAMSIZ]; 217 char proc_file_name[IFNAMSIZ];
218#endif /* CONFIG_PROC_FS */ 218#endif /* CONFIG_PROC_FS */
219 struct list_head bond_list; 219 struct list_head bond_list;
220 int (*xmit_hash_policy)(struct sk_buff *, int);
221 u16 rr_tx_counter; 220 u16 rr_tx_counter;
222 struct ad_bond_info ad_info; 221 struct ad_bond_info ad_info;
223 struct alb_bond_info alb_info; 222 struct alb_bond_info alb_info;
@@ -409,7 +408,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev);
409void bond_mii_monitor(struct work_struct *); 408void bond_mii_monitor(struct work_struct *);
410void bond_loadbalance_arp_mon(struct work_struct *); 409void bond_loadbalance_arp_mon(struct work_struct *);
411void bond_activebackup_arp_mon(struct work_struct *); 410void bond_activebackup_arp_mon(struct work_struct *);
412void bond_set_mode_ops(struct bonding *bond, int mode); 411int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count);
413int bond_parse_parm(const char *mode_arg, const struct bond_parm_tbl *tbl); 412int bond_parse_parm(const char *mode_arg, const struct bond_parm_tbl *tbl);
414void bond_select_active_slave(struct bonding *bond); 413void bond_select_active_slave(struct bonding *bond);
415void bond_change_active_slave(struct bonding *bond, struct slave *new_active); 414void bond_change_active_slave(struct bonding *bond, struct slave *new_active);