diff options
author | stephen hemminger <shemminger@vyatta.com> | 2012-10-09 16:35:50 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-10-10 22:41:21 -0400 |
commit | 05f47d69c44902c265dc2ad5a960978a97b45e3d (patch) | |
tree | d548dda11ebf45f143c515fbc291679c3f91584c /drivers/net | |
parent | 1cad87156b3e79d25731cdcbfa9e149bf3e08f60 (diff) |
vxlan: allow configuring port range
VXLAN bases source UDP port based on flow to help the
receiver to be able to load balance based on outer header flow.
This patch restricts the port range to the normal UDP local
ports, and allows overriding via configuration.
It also uses jhash of Ethernet header when looking at flows
with out know L3 header.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net')
-rw-r--r-- | drivers/net/vxlan.c | 62 |
1 files changed, 57 insertions, 5 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 43887d927775..4be2784e7ac2 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c | |||
@@ -106,6 +106,8 @@ struct vxlan_dev { | |||
106 | __be32 gaddr; /* multicast group */ | 106 | __be32 gaddr; /* multicast group */ |
107 | __be32 saddr; /* source address */ | 107 | __be32 saddr; /* source address */ |
108 | unsigned int link; /* link to multicast over */ | 108 | unsigned int link; /* link to multicast over */ |
109 | __u16 port_min; /* source port range */ | ||
110 | __u16 port_max; | ||
109 | __u8 tos; /* TOS override */ | 111 | __u8 tos; /* TOS override */ |
110 | __u8 ttl; | 112 | __u8 ttl; |
111 | bool learn; | 113 | bool learn; |
@@ -654,12 +656,29 @@ static void vxlan_set_owner(struct net_device *dev, struct sk_buff *skb) | |||
654 | skb->destructor = vxlan_sock_free; | 656 | skb->destructor = vxlan_sock_free; |
655 | } | 657 | } |
656 | 658 | ||
659 | /* Compute source port for outgoing packet | ||
660 | * first choice to use L4 flow hash since it will spread | ||
661 | * better and maybe available from hardware | ||
662 | * secondary choice is to use jhash on the Ethernet header | ||
663 | */ | ||
664 | static u16 vxlan_src_port(const struct vxlan_dev *vxlan, struct sk_buff *skb) | ||
665 | { | ||
666 | unsigned int range = (vxlan->port_max - vxlan->port_min) + 1; | ||
667 | u32 hash; | ||
668 | |||
669 | hash = skb_get_rxhash(skb); | ||
670 | if (!hash) | ||
671 | hash = jhash(skb->data, 2 * ETH_ALEN, | ||
672 | (__force u32) skb->protocol); | ||
673 | |||
674 | return (((u64) hash * range) >> 32) + vxlan->port_min; | ||
675 | } | ||
676 | |||
657 | /* Transmit local packets over Vxlan | 677 | /* Transmit local packets over Vxlan |
658 | * | 678 | * |
659 | * Outer IP header inherits ECN and DF from inner header. | 679 | * Outer IP header inherits ECN and DF from inner header. |
660 | * Outer UDP destination is the VXLAN assigned port. | 680 | * Outer UDP destination is the VXLAN assigned port. |
661 | * source port is based on hash of flow if available | 681 | * source port is based on hash of flow |
662 | * otherwise use a random value | ||
663 | */ | 682 | */ |
664 | static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) | 683 | static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) |
665 | { | 684 | { |
@@ -671,8 +690,8 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) | |||
671 | struct udphdr *uh; | 690 | struct udphdr *uh; |
672 | struct flowi4 fl4; | 691 | struct flowi4 fl4; |
673 | unsigned int pkt_len = skb->len; | 692 | unsigned int pkt_len = skb->len; |
674 | u32 hash; | ||
675 | __be32 dst; | 693 | __be32 dst; |
694 | __u16 src_port; | ||
676 | __be16 df = 0; | 695 | __be16 df = 0; |
677 | __u8 tos, ttl; | 696 | __u8 tos, ttl; |
678 | int err; | 697 | int err; |
@@ -695,7 +714,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) | |||
695 | if (tos == 1) | 714 | if (tos == 1) |
696 | tos = vxlan_get_dsfield(old_iph, skb); | 715 | tos = vxlan_get_dsfield(old_iph, skb); |
697 | 716 | ||
698 | hash = skb_get_rxhash(skb); | 717 | src_port = vxlan_src_port(vxlan, skb); |
699 | 718 | ||
700 | memset(&fl4, 0, sizeof(fl4)); | 719 | memset(&fl4, 0, sizeof(fl4)); |
701 | fl4.flowi4_oif = vxlan->link; | 720 | fl4.flowi4_oif = vxlan->link; |
@@ -732,7 +751,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) | |||
732 | uh = udp_hdr(skb); | 751 | uh = udp_hdr(skb); |
733 | 752 | ||
734 | uh->dest = htons(vxlan_port); | 753 | uh->dest = htons(vxlan_port); |
735 | uh->source = hash ? :random32(); | 754 | uh->source = htons(src_port); |
736 | 755 | ||
737 | uh->len = htons(skb->len); | 756 | uh->len = htons(skb->len); |
738 | uh->check = 0; | 757 | uh->check = 0; |
@@ -960,6 +979,7 @@ static void vxlan_setup(struct net_device *dev) | |||
960 | { | 979 | { |
961 | struct vxlan_dev *vxlan = netdev_priv(dev); | 980 | struct vxlan_dev *vxlan = netdev_priv(dev); |
962 | unsigned h; | 981 | unsigned h; |
982 | int low, high; | ||
963 | 983 | ||
964 | eth_hw_addr_random(dev); | 984 | eth_hw_addr_random(dev); |
965 | ether_setup(dev); | 985 | ether_setup(dev); |
@@ -979,6 +999,10 @@ static void vxlan_setup(struct net_device *dev) | |||
979 | vxlan->age_timer.function = vxlan_cleanup; | 999 | vxlan->age_timer.function = vxlan_cleanup; |
980 | vxlan->age_timer.data = (unsigned long) vxlan; | 1000 | vxlan->age_timer.data = (unsigned long) vxlan; |
981 | 1001 | ||
1002 | inet_get_local_port_range(&low, &high); | ||
1003 | vxlan->port_min = low; | ||
1004 | vxlan->port_max = high; | ||
1005 | |||
982 | vxlan->dev = dev; | 1006 | vxlan->dev = dev; |
983 | 1007 | ||
984 | for (h = 0; h < FDB_HASH_SIZE; ++h) | 1008 | for (h = 0; h < FDB_HASH_SIZE; ++h) |
@@ -995,6 +1019,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { | |||
995 | [IFLA_VXLAN_LEARNING] = { .type = NLA_U8 }, | 1019 | [IFLA_VXLAN_LEARNING] = { .type = NLA_U8 }, |
996 | [IFLA_VXLAN_AGEING] = { .type = NLA_U32 }, | 1020 | [IFLA_VXLAN_AGEING] = { .type = NLA_U32 }, |
997 | [IFLA_VXLAN_LIMIT] = { .type = NLA_U32 }, | 1021 | [IFLA_VXLAN_LIMIT] = { .type = NLA_U32 }, |
1022 | [IFLA_VXLAN_PORT_RANGE] = { .len = sizeof(struct ifla_vxlan_port_range) }, | ||
998 | }; | 1023 | }; |
999 | 1024 | ||
1000 | static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) | 1025 | static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) |
@@ -1027,6 +1052,18 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) | |||
1027 | return -EADDRNOTAVAIL; | 1052 | return -EADDRNOTAVAIL; |
1028 | } | 1053 | } |
1029 | } | 1054 | } |
1055 | |||
1056 | if (data[IFLA_VXLAN_PORT_RANGE]) { | ||
1057 | const struct ifla_vxlan_port_range *p | ||
1058 | = nla_data(data[IFLA_VXLAN_PORT_RANGE]); | ||
1059 | |||
1060 | if (ntohs(p->high) < ntohs(p->low)) { | ||
1061 | pr_debug("port range %u .. %u not valid\n", | ||
1062 | ntohs(p->low), ntohs(p->high)); | ||
1063 | return -EINVAL; | ||
1064 | } | ||
1065 | } | ||
1066 | |||
1030 | return 0; | 1067 | return 0; |
1031 | } | 1068 | } |
1032 | 1069 | ||
@@ -1077,6 +1114,13 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, | |||
1077 | if (data[IFLA_VXLAN_LIMIT]) | 1114 | if (data[IFLA_VXLAN_LIMIT]) |
1078 | vxlan->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]); | 1115 | vxlan->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]); |
1079 | 1116 | ||
1117 | if (data[IFLA_VXLAN_PORT_RANGE]) { | ||
1118 | const struct ifla_vxlan_port_range *p | ||
1119 | = nla_data(data[IFLA_VXLAN_PORT_RANGE]); | ||
1120 | vxlan->port_min = ntohs(p->low); | ||
1121 | vxlan->port_max = ntohs(p->high); | ||
1122 | } | ||
1123 | |||
1080 | err = register_netdevice(dev); | 1124 | err = register_netdevice(dev); |
1081 | if (!err) | 1125 | if (!err) |
1082 | hlist_add_head_rcu(&vxlan->hlist, vni_head(net, vxlan->vni)); | 1126 | hlist_add_head_rcu(&vxlan->hlist, vni_head(net, vxlan->vni)); |
@@ -1105,12 +1149,17 @@ static size_t vxlan_get_size(const struct net_device *dev) | |||
1105 | nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */ | 1149 | nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */ |
1106 | nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */ | 1150 | nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */ |
1107 | nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */ | 1151 | nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */ |
1152 | nla_total_size(sizeof(struct ifla_vxlan_port_range)) + | ||
1108 | 0; | 1153 | 0; |
1109 | } | 1154 | } |
1110 | 1155 | ||
1111 | static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) | 1156 | static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) |
1112 | { | 1157 | { |
1113 | const struct vxlan_dev *vxlan = netdev_priv(dev); | 1158 | const struct vxlan_dev *vxlan = netdev_priv(dev); |
1159 | struct ifla_vxlan_port_range ports = { | ||
1160 | .low = htons(vxlan->port_min), | ||
1161 | .high = htons(vxlan->port_max), | ||
1162 | }; | ||
1114 | 1163 | ||
1115 | if (nla_put_u32(skb, IFLA_VXLAN_ID, vxlan->vni)) | 1164 | if (nla_put_u32(skb, IFLA_VXLAN_ID, vxlan->vni)) |
1116 | goto nla_put_failure; | 1165 | goto nla_put_failure; |
@@ -1131,6 +1180,9 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) | |||
1131 | nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax)) | 1180 | nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax)) |
1132 | goto nla_put_failure; | 1181 | goto nla_put_failure; |
1133 | 1182 | ||
1183 | if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports)) | ||
1184 | goto nla_put_failure; | ||
1185 | |||
1134 | return 0; | 1186 | return 0; |
1135 | 1187 | ||
1136 | nla_put_failure: | 1188 | nla_put_failure: |