diff options
author | David S. Miller <davem@davemloft.net> | 2013-06-19 21:07:49 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-06-19 21:07:49 -0400 |
commit | dc3d807d6fd983603c82e7bcdbaa49cdb4239691 (patch) | |
tree | f426945de6694203f2c34218b4e4b06913b8f58c | |
parent | ac8025a643a0e0beb81f3f37ca693364c6b77858 (diff) | |
parent | aa310701e787087dbfbccf1409982a96e16c57a6 (diff) |
openvswitch: gre tunneling support.
Pravin B Shelar says:
====================
Following patch series adds support for gre tunneling.
First six patches extend kernel gre and ip_tunnel modules
api so that there is more code sharing between gre modules
and ovs. Rest of patches adds ovs tunneling infrastructre
and gre protocol vport.
V2 fixes two patches according to comments from Jesse.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/vxlan.c | 32 | ||||
-rw-r--r-- | include/net/gre.h | 27 | ||||
-rw-r--r-- | include/net/ip_tunnels.h | 28 | ||||
-rw-r--r-- | include/uapi/linux/openvswitch.h | 19 | ||||
-rw-r--r-- | net/ipv4/Makefile | 2 | ||||
-rw-r--r-- | net/ipv4/gre.c | 323 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 247 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel.c | 68 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel_core.c | 122 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 6 | ||||
-rw-r--r-- | net/ipv6/sit.c | 46 | ||||
-rw-r--r-- | net/openvswitch/Kconfig | 2 | ||||
-rw-r--r-- | net/openvswitch/Makefile | 3 | ||||
-rw-r--r-- | net/openvswitch/actions.c | 4 | ||||
-rw-r--r-- | net/openvswitch/datapath.c | 356 | ||||
-rw-r--r-- | net/openvswitch/datapath.h | 4 | ||||
-rw-r--r-- | net/openvswitch/flow.c | 184 | ||||
-rw-r--r-- | net/openvswitch/flow.h | 45 | ||||
-rw-r--r-- | net/openvswitch/vport-gre.c | 274 | ||||
-rw-r--r-- | net/openvswitch/vport-internal_dev.c | 2 | ||||
-rw-r--r-- | net/openvswitch/vport-netdev.c | 2 | ||||
-rw-r--r-- | net/openvswitch/vport.c | 23 | ||||
-rw-r--r-- | net/openvswitch/vport.h | 10 |
23 files changed, 1373 insertions, 456 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index f6dce13c8f89..284c6c00c353 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c | |||
@@ -1021,7 +1021,6 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, | |||
1021 | struct vxlan_dev *vxlan = netdev_priv(dev); | 1021 | struct vxlan_dev *vxlan = netdev_priv(dev); |
1022 | struct rtable *rt; | 1022 | struct rtable *rt; |
1023 | const struct iphdr *old_iph; | 1023 | const struct iphdr *old_iph; |
1024 | struct iphdr *iph; | ||
1025 | struct vxlanhdr *vxh; | 1024 | struct vxlanhdr *vxh; |
1026 | struct udphdr *uh; | 1025 | struct udphdr *uh; |
1027 | struct flowi4 fl4; | 1026 | struct flowi4 fl4; |
@@ -1030,6 +1029,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, | |||
1030 | u32 vni; | 1029 | u32 vni; |
1031 | __be16 df = 0; | 1030 | __be16 df = 0; |
1032 | __u8 tos, ttl; | 1031 | __u8 tos, ttl; |
1032 | int err; | ||
1033 | 1033 | ||
1034 | dst_port = rdst->remote_port ? rdst->remote_port : vxlan->dst_port; | 1034 | dst_port = rdst->remote_port ? rdst->remote_port : vxlan->dst_port; |
1035 | vni = rdst->remote_vni; | 1035 | vni = rdst->remote_vni; |
@@ -1097,13 +1097,6 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, | |||
1097 | vxlan_encap_bypass(skb, vxlan, dst_vxlan); | 1097 | vxlan_encap_bypass(skb, vxlan, dst_vxlan); |
1098 | return NETDEV_TX_OK; | 1098 | return NETDEV_TX_OK; |
1099 | } | 1099 | } |
1100 | |||
1101 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | ||
1102 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | | ||
1103 | IPSKB_REROUTED); | ||
1104 | skb_dst_drop(skb); | ||
1105 | skb_dst_set(skb, &rt->dst); | ||
1106 | |||
1107 | vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); | 1100 | vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); |
1108 | vxh->vx_flags = htonl(VXLAN_FLAGS); | 1101 | vxh->vx_flags = htonl(VXLAN_FLAGS); |
1109 | vxh->vx_vni = htonl(vni << 8); | 1102 | vxh->vx_vni = htonl(vni << 8); |
@@ -1118,27 +1111,18 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, | |||
1118 | uh->len = htons(skb->len); | 1111 | uh->len = htons(skb->len); |
1119 | uh->check = 0; | 1112 | uh->check = 0; |
1120 | 1113 | ||
1121 | __skb_push(skb, sizeof(*iph)); | ||
1122 | skb_reset_network_header(skb); | ||
1123 | iph = ip_hdr(skb); | ||
1124 | iph->version = 4; | ||
1125 | iph->ihl = sizeof(struct iphdr) >> 2; | ||
1126 | iph->frag_off = df; | ||
1127 | iph->protocol = IPPROTO_UDP; | ||
1128 | iph->tos = ip_tunnel_ecn_encap(tos, old_iph, skb); | ||
1129 | iph->daddr = dst; | ||
1130 | iph->saddr = fl4.saddr; | ||
1131 | iph->ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); | ||
1132 | tunnel_ip_select_ident(skb, old_iph, &rt->dst); | ||
1133 | |||
1134 | nf_reset(skb); | ||
1135 | |||
1136 | vxlan_set_owner(dev, skb); | 1114 | vxlan_set_owner(dev, skb); |
1137 | 1115 | ||
1138 | if (handle_offloads(skb)) | 1116 | if (handle_offloads(skb)) |
1139 | goto drop; | 1117 | goto drop; |
1140 | 1118 | ||
1141 | iptunnel_xmit(skb, dev); | 1119 | tos = ip_tunnel_ecn_encap(tos, old_iph, skb); |
1120 | ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); | ||
1121 | |||
1122 | err = iptunnel_xmit(dev_net(dev), rt, skb, fl4.saddr, dst, | ||
1123 | IPPROTO_UDP, tos, ttl, df); | ||
1124 | iptunnel_xmit_stats(err, &dev->stats, dev->tstats); | ||
1125 | |||
1142 | return NETDEV_TX_OK; | 1126 | return NETDEV_TX_OK; |
1143 | 1127 | ||
1144 | drop: | 1128 | drop: |
diff --git a/include/net/gre.h b/include/net/gre.h index 9f03a390c826..a5a4ddf05300 100644 --- a/include/net/gre.h +++ b/include/net/gre.h | |||
@@ -7,6 +7,7 @@ | |||
7 | #define GREPROTO_CISCO 0 | 7 | #define GREPROTO_CISCO 0 |
8 | #define GREPROTO_PPTP 1 | 8 | #define GREPROTO_PPTP 1 |
9 | #define GREPROTO_MAX 2 | 9 | #define GREPROTO_MAX 2 |
10 | #define GRE_IP_PROTO_MAX 2 | ||
10 | 11 | ||
11 | struct gre_protocol { | 12 | struct gre_protocol { |
12 | int (*handler)(struct sk_buff *skb); | 13 | int (*handler)(struct sk_buff *skb); |
@@ -22,6 +23,32 @@ struct gre_base_hdr { | |||
22 | int gre_add_protocol(const struct gre_protocol *proto, u8 version); | 23 | int gre_add_protocol(const struct gre_protocol *proto, u8 version); |
23 | int gre_del_protocol(const struct gre_protocol *proto, u8 version); | 24 | int gre_del_protocol(const struct gre_protocol *proto, u8 version); |
24 | 25 | ||
26 | struct gre_cisco_protocol { | ||
27 | int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi); | ||
28 | int (*err_handler)(struct sk_buff *skb, u32 info, | ||
29 | const struct tnl_ptk_info *tpi); | ||
30 | u8 priority; | ||
31 | }; | ||
32 | |||
33 | int gre_cisco_register(struct gre_cisco_protocol *proto); | ||
34 | int gre_cisco_unregister(struct gre_cisco_protocol *proto); | ||
35 | void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, | ||
36 | int hdr_len); | ||
37 | struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum); | ||
38 | |||
39 | static inline int ip_gre_calc_hlen(__be16 o_flags) | ||
40 | { | ||
41 | int addend = 4; | ||
42 | |||
43 | if (o_flags&TUNNEL_CSUM) | ||
44 | addend += 4; | ||
45 | if (o_flags&TUNNEL_KEY) | ||
46 | addend += 4; | ||
47 | if (o_flags&TUNNEL_SEQ) | ||
48 | addend += 4; | ||
49 | return addend; | ||
50 | } | ||
51 | |||
25 | static inline __be16 gre_flags_to_tnl_flags(__be16 flags) | 52 | static inline __be16 gre_flags_to_tnl_flags(__be16 flags) |
26 | { | 53 | { |
27 | __be16 tflags = 0; | 54 | __be16 tflags = 0; |
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 1be442f89406..10bbb4273f7d 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h | |||
@@ -73,6 +73,7 @@ struct ip_tunnel { | |||
73 | #define TUNNEL_REC __cpu_to_be16(0x20) | 73 | #define TUNNEL_REC __cpu_to_be16(0x20) |
74 | #define TUNNEL_VERSION __cpu_to_be16(0x40) | 74 | #define TUNNEL_VERSION __cpu_to_be16(0x40) |
75 | #define TUNNEL_NO_KEY __cpu_to_be16(0x80) | 75 | #define TUNNEL_NO_KEY __cpu_to_be16(0x80) |
76 | #define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100) | ||
76 | 77 | ||
77 | struct tnl_ptk_info { | 78 | struct tnl_ptk_info { |
78 | __be16 flags; | 79 | __be16 flags; |
@@ -155,23 +156,28 @@ static inline void tunnel_ip_select_ident(struct sk_buff *skb, | |||
155 | (skb_shinfo(skb)->gso_segs ?: 1) - 1); | 156 | (skb_shinfo(skb)->gso_segs ?: 1) - 1); |
156 | } | 157 | } |
157 | 158 | ||
158 | static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev) | 159 | int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto); |
159 | { | 160 | int iptunnel_xmit(struct net *net, struct rtable *rt, |
160 | int err; | 161 | struct sk_buff *skb, |
161 | int pkt_len = skb->len - skb_transport_offset(skb); | 162 | __be32 src, __be32 dst, __u8 proto, |
162 | struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); | 163 | __u8 tos, __u8 ttl, __be16 df); |
163 | 164 | ||
164 | nf_reset(skb); | 165 | static inline void iptunnel_xmit_stats(int err, |
166 | struct net_device_stats *err_stats, | ||
167 | struct pcpu_tstats __percpu *stats) | ||
168 | { | ||
169 | if (err > 0) { | ||
170 | struct pcpu_tstats *tstats = this_cpu_ptr(stats); | ||
165 | 171 | ||
166 | err = ip_local_out(skb); | ||
167 | if (likely(net_xmit_eval(err) == 0)) { | ||
168 | u64_stats_update_begin(&tstats->syncp); | 172 | u64_stats_update_begin(&tstats->syncp); |
169 | tstats->tx_bytes += pkt_len; | 173 | tstats->tx_bytes += err; |
170 | tstats->tx_packets++; | 174 | tstats->tx_packets++; |
171 | u64_stats_update_end(&tstats->syncp); | 175 | u64_stats_update_end(&tstats->syncp); |
176 | } else if (err < 0) { | ||
177 | err_stats->tx_errors++; | ||
178 | err_stats->tx_aborted_errors++; | ||
172 | } else { | 179 | } else { |
173 | dev->stats.tx_errors++; | 180 | err_stats->tx_dropped++; |
174 | dev->stats.tx_aborted_errors++; | ||
175 | } | 181 | } |
176 | } | 182 | } |
177 | #endif /* __NET_IP_TUNNELS_H */ | 183 | #endif /* __NET_IP_TUNNELS_H */ |
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 424672db7f12..c55efaaa9bb4 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h | |||
@@ -164,6 +164,7 @@ enum ovs_vport_type { | |||
164 | OVS_VPORT_TYPE_UNSPEC, | 164 | OVS_VPORT_TYPE_UNSPEC, |
165 | OVS_VPORT_TYPE_NETDEV, /* network device */ | 165 | OVS_VPORT_TYPE_NETDEV, /* network device */ |
166 | OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ | 166 | OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ |
167 | OVS_VPORT_TYPE_GRE, /* GRE tunnel. */ | ||
167 | __OVS_VPORT_TYPE_MAX | 168 | __OVS_VPORT_TYPE_MAX |
168 | }; | 169 | }; |
169 | 170 | ||
@@ -246,11 +247,29 @@ enum ovs_key_attr { | |||
246 | OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ | 247 | OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ |
247 | OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ | 248 | OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ |
248 | OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */ | 249 | OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */ |
250 | OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */ | ||
251 | |||
252 | #ifdef __KERNEL__ | ||
253 | OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */ | ||
254 | #endif | ||
249 | __OVS_KEY_ATTR_MAX | 255 | __OVS_KEY_ATTR_MAX |
250 | }; | 256 | }; |
251 | 257 | ||
252 | #define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1) | 258 | #define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1) |
253 | 259 | ||
260 | enum ovs_tunnel_key_attr { | ||
261 | OVS_TUNNEL_KEY_ATTR_ID, /* be64 Tunnel ID */ | ||
262 | OVS_TUNNEL_KEY_ATTR_IPV4_SRC, /* be32 src IP address. */ | ||
263 | OVS_TUNNEL_KEY_ATTR_IPV4_DST, /* be32 dst IP address. */ | ||
264 | OVS_TUNNEL_KEY_ATTR_TOS, /* u8 Tunnel IP ToS. */ | ||
265 | OVS_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */ | ||
266 | OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */ | ||
267 | OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ | ||
268 | __OVS_TUNNEL_KEY_ATTR_MAX | ||
269 | }; | ||
270 | |||
271 | #define OVS_TUNNEL_KEY_ATTR_MAX (__OVS_TUNNEL_KEY_ATTR_MAX - 1) | ||
272 | |||
254 | /** | 273 | /** |
255 | * enum ovs_frag_type - IPv4 and IPv6 fragment type | 274 | * enum ovs_frag_type - IPv4 and IPv6 fragment type |
256 | * @OVS_FRAG_TYPE_NONE: Packet is not a fragment. | 275 | * @OVS_FRAG_TYPE_NONE: Packet is not a fragment. |
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 7fcf8101d85f..86ded0bac9c7 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \ | |||
11 | tcp_offload.o datagram.o raw.o udp.o udplite.o \ | 11 | tcp_offload.o datagram.o raw.o udp.o udplite.o \ |
12 | udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ | 12 | udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ |
13 | fib_frontend.o fib_semantics.o fib_trie.o \ | 13 | fib_frontend.o fib_semantics.o fib_trie.o \ |
14 | inet_fragment.o ping.o | 14 | inet_fragment.o ping.o ip_tunnel_core.o |
15 | 15 | ||
16 | obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o | 16 | obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o |
17 | obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o | 17 | obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o |
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index b2e805af9b87..ba4803e609b5 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c | |||
@@ -13,6 +13,8 @@ | |||
13 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 13 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
14 | 14 | ||
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/if.h> | ||
17 | #include <linux/icmp.h> | ||
16 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
17 | #include <linux/kmod.h> | 19 | #include <linux/kmod.h> |
18 | #include <linux/skbuff.h> | 20 | #include <linux/skbuff.h> |
@@ -24,51 +26,270 @@ | |||
24 | #include <net/protocol.h> | 26 | #include <net/protocol.h> |
25 | #include <net/gre.h> | 27 | #include <net/gre.h> |
26 | 28 | ||
29 | #include <net/icmp.h> | ||
30 | #include <net/route.h> | ||
31 | #include <net/xfrm.h> | ||
27 | 32 | ||
28 | static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; | 33 | static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; |
29 | static DEFINE_SPINLOCK(gre_proto_lock); | 34 | static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX]; |
30 | 35 | ||
31 | int gre_add_protocol(const struct gre_protocol *proto, u8 version) | 36 | int gre_add_protocol(const struct gre_protocol *proto, u8 version) |
32 | { | 37 | { |
33 | if (version >= GREPROTO_MAX) | 38 | if (version >= GREPROTO_MAX) |
34 | goto err_out; | 39 | return -EINVAL; |
35 | |||
36 | spin_lock(&gre_proto_lock); | ||
37 | if (gre_proto[version]) | ||
38 | goto err_out_unlock; | ||
39 | |||
40 | RCU_INIT_POINTER(gre_proto[version], proto); | ||
41 | spin_unlock(&gre_proto_lock); | ||
42 | return 0; | ||
43 | 40 | ||
44 | err_out_unlock: | 41 | return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ? |
45 | spin_unlock(&gre_proto_lock); | 42 | 0 : -EBUSY; |
46 | err_out: | ||
47 | return -1; | ||
48 | } | 43 | } |
49 | EXPORT_SYMBOL_GPL(gre_add_protocol); | 44 | EXPORT_SYMBOL_GPL(gre_add_protocol); |
50 | 45 | ||
51 | int gre_del_protocol(const struct gre_protocol *proto, u8 version) | 46 | int gre_del_protocol(const struct gre_protocol *proto, u8 version) |
52 | { | 47 | { |
48 | int ret; | ||
49 | |||
53 | if (version >= GREPROTO_MAX) | 50 | if (version >= GREPROTO_MAX) |
54 | goto err_out; | 51 | return -EINVAL; |
55 | 52 | ||
56 | spin_lock(&gre_proto_lock); | 53 | ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ? |
57 | if (rcu_dereference_protected(gre_proto[version], | 54 | 0 : -EBUSY; |
58 | lockdep_is_held(&gre_proto_lock)) != proto) | 55 | |
59 | goto err_out_unlock; | 56 | if (ret) |
60 | RCU_INIT_POINTER(gre_proto[version], NULL); | 57 | return ret; |
61 | spin_unlock(&gre_proto_lock); | 58 | |
62 | synchronize_rcu(); | 59 | synchronize_rcu(); |
63 | return 0; | 60 | return 0; |
64 | |||
65 | err_out_unlock: | ||
66 | spin_unlock(&gre_proto_lock); | ||
67 | err_out: | ||
68 | return -1; | ||
69 | } | 61 | } |
70 | EXPORT_SYMBOL_GPL(gre_del_protocol); | 62 | EXPORT_SYMBOL_GPL(gre_del_protocol); |
71 | 63 | ||
64 | void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, | ||
65 | int hdr_len) | ||
66 | { | ||
67 | struct gre_base_hdr *greh; | ||
68 | |||
69 | skb_push(skb, hdr_len); | ||
70 | |||
71 | greh = (struct gre_base_hdr *)skb->data; | ||
72 | greh->flags = tnl_flags_to_gre_flags(tpi->flags); | ||
73 | greh->protocol = tpi->proto; | ||
74 | |||
75 | if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { | ||
76 | __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); | ||
77 | |||
78 | if (tpi->flags&TUNNEL_SEQ) { | ||
79 | *ptr = tpi->seq; | ||
80 | ptr--; | ||
81 | } | ||
82 | if (tpi->flags&TUNNEL_KEY) { | ||
83 | *ptr = tpi->key; | ||
84 | ptr--; | ||
85 | } | ||
86 | if (tpi->flags&TUNNEL_CSUM && | ||
87 | !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { | ||
88 | *ptr = 0; | ||
89 | *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, | ||
90 | skb->len, 0)); | ||
91 | } | ||
92 | } | ||
93 | } | ||
94 | EXPORT_SYMBOL_GPL(gre_build_header); | ||
95 | |||
96 | struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum) | ||
97 | { | ||
98 | int err; | ||
99 | |||
100 | if (likely(!skb->encapsulation)) { | ||
101 | skb_reset_inner_headers(skb); | ||
102 | skb->encapsulation = 1; | ||
103 | } | ||
104 | |||
105 | if (skb_is_gso(skb)) { | ||
106 | err = skb_unclone(skb, GFP_ATOMIC); | ||
107 | if (unlikely(err)) | ||
108 | goto error; | ||
109 | skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; | ||
110 | return skb; | ||
111 | } else if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) { | ||
112 | err = skb_checksum_help(skb); | ||
113 | if (unlikely(err)) | ||
114 | goto error; | ||
115 | } else if (skb->ip_summed != CHECKSUM_PARTIAL) | ||
116 | skb->ip_summed = CHECKSUM_NONE; | ||
117 | |||
118 | return skb; | ||
119 | error: | ||
120 | kfree_skb(skb); | ||
121 | return ERR_PTR(err); | ||
122 | } | ||
123 | EXPORT_SYMBOL_GPL(gre_handle_offloads); | ||
124 | |||
125 | static __sum16 check_checksum(struct sk_buff *skb) | ||
126 | { | ||
127 | __sum16 csum = 0; | ||
128 | |||
129 | switch (skb->ip_summed) { | ||
130 | case CHECKSUM_COMPLETE: | ||
131 | csum = csum_fold(skb->csum); | ||
132 | |||
133 | if (!csum) | ||
134 | break; | ||
135 | /* Fall through. */ | ||
136 | |||
137 | case CHECKSUM_NONE: | ||
138 | skb->csum = 0; | ||
139 | csum = __skb_checksum_complete(skb); | ||
140 | skb->ip_summed = CHECKSUM_COMPLETE; | ||
141 | break; | ||
142 | } | ||
143 | |||
144 | return csum; | ||
145 | } | ||
146 | |||
147 | static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, | ||
148 | bool *csum_err) | ||
149 | { | ||
150 | unsigned int ip_hlen = ip_hdrlen(skb); | ||
151 | const struct gre_base_hdr *greh; | ||
152 | __be32 *options; | ||
153 | int hdr_len; | ||
154 | |||
155 | if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) | ||
156 | return -EINVAL; | ||
157 | |||
158 | greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); | ||
159 | if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) | ||
160 | return -EINVAL; | ||
161 | |||
162 | tpi->flags = gre_flags_to_tnl_flags(greh->flags); | ||
163 | hdr_len = ip_gre_calc_hlen(tpi->flags); | ||
164 | |||
165 | if (!pskb_may_pull(skb, hdr_len)) | ||
166 | return -EINVAL; | ||
167 | |||
168 | greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); | ||
169 | tpi->proto = greh->protocol; | ||
170 | |||
171 | options = (__be32 *)(greh + 1); | ||
172 | if (greh->flags & GRE_CSUM) { | ||
173 | if (check_checksum(skb)) { | ||
174 | *csum_err = true; | ||
175 | return -EINVAL; | ||
176 | } | ||
177 | options++; | ||
178 | } | ||
179 | |||
180 | if (greh->flags & GRE_KEY) { | ||
181 | tpi->key = *options; | ||
182 | options++; | ||
183 | } else | ||
184 | tpi->key = 0; | ||
185 | |||
186 | if (unlikely(greh->flags & GRE_SEQ)) { | ||
187 | tpi->seq = *options; | ||
188 | options++; | ||
189 | } else | ||
190 | tpi->seq = 0; | ||
191 | |||
192 | /* WCCP version 1 and 2 protocol decoding. | ||
193 | * - Change protocol to IP | ||
194 | * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header | ||
195 | */ | ||
196 | if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { | ||
197 | tpi->proto = htons(ETH_P_IP); | ||
198 | if ((*(u8 *)options & 0xF0) != 0x40) { | ||
199 | hdr_len += 4; | ||
200 | if (!pskb_may_pull(skb, hdr_len)) | ||
201 | return -EINVAL; | ||
202 | } | ||
203 | } | ||
204 | |||
205 | return iptunnel_pull_header(skb, hdr_len, tpi->proto); | ||
206 | } | ||
207 | |||
208 | static int gre_cisco_rcv(struct sk_buff *skb) | ||
209 | { | ||
210 | struct tnl_ptk_info tpi; | ||
211 | int i; | ||
212 | bool csum_err = false; | ||
213 | |||
214 | if (parse_gre_header(skb, &tpi, &csum_err) < 0) | ||
215 | goto drop; | ||
216 | |||
217 | rcu_read_lock(); | ||
218 | for (i = 0; i < GRE_IP_PROTO_MAX; i++) { | ||
219 | struct gre_cisco_protocol *proto; | ||
220 | int ret; | ||
221 | |||
222 | proto = rcu_dereference(gre_cisco_proto_list[i]); | ||
223 | if (!proto) | ||
224 | continue; | ||
225 | ret = proto->handler(skb, &tpi); | ||
226 | if (ret == PACKET_RCVD) { | ||
227 | rcu_read_unlock(); | ||
228 | return 0; | ||
229 | } | ||
230 | } | ||
231 | rcu_read_unlock(); | ||
232 | |||
233 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | ||
234 | drop: | ||
235 | kfree_skb(skb); | ||
236 | return 0; | ||
237 | } | ||
238 | |||
239 | static void gre_cisco_err(struct sk_buff *skb, u32 info) | ||
240 | { | ||
241 | /* All the routers (except for Linux) return only | ||
242 | * 8 bytes of packet payload. It means, that precise relaying of | ||
243 | * ICMP in the real Internet is absolutely infeasible. | ||
244 | * | ||
245 | * Moreover, Cisco "wise men" put GRE key to the third word | ||
246 | * in GRE header. It makes impossible maintaining even soft | ||
247 | * state for keyed | ||
248 | * GRE tunnels with enabled checksum. Tell them "thank you". | ||
249 | * | ||
250 | * Well, I wonder, rfc1812 was written by Cisco employee, | ||
251 | * what the hell these idiots break standards established | ||
252 | * by themselves??? | ||
253 | */ | ||
254 | |||
255 | const int type = icmp_hdr(skb)->type; | ||
256 | const int code = icmp_hdr(skb)->code; | ||
257 | struct tnl_ptk_info tpi; | ||
258 | bool csum_err = false; | ||
259 | int i; | ||
260 | |||
261 | if (parse_gre_header(skb, &tpi, &csum_err)) { | ||
262 | if (!csum_err) /* ignore csum errors. */ | ||
263 | return; | ||
264 | } | ||
265 | |||
266 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { | ||
267 | ipv4_update_pmtu(skb, dev_net(skb->dev), info, | ||
268 | skb->dev->ifindex, 0, IPPROTO_GRE, 0); | ||
269 | return; | ||
270 | } | ||
271 | if (type == ICMP_REDIRECT) { | ||
272 | ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0, | ||
273 | IPPROTO_GRE, 0); | ||
274 | return; | ||
275 | } | ||
276 | |||
277 | rcu_read_lock(); | ||
278 | for (i = 0; i < GRE_IP_PROTO_MAX; i++) { | ||
279 | struct gre_cisco_protocol *proto; | ||
280 | |||
281 | proto = rcu_dereference(gre_cisco_proto_list[i]); | ||
282 | if (!proto) | ||
283 | continue; | ||
284 | |||
285 | if (proto->err_handler(skb, info, &tpi) == PACKET_RCVD) | ||
286 | goto out; | ||
287 | |||
288 | } | ||
289 | out: | ||
290 | rcu_read_unlock(); | ||
291 | } | ||
292 | |||
72 | static int gre_rcv(struct sk_buff *skb) | 293 | static int gre_rcv(struct sk_buff *skb) |
73 | { | 294 | { |
74 | const struct gre_protocol *proto; | 295 | const struct gre_protocol *proto; |
@@ -220,27 +441,68 @@ static const struct net_offload gre_offload = { | |||
220 | }, | 441 | }, |
221 | }; | 442 | }; |
222 | 443 | ||
444 | static const struct gre_protocol ipgre_protocol = { | ||
445 | .handler = gre_cisco_rcv, | ||
446 | .err_handler = gre_cisco_err, | ||
447 | }; | ||
448 | |||
449 | int gre_cisco_register(struct gre_cisco_protocol *newp) | ||
450 | { | ||
451 | struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) | ||
452 | &gre_cisco_proto_list[newp->priority]; | ||
453 | |||
454 | return (cmpxchg(proto, NULL, newp) == NULL) ? 0 : -EBUSY; | ||
455 | } | ||
456 | EXPORT_SYMBOL_GPL(gre_cisco_register); | ||
457 | |||
458 | int gre_cisco_unregister(struct gre_cisco_protocol *del_proto) | ||
459 | { | ||
460 | struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) | ||
461 | &gre_cisco_proto_list[del_proto->priority]; | ||
462 | int ret; | ||
463 | |||
464 | ret = (cmpxchg(proto, del_proto, NULL) == del_proto) ? 0 : -EINVAL; | ||
465 | |||
466 | if (ret) | ||
467 | return ret; | ||
468 | |||
469 | synchronize_net(); | ||
470 | return 0; | ||
471 | } | ||
472 | EXPORT_SYMBOL_GPL(gre_cisco_unregister); | ||
473 | |||
223 | static int __init gre_init(void) | 474 | static int __init gre_init(void) |
224 | { | 475 | { |
225 | pr_info("GRE over IPv4 demultiplexor driver\n"); | 476 | pr_info("GRE over IPv4 demultiplexor driver\n"); |
226 | 477 | ||
227 | if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { | 478 | if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { |
228 | pr_err("can't add protocol\n"); | 479 | pr_err("can't add protocol\n"); |
229 | return -EAGAIN; | 480 | goto err; |
481 | } | ||
482 | |||
483 | if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) { | ||
484 | pr_info("%s: can't add ipgre handler\n", __func__); | ||
485 | goto err_gre; | ||
230 | } | 486 | } |
231 | 487 | ||
232 | if (inet_add_offload(&gre_offload, IPPROTO_GRE)) { | 488 | if (inet_add_offload(&gre_offload, IPPROTO_GRE)) { |
233 | pr_err("can't add protocol offload\n"); | 489 | pr_err("can't add protocol offload\n"); |
234 | inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); | 490 | goto err_gso; |
235 | return -EAGAIN; | ||
236 | } | 491 | } |
237 | 492 | ||
238 | return 0; | 493 | return 0; |
494 | err_gso: | ||
495 | gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); | ||
496 | err_gre: | ||
497 | inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); | ||
498 | err: | ||
499 | return -EAGAIN; | ||
239 | } | 500 | } |
240 | 501 | ||
241 | static void __exit gre_exit(void) | 502 | static void __exit gre_exit(void) |
242 | { | 503 | { |
243 | inet_del_offload(&gre_offload, IPPROTO_GRE); | 504 | inet_del_offload(&gre_offload, IPPROTO_GRE); |
505 | gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); | ||
244 | inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); | 506 | inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); |
245 | } | 507 | } |
246 | 508 | ||
@@ -250,4 +512,3 @@ module_exit(gre_exit); | |||
250 | MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver"); | 512 | MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver"); |
251 | MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); | 513 | MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); |
252 | MODULE_LICENSE("GPL"); | 514 | MODULE_LICENSE("GPL"); |
253 | |||
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a982657d05e7..c326e869993a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -121,103 +121,8 @@ static int ipgre_tunnel_init(struct net_device *dev); | |||
121 | static int ipgre_net_id __read_mostly; | 121 | static int ipgre_net_id __read_mostly; |
122 | static int gre_tap_net_id __read_mostly; | 122 | static int gre_tap_net_id __read_mostly; |
123 | 123 | ||
124 | static __sum16 check_checksum(struct sk_buff *skb) | 124 | static int ipgre_err(struct sk_buff *skb, u32 info, |
125 | { | 125 | const struct tnl_ptk_info *tpi) |
126 | __sum16 csum = 0; | ||
127 | |||
128 | switch (skb->ip_summed) { | ||
129 | case CHECKSUM_COMPLETE: | ||
130 | csum = csum_fold(skb->csum); | ||
131 | |||
132 | if (!csum) | ||
133 | break; | ||
134 | /* Fall through. */ | ||
135 | |||
136 | case CHECKSUM_NONE: | ||
137 | skb->csum = 0; | ||
138 | csum = __skb_checksum_complete(skb); | ||
139 | skb->ip_summed = CHECKSUM_COMPLETE; | ||
140 | break; | ||
141 | } | ||
142 | |||
143 | return csum; | ||
144 | } | ||
145 | |||
146 | static int ip_gre_calc_hlen(__be16 o_flags) | ||
147 | { | ||
148 | int addend = 4; | ||
149 | |||
150 | if (o_flags&TUNNEL_CSUM) | ||
151 | addend += 4; | ||
152 | if (o_flags&TUNNEL_KEY) | ||
153 | addend += 4; | ||
154 | if (o_flags&TUNNEL_SEQ) | ||
155 | addend += 4; | ||
156 | return addend; | ||
157 | } | ||
158 | |||
159 | static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, | ||
160 | bool *csum_err, int *hdr_len) | ||
161 | { | ||
162 | unsigned int ip_hlen = ip_hdrlen(skb); | ||
163 | const struct gre_base_hdr *greh; | ||
164 | __be32 *options; | ||
165 | |||
166 | if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) | ||
167 | return -EINVAL; | ||
168 | |||
169 | greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); | ||
170 | if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) | ||
171 | return -EINVAL; | ||
172 | |||
173 | tpi->flags = gre_flags_to_tnl_flags(greh->flags); | ||
174 | *hdr_len = ip_gre_calc_hlen(tpi->flags); | ||
175 | |||
176 | if (!pskb_may_pull(skb, *hdr_len)) | ||
177 | return -EINVAL; | ||
178 | |||
179 | greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); | ||
180 | |||
181 | tpi->proto = greh->protocol; | ||
182 | |||
183 | options = (__be32 *)(greh + 1); | ||
184 | if (greh->flags & GRE_CSUM) { | ||
185 | if (check_checksum(skb)) { | ||
186 | *csum_err = true; | ||
187 | return -EINVAL; | ||
188 | } | ||
189 | options++; | ||
190 | } | ||
191 | |||
192 | if (greh->flags & GRE_KEY) { | ||
193 | tpi->key = *options; | ||
194 | options++; | ||
195 | } else | ||
196 | tpi->key = 0; | ||
197 | |||
198 | if (unlikely(greh->flags & GRE_SEQ)) { | ||
199 | tpi->seq = *options; | ||
200 | options++; | ||
201 | } else | ||
202 | tpi->seq = 0; | ||
203 | |||
204 | /* WCCP version 1 and 2 protocol decoding. | ||
205 | * - Change protocol to IP | ||
206 | * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header | ||
207 | */ | ||
208 | if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { | ||
209 | tpi->proto = htons(ETH_P_IP); | ||
210 | if ((*(u8 *)options & 0xF0) != 0x40) { | ||
211 | *hdr_len += 4; | ||
212 | if (!pskb_may_pull(skb, *hdr_len)) | ||
213 | return -EINVAL; | ||
214 | } | ||
215 | } | ||
216 | |||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | static void ipgre_err(struct sk_buff *skb, u32 info) | ||
221 | { | 126 | { |
222 | 127 | ||
223 | /* All the routers (except for Linux) return only | 128 | /* All the routers (except for Linux) return only |
@@ -239,26 +144,18 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
239 | const int type = icmp_hdr(skb)->type; | 144 | const int type = icmp_hdr(skb)->type; |
240 | const int code = icmp_hdr(skb)->code; | 145 | const int code = icmp_hdr(skb)->code; |
241 | struct ip_tunnel *t; | 146 | struct ip_tunnel *t; |
242 | struct tnl_ptk_info tpi; | ||
243 | int hdr_len; | ||
244 | bool csum_err = false; | ||
245 | |||
246 | if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) { | ||
247 | if (!csum_err) /* ignore csum errors. */ | ||
248 | return; | ||
249 | } | ||
250 | 147 | ||
251 | switch (type) { | 148 | switch (type) { |
252 | default: | 149 | default: |
253 | case ICMP_PARAMETERPROB: | 150 | case ICMP_PARAMETERPROB: |
254 | return; | 151 | return PACKET_RCVD; |
255 | 152 | ||
256 | case ICMP_DEST_UNREACH: | 153 | case ICMP_DEST_UNREACH: |
257 | switch (code) { | 154 | switch (code) { |
258 | case ICMP_SR_FAILED: | 155 | case ICMP_SR_FAILED: |
259 | case ICMP_PORT_UNREACH: | 156 | case ICMP_PORT_UNREACH: |
260 | /* Impossible event. */ | 157 | /* Impossible event. */ |
261 | return; | 158 | return PACKET_RCVD; |
262 | default: | 159 | default: |
263 | /* All others are translated to HOST_UNREACH. | 160 | /* All others are translated to HOST_UNREACH. |
264 | rfc2003 contains "deep thoughts" about NET_UNREACH, | 161 | rfc2003 contains "deep thoughts" about NET_UNREACH, |
@@ -269,138 +166,61 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
269 | break; | 166 | break; |
270 | case ICMP_TIME_EXCEEDED: | 167 | case ICMP_TIME_EXCEEDED: |
271 | if (code != ICMP_EXC_TTL) | 168 | if (code != ICMP_EXC_TTL) |
272 | return; | 169 | return PACKET_RCVD; |
273 | break; | 170 | break; |
274 | 171 | ||
275 | case ICMP_REDIRECT: | 172 | case ICMP_REDIRECT: |
276 | break; | 173 | break; |
277 | } | 174 | } |
278 | 175 | ||
279 | if (tpi.proto == htons(ETH_P_TEB)) | 176 | if (tpi->proto == htons(ETH_P_TEB)) |
280 | itn = net_generic(net, gre_tap_net_id); | 177 | itn = net_generic(net, gre_tap_net_id); |
281 | else | 178 | else |
282 | itn = net_generic(net, ipgre_net_id); | 179 | itn = net_generic(net, ipgre_net_id); |
283 | 180 | ||
284 | iph = (const struct iphdr *)skb->data; | 181 | iph = (const struct iphdr *)skb->data; |
285 | t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, | 182 | t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, |
286 | iph->daddr, iph->saddr, tpi.key); | 183 | iph->daddr, iph->saddr, tpi->key); |
287 | 184 | ||
288 | if (t == NULL) | 185 | if (t == NULL) |
289 | return; | 186 | return PACKET_REJECT; |
290 | 187 | ||
291 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { | ||
292 | ipv4_update_pmtu(skb, dev_net(skb->dev), info, | ||
293 | t->parms.link, 0, IPPROTO_GRE, 0); | ||
294 | return; | ||
295 | } | ||
296 | if (type == ICMP_REDIRECT) { | ||
297 | ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, | ||
298 | IPPROTO_GRE, 0); | ||
299 | return; | ||
300 | } | ||
301 | if (t->parms.iph.daddr == 0 || | 188 | if (t->parms.iph.daddr == 0 || |
302 | ipv4_is_multicast(t->parms.iph.daddr)) | 189 | ipv4_is_multicast(t->parms.iph.daddr)) |
303 | return; | 190 | return PACKET_RCVD; |
304 | 191 | ||
305 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) | 192 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) |
306 | return; | 193 | return PACKET_RCVD; |
307 | 194 | ||
308 | if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) | 195 | if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) |
309 | t->err_count++; | 196 | t->err_count++; |
310 | else | 197 | else |
311 | t->err_count = 1; | 198 | t->err_count = 1; |
312 | t->err_time = jiffies; | 199 | t->err_time = jiffies; |
200 | return PACKET_RCVD; | ||
313 | } | 201 | } |
314 | 202 | ||
315 | static int ipgre_rcv(struct sk_buff *skb) | 203 | static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) |
316 | { | 204 | { |
317 | struct net *net = dev_net(skb->dev); | 205 | struct net *net = dev_net(skb->dev); |
318 | struct ip_tunnel_net *itn; | 206 | struct ip_tunnel_net *itn; |
319 | const struct iphdr *iph; | 207 | const struct iphdr *iph; |
320 | struct ip_tunnel *tunnel; | 208 | struct ip_tunnel *tunnel; |
321 | struct tnl_ptk_info tpi; | ||
322 | int hdr_len; | ||
323 | bool csum_err = false; | ||
324 | |||
325 | if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0) | ||
326 | goto drop; | ||
327 | 209 | ||
328 | if (tpi.proto == htons(ETH_P_TEB)) | 210 | if (tpi->proto == htons(ETH_P_TEB)) |
329 | itn = net_generic(net, gre_tap_net_id); | 211 | itn = net_generic(net, gre_tap_net_id); |
330 | else | 212 | else |
331 | itn = net_generic(net, ipgre_net_id); | 213 | itn = net_generic(net, ipgre_net_id); |
332 | 214 | ||
333 | iph = ip_hdr(skb); | 215 | iph = ip_hdr(skb); |
334 | tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, | 216 | tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, |
335 | iph->saddr, iph->daddr, tpi.key); | 217 | iph->saddr, iph->daddr, tpi->key); |
336 | 218 | ||
337 | if (tunnel) { | 219 | if (tunnel) { |
338 | ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); | 220 | ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error); |
339 | return 0; | 221 | return PACKET_RCVD; |
340 | } | 222 | } |
341 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | 223 | return PACKET_REJECT; |
342 | drop: | ||
343 | kfree_skb(skb); | ||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb) | ||
348 | { | ||
349 | int err; | ||
350 | |||
351 | if (skb_is_gso(skb)) { | ||
352 | err = skb_unclone(skb, GFP_ATOMIC); | ||
353 | if (unlikely(err)) | ||
354 | goto error; | ||
355 | skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; | ||
356 | return skb; | ||
357 | } else if (skb->ip_summed == CHECKSUM_PARTIAL && | ||
358 | tunnel->parms.o_flags&TUNNEL_CSUM) { | ||
359 | err = skb_checksum_help(skb); | ||
360 | if (unlikely(err)) | ||
361 | goto error; | ||
362 | } else if (skb->ip_summed != CHECKSUM_PARTIAL) | ||
363 | skb->ip_summed = CHECKSUM_NONE; | ||
364 | |||
365 | return skb; | ||
366 | |||
367 | error: | ||
368 | kfree_skb(skb); | ||
369 | return ERR_PTR(err); | ||
370 | } | ||
371 | |||
372 | static struct sk_buff *gre_build_header(struct sk_buff *skb, | ||
373 | const struct tnl_ptk_info *tpi, | ||
374 | int hdr_len) | ||
375 | { | ||
376 | struct gre_base_hdr *greh; | ||
377 | |||
378 | skb_push(skb, hdr_len); | ||
379 | |||
380 | greh = (struct gre_base_hdr *)skb->data; | ||
381 | greh->flags = tnl_flags_to_gre_flags(tpi->flags); | ||
382 | greh->protocol = tpi->proto; | ||
383 | |||
384 | if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { | ||
385 | __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); | ||
386 | |||
387 | if (tpi->flags&TUNNEL_SEQ) { | ||
388 | *ptr = tpi->seq; | ||
389 | ptr--; | ||
390 | } | ||
391 | if (tpi->flags&TUNNEL_KEY) { | ||
392 | *ptr = tpi->key; | ||
393 | ptr--; | ||
394 | } | ||
395 | if (tpi->flags&TUNNEL_CSUM && | ||
396 | !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { | ||
397 | *(__sum16 *)ptr = 0; | ||
398 | *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, | ||
399 | skb->len, 0)); | ||
400 | } | ||
401 | } | ||
402 | |||
403 | return skb; | ||
404 | } | 224 | } |
405 | 225 | ||
406 | static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, | 226 | static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, |
@@ -410,11 +230,6 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, | |||
410 | struct ip_tunnel *tunnel = netdev_priv(dev); | 230 | struct ip_tunnel *tunnel = netdev_priv(dev); |
411 | struct tnl_ptk_info tpi; | 231 | struct tnl_ptk_info tpi; |
412 | 232 | ||
413 | if (likely(!skb->encapsulation)) { | ||
414 | skb_reset_inner_headers(skb); | ||
415 | skb->encapsulation = 1; | ||
416 | } | ||
417 | |||
418 | tpi.flags = tunnel->parms.o_flags; | 233 | tpi.flags = tunnel->parms.o_flags; |
419 | tpi.proto = proto; | 234 | tpi.proto = proto; |
420 | tpi.key = tunnel->parms.o_key; | 235 | tpi.key = tunnel->parms.o_key; |
@@ -423,11 +238,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, | |||
423 | tpi.seq = htonl(tunnel->o_seqno); | 238 | tpi.seq = htonl(tunnel->o_seqno); |
424 | 239 | ||
425 | /* Push GRE header. */ | 240 | /* Push GRE header. */ |
426 | skb = gre_build_header(skb, &tpi, tunnel->hlen); | 241 | gre_build_header(skb, &tpi, tunnel->hlen); |
427 | if (unlikely(!skb)) { | ||
428 | dev->stats.tx_dropped++; | ||
429 | return; | ||
430 | } | ||
431 | 242 | ||
432 | ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); | 243 | ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); |
433 | } | 244 | } |
@@ -438,7 +249,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, | |||
438 | struct ip_tunnel *tunnel = netdev_priv(dev); | 249 | struct ip_tunnel *tunnel = netdev_priv(dev); |
439 | const struct iphdr *tnl_params; | 250 | const struct iphdr *tnl_params; |
440 | 251 | ||
441 | skb = handle_offloads(tunnel, skb); | 252 | skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); |
442 | if (IS_ERR(skb)) | 253 | if (IS_ERR(skb)) |
443 | goto out; | 254 | goto out; |
444 | 255 | ||
@@ -477,7 +288,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, | |||
477 | { | 288 | { |
478 | struct ip_tunnel *tunnel = netdev_priv(dev); | 289 | struct ip_tunnel *tunnel = netdev_priv(dev); |
479 | 290 | ||
480 | skb = handle_offloads(tunnel, skb); | 291 | skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); |
481 | if (IS_ERR(skb)) | 292 | if (IS_ERR(skb)) |
482 | goto out; | 293 | goto out; |
483 | 294 | ||
@@ -708,9 +519,10 @@ static int ipgre_tunnel_init(struct net_device *dev) | |||
708 | return ip_tunnel_init(dev); | 519 | return ip_tunnel_init(dev); |
709 | } | 520 | } |
710 | 521 | ||
711 | static const struct gre_protocol ipgre_protocol = { | 522 | static struct gre_cisco_protocol ipgre_protocol = { |
712 | .handler = ipgre_rcv, | 523 | .handler = ipgre_rcv, |
713 | .err_handler = ipgre_err, | 524 | .err_handler = ipgre_err, |
525 | .priority = 0, | ||
714 | }; | 526 | }; |
715 | 527 | ||
716 | static int __net_init ipgre_init_net(struct net *net) | 528 | static int __net_init ipgre_init_net(struct net *net) |
@@ -978,7 +790,7 @@ static int __init ipgre_init(void) | |||
978 | if (err < 0) | 790 | if (err < 0) |
979 | goto pnet_tap_faied; | 791 | goto pnet_tap_faied; |
980 | 792 | ||
981 | err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); | 793 | err = gre_cisco_register(&ipgre_protocol); |
982 | if (err < 0) { | 794 | if (err < 0) { |
983 | pr_info("%s: can't add protocol\n", __func__); | 795 | pr_info("%s: can't add protocol\n", __func__); |
984 | goto add_proto_failed; | 796 | goto add_proto_failed; |
@@ -997,7 +809,7 @@ static int __init ipgre_init(void) | |||
997 | tap_ops_failed: | 809 | tap_ops_failed: |
998 | rtnl_link_unregister(&ipgre_link_ops); | 810 | rtnl_link_unregister(&ipgre_link_ops); |
999 | rtnl_link_failed: | 811 | rtnl_link_failed: |
1000 | gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); | 812 | gre_cisco_unregister(&ipgre_protocol); |
1001 | add_proto_failed: | 813 | add_proto_failed: |
1002 | unregister_pernet_device(&ipgre_tap_net_ops); | 814 | unregister_pernet_device(&ipgre_tap_net_ops); |
1003 | pnet_tap_faied: | 815 | pnet_tap_faied: |
@@ -1009,8 +821,7 @@ static void __exit ipgre_fini(void) | |||
1009 | { | 821 | { |
1010 | rtnl_link_unregister(&ipgre_tap_ops); | 822 | rtnl_link_unregister(&ipgre_tap_ops); |
1011 | rtnl_link_unregister(&ipgre_link_ops); | 823 | rtnl_link_unregister(&ipgre_link_ops); |
1012 | if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) | 824 | gre_cisco_unregister(&ipgre_protocol); |
1013 | pr_info("%s: can't remove protocol\n", __func__); | ||
1014 | unregister_pernet_device(&ipgre_tap_net_ops); | 825 | unregister_pernet_device(&ipgre_tap_net_ops); |
1015 | unregister_pernet_device(&ipgre_net_ops); | 826 | unregister_pernet_device(&ipgre_net_ops); |
1016 | } | 827 | } |
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e189db409b0e..bd227e5ea9da 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c | |||
@@ -408,13 +408,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, | |||
408 | const struct iphdr *iph = ip_hdr(skb); | 408 | const struct iphdr *iph = ip_hdr(skb); |
409 | int err; | 409 | int err; |
410 | 410 | ||
411 | secpath_reset(skb); | ||
412 | |||
413 | skb->protocol = tpi->proto; | ||
414 | |||
415 | skb->mac_header = skb->network_header; | ||
416 | __pskb_pull(skb, tunnel->hlen); | ||
417 | skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen); | ||
418 | #ifdef CONFIG_NET_IPGRE_BROADCAST | 411 | #ifdef CONFIG_NET_IPGRE_BROADCAST |
419 | if (ipv4_is_multicast(iph->daddr)) { | 412 | if (ipv4_is_multicast(iph->daddr)) { |
420 | /* Looped back packet, drop it! */ | 413 | /* Looped back packet, drop it! */ |
@@ -442,23 +435,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, | |||
442 | tunnel->i_seqno = ntohl(tpi->seq) + 1; | 435 | tunnel->i_seqno = ntohl(tpi->seq) + 1; |
443 | } | 436 | } |
444 | 437 | ||
445 | /* Warning: All skb pointers will be invalidated! */ | ||
446 | if (tunnel->dev->type == ARPHRD_ETHER) { | ||
447 | if (!pskb_may_pull(skb, ETH_HLEN)) { | ||
448 | tunnel->dev->stats.rx_length_errors++; | ||
449 | tunnel->dev->stats.rx_errors++; | ||
450 | goto drop; | ||
451 | } | ||
452 | |||
453 | iph = ip_hdr(skb); | ||
454 | skb->protocol = eth_type_trans(skb, tunnel->dev); | ||
455 | skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); | ||
456 | } | ||
457 | |||
458 | skb->pkt_type = PACKET_HOST; | ||
459 | __skb_tunnel_rx(skb, tunnel->dev); | ||
460 | |||
461 | skb_reset_network_header(skb); | ||
462 | err = IP_ECN_decapsulate(iph, skb); | 438 | err = IP_ECN_decapsulate(iph, skb); |
463 | if (unlikely(err)) { | 439 | if (unlikely(err)) { |
464 | if (log_ecn_error) | 440 | if (log_ecn_error) |
@@ -477,6 +453,12 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, | |||
477 | tstats->rx_bytes += skb->len; | 453 | tstats->rx_bytes += skb->len; |
478 | u64_stats_update_end(&tstats->syncp); | 454 | u64_stats_update_end(&tstats->syncp); |
479 | 455 | ||
456 | if (tunnel->dev->type == ARPHRD_ETHER) { | ||
457 | skb->protocol = eth_type_trans(skb, tunnel->dev); | ||
458 | skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); | ||
459 | } else { | ||
460 | skb->dev = tunnel->dev; | ||
461 | } | ||
480 | gro_cells_receive(&tunnel->gro_cells, skb); | 462 | gro_cells_receive(&tunnel->gro_cells, skb); |
481 | return 0; | 463 | return 0; |
482 | 464 | ||
@@ -491,19 +473,17 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, | |||
491 | { | 473 | { |
492 | struct ip_tunnel *tunnel = netdev_priv(dev); | 474 | struct ip_tunnel *tunnel = netdev_priv(dev); |
493 | const struct iphdr *inner_iph; | 475 | const struct iphdr *inner_iph; |
494 | struct iphdr *iph; | ||
495 | struct flowi4 fl4; | 476 | struct flowi4 fl4; |
496 | u8 tos, ttl; | 477 | u8 tos, ttl; |
497 | __be16 df; | 478 | __be16 df; |
498 | struct rtable *rt; /* Route to the other host */ | 479 | struct rtable *rt; /* Route to the other host */ |
499 | struct net_device *tdev; /* Device to other host */ | ||
500 | unsigned int max_headroom; /* The extra header space needed */ | 480 | unsigned int max_headroom; /* The extra header space needed */ |
501 | __be32 dst; | 481 | __be32 dst; |
502 | int mtu; | 482 | int mtu; |
483 | int err; | ||
503 | 484 | ||
504 | inner_iph = (const struct iphdr *)skb_inner_network_header(skb); | 485 | inner_iph = (const struct iphdr *)skb_inner_network_header(skb); |
505 | 486 | ||
506 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); | ||
507 | dst = tnl_params->daddr; | 487 | dst = tnl_params->daddr; |
508 | if (dst == 0) { | 488 | if (dst == 0) { |
509 | /* NBMA tunnel */ | 489 | /* NBMA tunnel */ |
@@ -571,14 +551,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, | |||
571 | dev->stats.tx_carrier_errors++; | 551 | dev->stats.tx_carrier_errors++; |
572 | goto tx_error; | 552 | goto tx_error; |
573 | } | 553 | } |
574 | tdev = rt->dst.dev; | 554 | if (rt->dst.dev == dev) { |
575 | |||
576 | if (tdev == dev) { | ||
577 | ip_rt_put(rt); | 555 | ip_rt_put(rt); |
578 | dev->stats.collisions++; | 556 | dev->stats.collisions++; |
579 | goto tx_error; | 557 | goto tx_error; |
580 | } | 558 | } |
581 | |||
582 | df = tnl_params->frag_off; | 559 | df = tnl_params->frag_off; |
583 | 560 | ||
584 | if (df) | 561 | if (df) |
@@ -596,6 +573,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, | |||
596 | if (!skb_is_gso(skb) && | 573 | if (!skb_is_gso(skb) && |
597 | (inner_iph->frag_off&htons(IP_DF)) && | 574 | (inner_iph->frag_off&htons(IP_DF)) && |
598 | mtu < ntohs(inner_iph->tot_len)) { | 575 | mtu < ntohs(inner_iph->tot_len)) { |
576 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); | ||
599 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); | 577 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); |
600 | ip_rt_put(rt); | 578 | ip_rt_put(rt); |
601 | goto tx_error; | 579 | goto tx_error; |
@@ -646,8 +624,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, | |||
646 | ttl = ip4_dst_hoplimit(&rt->dst); | 624 | ttl = ip4_dst_hoplimit(&rt->dst); |
647 | } | 625 | } |
648 | 626 | ||
649 | max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr) | 627 | max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) |
650 | + rt->dst.header_len; | 628 | + rt->dst.header_len; |
651 | if (max_headroom > dev->needed_headroom) { | 629 | if (max_headroom > dev->needed_headroom) { |
652 | dev->needed_headroom = max_headroom; | 630 | dev->needed_headroom = max_headroom; |
653 | if (skb_cow_head(skb, dev->needed_headroom)) { | 631 | if (skb_cow_head(skb, dev->needed_headroom)) { |
@@ -657,27 +635,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, | |||
657 | } | 635 | } |
658 | } | 636 | } |
659 | 637 | ||
660 | skb_dst_drop(skb); | 638 | err = iptunnel_xmit(dev_net(dev), rt, skb, |
661 | skb_dst_set(skb, &rt->dst); | 639 | fl4.saddr, fl4.daddr, protocol, |
662 | 640 | ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df); | |
663 | /* Push down and install the IP header. */ | 641 | iptunnel_xmit_stats(err, &dev->stats, dev->tstats); |
664 | skb_push(skb, sizeof(struct iphdr)); | ||
665 | skb_reset_network_header(skb); | ||
666 | |||
667 | iph = ip_hdr(skb); | ||
668 | inner_iph = (const struct iphdr *)skb_inner_network_header(skb); | ||
669 | 642 | ||
670 | iph->version = 4; | ||
671 | iph->ihl = sizeof(struct iphdr) >> 2; | ||
672 | iph->frag_off = df; | ||
673 | iph->protocol = protocol; | ||
674 | iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); | ||
675 | iph->daddr = fl4.daddr; | ||
676 | iph->saddr = fl4.saddr; | ||
677 | iph->ttl = ttl; | ||
678 | tunnel_ip_select_ident(skb, inner_iph, &rt->dst); | ||
679 | |||
680 | iptunnel_xmit(skb, dev); | ||
681 | return; | 643 | return; |
682 | 644 | ||
683 | #if IS_ENABLED(CONFIG_IPV6) | 645 | #if IS_ENABLED(CONFIG_IPV6) |
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c new file mode 100644 index 000000000000..7167b08977df --- /dev/null +++ b/net/ipv4/ip_tunnel_core.c | |||
@@ -0,0 +1,122 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2013 Nicira, Inc. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of version 2 of the GNU General Public | ||
6 | * License as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program; if not, write to the Free Software | ||
15 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | ||
16 | * 02110-1301, USA | ||
17 | */ | ||
18 | |||
19 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
20 | |||
21 | #include <linux/types.h> | ||
22 | #include <linux/kernel.h> | ||
23 | #include <linux/skbuff.h> | ||
24 | #include <linux/netdevice.h> | ||
25 | #include <linux/in.h> | ||
26 | #include <linux/if_arp.h> | ||
27 | #include <linux/mroute.h> | ||
28 | #include <linux/init.h> | ||
29 | #include <linux/in6.h> | ||
30 | #include <linux/inetdevice.h> | ||
31 | #include <linux/netfilter_ipv4.h> | ||
32 | #include <linux/etherdevice.h> | ||
33 | #include <linux/if_ether.h> | ||
34 | #include <linux/if_vlan.h> | ||
35 | |||
36 | #include <net/ip.h> | ||
37 | #include <net/icmp.h> | ||
38 | #include <net/protocol.h> | ||
39 | #include <net/ip_tunnels.h> | ||
40 | #include <net/arp.h> | ||
41 | #include <net/checksum.h> | ||
42 | #include <net/dsfield.h> | ||
43 | #include <net/inet_ecn.h> | ||
44 | #include <net/xfrm.h> | ||
45 | #include <net/net_namespace.h> | ||
46 | #include <net/netns/generic.h> | ||
47 | #include <net/rtnetlink.h> | ||
48 | |||
49 | int iptunnel_xmit(struct net *net, struct rtable *rt, | ||
50 | struct sk_buff *skb, | ||
51 | __be32 src, __be32 dst, __u8 proto, | ||
52 | __u8 tos, __u8 ttl, __be16 df) | ||
53 | { | ||
54 | int pkt_len = skb->len; | ||
55 | struct iphdr *iph; | ||
56 | int err; | ||
57 | |||
58 | nf_reset(skb); | ||
59 | secpath_reset(skb); | ||
60 | skb->rxhash = 0; | ||
61 | skb_dst_drop(skb); | ||
62 | skb_dst_set(skb, &rt->dst); | ||
63 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); | ||
64 | |||
65 | /* Push down and install the IP header. */ | ||
66 | __skb_push(skb, sizeof(struct iphdr)); | ||
67 | skb_reset_network_header(skb); | ||
68 | |||
69 | iph = ip_hdr(skb); | ||
70 | |||
71 | iph->version = 4; | ||
72 | iph->ihl = sizeof(struct iphdr) >> 2; | ||
73 | iph->frag_off = df; | ||
74 | iph->protocol = proto; | ||
75 | iph->tos = tos; | ||
76 | iph->daddr = dst; | ||
77 | iph->saddr = src; | ||
78 | iph->ttl = ttl; | ||
79 | tunnel_ip_select_ident(skb, | ||
80 | (const struct iphdr *)skb_inner_network_header(skb), | ||
81 | &rt->dst); | ||
82 | |||
83 | err = ip_local_out(skb); | ||
84 | if (unlikely(net_xmit_eval(err))) | ||
85 | pkt_len = 0; | ||
86 | return pkt_len; | ||
87 | } | ||
88 | EXPORT_SYMBOL_GPL(iptunnel_xmit); | ||
89 | |||
90 | int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) | ||
91 | { | ||
92 | if (unlikely(!pskb_may_pull(skb, hdr_len))) | ||
93 | return -ENOMEM; | ||
94 | |||
95 | skb_pull_rcsum(skb, hdr_len); | ||
96 | |||
97 | if (inner_proto == htons(ETH_P_TEB)) { | ||
98 | struct ethhdr *eh = (struct ethhdr *)skb->data; | ||
99 | |||
100 | if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) | ||
101 | return -ENOMEM; | ||
102 | |||
103 | if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) | ||
104 | skb->protocol = eh->h_proto; | ||
105 | else | ||
106 | skb->protocol = htons(ETH_P_802_2); | ||
107 | |||
108 | } else { | ||
109 | skb->protocol = inner_proto; | ||
110 | } | ||
111 | |||
112 | nf_reset(skb); | ||
113 | secpath_reset(skb); | ||
114 | if (!skb->l4_rxhash) | ||
115 | skb->rxhash = 0; | ||
116 | skb_dst_drop(skb); | ||
117 | skb->vlan_tci = 0; | ||
118 | skb_set_queue_mapping(skb, 0); | ||
119 | skb->pkt_type = PACKET_HOST; | ||
120 | return 0; | ||
121 | } | ||
122 | EXPORT_SYMBOL_GPL(iptunnel_pull_header); | ||
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 9df7ecd393f2..e6905fbda2a2 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -188,8 +188,12 @@ static int ipip_rcv(struct sk_buff *skb) | |||
188 | struct net *net = dev_net(skb->dev); | 188 | struct net *net = dev_net(skb->dev); |
189 | struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); | 189 | struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); |
190 | struct ip_tunnel *tunnel; | 190 | struct ip_tunnel *tunnel; |
191 | const struct iphdr *iph = ip_hdr(skb); | 191 | const struct iphdr *iph; |
192 | 192 | ||
193 | if (iptunnel_pull_header(skb, 0, tpi.proto)) | ||
194 | goto drop; | ||
195 | |||
196 | iph = ip_hdr(skb); | ||
193 | tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, | 197 | tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, |
194 | iph->saddr, iph->daddr, 0); | 198 | iph->saddr, iph->daddr, 0); |
195 | if (tunnel) { | 199 | if (tunnel) { |
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6b9c1f128eaf..6cee844678e2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c | |||
@@ -640,9 +640,14 @@ static const struct tnl_ptk_info tpi = { | |||
640 | 640 | ||
641 | static int ipip_rcv(struct sk_buff *skb) | 641 | static int ipip_rcv(struct sk_buff *skb) |
642 | { | 642 | { |
643 | const struct iphdr *iph = ip_hdr(skb); | 643 | const struct iphdr *iph; |
644 | struct ip_tunnel *tunnel; | 644 | struct ip_tunnel *tunnel; |
645 | 645 | ||
646 | if (iptunnel_pull_header(skb, 0, tpi.proto)) | ||
647 | goto drop; | ||
648 | |||
649 | iph = ip_hdr(skb); | ||
650 | |||
646 | tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, | 651 | tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, |
647 | iph->saddr, iph->daddr); | 652 | iph->saddr, iph->daddr); |
648 | if (tunnel != NULL) { | 653 | if (tunnel != NULL) { |
@@ -723,13 +728,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, | |||
723 | __be16 df = tiph->frag_off; | 728 | __be16 df = tiph->frag_off; |
724 | struct rtable *rt; /* Route to the other host */ | 729 | struct rtable *rt; /* Route to the other host */ |
725 | struct net_device *tdev; /* Device to other host */ | 730 | struct net_device *tdev; /* Device to other host */ |
726 | struct iphdr *iph; /* Our new IP header */ | ||
727 | unsigned int max_headroom; /* The extra header space needed */ | 731 | unsigned int max_headroom; /* The extra header space needed */ |
728 | __be32 dst = tiph->daddr; | 732 | __be32 dst = tiph->daddr; |
729 | struct flowi4 fl4; | 733 | struct flowi4 fl4; |
730 | int mtu; | 734 | int mtu; |
731 | const struct in6_addr *addr6; | 735 | const struct in6_addr *addr6; |
732 | int addr_type; | 736 | int addr_type; |
737 | u8 ttl; | ||
738 | int err; | ||
733 | 739 | ||
734 | if (skb->protocol != htons(ETH_P_IPV6)) | 740 | if (skb->protocol != htons(ETH_P_IPV6)) |
735 | goto tx_error; | 741 | goto tx_error; |
@@ -872,34 +878,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, | |||
872 | skb = new_skb; | 878 | skb = new_skb; |
873 | iph6 = ipv6_hdr(skb); | 879 | iph6 = ipv6_hdr(skb); |
874 | } | 880 | } |
875 | 881 | ttl = tiph->ttl; | |
876 | skb->transport_header = skb->network_header; | 882 | if (ttl == 0) |
877 | skb_push(skb, sizeof(struct iphdr)); | 883 | ttl = iph6->hop_limit; |
878 | skb_reset_network_header(skb); | 884 | tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); |
879 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | 885 | |
880 | IPCB(skb)->flags = 0; | 886 | err = iptunnel_xmit(dev_net(dev), rt, skb, fl4.saddr, fl4.daddr, |
881 | skb_dst_drop(skb); | 887 | IPPROTO_IPV6, tos, ttl, df); |
882 | skb_dst_set(skb, &rt->dst); | 888 | iptunnel_xmit_stats(err, &dev->stats, dev->tstats); |
883 | |||
884 | /* | ||
885 | * Push down and install the IPIP header. | ||
886 | */ | ||
887 | |||
888 | iph = ip_hdr(skb); | ||
889 | iph->version = 4; | ||
890 | iph->ihl = sizeof(struct iphdr)>>2; | ||
891 | iph->frag_off = df; | ||
892 | iph->protocol = IPPROTO_IPV6; | ||
893 | iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); | ||
894 | iph->daddr = fl4.daddr; | ||
895 | iph->saddr = fl4.saddr; | ||
896 | |||
897 | if ((iph->ttl = tiph->ttl) == 0) | ||
898 | iph->ttl = iph6->hop_limit; | ||
899 | |||
900 | skb->ip_summed = CHECKSUM_NONE; | ||
901 | ip_select_ident(iph, skb_dst(skb), NULL); | ||
902 | iptunnel_xmit(skb, dev); | ||
903 | return NETDEV_TX_OK; | 889 | return NETDEV_TX_OK; |
904 | 890 | ||
905 | tx_error_icmp: | 891 | tx_error_icmp: |
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index d9ea33c361be..9fbc04a31ed6 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig | |||
@@ -19,6 +19,8 @@ config OPENVSWITCH | |||
19 | which is able to accept configuration from a variety of sources and | 19 | which is able to accept configuration from a variety of sources and |
20 | translate it into packet processing rules. | 20 | translate it into packet processing rules. |
21 | 21 | ||
22 | Open vSwitch GRE support depends on CONFIG_NET_IPGRE_DEMUX. | ||
23 | |||
22 | See http://openvswitch.org for more information and userspace | 24 | See http://openvswitch.org for more information and userspace |
23 | utilities. | 25 | utilities. |
24 | 26 | ||
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile index 15e7384745c1..01bddb2991e3 100644 --- a/net/openvswitch/Makefile +++ b/net/openvswitch/Makefile | |||
@@ -10,5 +10,6 @@ openvswitch-y := \ | |||
10 | dp_notify.o \ | 10 | dp_notify.o \ |
11 | flow.o \ | 11 | flow.o \ |
12 | vport.o \ | 12 | vport.o \ |
13 | vport-gre.o \ | ||
13 | vport-internal_dev.o \ | 14 | vport-internal_dev.o \ |
14 | vport-netdev.o \ | 15 | vport-netdev.o |
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 596d6373399d..22c5f399f1cf 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c | |||
@@ -436,6 +436,10 @@ static int execute_set_action(struct sk_buff *skb, | |||
436 | skb->mark = nla_get_u32(nested_attr); | 436 | skb->mark = nla_get_u32(nested_attr); |
437 | break; | 437 | break; |
438 | 438 | ||
439 | case OVS_KEY_ATTR_IPV4_TUNNEL: | ||
440 | OVS_CB(skb)->tun_key = nla_data(nested_attr); | ||
441 | break; | ||
442 | |||
439 | case OVS_KEY_ATTR_ETHERNET: | 443 | case OVS_KEY_ATTR_ETHERNET: |
440 | err = set_eth_addr(skb, nla_data(nested_attr)); | 444 | err = set_eth_addr(skb, nla_data(nested_attr)); |
441 | break; | 445 | break; |
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 0f783d9fa00d..f7e3a0d84c40 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c | |||
@@ -362,6 +362,14 @@ static int queue_gso_packets(struct net *net, int dp_ifindex, | |||
362 | static size_t key_attr_size(void) | 362 | static size_t key_attr_size(void) |
363 | { | 363 | { |
364 | return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ | 364 | return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ |
365 | + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ | ||
366 | + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ | ||
367 | + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ | ||
368 | + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ | ||
369 | + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ | ||
370 | + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ | ||
371 | + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ | ||
372 | + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ | ||
365 | + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ | 373 | + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ |
366 | + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ | 374 | + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ |
367 | + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ | 375 | + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ |
@@ -464,16 +472,89 @@ static int flush_flows(struct datapath *dp) | |||
464 | return 0; | 472 | return 0; |
465 | } | 473 | } |
466 | 474 | ||
467 | static int validate_actions(const struct nlattr *attr, | 475 | static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len) |
468 | const struct sw_flow_key *key, int depth); | 476 | { |
477 | |||
478 | struct sw_flow_actions *acts; | ||
479 | int new_acts_size; | ||
480 | int req_size = NLA_ALIGN(attr_len); | ||
481 | int next_offset = offsetof(struct sw_flow_actions, actions) + | ||
482 | (*sfa)->actions_len; | ||
483 | |||
484 | if (req_size <= (ksize(*sfa) - next_offset)) | ||
485 | goto out; | ||
486 | |||
487 | new_acts_size = ksize(*sfa) * 2; | ||
488 | |||
489 | if (new_acts_size > MAX_ACTIONS_BUFSIZE) { | ||
490 | if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) | ||
491 | return ERR_PTR(-EMSGSIZE); | ||
492 | new_acts_size = MAX_ACTIONS_BUFSIZE; | ||
493 | } | ||
494 | |||
495 | acts = ovs_flow_actions_alloc(new_acts_size); | ||
496 | if (IS_ERR(acts)) | ||
497 | return (void *)acts; | ||
469 | 498 | ||
470 | static int validate_sample(const struct nlattr *attr, | 499 | memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); |
471 | const struct sw_flow_key *key, int depth) | 500 | acts->actions_len = (*sfa)->actions_len; |
501 | kfree(*sfa); | ||
502 | *sfa = acts; | ||
503 | |||
504 | out: | ||
505 | (*sfa)->actions_len += req_size; | ||
506 | return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); | ||
507 | } | ||
508 | |||
509 | static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len) | ||
510 | { | ||
511 | struct nlattr *a; | ||
512 | |||
513 | a = reserve_sfa_size(sfa, nla_attr_size(len)); | ||
514 | if (IS_ERR(a)) | ||
515 | return PTR_ERR(a); | ||
516 | |||
517 | a->nla_type = attrtype; | ||
518 | a->nla_len = nla_attr_size(len); | ||
519 | |||
520 | if (data) | ||
521 | memcpy(nla_data(a), data, len); | ||
522 | memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); | ||
523 | |||
524 | return 0; | ||
525 | } | ||
526 | |||
527 | static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype) | ||
528 | { | ||
529 | int used = (*sfa)->actions_len; | ||
530 | int err; | ||
531 | |||
532 | err = add_action(sfa, attrtype, NULL, 0); | ||
533 | if (err) | ||
534 | return err; | ||
535 | |||
536 | return used; | ||
537 | } | ||
538 | |||
539 | static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset) | ||
540 | { | ||
541 | struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset); | ||
542 | |||
543 | a->nla_len = sfa->actions_len - st_offset; | ||
544 | } | ||
545 | |||
546 | static int validate_and_copy_actions(const struct nlattr *attr, | ||
547 | const struct sw_flow_key *key, int depth, | ||
548 | struct sw_flow_actions **sfa); | ||
549 | |||
550 | static int validate_and_copy_sample(const struct nlattr *attr, | ||
551 | const struct sw_flow_key *key, int depth, | ||
552 | struct sw_flow_actions **sfa) | ||
472 | { | 553 | { |
473 | const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; | 554 | const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; |
474 | const struct nlattr *probability, *actions; | 555 | const struct nlattr *probability, *actions; |
475 | const struct nlattr *a; | 556 | const struct nlattr *a; |
476 | int rem; | 557 | int rem, start, err, st_acts; |
477 | 558 | ||
478 | memset(attrs, 0, sizeof(attrs)); | 559 | memset(attrs, 0, sizeof(attrs)); |
479 | nla_for_each_nested(a, attr, rem) { | 560 | nla_for_each_nested(a, attr, rem) { |
@@ -492,7 +573,26 @@ static int validate_sample(const struct nlattr *attr, | |||
492 | actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; | 573 | actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; |
493 | if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) | 574 | if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) |
494 | return -EINVAL; | 575 | return -EINVAL; |
495 | return validate_actions(actions, key, depth + 1); | 576 | |
577 | /* validation done, copy sample action. */ | ||
578 | start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); | ||
579 | if (start < 0) | ||
580 | return start; | ||
581 | err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32)); | ||
582 | if (err) | ||
583 | return err; | ||
584 | st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); | ||
585 | if (st_acts < 0) | ||
586 | return st_acts; | ||
587 | |||
588 | err = validate_and_copy_actions(actions, key, depth + 1, sfa); | ||
589 | if (err) | ||
590 | return err; | ||
591 | |||
592 | add_nested_action_end(*sfa, st_acts); | ||
593 | add_nested_action_end(*sfa, start); | ||
594 | |||
595 | return 0; | ||
496 | } | 596 | } |
497 | 597 | ||
498 | static int validate_tp_port(const struct sw_flow_key *flow_key) | 598 | static int validate_tp_port(const struct sw_flow_key *flow_key) |
@@ -508,8 +608,30 @@ static int validate_tp_port(const struct sw_flow_key *flow_key) | |||
508 | return -EINVAL; | 608 | return -EINVAL; |
509 | } | 609 | } |
510 | 610 | ||
611 | static int validate_and_copy_set_tun(const struct nlattr *attr, | ||
612 | struct sw_flow_actions **sfa) | ||
613 | { | ||
614 | struct ovs_key_ipv4_tunnel tun_key; | ||
615 | int err, start; | ||
616 | |||
617 | err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &tun_key); | ||
618 | if (err) | ||
619 | return err; | ||
620 | |||
621 | start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); | ||
622 | if (start < 0) | ||
623 | return start; | ||
624 | |||
625 | err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key)); | ||
626 | add_nested_action_end(*sfa, start); | ||
627 | |||
628 | return err; | ||
629 | } | ||
630 | |||
511 | static int validate_set(const struct nlattr *a, | 631 | static int validate_set(const struct nlattr *a, |
512 | const struct sw_flow_key *flow_key) | 632 | const struct sw_flow_key *flow_key, |
633 | struct sw_flow_actions **sfa, | ||
634 | bool *set_tun) | ||
513 | { | 635 | { |
514 | const struct nlattr *ovs_key = nla_data(a); | 636 | const struct nlattr *ovs_key = nla_data(a); |
515 | int key_type = nla_type(ovs_key); | 637 | int key_type = nla_type(ovs_key); |
@@ -519,18 +641,27 @@ static int validate_set(const struct nlattr *a, | |||
519 | return -EINVAL; | 641 | return -EINVAL; |
520 | 642 | ||
521 | if (key_type > OVS_KEY_ATTR_MAX || | 643 | if (key_type > OVS_KEY_ATTR_MAX || |
522 | nla_len(ovs_key) != ovs_key_lens[key_type]) | 644 | (ovs_key_lens[key_type] != nla_len(ovs_key) && |
645 | ovs_key_lens[key_type] != -1)) | ||
523 | return -EINVAL; | 646 | return -EINVAL; |
524 | 647 | ||
525 | switch (key_type) { | 648 | switch (key_type) { |
526 | const struct ovs_key_ipv4 *ipv4_key; | 649 | const struct ovs_key_ipv4 *ipv4_key; |
527 | const struct ovs_key_ipv6 *ipv6_key; | 650 | const struct ovs_key_ipv6 *ipv6_key; |
651 | int err; | ||
528 | 652 | ||
529 | case OVS_KEY_ATTR_PRIORITY: | 653 | case OVS_KEY_ATTR_PRIORITY: |
530 | case OVS_KEY_ATTR_SKB_MARK: | 654 | case OVS_KEY_ATTR_SKB_MARK: |
531 | case OVS_KEY_ATTR_ETHERNET: | 655 | case OVS_KEY_ATTR_ETHERNET: |
532 | break; | 656 | break; |
533 | 657 | ||
658 | case OVS_KEY_ATTR_TUNNEL: | ||
659 | *set_tun = true; | ||
660 | err = validate_and_copy_set_tun(a, sfa); | ||
661 | if (err) | ||
662 | return err; | ||
663 | break; | ||
664 | |||
534 | case OVS_KEY_ATTR_IPV4: | 665 | case OVS_KEY_ATTR_IPV4: |
535 | if (flow_key->eth.type != htons(ETH_P_IP)) | 666 | if (flow_key->eth.type != htons(ETH_P_IP)) |
536 | return -EINVAL; | 667 | return -EINVAL; |
@@ -606,8 +737,24 @@ static int validate_userspace(const struct nlattr *attr) | |||
606 | return 0; | 737 | return 0; |
607 | } | 738 | } |
608 | 739 | ||
609 | static int validate_actions(const struct nlattr *attr, | 740 | static int copy_action(const struct nlattr *from, |
610 | const struct sw_flow_key *key, int depth) | 741 | struct sw_flow_actions **sfa) |
742 | { | ||
743 | int totlen = NLA_ALIGN(from->nla_len); | ||
744 | struct nlattr *to; | ||
745 | |||
746 | to = reserve_sfa_size(sfa, from->nla_len); | ||
747 | if (IS_ERR(to)) | ||
748 | return PTR_ERR(to); | ||
749 | |||
750 | memcpy(to, from, totlen); | ||
751 | return 0; | ||
752 | } | ||
753 | |||
754 | static int validate_and_copy_actions(const struct nlattr *attr, | ||
755 | const struct sw_flow_key *key, | ||
756 | int depth, | ||
757 | struct sw_flow_actions **sfa) | ||
611 | { | 758 | { |
612 | const struct nlattr *a; | 759 | const struct nlattr *a; |
613 | int rem, err; | 760 | int rem, err; |
@@ -627,12 +774,14 @@ static int validate_actions(const struct nlattr *attr, | |||
627 | }; | 774 | }; |
628 | const struct ovs_action_push_vlan *vlan; | 775 | const struct ovs_action_push_vlan *vlan; |
629 | int type = nla_type(a); | 776 | int type = nla_type(a); |
777 | bool skip_copy; | ||
630 | 778 | ||
631 | if (type > OVS_ACTION_ATTR_MAX || | 779 | if (type > OVS_ACTION_ATTR_MAX || |
632 | (action_lens[type] != nla_len(a) && | 780 | (action_lens[type] != nla_len(a) && |
633 | action_lens[type] != (u32)-1)) | 781 | action_lens[type] != (u32)-1)) |
634 | return -EINVAL; | 782 | return -EINVAL; |
635 | 783 | ||
784 | skip_copy = false; | ||
636 | switch (type) { | 785 | switch (type) { |
637 | case OVS_ACTION_ATTR_UNSPEC: | 786 | case OVS_ACTION_ATTR_UNSPEC: |
638 | return -EINVAL; | 787 | return -EINVAL; |
@@ -661,20 +810,26 @@ static int validate_actions(const struct nlattr *attr, | |||
661 | break; | 810 | break; |
662 | 811 | ||
663 | case OVS_ACTION_ATTR_SET: | 812 | case OVS_ACTION_ATTR_SET: |
664 | err = validate_set(a, key); | 813 | err = validate_set(a, key, sfa, &skip_copy); |
665 | if (err) | 814 | if (err) |
666 | return err; | 815 | return err; |
667 | break; | 816 | break; |
668 | 817 | ||
669 | case OVS_ACTION_ATTR_SAMPLE: | 818 | case OVS_ACTION_ATTR_SAMPLE: |
670 | err = validate_sample(a, key, depth); | 819 | err = validate_and_copy_sample(a, key, depth, sfa); |
671 | if (err) | 820 | if (err) |
672 | return err; | 821 | return err; |
822 | skip_copy = true; | ||
673 | break; | 823 | break; |
674 | 824 | ||
675 | default: | 825 | default: |
676 | return -EINVAL; | 826 | return -EINVAL; |
677 | } | 827 | } |
828 | if (!skip_copy) { | ||
829 | err = copy_action(a, sfa); | ||
830 | if (err) | ||
831 | return err; | ||
832 | } | ||
678 | } | 833 | } |
679 | 834 | ||
680 | if (rem > 0) | 835 | if (rem > 0) |
@@ -739,21 +894,18 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) | |||
739 | if (err) | 894 | if (err) |
740 | goto err_flow_free; | 895 | goto err_flow_free; |
741 | 896 | ||
742 | err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]); | 897 | err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]); |
743 | if (err) | 898 | if (err) |
744 | goto err_flow_free; | 899 | goto err_flow_free; |
745 | 900 | acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); | |
746 | err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0); | ||
747 | if (err) | ||
748 | goto err_flow_free; | ||
749 | |||
750 | flow->hash = ovs_flow_hash(&flow->key, key_len); | ||
751 | |||
752 | acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]); | ||
753 | err = PTR_ERR(acts); | 901 | err = PTR_ERR(acts); |
754 | if (IS_ERR(acts)) | 902 | if (IS_ERR(acts)) |
755 | goto err_flow_free; | 903 | goto err_flow_free; |
904 | |||
905 | err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts); | ||
756 | rcu_assign_pointer(flow->sf_acts, acts); | 906 | rcu_assign_pointer(flow->sf_acts, acts); |
907 | if (err) | ||
908 | goto err_flow_free; | ||
757 | 909 | ||
758 | OVS_CB(packet)->flow = flow; | 910 | OVS_CB(packet)->flow = flow; |
759 | packet->priority = flow->key.phy.priority; | 911 | packet->priority = flow->key.phy.priority; |
@@ -843,6 +995,99 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = { | |||
843 | .name = OVS_FLOW_MCGROUP | 995 | .name = OVS_FLOW_MCGROUP |
844 | }; | 996 | }; |
845 | 997 | ||
998 | static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb); | ||
999 | static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) | ||
1000 | { | ||
1001 | const struct nlattr *a; | ||
1002 | struct nlattr *start; | ||
1003 | int err = 0, rem; | ||
1004 | |||
1005 | start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); | ||
1006 | if (!start) | ||
1007 | return -EMSGSIZE; | ||
1008 | |||
1009 | nla_for_each_nested(a, attr, rem) { | ||
1010 | int type = nla_type(a); | ||
1011 | struct nlattr *st_sample; | ||
1012 | |||
1013 | switch (type) { | ||
1014 | case OVS_SAMPLE_ATTR_PROBABILITY: | ||
1015 | if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a))) | ||
1016 | return -EMSGSIZE; | ||
1017 | break; | ||
1018 | case OVS_SAMPLE_ATTR_ACTIONS: | ||
1019 | st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); | ||
1020 | if (!st_sample) | ||
1021 | return -EMSGSIZE; | ||
1022 | err = actions_to_attr(nla_data(a), nla_len(a), skb); | ||
1023 | if (err) | ||
1024 | return err; | ||
1025 | nla_nest_end(skb, st_sample); | ||
1026 | break; | ||
1027 | } | ||
1028 | } | ||
1029 | |||
1030 | nla_nest_end(skb, start); | ||
1031 | return err; | ||
1032 | } | ||
1033 | |||
1034 | static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) | ||
1035 | { | ||
1036 | const struct nlattr *ovs_key = nla_data(a); | ||
1037 | int key_type = nla_type(ovs_key); | ||
1038 | struct nlattr *start; | ||
1039 | int err; | ||
1040 | |||
1041 | switch (key_type) { | ||
1042 | case OVS_KEY_ATTR_IPV4_TUNNEL: | ||
1043 | start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); | ||
1044 | if (!start) | ||
1045 | return -EMSGSIZE; | ||
1046 | |||
1047 | err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key)); | ||
1048 | if (err) | ||
1049 | return err; | ||
1050 | nla_nest_end(skb, start); | ||
1051 | break; | ||
1052 | default: | ||
1053 | if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) | ||
1054 | return -EMSGSIZE; | ||
1055 | break; | ||
1056 | } | ||
1057 | |||
1058 | return 0; | ||
1059 | } | ||
1060 | |||
1061 | static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb) | ||
1062 | { | ||
1063 | const struct nlattr *a; | ||
1064 | int rem, err; | ||
1065 | |||
1066 | nla_for_each_attr(a, attr, len, rem) { | ||
1067 | int type = nla_type(a); | ||
1068 | |||
1069 | switch (type) { | ||
1070 | case OVS_ACTION_ATTR_SET: | ||
1071 | err = set_action_to_attr(a, skb); | ||
1072 | if (err) | ||
1073 | return err; | ||
1074 | break; | ||
1075 | |||
1076 | case OVS_ACTION_ATTR_SAMPLE: | ||
1077 | err = sample_action_to_attr(a, skb); | ||
1078 | if (err) | ||
1079 | return err; | ||
1080 | break; | ||
1081 | default: | ||
1082 | if (nla_put(skb, type, nla_len(a), nla_data(a))) | ||
1083 | return -EMSGSIZE; | ||
1084 | break; | ||
1085 | } | ||
1086 | } | ||
1087 | |||
1088 | return 0; | ||
1089 | } | ||
1090 | |||
846 | static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) | 1091 | static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) |
847 | { | 1092 | { |
848 | return NLMSG_ALIGN(sizeof(struct ovs_header)) | 1093 | return NLMSG_ALIGN(sizeof(struct ovs_header)) |
@@ -860,6 +1105,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, | |||
860 | { | 1105 | { |
861 | const int skb_orig_len = skb->len; | 1106 | const int skb_orig_len = skb->len; |
862 | const struct sw_flow_actions *sf_acts; | 1107 | const struct sw_flow_actions *sf_acts; |
1108 | struct nlattr *start; | ||
863 | struct ovs_flow_stats stats; | 1109 | struct ovs_flow_stats stats; |
864 | struct ovs_header *ovs_header; | 1110 | struct ovs_header *ovs_header; |
865 | struct nlattr *nla; | 1111 | struct nlattr *nla; |
@@ -913,10 +1159,19 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, | |||
913 | * This can only fail for dump operations because the skb is always | 1159 | * This can only fail for dump operations because the skb is always |
914 | * properly sized for single flows. | 1160 | * properly sized for single flows. |
915 | */ | 1161 | */ |
916 | err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len, | 1162 | start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS); |
917 | sf_acts->actions); | 1163 | if (start) { |
918 | if (err < 0 && skb_orig_len) | 1164 | err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb); |
919 | goto error; | 1165 | if (!err) |
1166 | nla_nest_end(skb, start); | ||
1167 | else { | ||
1168 | if (skb_orig_len) | ||
1169 | goto error; | ||
1170 | |||
1171 | nla_nest_cancel(skb, start); | ||
1172 | } | ||
1173 | } else if (skb_orig_len) | ||
1174 | goto nla_put_failure; | ||
920 | 1175 | ||
921 | return genlmsg_end(skb, ovs_header); | 1176 | return genlmsg_end(skb, ovs_header); |
922 | 1177 | ||
@@ -961,6 +1216,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) | |||
961 | struct sk_buff *reply; | 1216 | struct sk_buff *reply; |
962 | struct datapath *dp; | 1217 | struct datapath *dp; |
963 | struct flow_table *table; | 1218 | struct flow_table *table; |
1219 | struct sw_flow_actions *acts = NULL; | ||
964 | int error; | 1220 | int error; |
965 | int key_len; | 1221 | int key_len; |
966 | 1222 | ||
@@ -974,9 +1230,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) | |||
974 | 1230 | ||
975 | /* Validate actions. */ | 1231 | /* Validate actions. */ |
976 | if (a[OVS_FLOW_ATTR_ACTIONS]) { | 1232 | if (a[OVS_FLOW_ATTR_ACTIONS]) { |
977 | error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0); | 1233 | acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); |
978 | if (error) | 1234 | error = PTR_ERR(acts); |
1235 | if (IS_ERR(acts)) | ||
979 | goto error; | 1236 | goto error; |
1237 | |||
1238 | error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0, &acts); | ||
1239 | if (error) | ||
1240 | goto err_kfree; | ||
980 | } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { | 1241 | } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { |
981 | error = -EINVAL; | 1242 | error = -EINVAL; |
982 | goto error; | 1243 | goto error; |
@@ -991,8 +1252,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) | |||
991 | table = ovsl_dereference(dp->table); | 1252 | table = ovsl_dereference(dp->table); |
992 | flow = ovs_flow_tbl_lookup(table, &key, key_len); | 1253 | flow = ovs_flow_tbl_lookup(table, &key, key_len); |
993 | if (!flow) { | 1254 | if (!flow) { |
994 | struct sw_flow_actions *acts; | ||
995 | |||
996 | /* Bail out if we're not allowed to create a new flow. */ | 1255 | /* Bail out if we're not allowed to create a new flow. */ |
997 | error = -ENOENT; | 1256 | error = -ENOENT; |
998 | if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) | 1257 | if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) |
@@ -1016,19 +1275,12 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) | |||
1016 | error = PTR_ERR(flow); | 1275 | error = PTR_ERR(flow); |
1017 | goto err_unlock_ovs; | 1276 | goto err_unlock_ovs; |
1018 | } | 1277 | } |
1019 | flow->key = key; | ||
1020 | clear_stats(flow); | 1278 | clear_stats(flow); |
1021 | 1279 | ||
1022 | /* Obtain actions. */ | ||
1023 | acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]); | ||
1024 | error = PTR_ERR(acts); | ||
1025 | if (IS_ERR(acts)) | ||
1026 | goto error_free_flow; | ||
1027 | rcu_assign_pointer(flow->sf_acts, acts); | 1280 | rcu_assign_pointer(flow->sf_acts, acts); |
1028 | 1281 | ||
1029 | /* Put flow in bucket. */ | 1282 | /* Put flow in bucket. */ |
1030 | flow->hash = ovs_flow_hash(&key, key_len); | 1283 | ovs_flow_tbl_insert(table, flow, &key, key_len); |
1031 | ovs_flow_tbl_insert(table, flow); | ||
1032 | 1284 | ||
1033 | reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, | 1285 | reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, |
1034 | info->snd_seq, | 1286 | info->snd_seq, |
@@ -1036,7 +1288,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) | |||
1036 | } else { | 1288 | } else { |
1037 | /* We found a matching flow. */ | 1289 | /* We found a matching flow. */ |
1038 | struct sw_flow_actions *old_acts; | 1290 | struct sw_flow_actions *old_acts; |
1039 | struct nlattr *acts_attrs; | ||
1040 | 1291 | ||
1041 | /* Bail out if we're not allowed to modify an existing flow. | 1292 | /* Bail out if we're not allowed to modify an existing flow. |
1042 | * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL | 1293 | * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL |
@@ -1051,21 +1302,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) | |||
1051 | 1302 | ||
1052 | /* Update actions. */ | 1303 | /* Update actions. */ |
1053 | old_acts = ovsl_dereference(flow->sf_acts); | 1304 | old_acts = ovsl_dereference(flow->sf_acts); |
1054 | acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; | 1305 | rcu_assign_pointer(flow->sf_acts, acts); |
1055 | if (acts_attrs && | 1306 | ovs_flow_deferred_free_acts(old_acts); |
1056 | (old_acts->actions_len != nla_len(acts_attrs) || | ||
1057 | memcmp(old_acts->actions, nla_data(acts_attrs), | ||
1058 | old_acts->actions_len))) { | ||
1059 | struct sw_flow_actions *new_acts; | ||
1060 | |||
1061 | new_acts = ovs_flow_actions_alloc(acts_attrs); | ||
1062 | error = PTR_ERR(new_acts); | ||
1063 | if (IS_ERR(new_acts)) | ||
1064 | goto err_unlock_ovs; | ||
1065 | |||
1066 | rcu_assign_pointer(flow->sf_acts, new_acts); | ||
1067 | ovs_flow_deferred_free_acts(old_acts); | ||
1068 | } | ||
1069 | 1307 | ||
1070 | reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, | 1308 | reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, |
1071 | info->snd_seq, OVS_FLOW_CMD_NEW); | 1309 | info->snd_seq, OVS_FLOW_CMD_NEW); |
@@ -1086,10 +1324,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) | |||
1086 | ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); | 1324 | ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); |
1087 | return 0; | 1325 | return 0; |
1088 | 1326 | ||
1089 | error_free_flow: | ||
1090 | ovs_flow_free(flow); | ||
1091 | err_unlock_ovs: | 1327 | err_unlock_ovs: |
1092 | ovs_unlock(); | 1328 | ovs_unlock(); |
1329 | err_kfree: | ||
1330 | kfree(acts); | ||
1093 | error: | 1331 | error: |
1094 | return error; | 1332 | return error; |
1095 | } | 1333 | } |
@@ -1866,8 +2104,8 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) | |||
1866 | goto exit_unlock; | 2104 | goto exit_unlock; |
1867 | } | 2105 | } |
1868 | 2106 | ||
1869 | reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, | 2107 | reply = ovs_vport_cmd_build_info(vport, info->snd_portid, |
1870 | OVS_VPORT_CMD_DEL); | 2108 | info->snd_seq, OVS_VPORT_CMD_DEL); |
1871 | err = PTR_ERR(reply); | 2109 | err = PTR_ERR(reply); |
1872 | if (IS_ERR(reply)) | 2110 | if (IS_ERR(reply)) |
1873 | goto exit_unlock; | 2111 | goto exit_unlock; |
@@ -1896,8 +2134,8 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) | |||
1896 | if (IS_ERR(vport)) | 2134 | if (IS_ERR(vport)) |
1897 | goto exit_unlock; | 2135 | goto exit_unlock; |
1898 | 2136 | ||
1899 | reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, | 2137 | reply = ovs_vport_cmd_build_info(vport, info->snd_portid, |
1900 | OVS_VPORT_CMD_NEW); | 2138 | info->snd_seq, OVS_VPORT_CMD_NEW); |
1901 | err = PTR_ERR(reply); | 2139 | err = PTR_ERR(reply); |
1902 | if (IS_ERR(reply)) | 2140 | if (IS_ERR(reply)) |
1903 | goto exit_unlock; | 2141 | goto exit_unlock; |
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 16b840695216..a91486484916 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h | |||
@@ -88,9 +88,12 @@ struct datapath { | |||
88 | /** | 88 | /** |
89 | * struct ovs_skb_cb - OVS data in skb CB | 89 | * struct ovs_skb_cb - OVS data in skb CB |
90 | * @flow: The flow associated with this packet. May be %NULL if no flow. | 90 | * @flow: The flow associated with this packet. May be %NULL if no flow. |
91 | * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the | ||
92 | * packet is not being tunneled. | ||
91 | */ | 93 | */ |
92 | struct ovs_skb_cb { | 94 | struct ovs_skb_cb { |
93 | struct sw_flow *flow; | 95 | struct sw_flow *flow; |
96 | struct ovs_key_ipv4_tunnel *tun_key; | ||
94 | }; | 97 | }; |
95 | #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) | 98 | #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) |
96 | 99 | ||
@@ -119,6 +122,7 @@ struct dp_upcall_info { | |||
119 | struct ovs_net { | 122 | struct ovs_net { |
120 | struct list_head dps; | 123 | struct list_head dps; |
121 | struct work_struct dp_notify_work; | 124 | struct work_struct dp_notify_work; |
125 | struct vport_net vport_net; | ||
122 | }; | 126 | }; |
123 | 127 | ||
124 | extern int ovs_net_id; | 128 | extern int ovs_net_id; |
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 093c191d4fc2..5c519b121e1b 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/icmpv6.h> | 40 | #include <linux/icmpv6.h> |
41 | #include <linux/rculist.h> | 41 | #include <linux/rculist.h> |
42 | #include <net/ip.h> | 42 | #include <net/ip.h> |
43 | #include <net/ip_tunnels.h> | ||
43 | #include <net/ipv6.h> | 44 | #include <net/ipv6.h> |
44 | #include <net/ndisc.h> | 45 | #include <net/ndisc.h> |
45 | 46 | ||
@@ -198,20 +199,18 @@ void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) | |||
198 | spin_unlock(&flow->lock); | 199 | spin_unlock(&flow->lock); |
199 | } | 200 | } |
200 | 201 | ||
201 | struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions) | 202 | struct sw_flow_actions *ovs_flow_actions_alloc(int size) |
202 | { | 203 | { |
203 | int actions_len = nla_len(actions); | ||
204 | struct sw_flow_actions *sfa; | 204 | struct sw_flow_actions *sfa; |
205 | 205 | ||
206 | if (actions_len > MAX_ACTIONS_BUFSIZE) | 206 | if (size > MAX_ACTIONS_BUFSIZE) |
207 | return ERR_PTR(-EINVAL); | 207 | return ERR_PTR(-EINVAL); |
208 | 208 | ||
209 | sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL); | 209 | sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); |
210 | if (!sfa) | 210 | if (!sfa) |
211 | return ERR_PTR(-ENOMEM); | 211 | return ERR_PTR(-ENOMEM); |
212 | 212 | ||
213 | sfa->actions_len = actions_len; | 213 | sfa->actions_len = 0; |
214 | nla_memcpy(sfa->actions, actions, actions_len); | ||
215 | return sfa; | 214 | return sfa; |
216 | } | 215 | } |
217 | 216 | ||
@@ -354,6 +353,14 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la | |||
354 | return NULL; | 353 | return NULL; |
355 | } | 354 | } |
356 | 355 | ||
356 | static void __flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) | ||
357 | { | ||
358 | struct hlist_head *head; | ||
359 | head = find_bucket(table, flow->hash); | ||
360 | hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); | ||
361 | table->count++; | ||
362 | } | ||
363 | |||
357 | static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new) | 364 | static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new) |
358 | { | 365 | { |
359 | int old_ver; | 366 | int old_ver; |
@@ -370,7 +377,7 @@ static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new | |||
370 | head = flex_array_get(old->buckets, i); | 377 | head = flex_array_get(old->buckets, i); |
371 | 378 | ||
372 | hlist_for_each_entry(flow, head, hash_node[old_ver]) | 379 | hlist_for_each_entry(flow, head, hash_node[old_ver]) |
373 | ovs_flow_tbl_insert(new, flow); | 380 | __flow_tbl_insert(new, flow); |
374 | } | 381 | } |
375 | old->keep_flows = true; | 382 | old->keep_flows = true; |
376 | } | 383 | } |
@@ -605,6 +612,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, | |||
605 | memset(key, 0, sizeof(*key)); | 612 | memset(key, 0, sizeof(*key)); |
606 | 613 | ||
607 | key->phy.priority = skb->priority; | 614 | key->phy.priority = skb->priority; |
615 | if (OVS_CB(skb)->tun_key) | ||
616 | memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key)); | ||
608 | key->phy.in_port = in_port; | 617 | key->phy.in_port = in_port; |
609 | key->phy.skb_mark = skb->mark; | 618 | key->phy.skb_mark = skb->mark; |
610 | 619 | ||
@@ -762,9 +771,18 @@ out: | |||
762 | return error; | 771 | return error; |
763 | } | 772 | } |
764 | 773 | ||
765 | u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len) | 774 | static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_len) |
766 | { | 775 | { |
767 | return jhash2((u32 *)key, DIV_ROUND_UP(key_len, sizeof(u32)), 0); | 776 | return jhash2((u32 *)((u8 *)key + key_start), |
777 | DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0); | ||
778 | } | ||
779 | |||
780 | static int flow_key_start(struct sw_flow_key *key) | ||
781 | { | ||
782 | if (key->tun_key.ipv4_dst) | ||
783 | return 0; | ||
784 | else | ||
785 | return offsetof(struct sw_flow_key, phy); | ||
768 | } | 786 | } |
769 | 787 | ||
770 | struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, | 788 | struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, |
@@ -772,28 +790,31 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, | |||
772 | { | 790 | { |
773 | struct sw_flow *flow; | 791 | struct sw_flow *flow; |
774 | struct hlist_head *head; | 792 | struct hlist_head *head; |
793 | u8 *_key; | ||
794 | int key_start; | ||
775 | u32 hash; | 795 | u32 hash; |
776 | 796 | ||
777 | hash = ovs_flow_hash(key, key_len); | 797 | key_start = flow_key_start(key); |
798 | hash = ovs_flow_hash(key, key_start, key_len); | ||
778 | 799 | ||
800 | _key = (u8 *) key + key_start; | ||
779 | head = find_bucket(table, hash); | 801 | head = find_bucket(table, hash); |
780 | hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { | 802 | hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { |
781 | 803 | ||
782 | if (flow->hash == hash && | 804 | if (flow->hash == hash && |
783 | !memcmp(&flow->key, key, key_len)) { | 805 | !memcmp((u8 *)&flow->key + key_start, _key, key_len - key_start)) { |
784 | return flow; | 806 | return flow; |
785 | } | 807 | } |
786 | } | 808 | } |
787 | return NULL; | 809 | return NULL; |
788 | } | 810 | } |
789 | 811 | ||
790 | void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) | 812 | void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, |
813 | struct sw_flow_key *key, int key_len) | ||
791 | { | 814 | { |
792 | struct hlist_head *head; | 815 | flow->hash = ovs_flow_hash(key, flow_key_start(key), key_len); |
793 | 816 | memcpy(&flow->key, key, sizeof(flow->key)); | |
794 | head = find_bucket(table, flow->hash); | 817 | __flow_tbl_insert(table, flow); |
795 | hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); | ||
796 | table->count++; | ||
797 | } | 818 | } |
798 | 819 | ||
799 | void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) | 820 | void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) |
@@ -820,6 +841,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { | |||
820 | [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), | 841 | [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), |
821 | [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), | 842 | [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), |
822 | [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), | 843 | [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), |
844 | [OVS_KEY_ATTR_TUNNEL] = -1, | ||
823 | }; | 845 | }; |
824 | 846 | ||
825 | static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, | 847 | static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, |
@@ -957,6 +979,105 @@ static int parse_flow_nlattrs(const struct nlattr *attr, | |||
957 | return 0; | 979 | return 0; |
958 | } | 980 | } |
959 | 981 | ||
982 | int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, | ||
983 | struct ovs_key_ipv4_tunnel *tun_key) | ||
984 | { | ||
985 | struct nlattr *a; | ||
986 | int rem; | ||
987 | bool ttl = false; | ||
988 | |||
989 | memset(tun_key, 0, sizeof(*tun_key)); | ||
990 | |||
991 | nla_for_each_nested(a, attr, rem) { | ||
992 | int type = nla_type(a); | ||
993 | static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { | ||
994 | [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), | ||
995 | [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), | ||
996 | [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), | ||
997 | [OVS_TUNNEL_KEY_ATTR_TOS] = 1, | ||
998 | [OVS_TUNNEL_KEY_ATTR_TTL] = 1, | ||
999 | [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, | ||
1000 | [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, | ||
1001 | }; | ||
1002 | |||
1003 | if (type > OVS_TUNNEL_KEY_ATTR_MAX || | ||
1004 | ovs_tunnel_key_lens[type] != nla_len(a)) | ||
1005 | return -EINVAL; | ||
1006 | |||
1007 | switch (type) { | ||
1008 | case OVS_TUNNEL_KEY_ATTR_ID: | ||
1009 | tun_key->tun_id = nla_get_be64(a); | ||
1010 | tun_key->tun_flags |= TUNNEL_KEY; | ||
1011 | break; | ||
1012 | case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: | ||
1013 | tun_key->ipv4_src = nla_get_be32(a); | ||
1014 | break; | ||
1015 | case OVS_TUNNEL_KEY_ATTR_IPV4_DST: | ||
1016 | tun_key->ipv4_dst = nla_get_be32(a); | ||
1017 | break; | ||
1018 | case OVS_TUNNEL_KEY_ATTR_TOS: | ||
1019 | tun_key->ipv4_tos = nla_get_u8(a); | ||
1020 | break; | ||
1021 | case OVS_TUNNEL_KEY_ATTR_TTL: | ||
1022 | tun_key->ipv4_ttl = nla_get_u8(a); | ||
1023 | ttl = true; | ||
1024 | break; | ||
1025 | case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: | ||
1026 | tun_key->tun_flags |= TUNNEL_DONT_FRAGMENT; | ||
1027 | break; | ||
1028 | case OVS_TUNNEL_KEY_ATTR_CSUM: | ||
1029 | tun_key->tun_flags |= TUNNEL_CSUM; | ||
1030 | break; | ||
1031 | default: | ||
1032 | return -EINVAL; | ||
1033 | |||
1034 | } | ||
1035 | } | ||
1036 | if (rem > 0) | ||
1037 | return -EINVAL; | ||
1038 | |||
1039 | if (!tun_key->ipv4_dst) | ||
1040 | return -EINVAL; | ||
1041 | |||
1042 | if (!ttl) | ||
1043 | return -EINVAL; | ||
1044 | |||
1045 | return 0; | ||
1046 | } | ||
1047 | |||
1048 | int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, | ||
1049 | const struct ovs_key_ipv4_tunnel *tun_key) | ||
1050 | { | ||
1051 | struct nlattr *nla; | ||
1052 | |||
1053 | nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); | ||
1054 | if (!nla) | ||
1055 | return -EMSGSIZE; | ||
1056 | |||
1057 | if (tun_key->tun_flags & TUNNEL_KEY && | ||
1058 | nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id)) | ||
1059 | return -EMSGSIZE; | ||
1060 | if (tun_key->ipv4_src && | ||
1061 | nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ipv4_src)) | ||
1062 | return -EMSGSIZE; | ||
1063 | if (nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ipv4_dst)) | ||
1064 | return -EMSGSIZE; | ||
1065 | if (tun_key->ipv4_tos && | ||
1066 | nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, tun_key->ipv4_tos)) | ||
1067 | return -EMSGSIZE; | ||
1068 | if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ipv4_ttl)) | ||
1069 | return -EMSGSIZE; | ||
1070 | if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) && | ||
1071 | nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) | ||
1072 | return -EMSGSIZE; | ||
1073 | if ((tun_key->tun_flags & TUNNEL_CSUM) && | ||
1074 | nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) | ||
1075 | return -EMSGSIZE; | ||
1076 | |||
1077 | nla_nest_end(skb, nla); | ||
1078 | return 0; | ||
1079 | } | ||
1080 | |||
960 | /** | 1081 | /** |
961 | * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key. | 1082 | * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key. |
962 | * @swkey: receives the extracted flow key. | 1083 | * @swkey: receives the extracted flow key. |
@@ -999,6 +1120,14 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, | |||
999 | attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); | 1120 | attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); |
1000 | } | 1121 | } |
1001 | 1122 | ||
1123 | if (attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { | ||
1124 | err = ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], &swkey->tun_key); | ||
1125 | if (err) | ||
1126 | return err; | ||
1127 | |||
1128 | attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); | ||
1129 | } | ||
1130 | |||
1002 | /* Data attributes. */ | 1131 | /* Data attributes. */ |
1003 | if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET))) | 1132 | if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET))) |
1004 | return -EINVAL; | 1133 | return -EINVAL; |
@@ -1126,6 +1255,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, | |||
1126 | /** | 1255 | /** |
1127 | * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. | 1256 | * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. |
1128 | * @flow: Receives extracted in_port, priority, tun_key and skb_mark. | 1257 | * @flow: Receives extracted in_port, priority, tun_key and skb_mark. |
1258 | * @key_len: Length of key in @flow. Used for calculating flow hash. | ||
1129 | * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute | 1259 | * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute |
1130 | * sequence. | 1260 | * sequence. |
1131 | * | 1261 | * |
@@ -1134,20 +1264,24 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, | |||
1134 | * get the metadata, that is, the parts of the flow key that cannot be | 1264 | * get the metadata, that is, the parts of the flow key that cannot be |
1135 | * extracted from the packet itself. | 1265 | * extracted from the packet itself. |
1136 | */ | 1266 | */ |
1137 | int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, | 1267 | int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, |
1138 | const struct nlattr *attr) | 1268 | const struct nlattr *attr) |
1139 | { | 1269 | { |
1270 | struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; | ||
1140 | const struct nlattr *nla; | 1271 | const struct nlattr *nla; |
1141 | int rem; | 1272 | int rem; |
1142 | 1273 | ||
1143 | flow->key.phy.in_port = DP_MAX_PORTS; | 1274 | flow->key.phy.in_port = DP_MAX_PORTS; |
1144 | flow->key.phy.priority = 0; | 1275 | flow->key.phy.priority = 0; |
1145 | flow->key.phy.skb_mark = 0; | 1276 | flow->key.phy.skb_mark = 0; |
1277 | memset(tun_key, 0, sizeof(flow->key.tun_key)); | ||
1146 | 1278 | ||
1147 | nla_for_each_nested(nla, attr, rem) { | 1279 | nla_for_each_nested(nla, attr, rem) { |
1148 | int type = nla_type(nla); | 1280 | int type = nla_type(nla); |
1149 | 1281 | ||
1150 | if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) { | 1282 | if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) { |
1283 | int err; | ||
1284 | |||
1151 | if (nla_len(nla) != ovs_key_lens[type]) | 1285 | if (nla_len(nla) != ovs_key_lens[type]) |
1152 | return -EINVAL; | 1286 | return -EINVAL; |
1153 | 1287 | ||
@@ -1156,6 +1290,12 @@ int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, | |||
1156 | flow->key.phy.priority = nla_get_u32(nla); | 1290 | flow->key.phy.priority = nla_get_u32(nla); |
1157 | break; | 1291 | break; |
1158 | 1292 | ||
1293 | case OVS_KEY_ATTR_TUNNEL: | ||
1294 | err = ovs_ipv4_tun_from_nlattr(nla, tun_key); | ||
1295 | if (err) | ||
1296 | return err; | ||
1297 | break; | ||
1298 | |||
1159 | case OVS_KEY_ATTR_IN_PORT: | 1299 | case OVS_KEY_ATTR_IN_PORT: |
1160 | if (nla_get_u32(nla) >= DP_MAX_PORTS) | 1300 | if (nla_get_u32(nla) >= DP_MAX_PORTS) |
1161 | return -EINVAL; | 1301 | return -EINVAL; |
@@ -1170,6 +1310,10 @@ int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, | |||
1170 | } | 1310 | } |
1171 | if (rem) | 1311 | if (rem) |
1172 | return -EINVAL; | 1312 | return -EINVAL; |
1313 | |||
1314 | flow->hash = ovs_flow_hash(&flow->key, | ||
1315 | flow_key_start(&flow->key), key_len); | ||
1316 | |||
1173 | return 0; | 1317 | return 0; |
1174 | } | 1318 | } |
1175 | 1319 | ||
@@ -1182,6 +1326,10 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) | |||
1182 | nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) | 1326 | nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) |
1183 | goto nla_put_failure; | 1327 | goto nla_put_failure; |
1184 | 1328 | ||
1329 | if (swkey->tun_key.ipv4_dst && | ||
1330 | ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key)) | ||
1331 | goto nla_put_failure; | ||
1332 | |||
1185 | if (swkey->phy.in_port != DP_MAX_PORTS && | 1333 | if (swkey->phy.in_port != DP_MAX_PORTS && |
1186 | nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) | 1334 | nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) |
1187 | goto nla_put_failure; | 1335 | goto nla_put_failure; |
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 2a83e2141f08..66ef7220293e 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h | |||
@@ -40,7 +40,38 @@ struct sw_flow_actions { | |||
40 | struct nlattr actions[]; | 40 | struct nlattr actions[]; |
41 | }; | 41 | }; |
42 | 42 | ||
43 | /* Used to memset ovs_key_ipv4_tunnel padding. */ | ||
44 | #define OVS_TUNNEL_KEY_SIZE \ | ||
45 | (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + \ | ||
46 | FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl)) | ||
47 | |||
48 | struct ovs_key_ipv4_tunnel { | ||
49 | __be64 tun_id; | ||
50 | __be32 ipv4_src; | ||
51 | __be32 ipv4_dst; | ||
52 | __be16 tun_flags; | ||
53 | u8 ipv4_tos; | ||
54 | u8 ipv4_ttl; | ||
55 | }; | ||
56 | |||
57 | static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, | ||
58 | const struct iphdr *iph, __be64 tun_id, | ||
59 | __be16 tun_flags) | ||
60 | { | ||
61 | tun_key->tun_id = tun_id; | ||
62 | tun_key->ipv4_src = iph->saddr; | ||
63 | tun_key->ipv4_dst = iph->daddr; | ||
64 | tun_key->ipv4_tos = iph->tos; | ||
65 | tun_key->ipv4_ttl = iph->ttl; | ||
66 | tun_key->tun_flags = tun_flags; | ||
67 | |||
68 | /* clear struct padding. */ | ||
69 | memset((unsigned char *) tun_key + OVS_TUNNEL_KEY_SIZE, 0, | ||
70 | sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE); | ||
71 | } | ||
72 | |||
43 | struct sw_flow_key { | 73 | struct sw_flow_key { |
74 | struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ | ||
44 | struct { | 75 | struct { |
45 | u32 priority; /* Packet QoS priority. */ | 76 | u32 priority; /* Packet QoS priority. */ |
46 | u32 skb_mark; /* SKB mark. */ | 77 | u32 skb_mark; /* SKB mark. */ |
@@ -130,7 +161,7 @@ struct sw_flow *ovs_flow_alloc(void); | |||
130 | void ovs_flow_deferred_free(struct sw_flow *); | 161 | void ovs_flow_deferred_free(struct sw_flow *); |
131 | void ovs_flow_free(struct sw_flow *flow); | 162 | void ovs_flow_free(struct sw_flow *flow); |
132 | 163 | ||
133 | struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *); | 164 | struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len); |
134 | void ovs_flow_deferred_free_acts(struct sw_flow_actions *); | 165 | void ovs_flow_deferred_free_acts(struct sw_flow_actions *); |
135 | 166 | ||
136 | int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, | 167 | int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, |
@@ -141,10 +172,10 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); | |||
141 | int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); | 172 | int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); |
142 | int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, | 173 | int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, |
143 | const struct nlattr *); | 174 | const struct nlattr *); |
144 | int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, | 175 | int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, |
145 | const struct nlattr *attr); | 176 | const struct nlattr *attr); |
146 | 177 | ||
147 | #define MAX_ACTIONS_BUFSIZE (16 * 1024) | 178 | #define MAX_ACTIONS_BUFSIZE (32 * 1024) |
148 | #define TBL_MIN_BUCKETS 1024 | 179 | #define TBL_MIN_BUCKETS 1024 |
149 | 180 | ||
150 | struct flow_table { | 181 | struct flow_table { |
@@ -173,11 +204,15 @@ void ovs_flow_tbl_deferred_destroy(struct flow_table *table); | |||
173 | struct flow_table *ovs_flow_tbl_alloc(int new_size); | 204 | struct flow_table *ovs_flow_tbl_alloc(int new_size); |
174 | struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); | 205 | struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); |
175 | struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); | 206 | struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); |
176 | void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow); | 207 | void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, |
208 | struct sw_flow_key *key, int key_len); | ||
177 | void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); | 209 | void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); |
178 | u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len); | ||
179 | 210 | ||
180 | struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx); | 211 | struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx); |
181 | extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; | 212 | extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; |
213 | int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, | ||
214 | struct ovs_key_ipv4_tunnel *tun_key); | ||
215 | int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, | ||
216 | const struct ovs_key_ipv4_tunnel *tun_key); | ||
182 | 217 | ||
183 | #endif /* flow.h */ | 218 | #endif /* flow.h */ |
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c new file mode 100644 index 000000000000..3a8d1900aa78 --- /dev/null +++ b/net/openvswitch/vport-gre.c | |||
@@ -0,0 +1,274 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2007-2013 Nicira, Inc. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of version 2 of the GNU General Public | ||
6 | * License as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program; if not, write to the Free Software | ||
15 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | ||
16 | * 02110-1301, USA | ||
17 | */ | ||
18 | |||
19 | #ifdef CONFIG_NET_IPGRE_DEMUX | ||
20 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
21 | |||
22 | #include <linux/if.h> | ||
23 | #include <linux/skbuff.h> | ||
24 | #include <linux/ip.h> | ||
25 | #include <linux/if_tunnel.h> | ||
26 | #include <linux/if_vlan.h> | ||
27 | #include <linux/in.h> | ||
28 | #include <linux/if_vlan.h> | ||
29 | #include <linux/in.h> | ||
30 | #include <linux/in_route.h> | ||
31 | #include <linux/inetdevice.h> | ||
32 | #include <linux/jhash.h> | ||
33 | #include <linux/list.h> | ||
34 | #include <linux/kernel.h> | ||
35 | #include <linux/workqueue.h> | ||
36 | #include <linux/rculist.h> | ||
37 | #include <net/route.h> | ||
38 | #include <net/xfrm.h> | ||
39 | |||
40 | #include <net/icmp.h> | ||
41 | #include <net/ip.h> | ||
42 | #include <net/ip_tunnels.h> | ||
43 | #include <net/gre.h> | ||
44 | #include <net/net_namespace.h> | ||
45 | #include <net/netns/generic.h> | ||
46 | #include <net/protocol.h> | ||
47 | |||
48 | #include "datapath.h" | ||
49 | #include "vport.h" | ||
50 | |||
51 | /* Returns the least-significant 32 bits of a __be64. */ | ||
52 | static __be32 be64_get_low32(__be64 x) | ||
53 | { | ||
54 | #ifdef __BIG_ENDIAN | ||
55 | return (__force __be32)x; | ||
56 | #else | ||
57 | return (__force __be32)((__force u64)x >> 32); | ||
58 | #endif | ||
59 | } | ||
60 | |||
61 | static __be16 filter_tnl_flags(__be16 flags) | ||
62 | { | ||
63 | return flags & (TUNNEL_CSUM | TUNNEL_KEY); | ||
64 | } | ||
65 | |||
66 | static struct sk_buff *__build_header(struct sk_buff *skb, | ||
67 | int tunnel_hlen) | ||
68 | { | ||
69 | const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; | ||
70 | struct tnl_ptk_info tpi; | ||
71 | |||
72 | skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM)); | ||
73 | if (IS_ERR(skb)) | ||
74 | return NULL; | ||
75 | |||
76 | tpi.flags = filter_tnl_flags(tun_key->tun_flags); | ||
77 | tpi.proto = htons(ETH_P_TEB); | ||
78 | tpi.key = be64_get_low32(tun_key->tun_id); | ||
79 | tpi.seq = 0; | ||
80 | gre_build_header(skb, &tpi, tunnel_hlen); | ||
81 | |||
82 | return skb; | ||
83 | } | ||
84 | |||
85 | static __be64 key_to_tunnel_id(__be32 key, __be32 seq) | ||
86 | { | ||
87 | #ifdef __BIG_ENDIAN | ||
88 | return (__force __be64)((__force u64)seq << 32 | (__force u32)key); | ||
89 | #else | ||
90 | return (__force __be64)((__force u64)key << 32 | (__force u32)seq); | ||
91 | #endif | ||
92 | } | ||
93 | |||
94 | /* Called with rcu_read_lock and BH disabled. */ | ||
95 | static int gre_rcv(struct sk_buff *skb, | ||
96 | const struct tnl_ptk_info *tpi) | ||
97 | { | ||
98 | struct ovs_key_ipv4_tunnel tun_key; | ||
99 | struct ovs_net *ovs_net; | ||
100 | struct vport *vport; | ||
101 | __be64 key; | ||
102 | |||
103 | ovs_net = net_generic(dev_net(skb->dev), ovs_net_id); | ||
104 | vport = rcu_dereference(ovs_net->vport_net.gre_vport); | ||
105 | if (unlikely(!vport)) | ||
106 | return PACKET_REJECT; | ||
107 | |||
108 | key = key_to_tunnel_id(tpi->key, tpi->seq); | ||
109 | ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, | ||
110 | filter_tnl_flags(tpi->flags)); | ||
111 | |||
112 | ovs_vport_receive(vport, skb, &tun_key); | ||
113 | return PACKET_RCVD; | ||
114 | } | ||
115 | |||
116 | static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) | ||
117 | { | ||
118 | struct net *net = ovs_dp_get_net(vport->dp); | ||
119 | struct flowi4 fl; | ||
120 | struct rtable *rt; | ||
121 | int min_headroom; | ||
122 | int tunnel_hlen; | ||
123 | __be16 df; | ||
124 | int err; | ||
125 | |||
126 | if (unlikely(!OVS_CB(skb)->tun_key)) { | ||
127 | err = -EINVAL; | ||
128 | goto error; | ||
129 | } | ||
130 | |||
131 | /* Route lookup */ | ||
132 | memset(&fl, 0, sizeof(fl)); | ||
133 | fl.daddr = OVS_CB(skb)->tun_key->ipv4_dst; | ||
134 | fl.saddr = OVS_CB(skb)->tun_key->ipv4_src; | ||
135 | fl.flowi4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos); | ||
136 | fl.flowi4_mark = skb->mark; | ||
137 | fl.flowi4_proto = IPPROTO_GRE; | ||
138 | |||
139 | rt = ip_route_output_key(net, &fl); | ||
140 | if (IS_ERR(rt)) | ||
141 | return PTR_ERR(rt); | ||
142 | |||
143 | tunnel_hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags); | ||
144 | |||
145 | min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len | ||
146 | + tunnel_hlen + sizeof(struct iphdr) | ||
147 | + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); | ||
148 | if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { | ||
149 | int head_delta = SKB_DATA_ALIGN(min_headroom - | ||
150 | skb_headroom(skb) + | ||
151 | 16); | ||
152 | err = pskb_expand_head(skb, max_t(int, head_delta, 0), | ||
153 | 0, GFP_ATOMIC); | ||
154 | if (unlikely(err)) | ||
155 | goto err_free_rt; | ||
156 | } | ||
157 | |||
158 | if (vlan_tx_tag_present(skb)) { | ||
159 | if (unlikely(!__vlan_put_tag(skb, | ||
160 | skb->vlan_proto, | ||
161 | vlan_tx_tag_get(skb)))) { | ||
162 | err = -ENOMEM; | ||
163 | goto err_free_rt; | ||
164 | } | ||
165 | skb->vlan_tci = 0; | ||
166 | } | ||
167 | |||
168 | /* Push Tunnel header. */ | ||
169 | skb = __build_header(skb, tunnel_hlen); | ||
170 | if (unlikely(!skb)) { | ||
171 | err = 0; | ||
172 | goto err_free_rt; | ||
173 | } | ||
174 | |||
175 | df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? | ||
176 | htons(IP_DF) : 0; | ||
177 | |||
178 | skb->local_df = 1; | ||
179 | |||
180 | return iptunnel_xmit(net, rt, skb, fl.saddr, | ||
181 | OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, | ||
182 | OVS_CB(skb)->tun_key->ipv4_tos, | ||
183 | OVS_CB(skb)->tun_key->ipv4_ttl, df); | ||
184 | err_free_rt: | ||
185 | ip_rt_put(rt); | ||
186 | error: | ||
187 | return err; | ||
188 | } | ||
189 | |||
190 | static struct gre_cisco_protocol gre_protocol = { | ||
191 | .handler = gre_rcv, | ||
192 | .priority = 1, | ||
193 | }; | ||
194 | |||
195 | static int gre_ports; | ||
196 | static int gre_init(void) | ||
197 | { | ||
198 | int err; | ||
199 | |||
200 | gre_ports++; | ||
201 | if (gre_ports > 1) | ||
202 | return 0; | ||
203 | |||
204 | err = gre_cisco_register(&gre_protocol); | ||
205 | if (err) | ||
206 | pr_warn("cannot register gre protocol handler\n"); | ||
207 | |||
208 | return err; | ||
209 | } | ||
210 | |||
211 | static void gre_exit(void) | ||
212 | { | ||
213 | gre_ports--; | ||
214 | if (gre_ports > 0) | ||
215 | return; | ||
216 | |||
217 | gre_cisco_unregister(&gre_protocol); | ||
218 | } | ||
219 | |||
220 | static const char *gre_get_name(const struct vport *vport) | ||
221 | { | ||
222 | return vport_priv(vport); | ||
223 | } | ||
224 | |||
225 | static struct vport *gre_create(const struct vport_parms *parms) | ||
226 | { | ||
227 | struct net *net = ovs_dp_get_net(parms->dp); | ||
228 | struct ovs_net *ovs_net; | ||
229 | struct vport *vport; | ||
230 | int err; | ||
231 | |||
232 | err = gre_init(); | ||
233 | if (err) | ||
234 | return ERR_PTR(err); | ||
235 | |||
236 | ovs_net = net_generic(net, ovs_net_id); | ||
237 | if (ovsl_dereference(ovs_net->vport_net.gre_vport)) { | ||
238 | vport = ERR_PTR(-EEXIST); | ||
239 | goto error; | ||
240 | } | ||
241 | |||
242 | vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms); | ||
243 | if (IS_ERR(vport)) | ||
244 | goto error; | ||
245 | |||
246 | strncpy(vport_priv(vport), parms->name, IFNAMSIZ); | ||
247 | rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport); | ||
248 | return vport; | ||
249 | |||
250 | error: | ||
251 | gre_exit(); | ||
252 | return vport; | ||
253 | } | ||
254 | |||
255 | static void gre_tnl_destroy(struct vport *vport) | ||
256 | { | ||
257 | struct net *net = ovs_dp_get_net(vport->dp); | ||
258 | struct ovs_net *ovs_net; | ||
259 | |||
260 | ovs_net = net_generic(net, ovs_net_id); | ||
261 | |||
262 | rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL); | ||
263 | ovs_vport_deferred_free(vport); | ||
264 | gre_exit(); | ||
265 | } | ||
266 | |||
267 | const struct vport_ops ovs_gre_vport_ops = { | ||
268 | .type = OVS_VPORT_TYPE_GRE, | ||
269 | .create = gre_create, | ||
270 | .destroy = gre_tnl_destroy, | ||
271 | .get_name = gre_get_name, | ||
272 | .send = gre_tnl_send, | ||
273 | }; | ||
274 | #endif | ||
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index e284c7e1fec4..98d3edbbc235 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c | |||
@@ -67,7 +67,7 @@ static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netde | |||
67 | static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) | 67 | static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) |
68 | { | 68 | { |
69 | rcu_read_lock(); | 69 | rcu_read_lock(); |
70 | ovs_vport_receive(internal_dev_priv(netdev)->vport, skb); | 70 | ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL); |
71 | rcu_read_unlock(); | 71 | rcu_read_unlock(); |
72 | return 0; | 72 | return 0; |
73 | } | 73 | } |
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 40de815b4213..5982f3f62835 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c | |||
@@ -51,7 +51,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) | |||
51 | skb_push(skb, ETH_HLEN); | 51 | skb_push(skb, ETH_HLEN); |
52 | ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); | 52 | ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); |
53 | 53 | ||
54 | ovs_vport_receive(vport, skb); | 54 | ovs_vport_receive(vport, skb, NULL); |
55 | return; | 55 | return; |
56 | 56 | ||
57 | error: | 57 | error: |
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 176d449351eb..f52dfb9cb5a7 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c | |||
@@ -38,6 +38,10 @@ | |||
38 | static const struct vport_ops *vport_ops_list[] = { | 38 | static const struct vport_ops *vport_ops_list[] = { |
39 | &ovs_netdev_vport_ops, | 39 | &ovs_netdev_vport_ops, |
40 | &ovs_internal_vport_ops, | 40 | &ovs_internal_vport_ops, |
41 | |||
42 | #ifdef CONFIG_NET_IPGRE_DEMUX | ||
43 | &ovs_gre_vport_ops, | ||
44 | #endif | ||
41 | }; | 45 | }; |
42 | 46 | ||
43 | /* Protected by RCU read lock for reading, ovs_mutex for writing. */ | 47 | /* Protected by RCU read lock for reading, ovs_mutex for writing. */ |
@@ -325,7 +329,8 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) | |||
325 | * Must be called with rcu_read_lock. The packet cannot be shared and | 329 | * Must be called with rcu_read_lock. The packet cannot be shared and |
326 | * skb->data should point to the Ethernet header. | 330 | * skb->data should point to the Ethernet header. |
327 | */ | 331 | */ |
328 | void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) | 332 | void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, |
333 | struct ovs_key_ipv4_tunnel *tun_key) | ||
329 | { | 334 | { |
330 | struct pcpu_tstats *stats; | 335 | struct pcpu_tstats *stats; |
331 | 336 | ||
@@ -335,6 +340,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) | |||
335 | stats->rx_bytes += skb->len; | 340 | stats->rx_bytes += skb->len; |
336 | u64_stats_update_end(&stats->syncp); | 341 | u64_stats_update_end(&stats->syncp); |
337 | 342 | ||
343 | OVS_CB(skb)->tun_key = tun_key; | ||
338 | ovs_dp_process_received_packet(vport, skb); | 344 | ovs_dp_process_received_packet(vport, skb); |
339 | } | 345 | } |
340 | 346 | ||
@@ -402,3 +408,18 @@ void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) | |||
402 | 408 | ||
403 | spin_unlock(&vport->stats_lock); | 409 | spin_unlock(&vport->stats_lock); |
404 | } | 410 | } |
411 | |||
412 | static void free_vport_rcu(struct rcu_head *rcu) | ||
413 | { | ||
414 | struct vport *vport = container_of(rcu, struct vport, rcu); | ||
415 | |||
416 | ovs_vport_free(vport); | ||
417 | } | ||
418 | |||
419 | void ovs_vport_deferred_free(struct vport *vport) | ||
420 | { | ||
421 | if (!vport) | ||
422 | return; | ||
423 | |||
424 | call_rcu(&vport->rcu, free_vport_rcu); | ||
425 | } | ||
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 293278c4c2df..376045c42f8b 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h | |||
@@ -34,6 +34,11 @@ struct vport_parms; | |||
34 | 34 | ||
35 | /* The following definitions are for users of the vport subsytem: */ | 35 | /* The following definitions are for users of the vport subsytem: */ |
36 | 36 | ||
37 | /* The following definitions are for users of the vport subsytem: */ | ||
38 | struct vport_net { | ||
39 | struct vport __rcu *gre_vport; | ||
40 | }; | ||
41 | |||
37 | int ovs_vport_init(void); | 42 | int ovs_vport_init(void); |
38 | void ovs_vport_exit(void); | 43 | void ovs_vport_exit(void); |
39 | 44 | ||
@@ -152,6 +157,7 @@ enum vport_err_type { | |||
152 | struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *, | 157 | struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *, |
153 | const struct vport_parms *); | 158 | const struct vport_parms *); |
154 | void ovs_vport_free(struct vport *); | 159 | void ovs_vport_free(struct vport *); |
160 | void ovs_vport_deferred_free(struct vport *vport); | ||
155 | 161 | ||
156 | #define VPORT_ALIGN 8 | 162 | #define VPORT_ALIGN 8 |
157 | 163 | ||
@@ -184,13 +190,15 @@ static inline struct vport *vport_from_priv(const void *priv) | |||
184 | return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); | 190 | return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); |
185 | } | 191 | } |
186 | 192 | ||
187 | void ovs_vport_receive(struct vport *, struct sk_buff *); | 193 | void ovs_vport_receive(struct vport *, struct sk_buff *, |
194 | struct ovs_key_ipv4_tunnel *); | ||
188 | void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); | 195 | void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); |
189 | 196 | ||
190 | /* List of statically compiled vport implementations. Don't forget to also | 197 | /* List of statically compiled vport implementations. Don't forget to also |
191 | * add yours to the list at the top of vport.c. */ | 198 | * add yours to the list at the top of vport.c. */ |
192 | extern const struct vport_ops ovs_netdev_vport_ops; | 199 | extern const struct vport_ops ovs_netdev_vport_ops; |
193 | extern const struct vport_ops ovs_internal_vport_ops; | 200 | extern const struct vport_ops ovs_internal_vport_ops; |
201 | extern const struct vport_ops ovs_gre_vport_ops; | ||
194 | 202 | ||
195 | static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, | 203 | static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, |
196 | const void *start, unsigned int len) | 204 | const void *start, unsigned int len) |