aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2013-06-19 21:07:49 -0400
committerDavid S. Miller <davem@davemloft.net>2013-06-19 21:07:49 -0400
commitdc3d807d6fd983603c82e7bcdbaa49cdb4239691 (patch)
treef426945de6694203f2c34218b4e4b06913b8f58c
parentac8025a643a0e0beb81f3f37ca693364c6b77858 (diff)
parentaa310701e787087dbfbccf1409982a96e16c57a6 (diff)
openvswitch: gre tunneling support.
Pravin B Shelar says: ==================== Following patch series adds support for gre tunneling. First six patches extend kernel gre and ip_tunnel modules api so that there is more code sharing between gre modules and ovs. Rest of patches adds ovs tunneling infrastructre and gre protocol vport. V2 fixes two patches according to comments from Jesse. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/vxlan.c32
-rw-r--r--include/net/gre.h27
-rw-r--r--include/net/ip_tunnels.h28
-rw-r--r--include/uapi/linux/openvswitch.h19
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/gre.c323
-rw-r--r--net/ipv4/ip_gre.c247
-rw-r--r--net/ipv4/ip_tunnel.c68
-rw-r--r--net/ipv4/ip_tunnel_core.c122
-rw-r--r--net/ipv4/ipip.c6
-rw-r--r--net/ipv6/sit.c46
-rw-r--r--net/openvswitch/Kconfig2
-rw-r--r--net/openvswitch/Makefile3
-rw-r--r--net/openvswitch/actions.c4
-rw-r--r--net/openvswitch/datapath.c356
-rw-r--r--net/openvswitch/datapath.h4
-rw-r--r--net/openvswitch/flow.c184
-rw-r--r--net/openvswitch/flow.h45
-rw-r--r--net/openvswitch/vport-gre.c274
-rw-r--r--net/openvswitch/vport-internal_dev.c2
-rw-r--r--net/openvswitch/vport-netdev.c2
-rw-r--r--net/openvswitch/vport.c23
-rw-r--r--net/openvswitch/vport.h10
23 files changed, 1373 insertions, 456 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index f6dce13c8f89..284c6c00c353 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1021,7 +1021,6 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1021 struct vxlan_dev *vxlan = netdev_priv(dev); 1021 struct vxlan_dev *vxlan = netdev_priv(dev);
1022 struct rtable *rt; 1022 struct rtable *rt;
1023 const struct iphdr *old_iph; 1023 const struct iphdr *old_iph;
1024 struct iphdr *iph;
1025 struct vxlanhdr *vxh; 1024 struct vxlanhdr *vxh;
1026 struct udphdr *uh; 1025 struct udphdr *uh;
1027 struct flowi4 fl4; 1026 struct flowi4 fl4;
@@ -1030,6 +1029,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1030 u32 vni; 1029 u32 vni;
1031 __be16 df = 0; 1030 __be16 df = 0;
1032 __u8 tos, ttl; 1031 __u8 tos, ttl;
1032 int err;
1033 1033
1034 dst_port = rdst->remote_port ? rdst->remote_port : vxlan->dst_port; 1034 dst_port = rdst->remote_port ? rdst->remote_port : vxlan->dst_port;
1035 vni = rdst->remote_vni; 1035 vni = rdst->remote_vni;
@@ -1097,13 +1097,6 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1097 vxlan_encap_bypass(skb, vxlan, dst_vxlan); 1097 vxlan_encap_bypass(skb, vxlan, dst_vxlan);
1098 return NETDEV_TX_OK; 1098 return NETDEV_TX_OK;
1099 } 1099 }
1100
1101 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1102 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
1103 IPSKB_REROUTED);
1104 skb_dst_drop(skb);
1105 skb_dst_set(skb, &rt->dst);
1106
1107 vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); 1100 vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
1108 vxh->vx_flags = htonl(VXLAN_FLAGS); 1101 vxh->vx_flags = htonl(VXLAN_FLAGS);
1109 vxh->vx_vni = htonl(vni << 8); 1102 vxh->vx_vni = htonl(vni << 8);
@@ -1118,27 +1111,18 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
1118 uh->len = htons(skb->len); 1111 uh->len = htons(skb->len);
1119 uh->check = 0; 1112 uh->check = 0;
1120 1113
1121 __skb_push(skb, sizeof(*iph));
1122 skb_reset_network_header(skb);
1123 iph = ip_hdr(skb);
1124 iph->version = 4;
1125 iph->ihl = sizeof(struct iphdr) >> 2;
1126 iph->frag_off = df;
1127 iph->protocol = IPPROTO_UDP;
1128 iph->tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
1129 iph->daddr = dst;
1130 iph->saddr = fl4.saddr;
1131 iph->ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
1132 tunnel_ip_select_ident(skb, old_iph, &rt->dst);
1133
1134 nf_reset(skb);
1135
1136 vxlan_set_owner(dev, skb); 1114 vxlan_set_owner(dev, skb);
1137 1115
1138 if (handle_offloads(skb)) 1116 if (handle_offloads(skb))
1139 goto drop; 1117 goto drop;
1140 1118
1141 iptunnel_xmit(skb, dev); 1119 tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
1120 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
1121
1122 err = iptunnel_xmit(dev_net(dev), rt, skb, fl4.saddr, dst,
1123 IPPROTO_UDP, tos, ttl, df);
1124 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
1125
1142 return NETDEV_TX_OK; 1126 return NETDEV_TX_OK;
1143 1127
1144drop: 1128drop:
diff --git a/include/net/gre.h b/include/net/gre.h
index 9f03a390c826..a5a4ddf05300 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -7,6 +7,7 @@
7#define GREPROTO_CISCO 0 7#define GREPROTO_CISCO 0
8#define GREPROTO_PPTP 1 8#define GREPROTO_PPTP 1
9#define GREPROTO_MAX 2 9#define GREPROTO_MAX 2
10#define GRE_IP_PROTO_MAX 2
10 11
11struct gre_protocol { 12struct gre_protocol {
12 int (*handler)(struct sk_buff *skb); 13 int (*handler)(struct sk_buff *skb);
@@ -22,6 +23,32 @@ struct gre_base_hdr {
22int gre_add_protocol(const struct gre_protocol *proto, u8 version); 23int gre_add_protocol(const struct gre_protocol *proto, u8 version);
23int gre_del_protocol(const struct gre_protocol *proto, u8 version); 24int gre_del_protocol(const struct gre_protocol *proto, u8 version);
24 25
26struct gre_cisco_protocol {
27 int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi);
28 int (*err_handler)(struct sk_buff *skb, u32 info,
29 const struct tnl_ptk_info *tpi);
30 u8 priority;
31};
32
33int gre_cisco_register(struct gre_cisco_protocol *proto);
34int gre_cisco_unregister(struct gre_cisco_protocol *proto);
35void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
36 int hdr_len);
37struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum);
38
39static inline int ip_gre_calc_hlen(__be16 o_flags)
40{
41 int addend = 4;
42
43 if (o_flags&TUNNEL_CSUM)
44 addend += 4;
45 if (o_flags&TUNNEL_KEY)
46 addend += 4;
47 if (o_flags&TUNNEL_SEQ)
48 addend += 4;
49 return addend;
50}
51
25static inline __be16 gre_flags_to_tnl_flags(__be16 flags) 52static inline __be16 gre_flags_to_tnl_flags(__be16 flags)
26{ 53{
27 __be16 tflags = 0; 54 __be16 tflags = 0;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 1be442f89406..10bbb4273f7d 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -73,6 +73,7 @@ struct ip_tunnel {
73#define TUNNEL_REC __cpu_to_be16(0x20) 73#define TUNNEL_REC __cpu_to_be16(0x20)
74#define TUNNEL_VERSION __cpu_to_be16(0x40) 74#define TUNNEL_VERSION __cpu_to_be16(0x40)
75#define TUNNEL_NO_KEY __cpu_to_be16(0x80) 75#define TUNNEL_NO_KEY __cpu_to_be16(0x80)
76#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
76 77
77struct tnl_ptk_info { 78struct tnl_ptk_info {
78 __be16 flags; 79 __be16 flags;
@@ -155,23 +156,28 @@ static inline void tunnel_ip_select_ident(struct sk_buff *skb,
155 (skb_shinfo(skb)->gso_segs ?: 1) - 1); 156 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
156} 157}
157 158
158static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev) 159int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
159{ 160int iptunnel_xmit(struct net *net, struct rtable *rt,
160 int err; 161 struct sk_buff *skb,
161 int pkt_len = skb->len - skb_transport_offset(skb); 162 __be32 src, __be32 dst, __u8 proto,
162 struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); 163 __u8 tos, __u8 ttl, __be16 df);
163 164
164 nf_reset(skb); 165static inline void iptunnel_xmit_stats(int err,
166 struct net_device_stats *err_stats,
167 struct pcpu_tstats __percpu *stats)
168{
169 if (err > 0) {
170 struct pcpu_tstats *tstats = this_cpu_ptr(stats);
165 171
166 err = ip_local_out(skb);
167 if (likely(net_xmit_eval(err) == 0)) {
168 u64_stats_update_begin(&tstats->syncp); 172 u64_stats_update_begin(&tstats->syncp);
169 tstats->tx_bytes += pkt_len; 173 tstats->tx_bytes += err;
170 tstats->tx_packets++; 174 tstats->tx_packets++;
171 u64_stats_update_end(&tstats->syncp); 175 u64_stats_update_end(&tstats->syncp);
176 } else if (err < 0) {
177 err_stats->tx_errors++;
178 err_stats->tx_aborted_errors++;
172 } else { 179 } else {
173 dev->stats.tx_errors++; 180 err_stats->tx_dropped++;
174 dev->stats.tx_aborted_errors++;
175 } 181 }
176} 182}
177#endif /* __NET_IP_TUNNELS_H */ 183#endif /* __NET_IP_TUNNELS_H */
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 424672db7f12..c55efaaa9bb4 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -164,6 +164,7 @@ enum ovs_vport_type {
164 OVS_VPORT_TYPE_UNSPEC, 164 OVS_VPORT_TYPE_UNSPEC,
165 OVS_VPORT_TYPE_NETDEV, /* network device */ 165 OVS_VPORT_TYPE_NETDEV, /* network device */
166 OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ 166 OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
167 OVS_VPORT_TYPE_GRE, /* GRE tunnel. */
167 __OVS_VPORT_TYPE_MAX 168 __OVS_VPORT_TYPE_MAX
168}; 169};
169 170
@@ -246,11 +247,29 @@ enum ovs_key_attr {
246 OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ 247 OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */
247 OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ 248 OVS_KEY_ATTR_ND, /* struct ovs_key_nd */
248 OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */ 249 OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */
250 OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */
251
252#ifdef __KERNEL__
253 OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */
254#endif
249 __OVS_KEY_ATTR_MAX 255 __OVS_KEY_ATTR_MAX
250}; 256};
251 257
252#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1) 258#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
253 259
260enum ovs_tunnel_key_attr {
261 OVS_TUNNEL_KEY_ATTR_ID, /* be64 Tunnel ID */
262 OVS_TUNNEL_KEY_ATTR_IPV4_SRC, /* be32 src IP address. */
263 OVS_TUNNEL_KEY_ATTR_IPV4_DST, /* be32 dst IP address. */
264 OVS_TUNNEL_KEY_ATTR_TOS, /* u8 Tunnel IP ToS. */
265 OVS_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */
266 OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */
267 OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */
268 __OVS_TUNNEL_KEY_ATTR_MAX
269};
270
271#define OVS_TUNNEL_KEY_ATTR_MAX (__OVS_TUNNEL_KEY_ATTR_MAX - 1)
272
254/** 273/**
255 * enum ovs_frag_type - IPv4 and IPv6 fragment type 274 * enum ovs_frag_type - IPv4 and IPv6 fragment type
256 * @OVS_FRAG_TYPE_NONE: Packet is not a fragment. 275 * @OVS_FRAG_TYPE_NONE: Packet is not a fragment.
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 7fcf8101d85f..86ded0bac9c7 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \
11 tcp_offload.o datagram.o raw.o udp.o udplite.o \ 11 tcp_offload.o datagram.o raw.o udp.o udplite.o \
12 udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ 12 udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
13 fib_frontend.o fib_semantics.o fib_trie.o \ 13 fib_frontend.o fib_semantics.o fib_trie.o \
14 inet_fragment.o ping.o 14 inet_fragment.o ping.o ip_tunnel_core.o
15 15
16obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o 16obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
17obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o 17obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index b2e805af9b87..ba4803e609b5 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -13,6 +13,8 @@
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 14
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/if.h>
17#include <linux/icmp.h>
16#include <linux/kernel.h> 18#include <linux/kernel.h>
17#include <linux/kmod.h> 19#include <linux/kmod.h>
18#include <linux/skbuff.h> 20#include <linux/skbuff.h>
@@ -24,51 +26,270 @@
24#include <net/protocol.h> 26#include <net/protocol.h>
25#include <net/gre.h> 27#include <net/gre.h>
26 28
29#include <net/icmp.h>
30#include <net/route.h>
31#include <net/xfrm.h>
27 32
28static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; 33static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
29static DEFINE_SPINLOCK(gre_proto_lock); 34static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX];
30 35
31int gre_add_protocol(const struct gre_protocol *proto, u8 version) 36int gre_add_protocol(const struct gre_protocol *proto, u8 version)
32{ 37{
33 if (version >= GREPROTO_MAX) 38 if (version >= GREPROTO_MAX)
34 goto err_out; 39 return -EINVAL;
35
36 spin_lock(&gre_proto_lock);
37 if (gre_proto[version])
38 goto err_out_unlock;
39
40 RCU_INIT_POINTER(gre_proto[version], proto);
41 spin_unlock(&gre_proto_lock);
42 return 0;
43 40
44err_out_unlock: 41 return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ?
45 spin_unlock(&gre_proto_lock); 42 0 : -EBUSY;
46err_out:
47 return -1;
48} 43}
49EXPORT_SYMBOL_GPL(gre_add_protocol); 44EXPORT_SYMBOL_GPL(gre_add_protocol);
50 45
51int gre_del_protocol(const struct gre_protocol *proto, u8 version) 46int gre_del_protocol(const struct gre_protocol *proto, u8 version)
52{ 47{
48 int ret;
49
53 if (version >= GREPROTO_MAX) 50 if (version >= GREPROTO_MAX)
54 goto err_out; 51 return -EINVAL;
55 52
56 spin_lock(&gre_proto_lock); 53 ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ?
57 if (rcu_dereference_protected(gre_proto[version], 54 0 : -EBUSY;
58 lockdep_is_held(&gre_proto_lock)) != proto) 55
59 goto err_out_unlock; 56 if (ret)
60 RCU_INIT_POINTER(gre_proto[version], NULL); 57 return ret;
61 spin_unlock(&gre_proto_lock); 58
62 synchronize_rcu(); 59 synchronize_rcu();
63 return 0; 60 return 0;
64
65err_out_unlock:
66 spin_unlock(&gre_proto_lock);
67err_out:
68 return -1;
69} 61}
70EXPORT_SYMBOL_GPL(gre_del_protocol); 62EXPORT_SYMBOL_GPL(gre_del_protocol);
71 63
64void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
65 int hdr_len)
66{
67 struct gre_base_hdr *greh;
68
69 skb_push(skb, hdr_len);
70
71 greh = (struct gre_base_hdr *)skb->data;
72 greh->flags = tnl_flags_to_gre_flags(tpi->flags);
73 greh->protocol = tpi->proto;
74
75 if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
76 __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
77
78 if (tpi->flags&TUNNEL_SEQ) {
79 *ptr = tpi->seq;
80 ptr--;
81 }
82 if (tpi->flags&TUNNEL_KEY) {
83 *ptr = tpi->key;
84 ptr--;
85 }
86 if (tpi->flags&TUNNEL_CSUM &&
87 !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
88 *ptr = 0;
89 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
90 skb->len, 0));
91 }
92 }
93}
94EXPORT_SYMBOL_GPL(gre_build_header);
95
96struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum)
97{
98 int err;
99
100 if (likely(!skb->encapsulation)) {
101 skb_reset_inner_headers(skb);
102 skb->encapsulation = 1;
103 }
104
105 if (skb_is_gso(skb)) {
106 err = skb_unclone(skb, GFP_ATOMIC);
107 if (unlikely(err))
108 goto error;
109 skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
110 return skb;
111 } else if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) {
112 err = skb_checksum_help(skb);
113 if (unlikely(err))
114 goto error;
115 } else if (skb->ip_summed != CHECKSUM_PARTIAL)
116 skb->ip_summed = CHECKSUM_NONE;
117
118 return skb;
119error:
120 kfree_skb(skb);
121 return ERR_PTR(err);
122}
123EXPORT_SYMBOL_GPL(gre_handle_offloads);
124
125static __sum16 check_checksum(struct sk_buff *skb)
126{
127 __sum16 csum = 0;
128
129 switch (skb->ip_summed) {
130 case CHECKSUM_COMPLETE:
131 csum = csum_fold(skb->csum);
132
133 if (!csum)
134 break;
135 /* Fall through. */
136
137 case CHECKSUM_NONE:
138 skb->csum = 0;
139 csum = __skb_checksum_complete(skb);
140 skb->ip_summed = CHECKSUM_COMPLETE;
141 break;
142 }
143
144 return csum;
145}
146
147static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
148 bool *csum_err)
149{
150 unsigned int ip_hlen = ip_hdrlen(skb);
151 const struct gre_base_hdr *greh;
152 __be32 *options;
153 int hdr_len;
154
155 if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
156 return -EINVAL;
157
158 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
159 if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
160 return -EINVAL;
161
162 tpi->flags = gre_flags_to_tnl_flags(greh->flags);
163 hdr_len = ip_gre_calc_hlen(tpi->flags);
164
165 if (!pskb_may_pull(skb, hdr_len))
166 return -EINVAL;
167
168 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
169 tpi->proto = greh->protocol;
170
171 options = (__be32 *)(greh + 1);
172 if (greh->flags & GRE_CSUM) {
173 if (check_checksum(skb)) {
174 *csum_err = true;
175 return -EINVAL;
176 }
177 options++;
178 }
179
180 if (greh->flags & GRE_KEY) {
181 tpi->key = *options;
182 options++;
183 } else
184 tpi->key = 0;
185
186 if (unlikely(greh->flags & GRE_SEQ)) {
187 tpi->seq = *options;
188 options++;
189 } else
190 tpi->seq = 0;
191
192 /* WCCP version 1 and 2 protocol decoding.
193 * - Change protocol to IP
194 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
195 */
196 if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
197 tpi->proto = htons(ETH_P_IP);
198 if ((*(u8 *)options & 0xF0) != 0x40) {
199 hdr_len += 4;
200 if (!pskb_may_pull(skb, hdr_len))
201 return -EINVAL;
202 }
203 }
204
205 return iptunnel_pull_header(skb, hdr_len, tpi->proto);
206}
207
208static int gre_cisco_rcv(struct sk_buff *skb)
209{
210 struct tnl_ptk_info tpi;
211 int i;
212 bool csum_err = false;
213
214 if (parse_gre_header(skb, &tpi, &csum_err) < 0)
215 goto drop;
216
217 rcu_read_lock();
218 for (i = 0; i < GRE_IP_PROTO_MAX; i++) {
219 struct gre_cisco_protocol *proto;
220 int ret;
221
222 proto = rcu_dereference(gre_cisco_proto_list[i]);
223 if (!proto)
224 continue;
225 ret = proto->handler(skb, &tpi);
226 if (ret == PACKET_RCVD) {
227 rcu_read_unlock();
228 return 0;
229 }
230 }
231 rcu_read_unlock();
232
233 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
234drop:
235 kfree_skb(skb);
236 return 0;
237}
238
239static void gre_cisco_err(struct sk_buff *skb, u32 info)
240{
241 /* All the routers (except for Linux) return only
242 * 8 bytes of packet payload. It means, that precise relaying of
243 * ICMP in the real Internet is absolutely infeasible.
244 *
245 * Moreover, Cisco "wise men" put GRE key to the third word
246 * in GRE header. It makes impossible maintaining even soft
247 * state for keyed
248 * GRE tunnels with enabled checksum. Tell them "thank you".
249 *
250 * Well, I wonder, rfc1812 was written by Cisco employee,
251 * what the hell these idiots break standards established
252 * by themselves???
253 */
254
255 const int type = icmp_hdr(skb)->type;
256 const int code = icmp_hdr(skb)->code;
257 struct tnl_ptk_info tpi;
258 bool csum_err = false;
259 int i;
260
261 if (parse_gre_header(skb, &tpi, &csum_err)) {
262 if (!csum_err) /* ignore csum errors. */
263 return;
264 }
265
266 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
267 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
268 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
269 return;
270 }
271 if (type == ICMP_REDIRECT) {
272 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
273 IPPROTO_GRE, 0);
274 return;
275 }
276
277 rcu_read_lock();
278 for (i = 0; i < GRE_IP_PROTO_MAX; i++) {
279 struct gre_cisco_protocol *proto;
280
281 proto = rcu_dereference(gre_cisco_proto_list[i]);
282 if (!proto)
283 continue;
284
285 if (proto->err_handler(skb, info, &tpi) == PACKET_RCVD)
286 goto out;
287
288 }
289out:
290 rcu_read_unlock();
291}
292
72static int gre_rcv(struct sk_buff *skb) 293static int gre_rcv(struct sk_buff *skb)
73{ 294{
74 const struct gre_protocol *proto; 295 const struct gre_protocol *proto;
@@ -220,27 +441,68 @@ static const struct net_offload gre_offload = {
220 }, 441 },
221}; 442};
222 443
444static const struct gre_protocol ipgre_protocol = {
445 .handler = gre_cisco_rcv,
446 .err_handler = gre_cisco_err,
447};
448
449int gre_cisco_register(struct gre_cisco_protocol *newp)
450{
451 struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **)
452 &gre_cisco_proto_list[newp->priority];
453
454 return (cmpxchg(proto, NULL, newp) == NULL) ? 0 : -EBUSY;
455}
456EXPORT_SYMBOL_GPL(gre_cisco_register);
457
458int gre_cisco_unregister(struct gre_cisco_protocol *del_proto)
459{
460 struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **)
461 &gre_cisco_proto_list[del_proto->priority];
462 int ret;
463
464 ret = (cmpxchg(proto, del_proto, NULL) == del_proto) ? 0 : -EINVAL;
465
466 if (ret)
467 return ret;
468
469 synchronize_net();
470 return 0;
471}
472EXPORT_SYMBOL_GPL(gre_cisco_unregister);
473
223static int __init gre_init(void) 474static int __init gre_init(void)
224{ 475{
225 pr_info("GRE over IPv4 demultiplexor driver\n"); 476 pr_info("GRE over IPv4 demultiplexor driver\n");
226 477
227 if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { 478 if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
228 pr_err("can't add protocol\n"); 479 pr_err("can't add protocol\n");
229 return -EAGAIN; 480 goto err;
481 }
482
483 if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) {
484 pr_info("%s: can't add ipgre handler\n", __func__);
485 goto err_gre;
230 } 486 }
231 487
232 if (inet_add_offload(&gre_offload, IPPROTO_GRE)) { 488 if (inet_add_offload(&gre_offload, IPPROTO_GRE)) {
233 pr_err("can't add protocol offload\n"); 489 pr_err("can't add protocol offload\n");
234 inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); 490 goto err_gso;
235 return -EAGAIN;
236 } 491 }
237 492
238 return 0; 493 return 0;
494err_gso:
495 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
496err_gre:
497 inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
498err:
499 return -EAGAIN;
239} 500}
240 501
241static void __exit gre_exit(void) 502static void __exit gre_exit(void)
242{ 503{
243 inet_del_offload(&gre_offload, IPPROTO_GRE); 504 inet_del_offload(&gre_offload, IPPROTO_GRE);
505 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
244 inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); 506 inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
245} 507}
246 508
@@ -250,4 +512,3 @@ module_exit(gre_exit);
250MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver"); 512MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver");
251MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); 513MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
252MODULE_LICENSE("GPL"); 514MODULE_LICENSE("GPL");
253
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a982657d05e7..c326e869993a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -121,103 +121,8 @@ static int ipgre_tunnel_init(struct net_device *dev);
121static int ipgre_net_id __read_mostly; 121static int ipgre_net_id __read_mostly;
122static int gre_tap_net_id __read_mostly; 122static int gre_tap_net_id __read_mostly;
123 123
124static __sum16 check_checksum(struct sk_buff *skb) 124static int ipgre_err(struct sk_buff *skb, u32 info,
125{ 125 const struct tnl_ptk_info *tpi)
126 __sum16 csum = 0;
127
128 switch (skb->ip_summed) {
129 case CHECKSUM_COMPLETE:
130 csum = csum_fold(skb->csum);
131
132 if (!csum)
133 break;
134 /* Fall through. */
135
136 case CHECKSUM_NONE:
137 skb->csum = 0;
138 csum = __skb_checksum_complete(skb);
139 skb->ip_summed = CHECKSUM_COMPLETE;
140 break;
141 }
142
143 return csum;
144}
145
146static int ip_gre_calc_hlen(__be16 o_flags)
147{
148 int addend = 4;
149
150 if (o_flags&TUNNEL_CSUM)
151 addend += 4;
152 if (o_flags&TUNNEL_KEY)
153 addend += 4;
154 if (o_flags&TUNNEL_SEQ)
155 addend += 4;
156 return addend;
157}
158
159static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
160 bool *csum_err, int *hdr_len)
161{
162 unsigned int ip_hlen = ip_hdrlen(skb);
163 const struct gre_base_hdr *greh;
164 __be32 *options;
165
166 if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
167 return -EINVAL;
168
169 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
170 if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
171 return -EINVAL;
172
173 tpi->flags = gre_flags_to_tnl_flags(greh->flags);
174 *hdr_len = ip_gre_calc_hlen(tpi->flags);
175
176 if (!pskb_may_pull(skb, *hdr_len))
177 return -EINVAL;
178
179 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
180
181 tpi->proto = greh->protocol;
182
183 options = (__be32 *)(greh + 1);
184 if (greh->flags & GRE_CSUM) {
185 if (check_checksum(skb)) {
186 *csum_err = true;
187 return -EINVAL;
188 }
189 options++;
190 }
191
192 if (greh->flags & GRE_KEY) {
193 tpi->key = *options;
194 options++;
195 } else
196 tpi->key = 0;
197
198 if (unlikely(greh->flags & GRE_SEQ)) {
199 tpi->seq = *options;
200 options++;
201 } else
202 tpi->seq = 0;
203
204 /* WCCP version 1 and 2 protocol decoding.
205 * - Change protocol to IP
206 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
207 */
208 if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
209 tpi->proto = htons(ETH_P_IP);
210 if ((*(u8 *)options & 0xF0) != 0x40) {
211 *hdr_len += 4;
212 if (!pskb_may_pull(skb, *hdr_len))
213 return -EINVAL;
214 }
215 }
216
217 return 0;
218}
219
220static void ipgre_err(struct sk_buff *skb, u32 info)
221{ 126{
222 127
223 /* All the routers (except for Linux) return only 128 /* All the routers (except for Linux) return only
@@ -239,26 +144,18 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
239 const int type = icmp_hdr(skb)->type; 144 const int type = icmp_hdr(skb)->type;
240 const int code = icmp_hdr(skb)->code; 145 const int code = icmp_hdr(skb)->code;
241 struct ip_tunnel *t; 146 struct ip_tunnel *t;
242 struct tnl_ptk_info tpi;
243 int hdr_len;
244 bool csum_err = false;
245
246 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) {
247 if (!csum_err) /* ignore csum errors. */
248 return;
249 }
250 147
251 switch (type) { 148 switch (type) {
252 default: 149 default:
253 case ICMP_PARAMETERPROB: 150 case ICMP_PARAMETERPROB:
254 return; 151 return PACKET_RCVD;
255 152
256 case ICMP_DEST_UNREACH: 153 case ICMP_DEST_UNREACH:
257 switch (code) { 154 switch (code) {
258 case ICMP_SR_FAILED: 155 case ICMP_SR_FAILED:
259 case ICMP_PORT_UNREACH: 156 case ICMP_PORT_UNREACH:
260 /* Impossible event. */ 157 /* Impossible event. */
261 return; 158 return PACKET_RCVD;
262 default: 159 default:
263 /* All others are translated to HOST_UNREACH. 160 /* All others are translated to HOST_UNREACH.
264 rfc2003 contains "deep thoughts" about NET_UNREACH, 161 rfc2003 contains "deep thoughts" about NET_UNREACH,
@@ -269,138 +166,61 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
269 break; 166 break;
270 case ICMP_TIME_EXCEEDED: 167 case ICMP_TIME_EXCEEDED:
271 if (code != ICMP_EXC_TTL) 168 if (code != ICMP_EXC_TTL)
272 return; 169 return PACKET_RCVD;
273 break; 170 break;
274 171
275 case ICMP_REDIRECT: 172 case ICMP_REDIRECT:
276 break; 173 break;
277 } 174 }
278 175
279 if (tpi.proto == htons(ETH_P_TEB)) 176 if (tpi->proto == htons(ETH_P_TEB))
280 itn = net_generic(net, gre_tap_net_id); 177 itn = net_generic(net, gre_tap_net_id);
281 else 178 else
282 itn = net_generic(net, ipgre_net_id); 179 itn = net_generic(net, ipgre_net_id);
283 180
284 iph = (const struct iphdr *)skb->data; 181 iph = (const struct iphdr *)skb->data;
285 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, 182 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
286 iph->daddr, iph->saddr, tpi.key); 183 iph->daddr, iph->saddr, tpi->key);
287 184
288 if (t == NULL) 185 if (t == NULL)
289 return; 186 return PACKET_REJECT;
290 187
291 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
292 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
293 t->parms.link, 0, IPPROTO_GRE, 0);
294 return;
295 }
296 if (type == ICMP_REDIRECT) {
297 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
298 IPPROTO_GRE, 0);
299 return;
300 }
301 if (t->parms.iph.daddr == 0 || 188 if (t->parms.iph.daddr == 0 ||
302 ipv4_is_multicast(t->parms.iph.daddr)) 189 ipv4_is_multicast(t->parms.iph.daddr))
303 return; 190 return PACKET_RCVD;
304 191
305 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 192 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
306 return; 193 return PACKET_RCVD;
307 194
308 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) 195 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
309 t->err_count++; 196 t->err_count++;
310 else 197 else
311 t->err_count = 1; 198 t->err_count = 1;
312 t->err_time = jiffies; 199 t->err_time = jiffies;
200 return PACKET_RCVD;
313} 201}
314 202
315static int ipgre_rcv(struct sk_buff *skb) 203static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
316{ 204{
317 struct net *net = dev_net(skb->dev); 205 struct net *net = dev_net(skb->dev);
318 struct ip_tunnel_net *itn; 206 struct ip_tunnel_net *itn;
319 const struct iphdr *iph; 207 const struct iphdr *iph;
320 struct ip_tunnel *tunnel; 208 struct ip_tunnel *tunnel;
321 struct tnl_ptk_info tpi;
322 int hdr_len;
323 bool csum_err = false;
324
325 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0)
326 goto drop;
327 209
328 if (tpi.proto == htons(ETH_P_TEB)) 210 if (tpi->proto == htons(ETH_P_TEB))
329 itn = net_generic(net, gre_tap_net_id); 211 itn = net_generic(net, gre_tap_net_id);
330 else 212 else
331 itn = net_generic(net, ipgre_net_id); 213 itn = net_generic(net, ipgre_net_id);
332 214
333 iph = ip_hdr(skb); 215 iph = ip_hdr(skb);
334 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, 216 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
335 iph->saddr, iph->daddr, tpi.key); 217 iph->saddr, iph->daddr, tpi->key);
336 218
337 if (tunnel) { 219 if (tunnel) {
338 ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); 220 ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error);
339 return 0; 221 return PACKET_RCVD;
340 } 222 }
341 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 223 return PACKET_REJECT;
342drop:
343 kfree_skb(skb);
344 return 0;
345}
346
347static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb)
348{
349 int err;
350
351 if (skb_is_gso(skb)) {
352 err = skb_unclone(skb, GFP_ATOMIC);
353 if (unlikely(err))
354 goto error;
355 skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
356 return skb;
357 } else if (skb->ip_summed == CHECKSUM_PARTIAL &&
358 tunnel->parms.o_flags&TUNNEL_CSUM) {
359 err = skb_checksum_help(skb);
360 if (unlikely(err))
361 goto error;
362 } else if (skb->ip_summed != CHECKSUM_PARTIAL)
363 skb->ip_summed = CHECKSUM_NONE;
364
365 return skb;
366
367error:
368 kfree_skb(skb);
369 return ERR_PTR(err);
370}
371
372static struct sk_buff *gre_build_header(struct sk_buff *skb,
373 const struct tnl_ptk_info *tpi,
374 int hdr_len)
375{
376 struct gre_base_hdr *greh;
377
378 skb_push(skb, hdr_len);
379
380 greh = (struct gre_base_hdr *)skb->data;
381 greh->flags = tnl_flags_to_gre_flags(tpi->flags);
382 greh->protocol = tpi->proto;
383
384 if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
385 __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
386
387 if (tpi->flags&TUNNEL_SEQ) {
388 *ptr = tpi->seq;
389 ptr--;
390 }
391 if (tpi->flags&TUNNEL_KEY) {
392 *ptr = tpi->key;
393 ptr--;
394 }
395 if (tpi->flags&TUNNEL_CSUM &&
396 !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
397 *(__sum16 *)ptr = 0;
398 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
399 skb->len, 0));
400 }
401 }
402
403 return skb;
404} 224}
405 225
406static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, 226static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -410,11 +230,6 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
410 struct ip_tunnel *tunnel = netdev_priv(dev); 230 struct ip_tunnel *tunnel = netdev_priv(dev);
411 struct tnl_ptk_info tpi; 231 struct tnl_ptk_info tpi;
412 232
413 if (likely(!skb->encapsulation)) {
414 skb_reset_inner_headers(skb);
415 skb->encapsulation = 1;
416 }
417
418 tpi.flags = tunnel->parms.o_flags; 233 tpi.flags = tunnel->parms.o_flags;
419 tpi.proto = proto; 234 tpi.proto = proto;
420 tpi.key = tunnel->parms.o_key; 235 tpi.key = tunnel->parms.o_key;
@@ -423,11 +238,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
423 tpi.seq = htonl(tunnel->o_seqno); 238 tpi.seq = htonl(tunnel->o_seqno);
424 239
425 /* Push GRE header. */ 240 /* Push GRE header. */
426 skb = gre_build_header(skb, &tpi, tunnel->hlen); 241 gre_build_header(skb, &tpi, tunnel->hlen);
427 if (unlikely(!skb)) {
428 dev->stats.tx_dropped++;
429 return;
430 }
431 242
432 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); 243 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
433} 244}
@@ -438,7 +249,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
438 struct ip_tunnel *tunnel = netdev_priv(dev); 249 struct ip_tunnel *tunnel = netdev_priv(dev);
439 const struct iphdr *tnl_params; 250 const struct iphdr *tnl_params;
440 251
441 skb = handle_offloads(tunnel, skb); 252 skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
442 if (IS_ERR(skb)) 253 if (IS_ERR(skb))
443 goto out; 254 goto out;
444 255
@@ -477,7 +288,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
477{ 288{
478 struct ip_tunnel *tunnel = netdev_priv(dev); 289 struct ip_tunnel *tunnel = netdev_priv(dev);
479 290
480 skb = handle_offloads(tunnel, skb); 291 skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
481 if (IS_ERR(skb)) 292 if (IS_ERR(skb))
482 goto out; 293 goto out;
483 294
@@ -708,9 +519,10 @@ static int ipgre_tunnel_init(struct net_device *dev)
708 return ip_tunnel_init(dev); 519 return ip_tunnel_init(dev);
709} 520}
710 521
711static const struct gre_protocol ipgre_protocol = { 522static struct gre_cisco_protocol ipgre_protocol = {
712 .handler = ipgre_rcv, 523 .handler = ipgre_rcv,
713 .err_handler = ipgre_err, 524 .err_handler = ipgre_err,
525 .priority = 0,
714}; 526};
715 527
716static int __net_init ipgre_init_net(struct net *net) 528static int __net_init ipgre_init_net(struct net *net)
@@ -978,7 +790,7 @@ static int __init ipgre_init(void)
978 if (err < 0) 790 if (err < 0)
979 goto pnet_tap_faied; 791 goto pnet_tap_faied;
980 792
981 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); 793 err = gre_cisco_register(&ipgre_protocol);
982 if (err < 0) { 794 if (err < 0) {
983 pr_info("%s: can't add protocol\n", __func__); 795 pr_info("%s: can't add protocol\n", __func__);
984 goto add_proto_failed; 796 goto add_proto_failed;
@@ -997,7 +809,7 @@ static int __init ipgre_init(void)
997tap_ops_failed: 809tap_ops_failed:
998 rtnl_link_unregister(&ipgre_link_ops); 810 rtnl_link_unregister(&ipgre_link_ops);
999rtnl_link_failed: 811rtnl_link_failed:
1000 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 812 gre_cisco_unregister(&ipgre_protocol);
1001add_proto_failed: 813add_proto_failed:
1002 unregister_pernet_device(&ipgre_tap_net_ops); 814 unregister_pernet_device(&ipgre_tap_net_ops);
1003pnet_tap_faied: 815pnet_tap_faied:
@@ -1009,8 +821,7 @@ static void __exit ipgre_fini(void)
1009{ 821{
1010 rtnl_link_unregister(&ipgre_tap_ops); 822 rtnl_link_unregister(&ipgre_tap_ops);
1011 rtnl_link_unregister(&ipgre_link_ops); 823 rtnl_link_unregister(&ipgre_link_ops);
1012 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) 824 gre_cisco_unregister(&ipgre_protocol);
1013 pr_info("%s: can't remove protocol\n", __func__);
1014 unregister_pernet_device(&ipgre_tap_net_ops); 825 unregister_pernet_device(&ipgre_tap_net_ops);
1015 unregister_pernet_device(&ipgre_net_ops); 826 unregister_pernet_device(&ipgre_net_ops);
1016} 827}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index e189db409b0e..bd227e5ea9da 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -408,13 +408,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
408 const struct iphdr *iph = ip_hdr(skb); 408 const struct iphdr *iph = ip_hdr(skb);
409 int err; 409 int err;
410 410
411 secpath_reset(skb);
412
413 skb->protocol = tpi->proto;
414
415 skb->mac_header = skb->network_header;
416 __pskb_pull(skb, tunnel->hlen);
417 skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
418#ifdef CONFIG_NET_IPGRE_BROADCAST 411#ifdef CONFIG_NET_IPGRE_BROADCAST
419 if (ipv4_is_multicast(iph->daddr)) { 412 if (ipv4_is_multicast(iph->daddr)) {
420 /* Looped back packet, drop it! */ 413 /* Looped back packet, drop it! */
@@ -442,23 +435,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
442 tunnel->i_seqno = ntohl(tpi->seq) + 1; 435 tunnel->i_seqno = ntohl(tpi->seq) + 1;
443 } 436 }
444 437
445 /* Warning: All skb pointers will be invalidated! */
446 if (tunnel->dev->type == ARPHRD_ETHER) {
447 if (!pskb_may_pull(skb, ETH_HLEN)) {
448 tunnel->dev->stats.rx_length_errors++;
449 tunnel->dev->stats.rx_errors++;
450 goto drop;
451 }
452
453 iph = ip_hdr(skb);
454 skb->protocol = eth_type_trans(skb, tunnel->dev);
455 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
456 }
457
458 skb->pkt_type = PACKET_HOST;
459 __skb_tunnel_rx(skb, tunnel->dev);
460
461 skb_reset_network_header(skb);
462 err = IP_ECN_decapsulate(iph, skb); 438 err = IP_ECN_decapsulate(iph, skb);
463 if (unlikely(err)) { 439 if (unlikely(err)) {
464 if (log_ecn_error) 440 if (log_ecn_error)
@@ -477,6 +453,12 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
477 tstats->rx_bytes += skb->len; 453 tstats->rx_bytes += skb->len;
478 u64_stats_update_end(&tstats->syncp); 454 u64_stats_update_end(&tstats->syncp);
479 455
456 if (tunnel->dev->type == ARPHRD_ETHER) {
457 skb->protocol = eth_type_trans(skb, tunnel->dev);
458 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
459 } else {
460 skb->dev = tunnel->dev;
461 }
480 gro_cells_receive(&tunnel->gro_cells, skb); 462 gro_cells_receive(&tunnel->gro_cells, skb);
481 return 0; 463 return 0;
482 464
@@ -491,19 +473,17 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
491{ 473{
492 struct ip_tunnel *tunnel = netdev_priv(dev); 474 struct ip_tunnel *tunnel = netdev_priv(dev);
493 const struct iphdr *inner_iph; 475 const struct iphdr *inner_iph;
494 struct iphdr *iph;
495 struct flowi4 fl4; 476 struct flowi4 fl4;
496 u8 tos, ttl; 477 u8 tos, ttl;
497 __be16 df; 478 __be16 df;
498 struct rtable *rt; /* Route to the other host */ 479 struct rtable *rt; /* Route to the other host */
499 struct net_device *tdev; /* Device to other host */
500 unsigned int max_headroom; /* The extra header space needed */ 480 unsigned int max_headroom; /* The extra header space needed */
501 __be32 dst; 481 __be32 dst;
502 int mtu; 482 int mtu;
483 int err;
503 484
504 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 485 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
505 486
506 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
507 dst = tnl_params->daddr; 487 dst = tnl_params->daddr;
508 if (dst == 0) { 488 if (dst == 0) {
509 /* NBMA tunnel */ 489 /* NBMA tunnel */
@@ -571,14 +551,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
571 dev->stats.tx_carrier_errors++; 551 dev->stats.tx_carrier_errors++;
572 goto tx_error; 552 goto tx_error;
573 } 553 }
574 tdev = rt->dst.dev; 554 if (rt->dst.dev == dev) {
575
576 if (tdev == dev) {
577 ip_rt_put(rt); 555 ip_rt_put(rt);
578 dev->stats.collisions++; 556 dev->stats.collisions++;
579 goto tx_error; 557 goto tx_error;
580 } 558 }
581
582 df = tnl_params->frag_off; 559 df = tnl_params->frag_off;
583 560
584 if (df) 561 if (df)
@@ -596,6 +573,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
596 if (!skb_is_gso(skb) && 573 if (!skb_is_gso(skb) &&
597 (inner_iph->frag_off&htons(IP_DF)) && 574 (inner_iph->frag_off&htons(IP_DF)) &&
598 mtu < ntohs(inner_iph->tot_len)) { 575 mtu < ntohs(inner_iph->tot_len)) {
576 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
599 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 577 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
600 ip_rt_put(rt); 578 ip_rt_put(rt);
601 goto tx_error; 579 goto tx_error;
@@ -646,8 +624,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
646 ttl = ip4_dst_hoplimit(&rt->dst); 624 ttl = ip4_dst_hoplimit(&rt->dst);
647 } 625 }
648 626
649 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr) 627 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
650 + rt->dst.header_len; 628 + rt->dst.header_len;
651 if (max_headroom > dev->needed_headroom) { 629 if (max_headroom > dev->needed_headroom) {
652 dev->needed_headroom = max_headroom; 630 dev->needed_headroom = max_headroom;
653 if (skb_cow_head(skb, dev->needed_headroom)) { 631 if (skb_cow_head(skb, dev->needed_headroom)) {
@@ -657,27 +635,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
657 } 635 }
658 } 636 }
659 637
660 skb_dst_drop(skb); 638 err = iptunnel_xmit(dev_net(dev), rt, skb,
661 skb_dst_set(skb, &rt->dst); 639 fl4.saddr, fl4.daddr, protocol,
662 640 ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df);
663 /* Push down and install the IP header. */ 641 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
664 skb_push(skb, sizeof(struct iphdr));
665 skb_reset_network_header(skb);
666
667 iph = ip_hdr(skb);
668 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
669 642
670 iph->version = 4;
671 iph->ihl = sizeof(struct iphdr) >> 2;
672 iph->frag_off = df;
673 iph->protocol = protocol;
674 iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
675 iph->daddr = fl4.daddr;
676 iph->saddr = fl4.saddr;
677 iph->ttl = ttl;
678 tunnel_ip_select_ident(skb, inner_iph, &rt->dst);
679
680 iptunnel_xmit(skb, dev);
681 return; 643 return;
682 644
683#if IS_ENABLED(CONFIG_IPV6) 645#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
new file mode 100644
index 000000000000..7167b08977df
--- /dev/null
+++ b/net/ipv4/ip_tunnel_core.c
@@ -0,0 +1,122 @@
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/skbuff.h>
24#include <linux/netdevice.h>
25#include <linux/in.h>
26#include <linux/if_arp.h>
27#include <linux/mroute.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/netfilter_ipv4.h>
32#include <linux/etherdevice.h>
33#include <linux/if_ether.h>
34#include <linux/if_vlan.h>
35
36#include <net/ip.h>
37#include <net/icmp.h>
38#include <net/protocol.h>
39#include <net/ip_tunnels.h>
40#include <net/arp.h>
41#include <net/checksum.h>
42#include <net/dsfield.h>
43#include <net/inet_ecn.h>
44#include <net/xfrm.h>
45#include <net/net_namespace.h>
46#include <net/netns/generic.h>
47#include <net/rtnetlink.h>
48
49int iptunnel_xmit(struct net *net, struct rtable *rt,
50 struct sk_buff *skb,
51 __be32 src, __be32 dst, __u8 proto,
52 __u8 tos, __u8 ttl, __be16 df)
53{
54 int pkt_len = skb->len;
55 struct iphdr *iph;
56 int err;
57
58 nf_reset(skb);
59 secpath_reset(skb);
60 skb->rxhash = 0;
61 skb_dst_drop(skb);
62 skb_dst_set(skb, &rt->dst);
63 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
64
65 /* Push down and install the IP header. */
66 __skb_push(skb, sizeof(struct iphdr));
67 skb_reset_network_header(skb);
68
69 iph = ip_hdr(skb);
70
71 iph->version = 4;
72 iph->ihl = sizeof(struct iphdr) >> 2;
73 iph->frag_off = df;
74 iph->protocol = proto;
75 iph->tos = tos;
76 iph->daddr = dst;
77 iph->saddr = src;
78 iph->ttl = ttl;
79 tunnel_ip_select_ident(skb,
80 (const struct iphdr *)skb_inner_network_header(skb),
81 &rt->dst);
82
83 err = ip_local_out(skb);
84 if (unlikely(net_xmit_eval(err)))
85 pkt_len = 0;
86 return pkt_len;
87}
88EXPORT_SYMBOL_GPL(iptunnel_xmit);
89
90int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
91{
92 if (unlikely(!pskb_may_pull(skb, hdr_len)))
93 return -ENOMEM;
94
95 skb_pull_rcsum(skb, hdr_len);
96
97 if (inner_proto == htons(ETH_P_TEB)) {
98 struct ethhdr *eh = (struct ethhdr *)skb->data;
99
100 if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
101 return -ENOMEM;
102
103 if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN))
104 skb->protocol = eh->h_proto;
105 else
106 skb->protocol = htons(ETH_P_802_2);
107
108 } else {
109 skb->protocol = inner_proto;
110 }
111
112 nf_reset(skb);
113 secpath_reset(skb);
114 if (!skb->l4_rxhash)
115 skb->rxhash = 0;
116 skb_dst_drop(skb);
117 skb->vlan_tci = 0;
118 skb_set_queue_mapping(skb, 0);
119 skb->pkt_type = PACKET_HOST;
120 return 0;
121}
122EXPORT_SYMBOL_GPL(iptunnel_pull_header);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 9df7ecd393f2..e6905fbda2a2 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -188,8 +188,12 @@ static int ipip_rcv(struct sk_buff *skb)
188 struct net *net = dev_net(skb->dev); 188 struct net *net = dev_net(skb->dev);
189 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); 189 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
190 struct ip_tunnel *tunnel; 190 struct ip_tunnel *tunnel;
191 const struct iphdr *iph = ip_hdr(skb); 191 const struct iphdr *iph;
192 192
193 if (iptunnel_pull_header(skb, 0, tpi.proto))
194 goto drop;
195
196 iph = ip_hdr(skb);
193 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, 197 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
194 iph->saddr, iph->daddr, 0); 198 iph->saddr, iph->daddr, 0);
195 if (tunnel) { 199 if (tunnel) {
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 6b9c1f128eaf..6cee844678e2 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -640,9 +640,14 @@ static const struct tnl_ptk_info tpi = {
640 640
641static int ipip_rcv(struct sk_buff *skb) 641static int ipip_rcv(struct sk_buff *skb)
642{ 642{
643 const struct iphdr *iph = ip_hdr(skb); 643 const struct iphdr *iph;
644 struct ip_tunnel *tunnel; 644 struct ip_tunnel *tunnel;
645 645
646 if (iptunnel_pull_header(skb, 0, tpi.proto))
647 goto drop;
648
649 iph = ip_hdr(skb);
650
646 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, 651 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
647 iph->saddr, iph->daddr); 652 iph->saddr, iph->daddr);
648 if (tunnel != NULL) { 653 if (tunnel != NULL) {
@@ -723,13 +728,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
723 __be16 df = tiph->frag_off; 728 __be16 df = tiph->frag_off;
724 struct rtable *rt; /* Route to the other host */ 729 struct rtable *rt; /* Route to the other host */
725 struct net_device *tdev; /* Device to other host */ 730 struct net_device *tdev; /* Device to other host */
726 struct iphdr *iph; /* Our new IP header */
727 unsigned int max_headroom; /* The extra header space needed */ 731 unsigned int max_headroom; /* The extra header space needed */
728 __be32 dst = tiph->daddr; 732 __be32 dst = tiph->daddr;
729 struct flowi4 fl4; 733 struct flowi4 fl4;
730 int mtu; 734 int mtu;
731 const struct in6_addr *addr6; 735 const struct in6_addr *addr6;
732 int addr_type; 736 int addr_type;
737 u8 ttl;
738 int err;
733 739
734 if (skb->protocol != htons(ETH_P_IPV6)) 740 if (skb->protocol != htons(ETH_P_IPV6))
735 goto tx_error; 741 goto tx_error;
@@ -872,34 +878,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
872 skb = new_skb; 878 skb = new_skb;
873 iph6 = ipv6_hdr(skb); 879 iph6 = ipv6_hdr(skb);
874 } 880 }
875 881 ttl = tiph->ttl;
876 skb->transport_header = skb->network_header; 882 if (ttl == 0)
877 skb_push(skb, sizeof(struct iphdr)); 883 ttl = iph6->hop_limit;
878 skb_reset_network_header(skb); 884 tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
879 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 885
880 IPCB(skb)->flags = 0; 886 err = iptunnel_xmit(dev_net(dev), rt, skb, fl4.saddr, fl4.daddr,
881 skb_dst_drop(skb); 887 IPPROTO_IPV6, tos, ttl, df);
882 skb_dst_set(skb, &rt->dst); 888 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
883
884 /*
885 * Push down and install the IPIP header.
886 */
887
888 iph = ip_hdr(skb);
889 iph->version = 4;
890 iph->ihl = sizeof(struct iphdr)>>2;
891 iph->frag_off = df;
892 iph->protocol = IPPROTO_IPV6;
893 iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
894 iph->daddr = fl4.daddr;
895 iph->saddr = fl4.saddr;
896
897 if ((iph->ttl = tiph->ttl) == 0)
898 iph->ttl = iph6->hop_limit;
899
900 skb->ip_summed = CHECKSUM_NONE;
901 ip_select_ident(iph, skb_dst(skb), NULL);
902 iptunnel_xmit(skb, dev);
903 return NETDEV_TX_OK; 889 return NETDEV_TX_OK;
904 890
905tx_error_icmp: 891tx_error_icmp:
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index d9ea33c361be..9fbc04a31ed6 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -19,6 +19,8 @@ config OPENVSWITCH
19 which is able to accept configuration from a variety of sources and 19 which is able to accept configuration from a variety of sources and
20 translate it into packet processing rules. 20 translate it into packet processing rules.
21 21
22 Open vSwitch GRE support depends on CONFIG_NET_IPGRE_DEMUX.
23
22 See http://openvswitch.org for more information and userspace 24 See http://openvswitch.org for more information and userspace
23 utilities. 25 utilities.
24 26
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 15e7384745c1..01bddb2991e3 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -10,5 +10,6 @@ openvswitch-y := \
10 dp_notify.o \ 10 dp_notify.o \
11 flow.o \ 11 flow.o \
12 vport.o \ 12 vport.o \
13 vport-gre.o \
13 vport-internal_dev.o \ 14 vport-internal_dev.o \
14 vport-netdev.o \ 15 vport-netdev.o
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 596d6373399d..22c5f399f1cf 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -436,6 +436,10 @@ static int execute_set_action(struct sk_buff *skb,
436 skb->mark = nla_get_u32(nested_attr); 436 skb->mark = nla_get_u32(nested_attr);
437 break; 437 break;
438 438
439 case OVS_KEY_ATTR_IPV4_TUNNEL:
440 OVS_CB(skb)->tun_key = nla_data(nested_attr);
441 break;
442
439 case OVS_KEY_ATTR_ETHERNET: 443 case OVS_KEY_ATTR_ETHERNET:
440 err = set_eth_addr(skb, nla_data(nested_attr)); 444 err = set_eth_addr(skb, nla_data(nested_attr));
441 break; 445 break;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 0f783d9fa00d..f7e3a0d84c40 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -362,6 +362,14 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
362static size_t key_attr_size(void) 362static size_t key_attr_size(void)
363{ 363{
364 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ 364 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
365 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
366 + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
367 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
368 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
369 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
370 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
371 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
372 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
365 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ 373 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
366 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ 374 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
367 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ 375 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
@@ -464,16 +472,89 @@ static int flush_flows(struct datapath *dp)
464 return 0; 472 return 0;
465} 473}
466 474
467static int validate_actions(const struct nlattr *attr, 475static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len)
468 const struct sw_flow_key *key, int depth); 476{
477
478 struct sw_flow_actions *acts;
479 int new_acts_size;
480 int req_size = NLA_ALIGN(attr_len);
481 int next_offset = offsetof(struct sw_flow_actions, actions) +
482 (*sfa)->actions_len;
483
484 if (req_size <= (ksize(*sfa) - next_offset))
485 goto out;
486
487 new_acts_size = ksize(*sfa) * 2;
488
489 if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
490 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
491 return ERR_PTR(-EMSGSIZE);
492 new_acts_size = MAX_ACTIONS_BUFSIZE;
493 }
494
495 acts = ovs_flow_actions_alloc(new_acts_size);
496 if (IS_ERR(acts))
497 return (void *)acts;
469 498
470static int validate_sample(const struct nlattr *attr, 499 memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
471 const struct sw_flow_key *key, int depth) 500 acts->actions_len = (*sfa)->actions_len;
501 kfree(*sfa);
502 *sfa = acts;
503
504out:
505 (*sfa)->actions_len += req_size;
506 return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
507}
508
509static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
510{
511 struct nlattr *a;
512
513 a = reserve_sfa_size(sfa, nla_attr_size(len));
514 if (IS_ERR(a))
515 return PTR_ERR(a);
516
517 a->nla_type = attrtype;
518 a->nla_len = nla_attr_size(len);
519
520 if (data)
521 memcpy(nla_data(a), data, len);
522 memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
523
524 return 0;
525}
526
527static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype)
528{
529 int used = (*sfa)->actions_len;
530 int err;
531
532 err = add_action(sfa, attrtype, NULL, 0);
533 if (err)
534 return err;
535
536 return used;
537}
538
539static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset)
540{
541 struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset);
542
543 a->nla_len = sfa->actions_len - st_offset;
544}
545
546static int validate_and_copy_actions(const struct nlattr *attr,
547 const struct sw_flow_key *key, int depth,
548 struct sw_flow_actions **sfa);
549
550static int validate_and_copy_sample(const struct nlattr *attr,
551 const struct sw_flow_key *key, int depth,
552 struct sw_flow_actions **sfa)
472{ 553{
473 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; 554 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
474 const struct nlattr *probability, *actions; 555 const struct nlattr *probability, *actions;
475 const struct nlattr *a; 556 const struct nlattr *a;
476 int rem; 557 int rem, start, err, st_acts;
477 558
478 memset(attrs, 0, sizeof(attrs)); 559 memset(attrs, 0, sizeof(attrs));
479 nla_for_each_nested(a, attr, rem) { 560 nla_for_each_nested(a, attr, rem) {
@@ -492,7 +573,26 @@ static int validate_sample(const struct nlattr *attr,
492 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; 573 actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
493 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) 574 if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
494 return -EINVAL; 575 return -EINVAL;
495 return validate_actions(actions, key, depth + 1); 576
577 /* validation done, copy sample action. */
578 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
579 if (start < 0)
580 return start;
581 err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32));
582 if (err)
583 return err;
584 st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
585 if (st_acts < 0)
586 return st_acts;
587
588 err = validate_and_copy_actions(actions, key, depth + 1, sfa);
589 if (err)
590 return err;
591
592 add_nested_action_end(*sfa, st_acts);
593 add_nested_action_end(*sfa, start);
594
595 return 0;
496} 596}
497 597
498static int validate_tp_port(const struct sw_flow_key *flow_key) 598static int validate_tp_port(const struct sw_flow_key *flow_key)
@@ -508,8 +608,30 @@ static int validate_tp_port(const struct sw_flow_key *flow_key)
508 return -EINVAL; 608 return -EINVAL;
509} 609}
510 610
611static int validate_and_copy_set_tun(const struct nlattr *attr,
612 struct sw_flow_actions **sfa)
613{
614 struct ovs_key_ipv4_tunnel tun_key;
615 int err, start;
616
617 err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &tun_key);
618 if (err)
619 return err;
620
621 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
622 if (start < 0)
623 return start;
624
625 err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key));
626 add_nested_action_end(*sfa, start);
627
628 return err;
629}
630
511static int validate_set(const struct nlattr *a, 631static int validate_set(const struct nlattr *a,
512 const struct sw_flow_key *flow_key) 632 const struct sw_flow_key *flow_key,
633 struct sw_flow_actions **sfa,
634 bool *set_tun)
513{ 635{
514 const struct nlattr *ovs_key = nla_data(a); 636 const struct nlattr *ovs_key = nla_data(a);
515 int key_type = nla_type(ovs_key); 637 int key_type = nla_type(ovs_key);
@@ -519,18 +641,27 @@ static int validate_set(const struct nlattr *a,
519 return -EINVAL; 641 return -EINVAL;
520 642
521 if (key_type > OVS_KEY_ATTR_MAX || 643 if (key_type > OVS_KEY_ATTR_MAX ||
522 nla_len(ovs_key) != ovs_key_lens[key_type]) 644 (ovs_key_lens[key_type] != nla_len(ovs_key) &&
645 ovs_key_lens[key_type] != -1))
523 return -EINVAL; 646 return -EINVAL;
524 647
525 switch (key_type) { 648 switch (key_type) {
526 const struct ovs_key_ipv4 *ipv4_key; 649 const struct ovs_key_ipv4 *ipv4_key;
527 const struct ovs_key_ipv6 *ipv6_key; 650 const struct ovs_key_ipv6 *ipv6_key;
651 int err;
528 652
529 case OVS_KEY_ATTR_PRIORITY: 653 case OVS_KEY_ATTR_PRIORITY:
530 case OVS_KEY_ATTR_SKB_MARK: 654 case OVS_KEY_ATTR_SKB_MARK:
531 case OVS_KEY_ATTR_ETHERNET: 655 case OVS_KEY_ATTR_ETHERNET:
532 break; 656 break;
533 657
658 case OVS_KEY_ATTR_TUNNEL:
659 *set_tun = true;
660 err = validate_and_copy_set_tun(a, sfa);
661 if (err)
662 return err;
663 break;
664
534 case OVS_KEY_ATTR_IPV4: 665 case OVS_KEY_ATTR_IPV4:
535 if (flow_key->eth.type != htons(ETH_P_IP)) 666 if (flow_key->eth.type != htons(ETH_P_IP))
536 return -EINVAL; 667 return -EINVAL;
@@ -606,8 +737,24 @@ static int validate_userspace(const struct nlattr *attr)
606 return 0; 737 return 0;
607} 738}
608 739
609static int validate_actions(const struct nlattr *attr, 740static int copy_action(const struct nlattr *from,
610 const struct sw_flow_key *key, int depth) 741 struct sw_flow_actions **sfa)
742{
743 int totlen = NLA_ALIGN(from->nla_len);
744 struct nlattr *to;
745
746 to = reserve_sfa_size(sfa, from->nla_len);
747 if (IS_ERR(to))
748 return PTR_ERR(to);
749
750 memcpy(to, from, totlen);
751 return 0;
752}
753
754static int validate_and_copy_actions(const struct nlattr *attr,
755 const struct sw_flow_key *key,
756 int depth,
757 struct sw_flow_actions **sfa)
611{ 758{
612 const struct nlattr *a; 759 const struct nlattr *a;
613 int rem, err; 760 int rem, err;
@@ -627,12 +774,14 @@ static int validate_actions(const struct nlattr *attr,
627 }; 774 };
628 const struct ovs_action_push_vlan *vlan; 775 const struct ovs_action_push_vlan *vlan;
629 int type = nla_type(a); 776 int type = nla_type(a);
777 bool skip_copy;
630 778
631 if (type > OVS_ACTION_ATTR_MAX || 779 if (type > OVS_ACTION_ATTR_MAX ||
632 (action_lens[type] != nla_len(a) && 780 (action_lens[type] != nla_len(a) &&
633 action_lens[type] != (u32)-1)) 781 action_lens[type] != (u32)-1))
634 return -EINVAL; 782 return -EINVAL;
635 783
784 skip_copy = false;
636 switch (type) { 785 switch (type) {
637 case OVS_ACTION_ATTR_UNSPEC: 786 case OVS_ACTION_ATTR_UNSPEC:
638 return -EINVAL; 787 return -EINVAL;
@@ -661,20 +810,26 @@ static int validate_actions(const struct nlattr *attr,
661 break; 810 break;
662 811
663 case OVS_ACTION_ATTR_SET: 812 case OVS_ACTION_ATTR_SET:
664 err = validate_set(a, key); 813 err = validate_set(a, key, sfa, &skip_copy);
665 if (err) 814 if (err)
666 return err; 815 return err;
667 break; 816 break;
668 817
669 case OVS_ACTION_ATTR_SAMPLE: 818 case OVS_ACTION_ATTR_SAMPLE:
670 err = validate_sample(a, key, depth); 819 err = validate_and_copy_sample(a, key, depth, sfa);
671 if (err) 820 if (err)
672 return err; 821 return err;
822 skip_copy = true;
673 break; 823 break;
674 824
675 default: 825 default:
676 return -EINVAL; 826 return -EINVAL;
677 } 827 }
828 if (!skip_copy) {
829 err = copy_action(a, sfa);
830 if (err)
831 return err;
832 }
678 } 833 }
679 834
680 if (rem > 0) 835 if (rem > 0)
@@ -739,21 +894,18 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
739 if (err) 894 if (err)
740 goto err_flow_free; 895 goto err_flow_free;
741 896
742 err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]); 897 err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]);
743 if (err) 898 if (err)
744 goto err_flow_free; 899 goto err_flow_free;
745 900 acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
746 err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0);
747 if (err)
748 goto err_flow_free;
749
750 flow->hash = ovs_flow_hash(&flow->key, key_len);
751
752 acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
753 err = PTR_ERR(acts); 901 err = PTR_ERR(acts);
754 if (IS_ERR(acts)) 902 if (IS_ERR(acts))
755 goto err_flow_free; 903 goto err_flow_free;
904
905 err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts);
756 rcu_assign_pointer(flow->sf_acts, acts); 906 rcu_assign_pointer(flow->sf_acts, acts);
907 if (err)
908 goto err_flow_free;
757 909
758 OVS_CB(packet)->flow = flow; 910 OVS_CB(packet)->flow = flow;
759 packet->priority = flow->key.phy.priority; 911 packet->priority = flow->key.phy.priority;
@@ -843,6 +995,99 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = {
843 .name = OVS_FLOW_MCGROUP 995 .name = OVS_FLOW_MCGROUP
844}; 996};
845 997
998static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb);
999static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
1000{
1001 const struct nlattr *a;
1002 struct nlattr *start;
1003 int err = 0, rem;
1004
1005 start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
1006 if (!start)
1007 return -EMSGSIZE;
1008
1009 nla_for_each_nested(a, attr, rem) {
1010 int type = nla_type(a);
1011 struct nlattr *st_sample;
1012
1013 switch (type) {
1014 case OVS_SAMPLE_ATTR_PROBABILITY:
1015 if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a)))
1016 return -EMSGSIZE;
1017 break;
1018 case OVS_SAMPLE_ATTR_ACTIONS:
1019 st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
1020 if (!st_sample)
1021 return -EMSGSIZE;
1022 err = actions_to_attr(nla_data(a), nla_len(a), skb);
1023 if (err)
1024 return err;
1025 nla_nest_end(skb, st_sample);
1026 break;
1027 }
1028 }
1029
1030 nla_nest_end(skb, start);
1031 return err;
1032}
1033
1034static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
1035{
1036 const struct nlattr *ovs_key = nla_data(a);
1037 int key_type = nla_type(ovs_key);
1038 struct nlattr *start;
1039 int err;
1040
1041 switch (key_type) {
1042 case OVS_KEY_ATTR_IPV4_TUNNEL:
1043 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
1044 if (!start)
1045 return -EMSGSIZE;
1046
1047 err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key));
1048 if (err)
1049 return err;
1050 nla_nest_end(skb, start);
1051 break;
1052 default:
1053 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
1054 return -EMSGSIZE;
1055 break;
1056 }
1057
1058 return 0;
1059}
1060
1061static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb)
1062{
1063 const struct nlattr *a;
1064 int rem, err;
1065
1066 nla_for_each_attr(a, attr, len, rem) {
1067 int type = nla_type(a);
1068
1069 switch (type) {
1070 case OVS_ACTION_ATTR_SET:
1071 err = set_action_to_attr(a, skb);
1072 if (err)
1073 return err;
1074 break;
1075
1076 case OVS_ACTION_ATTR_SAMPLE:
1077 err = sample_action_to_attr(a, skb);
1078 if (err)
1079 return err;
1080 break;
1081 default:
1082 if (nla_put(skb, type, nla_len(a), nla_data(a)))
1083 return -EMSGSIZE;
1084 break;
1085 }
1086 }
1087
1088 return 0;
1089}
1090
846static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) 1091static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
847{ 1092{
848 return NLMSG_ALIGN(sizeof(struct ovs_header)) 1093 return NLMSG_ALIGN(sizeof(struct ovs_header))
@@ -860,6 +1105,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
860{ 1105{
861 const int skb_orig_len = skb->len; 1106 const int skb_orig_len = skb->len;
862 const struct sw_flow_actions *sf_acts; 1107 const struct sw_flow_actions *sf_acts;
1108 struct nlattr *start;
863 struct ovs_flow_stats stats; 1109 struct ovs_flow_stats stats;
864 struct ovs_header *ovs_header; 1110 struct ovs_header *ovs_header;
865 struct nlattr *nla; 1111 struct nlattr *nla;
@@ -913,10 +1159,19 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
913 * This can only fail for dump operations because the skb is always 1159 * This can only fail for dump operations because the skb is always
914 * properly sized for single flows. 1160 * properly sized for single flows.
915 */ 1161 */
916 err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len, 1162 start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
917 sf_acts->actions); 1163 if (start) {
918 if (err < 0 && skb_orig_len) 1164 err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb);
919 goto error; 1165 if (!err)
1166 nla_nest_end(skb, start);
1167 else {
1168 if (skb_orig_len)
1169 goto error;
1170
1171 nla_nest_cancel(skb, start);
1172 }
1173 } else if (skb_orig_len)
1174 goto nla_put_failure;
920 1175
921 return genlmsg_end(skb, ovs_header); 1176 return genlmsg_end(skb, ovs_header);
922 1177
@@ -961,6 +1216,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
961 struct sk_buff *reply; 1216 struct sk_buff *reply;
962 struct datapath *dp; 1217 struct datapath *dp;
963 struct flow_table *table; 1218 struct flow_table *table;
1219 struct sw_flow_actions *acts = NULL;
964 int error; 1220 int error;
965 int key_len; 1221 int key_len;
966 1222
@@ -974,9 +1230,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
974 1230
975 /* Validate actions. */ 1231 /* Validate actions. */
976 if (a[OVS_FLOW_ATTR_ACTIONS]) { 1232 if (a[OVS_FLOW_ATTR_ACTIONS]) {
977 error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0); 1233 acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
978 if (error) 1234 error = PTR_ERR(acts);
1235 if (IS_ERR(acts))
979 goto error; 1236 goto error;
1237
1238 error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0, &acts);
1239 if (error)
1240 goto err_kfree;
980 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { 1241 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
981 error = -EINVAL; 1242 error = -EINVAL;
982 goto error; 1243 goto error;
@@ -991,8 +1252,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
991 table = ovsl_dereference(dp->table); 1252 table = ovsl_dereference(dp->table);
992 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1253 flow = ovs_flow_tbl_lookup(table, &key, key_len);
993 if (!flow) { 1254 if (!flow) {
994 struct sw_flow_actions *acts;
995
996 /* Bail out if we're not allowed to create a new flow. */ 1255 /* Bail out if we're not allowed to create a new flow. */
997 error = -ENOENT; 1256 error = -ENOENT;
998 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) 1257 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
@@ -1016,19 +1275,12 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1016 error = PTR_ERR(flow); 1275 error = PTR_ERR(flow);
1017 goto err_unlock_ovs; 1276 goto err_unlock_ovs;
1018 } 1277 }
1019 flow->key = key;
1020 clear_stats(flow); 1278 clear_stats(flow);
1021 1279
1022 /* Obtain actions. */
1023 acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
1024 error = PTR_ERR(acts);
1025 if (IS_ERR(acts))
1026 goto error_free_flow;
1027 rcu_assign_pointer(flow->sf_acts, acts); 1280 rcu_assign_pointer(flow->sf_acts, acts);
1028 1281
1029 /* Put flow in bucket. */ 1282 /* Put flow in bucket. */
1030 flow->hash = ovs_flow_hash(&key, key_len); 1283 ovs_flow_tbl_insert(table, flow, &key, key_len);
1031 ovs_flow_tbl_insert(table, flow);
1032 1284
1033 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 1285 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1034 info->snd_seq, 1286 info->snd_seq,
@@ -1036,7 +1288,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1036 } else { 1288 } else {
1037 /* We found a matching flow. */ 1289 /* We found a matching flow. */
1038 struct sw_flow_actions *old_acts; 1290 struct sw_flow_actions *old_acts;
1039 struct nlattr *acts_attrs;
1040 1291
1041 /* Bail out if we're not allowed to modify an existing flow. 1292 /* Bail out if we're not allowed to modify an existing flow.
1042 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL 1293 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
@@ -1051,21 +1302,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1051 1302
1052 /* Update actions. */ 1303 /* Update actions. */
1053 old_acts = ovsl_dereference(flow->sf_acts); 1304 old_acts = ovsl_dereference(flow->sf_acts);
1054 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; 1305 rcu_assign_pointer(flow->sf_acts, acts);
1055 if (acts_attrs && 1306 ovs_flow_deferred_free_acts(old_acts);
1056 (old_acts->actions_len != nla_len(acts_attrs) ||
1057 memcmp(old_acts->actions, nla_data(acts_attrs),
1058 old_acts->actions_len))) {
1059 struct sw_flow_actions *new_acts;
1060
1061 new_acts = ovs_flow_actions_alloc(acts_attrs);
1062 error = PTR_ERR(new_acts);
1063 if (IS_ERR(new_acts))
1064 goto err_unlock_ovs;
1065
1066 rcu_assign_pointer(flow->sf_acts, new_acts);
1067 ovs_flow_deferred_free_acts(old_acts);
1068 }
1069 1307
1070 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 1308 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1071 info->snd_seq, OVS_FLOW_CMD_NEW); 1309 info->snd_seq, OVS_FLOW_CMD_NEW);
@@ -1086,10 +1324,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1086 ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); 1324 ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
1087 return 0; 1325 return 0;
1088 1326
1089error_free_flow:
1090 ovs_flow_free(flow);
1091err_unlock_ovs: 1327err_unlock_ovs:
1092 ovs_unlock(); 1328 ovs_unlock();
1329err_kfree:
1330 kfree(acts);
1093error: 1331error:
1094 return error; 1332 return error;
1095} 1333}
@@ -1866,8 +2104,8 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1866 goto exit_unlock; 2104 goto exit_unlock;
1867 } 2105 }
1868 2106
1869 reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, 2107 reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
1870 OVS_VPORT_CMD_DEL); 2108 info->snd_seq, OVS_VPORT_CMD_DEL);
1871 err = PTR_ERR(reply); 2109 err = PTR_ERR(reply);
1872 if (IS_ERR(reply)) 2110 if (IS_ERR(reply))
1873 goto exit_unlock; 2111 goto exit_unlock;
@@ -1896,8 +2134,8 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1896 if (IS_ERR(vport)) 2134 if (IS_ERR(vport))
1897 goto exit_unlock; 2135 goto exit_unlock;
1898 2136
1899 reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, 2137 reply = ovs_vport_cmd_build_info(vport, info->snd_portid,
1900 OVS_VPORT_CMD_NEW); 2138 info->snd_seq, OVS_VPORT_CMD_NEW);
1901 err = PTR_ERR(reply); 2139 err = PTR_ERR(reply);
1902 if (IS_ERR(reply)) 2140 if (IS_ERR(reply))
1903 goto exit_unlock; 2141 goto exit_unlock;
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 16b840695216..a91486484916 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -88,9 +88,12 @@ struct datapath {
88/** 88/**
89 * struct ovs_skb_cb - OVS data in skb CB 89 * struct ovs_skb_cb - OVS data in skb CB
90 * @flow: The flow associated with this packet. May be %NULL if no flow. 90 * @flow: The flow associated with this packet. May be %NULL if no flow.
91 * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the
92 * packet is not being tunneled.
91 */ 93 */
92struct ovs_skb_cb { 94struct ovs_skb_cb {
93 struct sw_flow *flow; 95 struct sw_flow *flow;
96 struct ovs_key_ipv4_tunnel *tun_key;
94}; 97};
95#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) 98#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
96 99
@@ -119,6 +122,7 @@ struct dp_upcall_info {
119struct ovs_net { 122struct ovs_net {
120 struct list_head dps; 123 struct list_head dps;
121 struct work_struct dp_notify_work; 124 struct work_struct dp_notify_work;
125 struct vport_net vport_net;
122}; 126};
123 127
124extern int ovs_net_id; 128extern int ovs_net_id;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 093c191d4fc2..5c519b121e1b 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -40,6 +40,7 @@
40#include <linux/icmpv6.h> 40#include <linux/icmpv6.h>
41#include <linux/rculist.h> 41#include <linux/rculist.h>
42#include <net/ip.h> 42#include <net/ip.h>
43#include <net/ip_tunnels.h>
43#include <net/ipv6.h> 44#include <net/ipv6.h>
44#include <net/ndisc.h> 45#include <net/ndisc.h>
45 46
@@ -198,20 +199,18 @@ void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
198 spin_unlock(&flow->lock); 199 spin_unlock(&flow->lock);
199} 200}
200 201
201struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions) 202struct sw_flow_actions *ovs_flow_actions_alloc(int size)
202{ 203{
203 int actions_len = nla_len(actions);
204 struct sw_flow_actions *sfa; 204 struct sw_flow_actions *sfa;
205 205
206 if (actions_len > MAX_ACTIONS_BUFSIZE) 206 if (size > MAX_ACTIONS_BUFSIZE)
207 return ERR_PTR(-EINVAL); 207 return ERR_PTR(-EINVAL);
208 208
209 sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL); 209 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
210 if (!sfa) 210 if (!sfa)
211 return ERR_PTR(-ENOMEM); 211 return ERR_PTR(-ENOMEM);
212 212
213 sfa->actions_len = actions_len; 213 sfa->actions_len = 0;
214 nla_memcpy(sfa->actions, actions, actions_len);
215 return sfa; 214 return sfa;
216} 215}
217 216
@@ -354,6 +353,14 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la
354 return NULL; 353 return NULL;
355} 354}
356 355
356static void __flow_tbl_insert(struct flow_table *table, struct sw_flow *flow)
357{
358 struct hlist_head *head;
359 head = find_bucket(table, flow->hash);
360 hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
361 table->count++;
362}
363
357static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new) 364static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new)
358{ 365{
359 int old_ver; 366 int old_ver;
@@ -370,7 +377,7 @@ static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new
370 head = flex_array_get(old->buckets, i); 377 head = flex_array_get(old->buckets, i);
371 378
372 hlist_for_each_entry(flow, head, hash_node[old_ver]) 379 hlist_for_each_entry(flow, head, hash_node[old_ver])
373 ovs_flow_tbl_insert(new, flow); 380 __flow_tbl_insert(new, flow);
374 } 381 }
375 old->keep_flows = true; 382 old->keep_flows = true;
376} 383}
@@ -605,6 +612,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
605 memset(key, 0, sizeof(*key)); 612 memset(key, 0, sizeof(*key));
606 613
607 key->phy.priority = skb->priority; 614 key->phy.priority = skb->priority;
615 if (OVS_CB(skb)->tun_key)
616 memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key));
608 key->phy.in_port = in_port; 617 key->phy.in_port = in_port;
609 key->phy.skb_mark = skb->mark; 618 key->phy.skb_mark = skb->mark;
610 619
@@ -762,9 +771,18 @@ out:
762 return error; 771 return error;
763} 772}
764 773
765u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len) 774static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_len)
766{ 775{
767 return jhash2((u32 *)key, DIV_ROUND_UP(key_len, sizeof(u32)), 0); 776 return jhash2((u32 *)((u8 *)key + key_start),
777 DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0);
778}
779
780static int flow_key_start(struct sw_flow_key *key)
781{
782 if (key->tun_key.ipv4_dst)
783 return 0;
784 else
785 return offsetof(struct sw_flow_key, phy);
768} 786}
769 787
770struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, 788struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
@@ -772,28 +790,31 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
772{ 790{
773 struct sw_flow *flow; 791 struct sw_flow *flow;
774 struct hlist_head *head; 792 struct hlist_head *head;
793 u8 *_key;
794 int key_start;
775 u32 hash; 795 u32 hash;
776 796
777 hash = ovs_flow_hash(key, key_len); 797 key_start = flow_key_start(key);
798 hash = ovs_flow_hash(key, key_start, key_len);
778 799
800 _key = (u8 *) key + key_start;
779 head = find_bucket(table, hash); 801 head = find_bucket(table, hash);
780 hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { 802 hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) {
781 803
782 if (flow->hash == hash && 804 if (flow->hash == hash &&
783 !memcmp(&flow->key, key, key_len)) { 805 !memcmp((u8 *)&flow->key + key_start, _key, key_len - key_start)) {
784 return flow; 806 return flow;
785 } 807 }
786 } 808 }
787 return NULL; 809 return NULL;
788} 810}
789 811
790void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) 812void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
813 struct sw_flow_key *key, int key_len)
791{ 814{
792 struct hlist_head *head; 815 flow->hash = ovs_flow_hash(key, flow_key_start(key), key_len);
793 816 memcpy(&flow->key, key, sizeof(flow->key));
794 head = find_bucket(table, flow->hash); 817 __flow_tbl_insert(table, flow);
795 hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
796 table->count++;
797} 818}
798 819
799void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) 820void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
@@ -820,6 +841,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
820 [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), 841 [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
821 [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), 842 [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
822 [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), 843 [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
844 [OVS_KEY_ATTR_TUNNEL] = -1,
823}; 845};
824 846
825static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, 847static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
@@ -957,6 +979,105 @@ static int parse_flow_nlattrs(const struct nlattr *attr,
957 return 0; 979 return 0;
958} 980}
959 981
982int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
983 struct ovs_key_ipv4_tunnel *tun_key)
984{
985 struct nlattr *a;
986 int rem;
987 bool ttl = false;
988
989 memset(tun_key, 0, sizeof(*tun_key));
990
991 nla_for_each_nested(a, attr, rem) {
992 int type = nla_type(a);
993 static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
994 [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
995 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
996 [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
997 [OVS_TUNNEL_KEY_ATTR_TOS] = 1,
998 [OVS_TUNNEL_KEY_ATTR_TTL] = 1,
999 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
1000 [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
1001 };
1002
1003 if (type > OVS_TUNNEL_KEY_ATTR_MAX ||
1004 ovs_tunnel_key_lens[type] != nla_len(a))
1005 return -EINVAL;
1006
1007 switch (type) {
1008 case OVS_TUNNEL_KEY_ATTR_ID:
1009 tun_key->tun_id = nla_get_be64(a);
1010 tun_key->tun_flags |= TUNNEL_KEY;
1011 break;
1012 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
1013 tun_key->ipv4_src = nla_get_be32(a);
1014 break;
1015 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
1016 tun_key->ipv4_dst = nla_get_be32(a);
1017 break;
1018 case OVS_TUNNEL_KEY_ATTR_TOS:
1019 tun_key->ipv4_tos = nla_get_u8(a);
1020 break;
1021 case OVS_TUNNEL_KEY_ATTR_TTL:
1022 tun_key->ipv4_ttl = nla_get_u8(a);
1023 ttl = true;
1024 break;
1025 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
1026 tun_key->tun_flags |= TUNNEL_DONT_FRAGMENT;
1027 break;
1028 case OVS_TUNNEL_KEY_ATTR_CSUM:
1029 tun_key->tun_flags |= TUNNEL_CSUM;
1030 break;
1031 default:
1032 return -EINVAL;
1033
1034 }
1035 }
1036 if (rem > 0)
1037 return -EINVAL;
1038
1039 if (!tun_key->ipv4_dst)
1040 return -EINVAL;
1041
1042 if (!ttl)
1043 return -EINVAL;
1044
1045 return 0;
1046}
1047
1048int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
1049 const struct ovs_key_ipv4_tunnel *tun_key)
1050{
1051 struct nlattr *nla;
1052
1053 nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
1054 if (!nla)
1055 return -EMSGSIZE;
1056
1057 if (tun_key->tun_flags & TUNNEL_KEY &&
1058 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id))
1059 return -EMSGSIZE;
1060 if (tun_key->ipv4_src &&
1061 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ipv4_src))
1062 return -EMSGSIZE;
1063 if (nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ipv4_dst))
1064 return -EMSGSIZE;
1065 if (tun_key->ipv4_tos &&
1066 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, tun_key->ipv4_tos))
1067 return -EMSGSIZE;
1068 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ipv4_ttl))
1069 return -EMSGSIZE;
1070 if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) &&
1071 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
1072 return -EMSGSIZE;
1073 if ((tun_key->tun_flags & TUNNEL_CSUM) &&
1074 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
1075 return -EMSGSIZE;
1076
1077 nla_nest_end(skb, nla);
1078 return 0;
1079}
1080
960/** 1081/**
961 * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key. 1082 * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key.
962 * @swkey: receives the extracted flow key. 1083 * @swkey: receives the extracted flow key.
@@ -999,6 +1120,14 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
999 attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 1120 attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
1000 } 1121 }
1001 1122
1123 if (attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
1124 err = ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], &swkey->tun_key);
1125 if (err)
1126 return err;
1127
1128 attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
1129 }
1130
1002 /* Data attributes. */ 1131 /* Data attributes. */
1003 if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET))) 1132 if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET)))
1004 return -EINVAL; 1133 return -EINVAL;
@@ -1126,6 +1255,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
1126/** 1255/**
1127 * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. 1256 * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
1128 * @flow: Receives extracted in_port, priority, tun_key and skb_mark. 1257 * @flow: Receives extracted in_port, priority, tun_key and skb_mark.
1258 * @key_len: Length of key in @flow. Used for calculating flow hash.
1129 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1259 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1130 * sequence. 1260 * sequence.
1131 * 1261 *
@@ -1134,20 +1264,24 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
1134 * get the metadata, that is, the parts of the flow key that cannot be 1264 * get the metadata, that is, the parts of the flow key that cannot be
1135 * extracted from the packet itself. 1265 * extracted from the packet itself.
1136 */ 1266 */
1137int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, 1267int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len,
1138 const struct nlattr *attr) 1268 const struct nlattr *attr)
1139{ 1269{
1270 struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
1140 const struct nlattr *nla; 1271 const struct nlattr *nla;
1141 int rem; 1272 int rem;
1142 1273
1143 flow->key.phy.in_port = DP_MAX_PORTS; 1274 flow->key.phy.in_port = DP_MAX_PORTS;
1144 flow->key.phy.priority = 0; 1275 flow->key.phy.priority = 0;
1145 flow->key.phy.skb_mark = 0; 1276 flow->key.phy.skb_mark = 0;
1277 memset(tun_key, 0, sizeof(flow->key.tun_key));
1146 1278
1147 nla_for_each_nested(nla, attr, rem) { 1279 nla_for_each_nested(nla, attr, rem) {
1148 int type = nla_type(nla); 1280 int type = nla_type(nla);
1149 1281
1150 if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) { 1282 if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) {
1283 int err;
1284
1151 if (nla_len(nla) != ovs_key_lens[type]) 1285 if (nla_len(nla) != ovs_key_lens[type])
1152 return -EINVAL; 1286 return -EINVAL;
1153 1287
@@ -1156,6 +1290,12 @@ int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
1156 flow->key.phy.priority = nla_get_u32(nla); 1290 flow->key.phy.priority = nla_get_u32(nla);
1157 break; 1291 break;
1158 1292
1293 case OVS_KEY_ATTR_TUNNEL:
1294 err = ovs_ipv4_tun_from_nlattr(nla, tun_key);
1295 if (err)
1296 return err;
1297 break;
1298
1159 case OVS_KEY_ATTR_IN_PORT: 1299 case OVS_KEY_ATTR_IN_PORT:
1160 if (nla_get_u32(nla) >= DP_MAX_PORTS) 1300 if (nla_get_u32(nla) >= DP_MAX_PORTS)
1161 return -EINVAL; 1301 return -EINVAL;
@@ -1170,6 +1310,10 @@ int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
1170 } 1310 }
1171 if (rem) 1311 if (rem)
1172 return -EINVAL; 1312 return -EINVAL;
1313
1314 flow->hash = ovs_flow_hash(&flow->key,
1315 flow_key_start(&flow->key), key_len);
1316
1173 return 0; 1317 return 0;
1174} 1318}
1175 1319
@@ -1182,6 +1326,10 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1182 nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) 1326 nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
1183 goto nla_put_failure; 1327 goto nla_put_failure;
1184 1328
1329 if (swkey->tun_key.ipv4_dst &&
1330 ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key))
1331 goto nla_put_failure;
1332
1185 if (swkey->phy.in_port != DP_MAX_PORTS && 1333 if (swkey->phy.in_port != DP_MAX_PORTS &&
1186 nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) 1334 nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port))
1187 goto nla_put_failure; 1335 goto nla_put_failure;
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 2a83e2141f08..66ef7220293e 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -40,7 +40,38 @@ struct sw_flow_actions {
40 struct nlattr actions[]; 40 struct nlattr actions[];
41}; 41};
42 42
43/* Used to memset ovs_key_ipv4_tunnel padding. */
44#define OVS_TUNNEL_KEY_SIZE \
45 (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + \
46 FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl))
47
48struct ovs_key_ipv4_tunnel {
49 __be64 tun_id;
50 __be32 ipv4_src;
51 __be32 ipv4_dst;
52 __be16 tun_flags;
53 u8 ipv4_tos;
54 u8 ipv4_ttl;
55};
56
57static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
58 const struct iphdr *iph, __be64 tun_id,
59 __be16 tun_flags)
60{
61 tun_key->tun_id = tun_id;
62 tun_key->ipv4_src = iph->saddr;
63 tun_key->ipv4_dst = iph->daddr;
64 tun_key->ipv4_tos = iph->tos;
65 tun_key->ipv4_ttl = iph->ttl;
66 tun_key->tun_flags = tun_flags;
67
68 /* clear struct padding. */
69 memset((unsigned char *) tun_key + OVS_TUNNEL_KEY_SIZE, 0,
70 sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE);
71}
72
43struct sw_flow_key { 73struct sw_flow_key {
74 struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */
44 struct { 75 struct {
45 u32 priority; /* Packet QoS priority. */ 76 u32 priority; /* Packet QoS priority. */
46 u32 skb_mark; /* SKB mark. */ 77 u32 skb_mark; /* SKB mark. */
@@ -130,7 +161,7 @@ struct sw_flow *ovs_flow_alloc(void);
130void ovs_flow_deferred_free(struct sw_flow *); 161void ovs_flow_deferred_free(struct sw_flow *);
131void ovs_flow_free(struct sw_flow *flow); 162void ovs_flow_free(struct sw_flow *flow);
132 163
133struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *); 164struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len);
134void ovs_flow_deferred_free_acts(struct sw_flow_actions *); 165void ovs_flow_deferred_free_acts(struct sw_flow_actions *);
135 166
136int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, 167int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
@@ -141,10 +172,10 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
141int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); 172int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
142int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, 173int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
143 const struct nlattr *); 174 const struct nlattr *);
144int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, 175int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len,
145 const struct nlattr *attr); 176 const struct nlattr *attr);
146 177
147#define MAX_ACTIONS_BUFSIZE (16 * 1024) 178#define MAX_ACTIONS_BUFSIZE (32 * 1024)
148#define TBL_MIN_BUCKETS 1024 179#define TBL_MIN_BUCKETS 1024
149 180
150struct flow_table { 181struct flow_table {
@@ -173,11 +204,15 @@ void ovs_flow_tbl_deferred_destroy(struct flow_table *table);
173struct flow_table *ovs_flow_tbl_alloc(int new_size); 204struct flow_table *ovs_flow_tbl_alloc(int new_size);
174struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); 205struct flow_table *ovs_flow_tbl_expand(struct flow_table *table);
175struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); 206struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table);
176void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow); 207void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
208 struct sw_flow_key *key, int key_len);
177void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); 209void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
178u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len);
179 210
180struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx); 211struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx);
181extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; 212extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1];
213int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
214 struct ovs_key_ipv4_tunnel *tun_key);
215int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
216 const struct ovs_key_ipv4_tunnel *tun_key);
182 217
183#endif /* flow.h */ 218#endif /* flow.h */
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
new file mode 100644
index 000000000000..3a8d1900aa78
--- /dev/null
+++ b/net/openvswitch/vport-gre.c
@@ -0,0 +1,274 @@
1/*
2 * Copyright (c) 2007-2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#ifdef CONFIG_NET_IPGRE_DEMUX
20#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21
22#include <linux/if.h>
23#include <linux/skbuff.h>
24#include <linux/ip.h>
25#include <linux/if_tunnel.h>
26#include <linux/if_vlan.h>
27#include <linux/in.h>
28#include <linux/if_vlan.h>
29#include <linux/in.h>
30#include <linux/in_route.h>
31#include <linux/inetdevice.h>
32#include <linux/jhash.h>
33#include <linux/list.h>
34#include <linux/kernel.h>
35#include <linux/workqueue.h>
36#include <linux/rculist.h>
37#include <net/route.h>
38#include <net/xfrm.h>
39
40#include <net/icmp.h>
41#include <net/ip.h>
42#include <net/ip_tunnels.h>
43#include <net/gre.h>
44#include <net/net_namespace.h>
45#include <net/netns/generic.h>
46#include <net/protocol.h>
47
48#include "datapath.h"
49#include "vport.h"
50
51/* Returns the least-significant 32 bits of a __be64. */
52static __be32 be64_get_low32(__be64 x)
53{
54#ifdef __BIG_ENDIAN
55 return (__force __be32)x;
56#else
57 return (__force __be32)((__force u64)x >> 32);
58#endif
59}
60
61static __be16 filter_tnl_flags(__be16 flags)
62{
63 return flags & (TUNNEL_CSUM | TUNNEL_KEY);
64}
65
66static struct sk_buff *__build_header(struct sk_buff *skb,
67 int tunnel_hlen)
68{
69 const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
70 struct tnl_ptk_info tpi;
71
72 skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
73 if (IS_ERR(skb))
74 return NULL;
75
76 tpi.flags = filter_tnl_flags(tun_key->tun_flags);
77 tpi.proto = htons(ETH_P_TEB);
78 tpi.key = be64_get_low32(tun_key->tun_id);
79 tpi.seq = 0;
80 gre_build_header(skb, &tpi, tunnel_hlen);
81
82 return skb;
83}
84
85static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
86{
87#ifdef __BIG_ENDIAN
88 return (__force __be64)((__force u64)seq << 32 | (__force u32)key);
89#else
90 return (__force __be64)((__force u64)key << 32 | (__force u32)seq);
91#endif
92}
93
94/* Called with rcu_read_lock and BH disabled. */
95static int gre_rcv(struct sk_buff *skb,
96 const struct tnl_ptk_info *tpi)
97{
98 struct ovs_key_ipv4_tunnel tun_key;
99 struct ovs_net *ovs_net;
100 struct vport *vport;
101 __be64 key;
102
103 ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
104 vport = rcu_dereference(ovs_net->vport_net.gre_vport);
105 if (unlikely(!vport))
106 return PACKET_REJECT;
107
108 key = key_to_tunnel_id(tpi->key, tpi->seq);
109 ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key,
110 filter_tnl_flags(tpi->flags));
111
112 ovs_vport_receive(vport, skb, &tun_key);
113 return PACKET_RCVD;
114}
115
116static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
117{
118 struct net *net = ovs_dp_get_net(vport->dp);
119 struct flowi4 fl;
120 struct rtable *rt;
121 int min_headroom;
122 int tunnel_hlen;
123 __be16 df;
124 int err;
125
126 if (unlikely(!OVS_CB(skb)->tun_key)) {
127 err = -EINVAL;
128 goto error;
129 }
130
131 /* Route lookup */
132 memset(&fl, 0, sizeof(fl));
133 fl.daddr = OVS_CB(skb)->tun_key->ipv4_dst;
134 fl.saddr = OVS_CB(skb)->tun_key->ipv4_src;
135 fl.flowi4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos);
136 fl.flowi4_mark = skb->mark;
137 fl.flowi4_proto = IPPROTO_GRE;
138
139 rt = ip_route_output_key(net, &fl);
140 if (IS_ERR(rt))
141 return PTR_ERR(rt);
142
143 tunnel_hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags);
144
145 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
146 + tunnel_hlen + sizeof(struct iphdr)
147 + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
148 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
149 int head_delta = SKB_DATA_ALIGN(min_headroom -
150 skb_headroom(skb) +
151 16);
152 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
153 0, GFP_ATOMIC);
154 if (unlikely(err))
155 goto err_free_rt;
156 }
157
158 if (vlan_tx_tag_present(skb)) {
159 if (unlikely(!__vlan_put_tag(skb,
160 skb->vlan_proto,
161 vlan_tx_tag_get(skb)))) {
162 err = -ENOMEM;
163 goto err_free_rt;
164 }
165 skb->vlan_tci = 0;
166 }
167
168 /* Push Tunnel header. */
169 skb = __build_header(skb, tunnel_hlen);
170 if (unlikely(!skb)) {
171 err = 0;
172 goto err_free_rt;
173 }
174
175 df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
176 htons(IP_DF) : 0;
177
178 skb->local_df = 1;
179
180 return iptunnel_xmit(net, rt, skb, fl.saddr,
181 OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
182 OVS_CB(skb)->tun_key->ipv4_tos,
183 OVS_CB(skb)->tun_key->ipv4_ttl, df);
184err_free_rt:
185 ip_rt_put(rt);
186error:
187 return err;
188}
189
190static struct gre_cisco_protocol gre_protocol = {
191 .handler = gre_rcv,
192 .priority = 1,
193};
194
195static int gre_ports;
196static int gre_init(void)
197{
198 int err;
199
200 gre_ports++;
201 if (gre_ports > 1)
202 return 0;
203
204 err = gre_cisco_register(&gre_protocol);
205 if (err)
206 pr_warn("cannot register gre protocol handler\n");
207
208 return err;
209}
210
211static void gre_exit(void)
212{
213 gre_ports--;
214 if (gre_ports > 0)
215 return;
216
217 gre_cisco_unregister(&gre_protocol);
218}
219
220static const char *gre_get_name(const struct vport *vport)
221{
222 return vport_priv(vport);
223}
224
225static struct vport *gre_create(const struct vport_parms *parms)
226{
227 struct net *net = ovs_dp_get_net(parms->dp);
228 struct ovs_net *ovs_net;
229 struct vport *vport;
230 int err;
231
232 err = gre_init();
233 if (err)
234 return ERR_PTR(err);
235
236 ovs_net = net_generic(net, ovs_net_id);
237 if (ovsl_dereference(ovs_net->vport_net.gre_vport)) {
238 vport = ERR_PTR(-EEXIST);
239 goto error;
240 }
241
242 vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms);
243 if (IS_ERR(vport))
244 goto error;
245
246 strncpy(vport_priv(vport), parms->name, IFNAMSIZ);
247 rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport);
248 return vport;
249
250error:
251 gre_exit();
252 return vport;
253}
254
255static void gre_tnl_destroy(struct vport *vport)
256{
257 struct net *net = ovs_dp_get_net(vport->dp);
258 struct ovs_net *ovs_net;
259
260 ovs_net = net_generic(net, ovs_net_id);
261
262 rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL);
263 ovs_vport_deferred_free(vport);
264 gre_exit();
265}
266
267const struct vport_ops ovs_gre_vport_ops = {
268 .type = OVS_VPORT_TYPE_GRE,
269 .create = gre_create,
270 .destroy = gre_tnl_destroy,
271 .get_name = gre_get_name,
272 .send = gre_tnl_send,
273};
274#endif
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index e284c7e1fec4..98d3edbbc235 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -67,7 +67,7 @@ static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netde
67static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) 67static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
68{ 68{
69 rcu_read_lock(); 69 rcu_read_lock();
70 ovs_vport_receive(internal_dev_priv(netdev)->vport, skb); 70 ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL);
71 rcu_read_unlock(); 71 rcu_read_unlock();
72 return 0; 72 return 0;
73} 73}
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 40de815b4213..5982f3f62835 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -51,7 +51,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
51 skb_push(skb, ETH_HLEN); 51 skb_push(skb, ETH_HLEN);
52 ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); 52 ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
53 53
54 ovs_vport_receive(vport, skb); 54 ovs_vport_receive(vport, skb, NULL);
55 return; 55 return;
56 56
57error: 57error:
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 176d449351eb..f52dfb9cb5a7 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -38,6 +38,10 @@
38static const struct vport_ops *vport_ops_list[] = { 38static const struct vport_ops *vport_ops_list[] = {
39 &ovs_netdev_vport_ops, 39 &ovs_netdev_vport_ops,
40 &ovs_internal_vport_ops, 40 &ovs_internal_vport_ops,
41
42#ifdef CONFIG_NET_IPGRE_DEMUX
43 &ovs_gre_vport_ops,
44#endif
41}; 45};
42 46
43/* Protected by RCU read lock for reading, ovs_mutex for writing. */ 47/* Protected by RCU read lock for reading, ovs_mutex for writing. */
@@ -325,7 +329,8 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
325 * Must be called with rcu_read_lock. The packet cannot be shared and 329 * Must be called with rcu_read_lock. The packet cannot be shared and
326 * skb->data should point to the Ethernet header. 330 * skb->data should point to the Ethernet header.
327 */ 331 */
328void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) 332void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
333 struct ovs_key_ipv4_tunnel *tun_key)
329{ 334{
330 struct pcpu_tstats *stats; 335 struct pcpu_tstats *stats;
331 336
@@ -335,6 +340,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
335 stats->rx_bytes += skb->len; 340 stats->rx_bytes += skb->len;
336 u64_stats_update_end(&stats->syncp); 341 u64_stats_update_end(&stats->syncp);
337 342
343 OVS_CB(skb)->tun_key = tun_key;
338 ovs_dp_process_received_packet(vport, skb); 344 ovs_dp_process_received_packet(vport, skb);
339} 345}
340 346
@@ -402,3 +408,18 @@ void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type)
402 408
403 spin_unlock(&vport->stats_lock); 409 spin_unlock(&vport->stats_lock);
404} 410}
411
412static void free_vport_rcu(struct rcu_head *rcu)
413{
414 struct vport *vport = container_of(rcu, struct vport, rcu);
415
416 ovs_vport_free(vport);
417}
418
419void ovs_vport_deferred_free(struct vport *vport)
420{
421 if (!vport)
422 return;
423
424 call_rcu(&vport->rcu, free_vport_rcu);
425}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 293278c4c2df..376045c42f8b 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -34,6 +34,11 @@ struct vport_parms;
34 34
35/* The following definitions are for users of the vport subsytem: */ 35/* The following definitions are for users of the vport subsytem: */
36 36
37/* The following definitions are for users of the vport subsytem: */
38struct vport_net {
39 struct vport __rcu *gre_vport;
40};
41
37int ovs_vport_init(void); 42int ovs_vport_init(void);
38void ovs_vport_exit(void); 43void ovs_vport_exit(void);
39 44
@@ -152,6 +157,7 @@ enum vport_err_type {
152struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *, 157struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *,
153 const struct vport_parms *); 158 const struct vport_parms *);
154void ovs_vport_free(struct vport *); 159void ovs_vport_free(struct vport *);
160void ovs_vport_deferred_free(struct vport *vport);
155 161
156#define VPORT_ALIGN 8 162#define VPORT_ALIGN 8
157 163
@@ -184,13 +190,15 @@ static inline struct vport *vport_from_priv(const void *priv)
184 return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); 190 return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
185} 191}
186 192
187void ovs_vport_receive(struct vport *, struct sk_buff *); 193void ovs_vport_receive(struct vport *, struct sk_buff *,
194 struct ovs_key_ipv4_tunnel *);
188void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); 195void ovs_vport_record_error(struct vport *, enum vport_err_type err_type);
189 196
190/* List of statically compiled vport implementations. Don't forget to also 197/* List of statically compiled vport implementations. Don't forget to also
191 * add yours to the list at the top of vport.c. */ 198 * add yours to the list at the top of vport.c. */
192extern const struct vport_ops ovs_netdev_vport_ops; 199extern const struct vport_ops ovs_netdev_vport_ops;
193extern const struct vport_ops ovs_internal_vport_ops; 200extern const struct vport_ops ovs_internal_vport_ops;
201extern const struct vport_ops ovs_gre_vport_ops;
194 202
195static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, 203static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
196 const void *start, unsigned int len) 204 const void *start, unsigned int len)