aboutsummaryrefslogtreecommitdiffstats
path: root/net/openvswitch
diff options
context:
space:
mode:
Diffstat (limited to 'net/openvswitch')
-rw-r--r--net/openvswitch/Kconfig23
-rw-r--r--net/openvswitch/Makefile14
-rw-r--r--net/openvswitch/actions.c366
-rw-r--r--net/openvswitch/datapath.c365
-rw-r--r--net/openvswitch/datapath.h24
-rw-r--r--net/openvswitch/flow.c43
-rw-r--r--net/openvswitch/flow.h88
-rw-r--r--net/openvswitch/flow_netlink.c625
-rw-r--r--net/openvswitch/flow_netlink.h18
-rw-r--r--net/openvswitch/flow_table.c27
-rw-r--r--net/openvswitch/flow_table.h10
-rw-r--r--net/openvswitch/vport-geneve.c49
-rw-r--r--net/openvswitch/vport-gre.c65
-rw-r--r--net/openvswitch/vport-internal_dev.c22
-rw-r--r--net/openvswitch/vport-netdev.c16
-rw-r--r--net/openvswitch/vport-netdev.h3
-rw-r--r--net/openvswitch/vport-vxlan.c49
-rw-r--r--net/openvswitch/vport.c178
-rw-r--r--net/openvswitch/vport.h34
19 files changed, 1351 insertions, 668 deletions
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index ba3bb8203b99..b7d818c59423 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -4,7 +4,9 @@
4 4
5config OPENVSWITCH 5config OPENVSWITCH
6 tristate "Open vSwitch" 6 tristate "Open vSwitch"
7 depends on INET
7 select LIBCRC32C 8 select LIBCRC32C
9 select NET_MPLS_GSO
8 ---help--- 10 ---help---
9 Open vSwitch is a multilayer Ethernet switch targeted at virtualized 11 Open vSwitch is a multilayer Ethernet switch targeted at virtualized
10 environments. In addition to supporting a variety of features 12 environments. In addition to supporting a variety of features
@@ -29,11 +31,10 @@ config OPENVSWITCH
29 If unsure, say N. 31 If unsure, say N.
30 32
31config OPENVSWITCH_GRE 33config OPENVSWITCH_GRE
32 bool "Open vSwitch GRE tunneling support" 34 tristate "Open vSwitch GRE tunneling support"
33 depends on INET
34 depends on OPENVSWITCH 35 depends on OPENVSWITCH
35 depends on NET_IPGRE_DEMUX && !(OPENVSWITCH=y && NET_IPGRE_DEMUX=m) 36 depends on NET_IPGRE_DEMUX
36 default y 37 default OPENVSWITCH
37 ---help--- 38 ---help---
38 If you say Y here, then the Open vSwitch will be able create GRE 39 If you say Y here, then the Open vSwitch will be able create GRE
39 vport. 40 vport.
@@ -43,11 +44,10 @@ config OPENVSWITCH_GRE
43 If unsure, say Y. 44 If unsure, say Y.
44 45
45config OPENVSWITCH_VXLAN 46config OPENVSWITCH_VXLAN
46 bool "Open vSwitch VXLAN tunneling support" 47 tristate "Open vSwitch VXLAN tunneling support"
47 depends on INET
48 depends on OPENVSWITCH 48 depends on OPENVSWITCH
49 depends on VXLAN && !(OPENVSWITCH=y && VXLAN=m) 49 depends on VXLAN
50 default y 50 default OPENVSWITCH
51 ---help--- 51 ---help---
52 If you say Y here, then the Open vSwitch will be able create vxlan vport. 52 If you say Y here, then the Open vSwitch will be able create vxlan vport.
53 53
@@ -56,11 +56,10 @@ config OPENVSWITCH_VXLAN
56 If unsure, say Y. 56 If unsure, say Y.
57 57
58config OPENVSWITCH_GENEVE 58config OPENVSWITCH_GENEVE
59 bool "Open vSwitch Geneve tunneling support" 59 tristate "Open vSwitch Geneve tunneling support"
60 depends on INET
61 depends on OPENVSWITCH 60 depends on OPENVSWITCH
62 depends on GENEVE && !(OPENVSWITCH=y && GENEVE=m) 61 depends on GENEVE
63 default y 62 default OPENVSWITCH
64 ---help--- 63 ---help---
65 If you say Y here, then the Open vSwitch will be able create geneve vport. 64 If you say Y here, then the Open vSwitch will be able create geneve vport.
66 65
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 9a33a273c375..91b9478413ef 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -15,14 +15,6 @@ openvswitch-y := \
15 vport-internal_dev.o \ 15 vport-internal_dev.o \
16 vport-netdev.o 16 vport-netdev.o
17 17
18ifneq ($(CONFIG_OPENVSWITCH_GENEVE),) 18obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o
19openvswitch-y += vport-geneve.o 19obj-$(CONFIG_OPENVSWITCH_VXLAN) += vport-vxlan.o
20endif 20obj-$(CONFIG_OPENVSWITCH_GRE) += vport-gre.o
21
22ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
23openvswitch-y += vport-vxlan.o
24endif
25
26ifneq ($(CONFIG_OPENVSWITCH_GRE),)
27openvswitch-y += vport-gre.o
28endif
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 006886dbee36..770064c83711 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -28,10 +28,12 @@
28#include <linux/in6.h> 28#include <linux/in6.h>
29#include <linux/if_arp.h> 29#include <linux/if_arp.h>
30#include <linux/if_vlan.h> 30#include <linux/if_vlan.h>
31
31#include <net/ip.h> 32#include <net/ip.h>
32#include <net/ipv6.h> 33#include <net/ipv6.h>
33#include <net/checksum.h> 34#include <net/checksum.h>
34#include <net/dsfield.h> 35#include <net/dsfield.h>
36#include <net/mpls.h>
35#include <net/sctp/checksum.h> 37#include <net/sctp/checksum.h>
36 38
37#include "datapath.h" 39#include "datapath.h"
@@ -67,7 +69,7 @@ static void action_fifo_init(struct action_fifo *fifo)
67 fifo->tail = 0; 69 fifo->tail = 0;
68} 70}
69 71
70static bool action_fifo_is_empty(struct action_fifo *fifo) 72static bool action_fifo_is_empty(const struct action_fifo *fifo)
71{ 73{
72 return (fifo->head == fifo->tail); 74 return (fifo->head == fifo->tail);
73} 75}
@@ -90,7 +92,7 @@ static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
90 92
91/* Return true if fifo is not full */ 93/* Return true if fifo is not full */
92static struct deferred_action *add_deferred_actions(struct sk_buff *skb, 94static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
93 struct sw_flow_key *key, 95 const struct sw_flow_key *key,
94 const struct nlattr *attr) 96 const struct nlattr *attr)
95{ 97{
96 struct action_fifo *fifo; 98 struct action_fifo *fifo;
@@ -107,100 +109,132 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
107 return da; 109 return da;
108} 110}
109 111
110static int make_writable(struct sk_buff *skb, int write_len) 112static void invalidate_flow_key(struct sw_flow_key *key)
113{
114 key->eth.type = htons(0);
115}
116
117static bool is_flow_key_valid(const struct sw_flow_key *key)
111{ 118{
112 if (!pskb_may_pull(skb, write_len)) 119 return !!key->eth.type;
120}
121
122static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
123 const struct ovs_action_push_mpls *mpls)
124{
125 __be32 *new_mpls_lse;
126 struct ethhdr *hdr;
127
128 /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
129 if (skb->encapsulation)
130 return -ENOTSUPP;
131
132 if (skb_cow_head(skb, MPLS_HLEN) < 0)
113 return -ENOMEM; 133 return -ENOMEM;
114 134
115 if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) 135 skb_push(skb, MPLS_HLEN);
116 return 0; 136 memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
137 skb->mac_len);
138 skb_reset_mac_header(skb);
139
140 new_mpls_lse = (__be32 *)skb_mpls_header(skb);
141 *new_mpls_lse = mpls->mpls_lse;
117 142
118 return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); 143 if (skb->ip_summed == CHECKSUM_COMPLETE)
144 skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
145 MPLS_HLEN, 0));
146
147 hdr = eth_hdr(skb);
148 hdr->h_proto = mpls->mpls_ethertype;
149
150 if (!skb->inner_protocol)
151 skb_set_inner_protocol(skb, skb->protocol);
152 skb->protocol = mpls->mpls_ethertype;
153
154 invalidate_flow_key(key);
155 return 0;
119} 156}
120 157
121/* remove VLAN header from packet and update csum accordingly. */ 158static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
122static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) 159 const __be16 ethertype)
123{ 160{
124 struct vlan_hdr *vhdr; 161 struct ethhdr *hdr;
125 int err; 162 int err;
126 163
127 err = make_writable(skb, VLAN_ETH_HLEN); 164 err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
128 if (unlikely(err)) 165 if (unlikely(err))
129 return err; 166 return err;
130 167
131 if (skb->ip_summed == CHECKSUM_COMPLETE) 168 skb_postpull_rcsum(skb, skb_mpls_header(skb), MPLS_HLEN);
132 skb->csum = csum_sub(skb->csum, csum_partial(skb->data
133 + (2 * ETH_ALEN), VLAN_HLEN, 0));
134 169
135 vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); 170 memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
136 *current_tci = vhdr->h_vlan_TCI; 171 skb->mac_len);
137 172
138 memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); 173 __skb_pull(skb, MPLS_HLEN);
139 __skb_pull(skb, VLAN_HLEN); 174 skb_reset_mac_header(skb);
140 175
141 vlan_set_encap_proto(skb, vhdr); 176 /* skb_mpls_header() is used to locate the ethertype
142 skb->mac_header += VLAN_HLEN; 177 * field correctly in the presence of VLAN tags.
143 if (skb_network_offset(skb) < ETH_HLEN) 178 */
144 skb_set_network_header(skb, ETH_HLEN); 179 hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN);
145 skb_reset_mac_len(skb); 180 hdr->h_proto = ethertype;
181 if (eth_p_mpls(skb->protocol))
182 skb->protocol = ethertype;
146 183
184 invalidate_flow_key(key);
147 return 0; 185 return 0;
148} 186}
149 187
150static int pop_vlan(struct sk_buff *skb) 188static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key,
189 const __be32 *mpls_lse)
151{ 190{
152 __be16 tci; 191 __be32 *stack;
153 int err; 192 int err;
154 193
155 if (likely(vlan_tx_tag_present(skb))) { 194 err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
156 skb->vlan_tci = 0;
157 } else {
158 if (unlikely(skb->protocol != htons(ETH_P_8021Q) ||
159 skb->len < VLAN_ETH_HLEN))
160 return 0;
161
162 err = __pop_vlan_tci(skb, &tci);
163 if (err)
164 return err;
165 }
166 /* move next vlan tag to hw accel tag */
167 if (likely(skb->protocol != htons(ETH_P_8021Q) ||
168 skb->len < VLAN_ETH_HLEN))
169 return 0;
170
171 err = __pop_vlan_tci(skb, &tci);
172 if (unlikely(err)) 195 if (unlikely(err))
173 return err; 196 return err;
174 197
175 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(tci)); 198 stack = (__be32 *)skb_mpls_header(skb);
199 if (skb->ip_summed == CHECKSUM_COMPLETE) {
200 __be32 diff[] = { ~(*stack), *mpls_lse };
201 skb->csum = ~csum_partial((char *)diff, sizeof(diff),
202 ~skb->csum);
203 }
204
205 *stack = *mpls_lse;
206 key->mpls.top_lse = *mpls_lse;
176 return 0; 207 return 0;
177} 208}
178 209
179static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan) 210static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
180{ 211{
181 if (unlikely(vlan_tx_tag_present(skb))) { 212 int err;
182 u16 current_tag;
183
184 /* push down current VLAN tag */
185 current_tag = vlan_tx_tag_get(skb);
186
187 if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
188 return -ENOMEM;
189 213
190 if (skb->ip_summed == CHECKSUM_COMPLETE) 214 err = skb_vlan_pop(skb);
191 skb->csum = csum_add(skb->csum, csum_partial(skb->data 215 if (vlan_tx_tag_present(skb))
192 + (2 * ETH_ALEN), VLAN_HLEN, 0)); 216 invalidate_flow_key(key);
217 else
218 key->eth.tci = 0;
219 return err;
220}
193 221
194 } 222static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
195 __vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); 223 const struct ovs_action_push_vlan *vlan)
196 return 0; 224{
225 if (vlan_tx_tag_present(skb))
226 invalidate_flow_key(key);
227 else
228 key->eth.tci = vlan->vlan_tci;
229 return skb_vlan_push(skb, vlan->vlan_tpid,
230 ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
197} 231}
198 232
199static int set_eth_addr(struct sk_buff *skb, 233static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key,
200 const struct ovs_key_ethernet *eth_key) 234 const struct ovs_key_ethernet *eth_key)
201{ 235{
202 int err; 236 int err;
203 err = make_writable(skb, ETH_HLEN); 237 err = skb_ensure_writable(skb, ETH_HLEN);
204 if (unlikely(err)) 238 if (unlikely(err))
205 return err; 239 return err;
206 240
@@ -211,11 +245,13 @@ static int set_eth_addr(struct sk_buff *skb,
211 245
212 ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); 246 ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
213 247
248 ether_addr_copy(key->eth.src, eth_key->eth_src);
249 ether_addr_copy(key->eth.dst, eth_key->eth_dst);
214 return 0; 250 return 0;
215} 251}
216 252
217static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, 253static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
218 __be32 *addr, __be32 new_addr) 254 __be32 *addr, __be32 new_addr)
219{ 255{
220 int transport_len = skb->len - skb_transport_offset(skb); 256 int transport_len = skb->len - skb_transport_offset(skb);
221 257
@@ -246,11 +282,11 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
246{ 282{
247 int transport_len = skb->len - skb_transport_offset(skb); 283 int transport_len = skb->len - skb_transport_offset(skb);
248 284
249 if (l4_proto == IPPROTO_TCP) { 285 if (l4_proto == NEXTHDR_TCP) {
250 if (likely(transport_len >= sizeof(struct tcphdr))) 286 if (likely(transport_len >= sizeof(struct tcphdr)))
251 inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb, 287 inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
252 addr, new_addr, 1); 288 addr, new_addr, 1);
253 } else if (l4_proto == IPPROTO_UDP) { 289 } else if (l4_proto == NEXTHDR_UDP) {
254 if (likely(transport_len >= sizeof(struct udphdr))) { 290 if (likely(transport_len >= sizeof(struct udphdr))) {
255 struct udphdr *uh = udp_hdr(skb); 291 struct udphdr *uh = udp_hdr(skb);
256 292
@@ -261,6 +297,10 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
261 uh->check = CSUM_MANGLED_0; 297 uh->check = CSUM_MANGLED_0;
262 } 298 }
263 } 299 }
300 } else if (l4_proto == NEXTHDR_ICMP) {
301 if (likely(transport_len >= sizeof(struct icmp6hdr)))
302 inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum,
303 skb, addr, new_addr, 1);
264 } 304 }
265} 305}
266 306
@@ -294,42 +334,52 @@ static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
294 nh->ttl = new_ttl; 334 nh->ttl = new_ttl;
295} 335}
296 336
297static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key) 337static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key,
338 const struct ovs_key_ipv4 *ipv4_key)
298{ 339{
299 struct iphdr *nh; 340 struct iphdr *nh;
300 int err; 341 int err;
301 342
302 err = make_writable(skb, skb_network_offset(skb) + 343 err = skb_ensure_writable(skb, skb_network_offset(skb) +
303 sizeof(struct iphdr)); 344 sizeof(struct iphdr));
304 if (unlikely(err)) 345 if (unlikely(err))
305 return err; 346 return err;
306 347
307 nh = ip_hdr(skb); 348 nh = ip_hdr(skb);
308 349
309 if (ipv4_key->ipv4_src != nh->saddr) 350 if (ipv4_key->ipv4_src != nh->saddr) {
310 set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src); 351 set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
352 key->ipv4.addr.src = ipv4_key->ipv4_src;
353 }
311 354
312 if (ipv4_key->ipv4_dst != nh->daddr) 355 if (ipv4_key->ipv4_dst != nh->daddr) {
313 set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst); 356 set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
357 key->ipv4.addr.dst = ipv4_key->ipv4_dst;
358 }
314 359
315 if (ipv4_key->ipv4_tos != nh->tos) 360 if (ipv4_key->ipv4_tos != nh->tos) {
316 ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos); 361 ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
362 key->ip.tos = nh->tos;
363 }
317 364
318 if (ipv4_key->ipv4_ttl != nh->ttl) 365 if (ipv4_key->ipv4_ttl != nh->ttl) {
319 set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl); 366 set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
367 key->ip.ttl = ipv4_key->ipv4_ttl;
368 }
320 369
321 return 0; 370 return 0;
322} 371}
323 372
324static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key) 373static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key,
374 const struct ovs_key_ipv6 *ipv6_key)
325{ 375{
326 struct ipv6hdr *nh; 376 struct ipv6hdr *nh;
327 int err; 377 int err;
328 __be32 *saddr; 378 __be32 *saddr;
329 __be32 *daddr; 379 __be32 *daddr;
330 380
331 err = make_writable(skb, skb_network_offset(skb) + 381 err = skb_ensure_writable(skb, skb_network_offset(skb) +
332 sizeof(struct ipv6hdr)); 382 sizeof(struct ipv6hdr));
333 if (unlikely(err)) 383 if (unlikely(err))
334 return err; 384 return err;
335 385
@@ -337,9 +387,12 @@ static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
337 saddr = (__be32 *)&nh->saddr; 387 saddr = (__be32 *)&nh->saddr;
338 daddr = (__be32 *)&nh->daddr; 388 daddr = (__be32 *)&nh->daddr;
339 389
340 if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) 390 if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) {
341 set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr, 391 set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
342 ipv6_key->ipv6_src, true); 392 ipv6_key->ipv6_src, true);
393 memcpy(&key->ipv6.addr.src, ipv6_key->ipv6_src,
394 sizeof(ipv6_key->ipv6_src));
395 }
343 396
344 if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) { 397 if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
345 unsigned int offset = 0; 398 unsigned int offset = 0;
@@ -353,16 +406,22 @@ static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
353 406
354 set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr, 407 set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
355 ipv6_key->ipv6_dst, recalc_csum); 408 ipv6_key->ipv6_dst, recalc_csum);
409 memcpy(&key->ipv6.addr.dst, ipv6_key->ipv6_dst,
410 sizeof(ipv6_key->ipv6_dst));
356 } 411 }
357 412
358 set_ipv6_tc(nh, ipv6_key->ipv6_tclass); 413 set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
414 key->ip.tos = ipv6_get_dsfield(nh);
415
359 set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label)); 416 set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
360 nh->hop_limit = ipv6_key->ipv6_hlimit; 417 key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
361 418
419 nh->hop_limit = ipv6_key->ipv6_hlimit;
420 key->ip.ttl = ipv6_key->ipv6_hlimit;
362 return 0; 421 return 0;
363} 422}
364 423
365/* Must follow make_writable() since that can move the skb data. */ 424/* Must follow skb_ensure_writable() since that can move the skb data. */
366static void set_tp_port(struct sk_buff *skb, __be16 *port, 425static void set_tp_port(struct sk_buff *skb, __be16 *port,
367 __be16 new_port, __sum16 *check) 426 __be16 new_port, __sum16 *check)
368{ 427{
@@ -386,54 +445,64 @@ static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port)
386 } 445 }
387} 446}
388 447
389static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key) 448static int set_udp(struct sk_buff *skb, struct sw_flow_key *key,
449 const struct ovs_key_udp *udp_port_key)
390{ 450{
391 struct udphdr *uh; 451 struct udphdr *uh;
392 int err; 452 int err;
393 453
394 err = make_writable(skb, skb_transport_offset(skb) + 454 err = skb_ensure_writable(skb, skb_transport_offset(skb) +
395 sizeof(struct udphdr)); 455 sizeof(struct udphdr));
396 if (unlikely(err)) 456 if (unlikely(err))
397 return err; 457 return err;
398 458
399 uh = udp_hdr(skb); 459 uh = udp_hdr(skb);
400 if (udp_port_key->udp_src != uh->source) 460 if (udp_port_key->udp_src != uh->source) {
401 set_udp_port(skb, &uh->source, udp_port_key->udp_src); 461 set_udp_port(skb, &uh->source, udp_port_key->udp_src);
462 key->tp.src = udp_port_key->udp_src;
463 }
402 464
403 if (udp_port_key->udp_dst != uh->dest) 465 if (udp_port_key->udp_dst != uh->dest) {
404 set_udp_port(skb, &uh->dest, udp_port_key->udp_dst); 466 set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
467 key->tp.dst = udp_port_key->udp_dst;
468 }
405 469
406 return 0; 470 return 0;
407} 471}
408 472
409static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key) 473static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key,
474 const struct ovs_key_tcp *tcp_port_key)
410{ 475{
411 struct tcphdr *th; 476 struct tcphdr *th;
412 int err; 477 int err;
413 478
414 err = make_writable(skb, skb_transport_offset(skb) + 479 err = skb_ensure_writable(skb, skb_transport_offset(skb) +
415 sizeof(struct tcphdr)); 480 sizeof(struct tcphdr));
416 if (unlikely(err)) 481 if (unlikely(err))
417 return err; 482 return err;
418 483
419 th = tcp_hdr(skb); 484 th = tcp_hdr(skb);
420 if (tcp_port_key->tcp_src != th->source) 485 if (tcp_port_key->tcp_src != th->source) {
421 set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check); 486 set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
487 key->tp.src = tcp_port_key->tcp_src;
488 }
422 489
423 if (tcp_port_key->tcp_dst != th->dest) 490 if (tcp_port_key->tcp_dst != th->dest) {
424 set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check); 491 set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
492 key->tp.dst = tcp_port_key->tcp_dst;
493 }
425 494
426 return 0; 495 return 0;
427} 496}
428 497
429static int set_sctp(struct sk_buff *skb, 498static int set_sctp(struct sk_buff *skb, struct sw_flow_key *key,
430 const struct ovs_key_sctp *sctp_port_key) 499 const struct ovs_key_sctp *sctp_port_key)
431{ 500{
432 struct sctphdr *sh; 501 struct sctphdr *sh;
433 int err; 502 int err;
434 unsigned int sctphoff = skb_transport_offset(skb); 503 unsigned int sctphoff = skb_transport_offset(skb);
435 504
436 err = make_writable(skb, sctphoff + sizeof(struct sctphdr)); 505 err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
437 if (unlikely(err)) 506 if (unlikely(err))
438 return err; 507 return err;
439 508
@@ -454,39 +523,35 @@ static int set_sctp(struct sk_buff *skb,
454 sh->checksum = old_csum ^ old_correct_csum ^ new_csum; 523 sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
455 524
456 skb_clear_hash(skb); 525 skb_clear_hash(skb);
526 key->tp.src = sctp_port_key->sctp_src;
527 key->tp.dst = sctp_port_key->sctp_dst;
457 } 528 }
458 529
459 return 0; 530 return 0;
460} 531}
461 532
462static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port) 533static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
463{ 534{
464 struct vport *vport; 535 struct vport *vport = ovs_vport_rcu(dp, out_port);
465 536
466 if (unlikely(!skb)) 537 if (likely(vport))
467 return -ENOMEM; 538 ovs_vport_send(vport, skb);
468 539 else
469 vport = ovs_vport_rcu(dp, out_port);
470 if (unlikely(!vport)) {
471 kfree_skb(skb); 540 kfree_skb(skb);
472 return -ENODEV;
473 }
474
475 ovs_vport_send(vport, skb);
476 return 0;
477} 541}
478 542
479static int output_userspace(struct datapath *dp, struct sk_buff *skb, 543static int output_userspace(struct datapath *dp, struct sk_buff *skb,
480 struct sw_flow_key *key, const struct nlattr *attr) 544 struct sw_flow_key *key, const struct nlattr *attr)
481{ 545{
546 struct ovs_tunnel_info info;
482 struct dp_upcall_info upcall; 547 struct dp_upcall_info upcall;
483 const struct nlattr *a; 548 const struct nlattr *a;
484 int rem; 549 int rem;
485 550
486 upcall.cmd = OVS_PACKET_CMD_ACTION; 551 upcall.cmd = OVS_PACKET_CMD_ACTION;
487 upcall.key = key;
488 upcall.userdata = NULL; 552 upcall.userdata = NULL;
489 upcall.portid = 0; 553 upcall.portid = 0;
554 upcall.egress_tun_info = NULL;
490 555
491 for (a = nla_data(attr), rem = nla_len(attr); rem > 0; 556 for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
492 a = nla_next(a, &rem)) { 557 a = nla_next(a, &rem)) {
@@ -498,15 +563,27 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
498 case OVS_USERSPACE_ATTR_PID: 563 case OVS_USERSPACE_ATTR_PID:
499 upcall.portid = nla_get_u32(a); 564 upcall.portid = nla_get_u32(a);
500 break; 565 break;
566
567 case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
568 /* Get out tunnel info. */
569 struct vport *vport;
570
571 vport = ovs_vport_rcu(dp, nla_get_u32(a));
572 if (vport) {
573 int err;
574
575 err = ovs_vport_get_egress_tun_info(vport, skb,
576 &info);
577 if (!err)
578 upcall.egress_tun_info = &info;
579 }
580 break;
501 } 581 }
502 }
503 582
504 return ovs_dp_upcall(dp, skb, &upcall); 583 } /* End of switch. */
505} 584 }
506 585
507static bool last_action(const struct nlattr *a, int rem) 586 return ovs_dp_upcall(dp, skb, key, &upcall);
508{
509 return a->nla_len == rem;
510} 587}
511 588
512static int sample(struct datapath *dp, struct sk_buff *skb, 589static int sample(struct datapath *dp, struct sk_buff *skb,
@@ -543,7 +620,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
543 * user space. This skb will be consumed by its caller. 620 * user space. This skb will be consumed by its caller.
544 */ 621 */
545 if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE && 622 if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
546 last_action(a, rem))) 623 nla_is_last(a, rem)))
547 return output_userspace(dp, skb, key, a); 624 return output_userspace(dp, skb, key, a);
548 625
549 skb = skb_clone(skb, GFP_ATOMIC); 626 skb = skb_clone(skb, GFP_ATOMIC);
@@ -576,18 +653,20 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
576 key->ovs_flow_hash = hash; 653 key->ovs_flow_hash = hash;
577} 654}
578 655
579static int execute_set_action(struct sk_buff *skb, 656static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key,
580 const struct nlattr *nested_attr) 657 const struct nlattr *nested_attr)
581{ 658{
582 int err = 0; 659 int err = 0;
583 660
584 switch (nla_type(nested_attr)) { 661 switch (nla_type(nested_attr)) {
585 case OVS_KEY_ATTR_PRIORITY: 662 case OVS_KEY_ATTR_PRIORITY:
586 skb->priority = nla_get_u32(nested_attr); 663 skb->priority = nla_get_u32(nested_attr);
664 key->phy.priority = skb->priority;
587 break; 665 break;
588 666
589 case OVS_KEY_ATTR_SKB_MARK: 667 case OVS_KEY_ATTR_SKB_MARK:
590 skb->mark = nla_get_u32(nested_attr); 668 skb->mark = nla_get_u32(nested_attr);
669 key->phy.skb_mark = skb->mark;
591 break; 670 break;
592 671
593 case OVS_KEY_ATTR_TUNNEL_INFO: 672 case OVS_KEY_ATTR_TUNNEL_INFO:
@@ -595,27 +674,31 @@ static int execute_set_action(struct sk_buff *skb,
595 break; 674 break;
596 675
597 case OVS_KEY_ATTR_ETHERNET: 676 case OVS_KEY_ATTR_ETHERNET:
598 err = set_eth_addr(skb, nla_data(nested_attr)); 677 err = set_eth_addr(skb, key, nla_data(nested_attr));
599 break; 678 break;
600 679
601 case OVS_KEY_ATTR_IPV4: 680 case OVS_KEY_ATTR_IPV4:
602 err = set_ipv4(skb, nla_data(nested_attr)); 681 err = set_ipv4(skb, key, nla_data(nested_attr));
603 break; 682 break;
604 683
605 case OVS_KEY_ATTR_IPV6: 684 case OVS_KEY_ATTR_IPV6:
606 err = set_ipv6(skb, nla_data(nested_attr)); 685 err = set_ipv6(skb, key, nla_data(nested_attr));
607 break; 686 break;
608 687
609 case OVS_KEY_ATTR_TCP: 688 case OVS_KEY_ATTR_TCP:
610 err = set_tcp(skb, nla_data(nested_attr)); 689 err = set_tcp(skb, key, nla_data(nested_attr));
611 break; 690 break;
612 691
613 case OVS_KEY_ATTR_UDP: 692 case OVS_KEY_ATTR_UDP:
614 err = set_udp(skb, nla_data(nested_attr)); 693 err = set_udp(skb, key, nla_data(nested_attr));
615 break; 694 break;
616 695
617 case OVS_KEY_ATTR_SCTP: 696 case OVS_KEY_ATTR_SCTP:
618 err = set_sctp(skb, nla_data(nested_attr)); 697 err = set_sctp(skb, key, nla_data(nested_attr));
698 break;
699
700 case OVS_KEY_ATTR_MPLS:
701 err = set_mpls(skb, key, nla_data(nested_attr));
619 break; 702 break;
620 } 703 }
621 704
@@ -627,13 +710,17 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
627 const struct nlattr *a, int rem) 710 const struct nlattr *a, int rem)
628{ 711{
629 struct deferred_action *da; 712 struct deferred_action *da;
630 int err;
631 713
632 err = ovs_flow_key_update(skb, key); 714 if (!is_flow_key_valid(key)) {
633 if (err) 715 int err;
634 return err; 716
717 err = ovs_flow_key_update(skb, key);
718 if (err)
719 return err;
720 }
721 BUG_ON(!is_flow_key_valid(key));
635 722
636 if (!last_action(a, rem)) { 723 if (!nla_is_last(a, rem)) {
637 /* Recirc action is the not the last action 724 /* Recirc action is the not the last action
638 * of the action list, need to clone the skb. 725 * of the action list, need to clone the skb.
639 */ 726 */
@@ -668,7 +755,8 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
668 /* Every output action needs a separate clone of 'skb', but the common 755 /* Every output action needs a separate clone of 'skb', but the common
669 * case is just a single output action, so that doing a clone and 756 * case is just a single output action, so that doing a clone and
670 * then freeing the original skbuff is wasteful. So the following code 757 * then freeing the original skbuff is wasteful. So the following code
671 * is slightly obscure just to avoid that. */ 758 * is slightly obscure just to avoid that.
759 */
672 int prev_port = -1; 760 int prev_port = -1;
673 const struct nlattr *a; 761 const struct nlattr *a;
674 int rem; 762 int rem;
@@ -677,8 +765,12 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
677 a = nla_next(a, &rem)) { 765 a = nla_next(a, &rem)) {
678 int err = 0; 766 int err = 0;
679 767
680 if (prev_port != -1) { 768 if (unlikely(prev_port != -1)) {
681 do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port); 769 struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);
770
771 if (out_skb)
772 do_output(dp, out_skb, prev_port);
773
682 prev_port = -1; 774 prev_port = -1;
683 } 775 }
684 776
@@ -695,19 +787,25 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
695 execute_hash(skb, key, a); 787 execute_hash(skb, key, a);
696 break; 788 break;
697 789
790 case OVS_ACTION_ATTR_PUSH_MPLS:
791 err = push_mpls(skb, key, nla_data(a));
792 break;
793
794 case OVS_ACTION_ATTR_POP_MPLS:
795 err = pop_mpls(skb, key, nla_get_be16(a));
796 break;
797
698 case OVS_ACTION_ATTR_PUSH_VLAN: 798 case OVS_ACTION_ATTR_PUSH_VLAN:
699 err = push_vlan(skb, nla_data(a)); 799 err = push_vlan(skb, key, nla_data(a));
700 if (unlikely(err)) /* skb already freed. */
701 return err;
702 break; 800 break;
703 801
704 case OVS_ACTION_ATTR_POP_VLAN: 802 case OVS_ACTION_ATTR_POP_VLAN:
705 err = pop_vlan(skb); 803 err = pop_vlan(skb, key);
706 break; 804 break;
707 805
708 case OVS_ACTION_ATTR_RECIRC: 806 case OVS_ACTION_ATTR_RECIRC:
709 err = execute_recirc(dp, skb, key, a, rem); 807 err = execute_recirc(dp, skb, key, a, rem);
710 if (last_action(a, rem)) { 808 if (nla_is_last(a, rem)) {
711 /* If this is the last action, the skb has 809 /* If this is the last action, the skb has
712 * been consumed or freed. 810 * been consumed or freed.
713 * Return immediately. 811 * Return immediately.
@@ -717,13 +815,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
717 break; 815 break;
718 816
719 case OVS_ACTION_ATTR_SET: 817 case OVS_ACTION_ATTR_SET:
720 err = execute_set_action(skb, nla_data(a)); 818 err = execute_set_action(skb, key, nla_data(a));
721 break; 819 break;
722 820
723 case OVS_ACTION_ATTR_SAMPLE: 821 case OVS_ACTION_ATTR_SAMPLE:
724 err = sample(dp, skb, key, a); 822 err = sample(dp, skb, key, a);
725 if (unlikely(err)) /* skb already freed. */
726 return err;
727 break; 823 break;
728 } 824 }
729 825
@@ -769,14 +865,12 @@ static void process_deferred_actions(struct datapath *dp)
769 865
770/* Execute a list of actions against 'skb'. */ 866/* Execute a list of actions against 'skb'. */
771int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, 867int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
868 const struct sw_flow_actions *acts,
772 struct sw_flow_key *key) 869 struct sw_flow_key *key)
773{ 870{
774 int level = this_cpu_read(exec_actions_level); 871 int level = this_cpu_read(exec_actions_level);
775 struct sw_flow_actions *acts;
776 int err; 872 int err;
777 873
778 acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
779
780 this_cpu_inc(exec_actions_level); 874 this_cpu_inc(exec_actions_level);
781 OVS_CB(skb)->egress_tun_info = NULL; 875 OVS_CB(skb)->egress_tun_info = NULL;
782 err = do_execute_actions(dp, skb, key, 876 err = do_execute_actions(dp, skb, key,
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 2e31d9e7f4dc..4e9a5f035cbc 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -59,6 +59,7 @@
59#include "vport-netdev.h" 59#include "vport-netdev.h"
60 60
61int ovs_net_id __read_mostly; 61int ovs_net_id __read_mostly;
62EXPORT_SYMBOL_GPL(ovs_net_id);
62 63
63static struct genl_family dp_packet_genl_family; 64static struct genl_family dp_packet_genl_family;
64static struct genl_family dp_flow_genl_family; 65static struct genl_family dp_flow_genl_family;
@@ -82,8 +83,7 @@ static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
82 unsigned int group) 83 unsigned int group)
83{ 84{
84 return info->nlhdr->nlmsg_flags & NLM_F_ECHO || 85 return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
85 genl_has_listeners(family, genl_info_net(info)->genl_sock, 86 genl_has_listeners(family, genl_info_net(info), group);
86 group);
87} 87}
88 88
89static void ovs_notify(struct genl_family *family, 89static void ovs_notify(struct genl_family *family,
@@ -130,27 +130,41 @@ int lockdep_ovsl_is_held(void)
130 else 130 else
131 return 1; 131 return 1;
132} 132}
133EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
133#endif 134#endif
134 135
135static struct vport *new_vport(const struct vport_parms *); 136static struct vport *new_vport(const struct vport_parms *);
136static int queue_gso_packets(struct datapath *dp, struct sk_buff *, 137static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
138 const struct sw_flow_key *,
137 const struct dp_upcall_info *); 139 const struct dp_upcall_info *);
138static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, 140static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
141 const struct sw_flow_key *,
139 const struct dp_upcall_info *); 142 const struct dp_upcall_info *);
140 143
141/* Must be called with rcu_read_lock or ovs_mutex. */ 144/* Must be called with rcu_read_lock. */
142static struct datapath *get_dp(struct net *net, int dp_ifindex) 145static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
143{ 146{
144 struct datapath *dp = NULL; 147 struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
145 struct net_device *dev;
146 148
147 rcu_read_lock();
148 dev = dev_get_by_index_rcu(net, dp_ifindex);
149 if (dev) { 149 if (dev) {
150 struct vport *vport = ovs_internal_dev_get_vport(dev); 150 struct vport *vport = ovs_internal_dev_get_vport(dev);
151 if (vport) 151 if (vport)
152 dp = vport->dp; 152 return vport->dp;
153 } 153 }
154
155 return NULL;
156}
157
158/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
159 * returned dp pointer valid.
160 */
161static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
162{
163 struct datapath *dp;
164
165 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
166 rcu_read_lock();
167 dp = get_dp_rcu(net, dp_ifindex);
154 rcu_read_unlock(); 168 rcu_read_unlock();
155 169
156 return dp; 170 return dp;
@@ -163,7 +177,7 @@ const char *ovs_dp_name(const struct datapath *dp)
163 return vport->ops->get_name(vport); 177 return vport->ops->get_name(vport);
164} 178}
165 179
166static int get_dpifindex(struct datapath *dp) 180static int get_dpifindex(const struct datapath *dp)
167{ 181{
168 struct vport *local; 182 struct vport *local;
169 int ifindex; 183 int ifindex;
@@ -185,6 +199,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
185{ 199{
186 struct datapath *dp = container_of(rcu, struct datapath, rcu); 200 struct datapath *dp = container_of(rcu, struct datapath, rcu);
187 201
202 ovs_flow_tbl_destroy(&dp->table);
188 free_percpu(dp->stats_percpu); 203 free_percpu(dp->stats_percpu);
189 release_net(ovs_dp_get_net(dp)); 204 release_net(ovs_dp_get_net(dp));
190 kfree(dp->ports); 205 kfree(dp->ports);
@@ -243,6 +258,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
243 const struct vport *p = OVS_CB(skb)->input_vport; 258 const struct vport *p = OVS_CB(skb)->input_vport;
244 struct datapath *dp = p->dp; 259 struct datapath *dp = p->dp;
245 struct sw_flow *flow; 260 struct sw_flow *flow;
261 struct sw_flow_actions *sf_acts;
246 struct dp_stats_percpu *stats; 262 struct dp_stats_percpu *stats;
247 u64 *stats_counter; 263 u64 *stats_counter;
248 u32 n_mask_hit; 264 u32 n_mask_hit;
@@ -256,10 +272,10 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
256 int error; 272 int error;
257 273
258 upcall.cmd = OVS_PACKET_CMD_MISS; 274 upcall.cmd = OVS_PACKET_CMD_MISS;
259 upcall.key = key;
260 upcall.userdata = NULL; 275 upcall.userdata = NULL;
261 upcall.portid = ovs_vport_find_upcall_portid(p, skb); 276 upcall.portid = ovs_vport_find_upcall_portid(p, skb);
262 error = ovs_dp_upcall(dp, skb, &upcall); 277 upcall.egress_tun_info = NULL;
278 error = ovs_dp_upcall(dp, skb, key, &upcall);
263 if (unlikely(error)) 279 if (unlikely(error))
264 kfree_skb(skb); 280 kfree_skb(skb);
265 else 281 else
@@ -268,10 +284,10 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
268 goto out; 284 goto out;
269 } 285 }
270 286
271 OVS_CB(skb)->flow = flow; 287 ovs_flow_stats_update(flow, key->tp.flags, skb);
288 sf_acts = rcu_dereference(flow->sf_acts);
289 ovs_execute_actions(dp, skb, sf_acts, key);
272 290
273 ovs_flow_stats_update(OVS_CB(skb)->flow, key->tp.flags, skb);
274 ovs_execute_actions(dp, skb, key);
275 stats_counter = &stats->n_hit; 291 stats_counter = &stats->n_hit;
276 292
277out: 293out:
@@ -283,6 +299,7 @@ out:
283} 299}
284 300
285int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, 301int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
302 const struct sw_flow_key *key,
286 const struct dp_upcall_info *upcall_info) 303 const struct dp_upcall_info *upcall_info)
287{ 304{
288 struct dp_stats_percpu *stats; 305 struct dp_stats_percpu *stats;
@@ -294,9 +311,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
294 } 311 }
295 312
296 if (!skb_is_gso(skb)) 313 if (!skb_is_gso(skb))
297 err = queue_userspace_packet(dp, skb, upcall_info); 314 err = queue_userspace_packet(dp, skb, key, upcall_info);
298 else 315 else
299 err = queue_gso_packets(dp, skb, upcall_info); 316 err = queue_gso_packets(dp, skb, key, upcall_info);
300 if (err) 317 if (err)
301 goto err; 318 goto err;
302 319
@@ -313,37 +330,43 @@ err:
313} 330}
314 331
315static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, 332static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
333 const struct sw_flow_key *key,
316 const struct dp_upcall_info *upcall_info) 334 const struct dp_upcall_info *upcall_info)
317{ 335{
318 unsigned short gso_type = skb_shinfo(skb)->gso_type; 336 unsigned short gso_type = skb_shinfo(skb)->gso_type;
319 struct dp_upcall_info later_info;
320 struct sw_flow_key later_key; 337 struct sw_flow_key later_key;
321 struct sk_buff *segs, *nskb; 338 struct sk_buff *segs, *nskb;
339 struct ovs_skb_cb ovs_cb;
322 int err; 340 int err;
323 341
342 ovs_cb = *OVS_CB(skb);
324 segs = __skb_gso_segment(skb, NETIF_F_SG, false); 343 segs = __skb_gso_segment(skb, NETIF_F_SG, false);
344 *OVS_CB(skb) = ovs_cb;
325 if (IS_ERR(segs)) 345 if (IS_ERR(segs))
326 return PTR_ERR(segs); 346 return PTR_ERR(segs);
347 if (segs == NULL)
348 return -EINVAL;
349
350 if (gso_type & SKB_GSO_UDP) {
351 /* The initial flow key extracted by ovs_flow_key_extract()
352 * in this case is for a first fragment, so we need to
353 * properly mark later fragments.
354 */
355 later_key = *key;
356 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
357 }
327 358
328 /* Queue all of the segments. */ 359 /* Queue all of the segments. */
329 skb = segs; 360 skb = segs;
330 do { 361 do {
331 err = queue_userspace_packet(dp, skb, upcall_info); 362 *OVS_CB(skb) = ovs_cb;
363 if (gso_type & SKB_GSO_UDP && skb != segs)
364 key = &later_key;
365
366 err = queue_userspace_packet(dp, skb, key, upcall_info);
332 if (err) 367 if (err)
333 break; 368 break;
334 369
335 if (skb == segs && gso_type & SKB_GSO_UDP) {
336 /* The initial flow key extracted by ovs_flow_extract()
337 * in this case is for a first fragment, so we need to
338 * properly mark later fragments.
339 */
340 later_key = *upcall_info->key;
341 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
342
343 later_info = *upcall_info;
344 later_info.key = &later_key;
345 upcall_info = &later_info;
346 }
347 } while ((skb = skb->next)); 370 } while ((skb = skb->next));
348 371
349 /* Free all of the segments. */ 372 /* Free all of the segments. */
@@ -358,46 +381,26 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
358 return err; 381 return err;
359} 382}
360 383
361static size_t key_attr_size(void) 384static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
362{
363 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
364 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
365 + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
366 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
367 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
368 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
369 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
370 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
371 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
372 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
373 + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
374 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
375 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
376 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
377 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
378 + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */
379 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
380 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
381 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
382 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
383 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
384}
385
386static size_t upcall_msg_size(const struct nlattr *userdata,
387 unsigned int hdrlen) 385 unsigned int hdrlen)
388{ 386{
389 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) 387 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
390 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ 388 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
391 + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */ 389 + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */
392 390
393 /* OVS_PACKET_ATTR_USERDATA */ 391 /* OVS_PACKET_ATTR_USERDATA */
394 if (userdata) 392 if (upcall_info->userdata)
395 size += NLA_ALIGN(userdata->nla_len); 393 size += NLA_ALIGN(upcall_info->userdata->nla_len);
394
395 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
396 if (upcall_info->egress_tun_info)
397 size += nla_total_size(ovs_tun_key_attr_size());
396 398
397 return size; 399 return size;
398} 400}
399 401
400static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, 402static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
403 const struct sw_flow_key *key,
401 const struct dp_upcall_info *upcall_info) 404 const struct dp_upcall_info *upcall_info)
402{ 405{
403 struct ovs_header *upcall; 406 struct ovs_header *upcall;
@@ -421,11 +424,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
421 if (!nskb) 424 if (!nskb)
422 return -ENOMEM; 425 return -ENOMEM;
423 426
424 nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb)); 427 nskb = __vlan_hwaccel_push_inside(nskb);
425 if (!nskb) 428 if (!nskb)
426 return -ENOMEM; 429 return -ENOMEM;
427 430
428 nskb->vlan_tci = 0;
429 skb = nskb; 431 skb = nskb;
430 } 432 }
431 433
@@ -448,7 +450,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
448 else 450 else
449 hlen = skb->len; 451 hlen = skb->len;
450 452
451 len = upcall_msg_size(upcall_info->userdata, hlen); 453 len = upcall_msg_size(upcall_info, hlen);
452 user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); 454 user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
453 if (!user_skb) { 455 if (!user_skb) {
454 err = -ENOMEM; 456 err = -ENOMEM;
@@ -460,7 +462,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
460 upcall->dp_ifindex = dp_ifindex; 462 upcall->dp_ifindex = dp_ifindex;
461 463
462 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); 464 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
463 err = ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb); 465 err = ovs_nla_put_flow(key, key, user_skb);
464 BUG_ON(err); 466 BUG_ON(err);
465 nla_nest_end(user_skb, nla); 467 nla_nest_end(user_skb, nla);
466 468
@@ -469,6 +471,14 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
469 nla_len(upcall_info->userdata), 471 nla_len(upcall_info->userdata),
470 nla_data(upcall_info->userdata)); 472 nla_data(upcall_info->userdata));
471 473
474 if (upcall_info->egress_tun_info) {
475 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
476 err = ovs_nla_put_egress_tunnel_key(user_skb,
477 upcall_info->egress_tun_info);
478 BUG_ON(err);
479 nla_nest_end(user_skb, nla);
480 }
481
472 /* Only reserve room for attribute header, packet data is added 482 /* Only reserve room for attribute header, packet data is added
473 * in skb_zerocopy() */ 483 * in skb_zerocopy() */
474 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { 484 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
@@ -508,11 +518,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
508 struct sw_flow_actions *acts; 518 struct sw_flow_actions *acts;
509 struct sk_buff *packet; 519 struct sk_buff *packet;
510 struct sw_flow *flow; 520 struct sw_flow *flow;
521 struct sw_flow_actions *sf_acts;
511 struct datapath *dp; 522 struct datapath *dp;
512 struct ethhdr *eth; 523 struct ethhdr *eth;
513 struct vport *input_vport; 524 struct vport *input_vport;
514 int len; 525 int len;
515 int err; 526 int err;
527 bool log = !a[OVS_FLOW_ATTR_PROBE];
516 528
517 err = -EINVAL; 529 err = -EINVAL;
518 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || 530 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
@@ -546,29 +558,22 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
546 goto err_kfree_skb; 558 goto err_kfree_skb;
547 559
548 err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet, 560 err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet,
549 &flow->key); 561 &flow->key, log);
550 if (err) 562 if (err)
551 goto err_flow_free; 563 goto err_flow_free;
552 564
553 acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
554 err = PTR_ERR(acts);
555 if (IS_ERR(acts))
556 goto err_flow_free;
557
558 err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], 565 err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
559 &flow->key, 0, &acts); 566 &flow->key, &acts, log);
560 if (err) 567 if (err)
561 goto err_flow_free; 568 goto err_flow_free;
562 569
563 rcu_assign_pointer(flow->sf_acts, acts); 570 rcu_assign_pointer(flow->sf_acts, acts);
564
565 OVS_CB(packet)->egress_tun_info = NULL; 571 OVS_CB(packet)->egress_tun_info = NULL;
566 OVS_CB(packet)->flow = flow;
567 packet->priority = flow->key.phy.priority; 572 packet->priority = flow->key.phy.priority;
568 packet->mark = flow->key.phy.skb_mark; 573 packet->mark = flow->key.phy.skb_mark;
569 574
570 rcu_read_lock(); 575 rcu_read_lock();
571 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 576 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
572 err = -ENODEV; 577 err = -ENODEV;
573 if (!dp) 578 if (!dp)
574 goto err_unlock; 579 goto err_unlock;
@@ -581,9 +586,10 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
581 goto err_unlock; 586 goto err_unlock;
582 587
583 OVS_CB(packet)->input_vport = input_vport; 588 OVS_CB(packet)->input_vport = input_vport;
589 sf_acts = rcu_dereference(flow->sf_acts);
584 590
585 local_bh_disable(); 591 local_bh_disable();
586 err = ovs_execute_actions(dp, packet, &flow->key); 592 err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
587 local_bh_enable(); 593 local_bh_enable();
588 rcu_read_unlock(); 594 rcu_read_unlock();
589 595
@@ -626,7 +632,7 @@ static struct genl_family dp_packet_genl_family = {
626 .n_ops = ARRAY_SIZE(dp_packet_genl_ops), 632 .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
627}; 633};
628 634
629static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, 635static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
630 struct ovs_dp_megaflow_stats *mega_stats) 636 struct ovs_dp_megaflow_stats *mega_stats)
631{ 637{
632 int i; 638 int i;
@@ -660,8 +666,8 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
660static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) 666static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
661{ 667{
662 return NLMSG_ALIGN(sizeof(struct ovs_header)) 668 return NLMSG_ALIGN(sizeof(struct ovs_header))
663 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */ 669 + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */
664 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */ 670 + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */
665 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ 671 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
666 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ 672 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
667 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */ 673 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
@@ -669,58 +675,67 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
669} 675}
670 676
671/* Called with ovs_mutex or RCU read lock. */ 677/* Called with ovs_mutex or RCU read lock. */
672static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, 678static int ovs_flow_cmd_fill_match(const struct sw_flow *flow,
673 struct sk_buff *skb, u32 portid, 679 struct sk_buff *skb)
674 u32 seq, u32 flags, u8 cmd)
675{ 680{
676 const int skb_orig_len = skb->len;
677 struct nlattr *start;
678 struct ovs_flow_stats stats;
679 __be16 tcp_flags;
680 unsigned long used;
681 struct ovs_header *ovs_header;
682 struct nlattr *nla; 681 struct nlattr *nla;
683 int err; 682 int err;
684 683
685 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
686 if (!ovs_header)
687 return -EMSGSIZE;
688
689 ovs_header->dp_ifindex = dp_ifindex;
690
691 /* Fill flow key. */ 684 /* Fill flow key. */
692 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); 685 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
693 if (!nla) 686 if (!nla)
694 goto nla_put_failure; 687 return -EMSGSIZE;
695 688
696 err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb); 689 err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);
697 if (err) 690 if (err)
698 goto error; 691 return err;
692
699 nla_nest_end(skb, nla); 693 nla_nest_end(skb, nla);
700 694
695 /* Fill flow mask. */
701 nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK); 696 nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
702 if (!nla) 697 if (!nla)
703 goto nla_put_failure; 698 return -EMSGSIZE;
704 699
705 err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); 700 err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
706 if (err) 701 if (err)
707 goto error; 702 return err;
708 703
709 nla_nest_end(skb, nla); 704 nla_nest_end(skb, nla);
705 return 0;
706}
707
708/* Called with ovs_mutex or RCU read lock. */
709static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
710 struct sk_buff *skb)
711{
712 struct ovs_flow_stats stats;
713 __be16 tcp_flags;
714 unsigned long used;
710 715
711 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); 716 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
712 717
713 if (used && 718 if (used &&
714 nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) 719 nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
715 goto nla_put_failure; 720 return -EMSGSIZE;
716 721
717 if (stats.n_packets && 722 if (stats.n_packets &&
718 nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats)) 723 nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
719 goto nla_put_failure; 724 return -EMSGSIZE;
720 725
721 if ((u8)ntohs(tcp_flags) && 726 if ((u8)ntohs(tcp_flags) &&
722 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags))) 727 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
723 goto nla_put_failure; 728 return -EMSGSIZE;
729
730 return 0;
731}
732
733/* Called with ovs_mutex or RCU read lock. */
734static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
735 struct sk_buff *skb, int skb_orig_len)
736{
737 struct nlattr *start;
738 int err;
724 739
725 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if 740 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
726 * this is the first flow to be dumped into 'skb'. This is unusual for 741 * this is the first flow to be dumped into 'skb'. This is unusual for
@@ -744,17 +759,47 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
744 nla_nest_end(skb, start); 759 nla_nest_end(skb, start);
745 else { 760 else {
746 if (skb_orig_len) 761 if (skb_orig_len)
747 goto error; 762 return err;
748 763
749 nla_nest_cancel(skb, start); 764 nla_nest_cancel(skb, start);
750 } 765 }
751 } else if (skb_orig_len) 766 } else if (skb_orig_len) {
752 goto nla_put_failure; 767 return -EMSGSIZE;
768 }
769
770 return 0;
771}
772
773/* Called with ovs_mutex or RCU read lock. */
774static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
775 struct sk_buff *skb, u32 portid,
776 u32 seq, u32 flags, u8 cmd)
777{
778 const int skb_orig_len = skb->len;
779 struct ovs_header *ovs_header;
780 int err;
781
782 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
783 flags, cmd);
784 if (!ovs_header)
785 return -EMSGSIZE;
786
787 ovs_header->dp_ifindex = dp_ifindex;
788
789 err = ovs_flow_cmd_fill_match(flow, skb);
790 if (err)
791 goto error;
792
793 err = ovs_flow_cmd_fill_stats(flow, skb);
794 if (err)
795 goto error;
796
797 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
798 if (err)
799 goto error;
753 800
754 return genlmsg_end(skb, ovs_header); 801 return genlmsg_end(skb, ovs_header);
755 802
756nla_put_failure:
757 err = -EMSGSIZE;
758error: 803error:
759 genlmsg_cancel(skb, ovs_header); 804 genlmsg_cancel(skb, ovs_header);
760 return err; 805 return err;
@@ -809,13 +854,18 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
809 struct sw_flow_actions *acts; 854 struct sw_flow_actions *acts;
810 struct sw_flow_match match; 855 struct sw_flow_match match;
811 int error; 856 int error;
857 bool log = !a[OVS_FLOW_ATTR_PROBE];
812 858
813 /* Must have key and actions. */ 859 /* Must have key and actions. */
814 error = -EINVAL; 860 error = -EINVAL;
815 if (!a[OVS_FLOW_ATTR_KEY]) 861 if (!a[OVS_FLOW_ATTR_KEY]) {
862 OVS_NLERR(log, "Flow key attr not present in new flow.");
816 goto error; 863 goto error;
817 if (!a[OVS_FLOW_ATTR_ACTIONS]) 864 }
865 if (!a[OVS_FLOW_ATTR_ACTIONS]) {
866 OVS_NLERR(log, "Flow actions attr not present in new flow.");
818 goto error; 867 goto error;
868 }
819 869
820 /* Most of the time we need to allocate a new flow, do it before 870 /* Most of the time we need to allocate a new flow, do it before
821 * locking. 871 * locking.
@@ -828,24 +878,19 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
828 878
829 /* Extract key. */ 879 /* Extract key. */
830 ovs_match_init(&match, &new_flow->unmasked_key, &mask); 880 ovs_match_init(&match, &new_flow->unmasked_key, &mask);
831 error = ovs_nla_get_match(&match, 881 error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
832 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); 882 a[OVS_FLOW_ATTR_MASK], log);
833 if (error) 883 if (error)
834 goto err_kfree_flow; 884 goto err_kfree_flow;
835 885
836 ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); 886 ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
837 887
838 /* Validate actions. */ 888 /* Validate actions. */
839 acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
840 error = PTR_ERR(acts);
841 if (IS_ERR(acts))
842 goto err_kfree_flow;
843
844 error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, 889 error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
845 0, &acts); 890 &acts, log);
846 if (error) { 891 if (error) {
847 OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); 892 OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
848 goto err_kfree_acts; 893 goto err_kfree_flow;
849 } 894 }
850 895
851 reply = ovs_flow_cmd_alloc_info(acts, info, false); 896 reply = ovs_flow_cmd_alloc_info(acts, info, false);
@@ -897,6 +942,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
897 } 942 }
898 /* The unmasked key has to be the same for flow updates. */ 943 /* The unmasked key has to be the same for flow updates. */
899 if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { 944 if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
945 /* Look for any overlapping flow. */
900 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 946 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
901 if (!flow) { 947 if (!flow) {
902 error = -ENOENT; 948 error = -ENOENT;
@@ -936,23 +982,21 @@ error:
936 return error; 982 return error;
937} 983}
938 984
985/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
939static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, 986static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
940 const struct sw_flow_key *key, 987 const struct sw_flow_key *key,
941 const struct sw_flow_mask *mask) 988 const struct sw_flow_mask *mask,
989 bool log)
942{ 990{
943 struct sw_flow_actions *acts; 991 struct sw_flow_actions *acts;
944 struct sw_flow_key masked_key; 992 struct sw_flow_key masked_key;
945 int error; 993 int error;
946 994
947 acts = ovs_nla_alloc_flow_actions(nla_len(a));
948 if (IS_ERR(acts))
949 return acts;
950
951 ovs_flow_mask_key(&masked_key, key, mask); 995 ovs_flow_mask_key(&masked_key, key, mask);
952 error = ovs_nla_copy_actions(a, &masked_key, 0, &acts); 996 error = ovs_nla_copy_actions(a, &masked_key, &acts, log);
953 if (error) { 997 if (error) {
954 OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); 998 OVS_NLERR(log,
955 kfree(acts); 999 "Actions may not be safe on all matching packets");
956 return ERR_PTR(error); 1000 return ERR_PTR(error);
957 } 1001 }
958 1002
@@ -971,29 +1015,31 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
971 struct sw_flow_actions *old_acts = NULL, *acts = NULL; 1015 struct sw_flow_actions *old_acts = NULL, *acts = NULL;
972 struct sw_flow_match match; 1016 struct sw_flow_match match;
973 int error; 1017 int error;
1018 bool log = !a[OVS_FLOW_ATTR_PROBE];
974 1019
975 /* Extract key. */ 1020 /* Extract key. */
976 error = -EINVAL; 1021 error = -EINVAL;
977 if (!a[OVS_FLOW_ATTR_KEY]) 1022 if (!a[OVS_FLOW_ATTR_KEY]) {
1023 OVS_NLERR(log, "Flow key attribute not present in set flow.");
978 goto error; 1024 goto error;
1025 }
979 1026
980 ovs_match_init(&match, &key, &mask); 1027 ovs_match_init(&match, &key, &mask);
981 error = ovs_nla_get_match(&match, 1028 error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
982 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); 1029 a[OVS_FLOW_ATTR_MASK], log);
983 if (error) 1030 if (error)
984 goto error; 1031 goto error;
985 1032
986 /* Validate actions. */ 1033 /* Validate actions. */
987 if (a[OVS_FLOW_ATTR_ACTIONS]) { 1034 if (a[OVS_FLOW_ATTR_ACTIONS]) {
988 acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask); 1035 acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask,
1036 log);
989 if (IS_ERR(acts)) { 1037 if (IS_ERR(acts)) {
990 error = PTR_ERR(acts); 1038 error = PTR_ERR(acts);
991 goto error; 1039 goto error;
992 } 1040 }
993 }
994 1041
995 /* Can allocate before locking if have acts. */ 1042 /* Can allocate before locking if have acts. */
996 if (acts) {
997 reply = ovs_flow_cmd_alloc_info(acts, info, false); 1043 reply = ovs_flow_cmd_alloc_info(acts, info, false);
998 if (IS_ERR(reply)) { 1044 if (IS_ERR(reply)) {
999 error = PTR_ERR(reply); 1045 error = PTR_ERR(reply);
@@ -1068,14 +1114,16 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1068 struct datapath *dp; 1114 struct datapath *dp;
1069 struct sw_flow_match match; 1115 struct sw_flow_match match;
1070 int err; 1116 int err;
1117 bool log = !a[OVS_FLOW_ATTR_PROBE];
1071 1118
1072 if (!a[OVS_FLOW_ATTR_KEY]) { 1119 if (!a[OVS_FLOW_ATTR_KEY]) {
1073 OVS_NLERR("Flow get message rejected, Key attribute missing.\n"); 1120 OVS_NLERR(log,
1121 "Flow get message rejected, Key attribute missing.");
1074 return -EINVAL; 1122 return -EINVAL;
1075 } 1123 }
1076 1124
1077 ovs_match_init(&match, &key, NULL); 1125 ovs_match_init(&match, &key, NULL);
1078 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); 1126 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log);
1079 if (err) 1127 if (err)
1080 return err; 1128 return err;
1081 1129
@@ -1116,10 +1164,12 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1116 struct datapath *dp; 1164 struct datapath *dp;
1117 struct sw_flow_match match; 1165 struct sw_flow_match match;
1118 int err; 1166 int err;
1167 bool log = !a[OVS_FLOW_ATTR_PROBE];
1119 1168
1120 if (likely(a[OVS_FLOW_ATTR_KEY])) { 1169 if (likely(a[OVS_FLOW_ATTR_KEY])) {
1121 ovs_match_init(&match, &key, NULL); 1170 ovs_match_init(&match, &key, NULL);
1122 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); 1171 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
1172 log);
1123 if (unlikely(err)) 1173 if (unlikely(err))
1124 return err; 1174 return err;
1125 } 1175 }
@@ -1177,7 +1227,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1177 struct datapath *dp; 1227 struct datapath *dp;
1178 1228
1179 rcu_read_lock(); 1229 rcu_read_lock();
1180 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1230 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1181 if (!dp) { 1231 if (!dp) {
1182 rcu_read_unlock(); 1232 rcu_read_unlock();
1183 return -ENODEV; 1233 return -ENODEV;
@@ -1209,8 +1259,10 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1209 1259
1210static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { 1260static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1211 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, 1261 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1262 [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1212 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, 1263 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1213 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, 1264 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1265 [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1214}; 1266};
1215 1267
1216static const struct genl_ops dp_flow_genl_ops[] = { 1268static const struct genl_ops dp_flow_genl_ops[] = {
@@ -1263,7 +1315,7 @@ static size_t ovs_dp_cmd_msg_size(void)
1263 return msgsize; 1315 return msgsize;
1264} 1316}
1265 1317
1266/* Called with ovs_mutex or RCU read lock. */ 1318/* Called with ovs_mutex. */
1267static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, 1319static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1268 u32 portid, u32 seq, u32 flags, u8 cmd) 1320 u32 portid, u32 seq, u32 flags, u8 cmd)
1269{ 1321{
@@ -1311,7 +1363,7 @@ static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
1311 1363
1312/* Called with rcu_read_lock or ovs_mutex. */ 1364/* Called with rcu_read_lock or ovs_mutex. */
1313static struct datapath *lookup_datapath(struct net *net, 1365static struct datapath *lookup_datapath(struct net *net,
1314 struct ovs_header *ovs_header, 1366 const struct ovs_header *ovs_header,
1315 struct nlattr *a[OVS_DP_ATTR_MAX + 1]) 1367 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1316{ 1368{
1317 struct datapath *dp; 1369 struct datapath *dp;
@@ -1339,7 +1391,7 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in
1339 dp->user_features = 0; 1391 dp->user_features = 0;
1340} 1392}
1341 1393
1342static void ovs_dp_change(struct datapath *dp, struct nlattr **a) 1394static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1343{ 1395{
1344 if (a[OVS_DP_ATTR_USER_FEATURES]) 1396 if (a[OVS_DP_ATTR_USER_FEATURES])
1345 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); 1397 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
@@ -1440,7 +1492,7 @@ err_destroy_ports_array:
1440err_destroy_percpu: 1492err_destroy_percpu:
1441 free_percpu(dp->stats_percpu); 1493 free_percpu(dp->stats_percpu);
1442err_destroy_table: 1494err_destroy_table:
1443 ovs_flow_tbl_destroy(&dp->table, false); 1495 ovs_flow_tbl_destroy(&dp->table);
1444err_free_dp: 1496err_free_dp:
1445 release_net(ovs_dp_get_net(dp)); 1497 release_net(ovs_dp_get_net(dp));
1446 kfree(dp); 1498 kfree(dp);
@@ -1472,8 +1524,6 @@ static void __dp_destroy(struct datapath *dp)
1472 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); 1524 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1473 1525
1474 /* RCU destroy the flow table */ 1526 /* RCU destroy the flow table */
1475 ovs_flow_tbl_destroy(&dp->table, true);
1476
1477 call_rcu(&dp->rcu, destroy_dp_rcu); 1527 call_rcu(&dp->rcu, destroy_dp_rcu);
1478} 1528}
1479 1529
@@ -1553,7 +1603,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1553 if (!reply) 1603 if (!reply)
1554 return -ENOMEM; 1604 return -ENOMEM;
1555 1605
1556 rcu_read_lock(); 1606 ovs_lock();
1557 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1607 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1558 if (IS_ERR(dp)) { 1608 if (IS_ERR(dp)) {
1559 err = PTR_ERR(dp); 1609 err = PTR_ERR(dp);
@@ -1562,12 +1612,12 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1562 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, 1612 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1563 info->snd_seq, 0, OVS_DP_CMD_NEW); 1613 info->snd_seq, 0, OVS_DP_CMD_NEW);
1564 BUG_ON(err < 0); 1614 BUG_ON(err < 0);
1565 rcu_read_unlock(); 1615 ovs_unlock();
1566 1616
1567 return genlmsg_reply(reply, info); 1617 return genlmsg_reply(reply, info);
1568 1618
1569err_unlock_free: 1619err_unlock_free:
1570 rcu_read_unlock(); 1620 ovs_unlock();
1571 kfree_skb(reply); 1621 kfree_skb(reply);
1572 return err; 1622 return err;
1573} 1623}
@@ -1579,8 +1629,8 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1579 int skip = cb->args[0]; 1629 int skip = cb->args[0];
1580 int i = 0; 1630 int i = 0;
1581 1631
1582 rcu_read_lock(); 1632 ovs_lock();
1583 list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) { 1633 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1584 if (i >= skip && 1634 if (i >= skip &&
1585 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, 1635 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1586 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1636 cb->nlh->nlmsg_seq, NLM_F_MULTI,
@@ -1588,7 +1638,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1588 break; 1638 break;
1589 i++; 1639 i++;
1590 } 1640 }
1591 rcu_read_unlock(); 1641 ovs_unlock();
1592 1642
1593 cb->args[0] = i; 1643 cb->args[0] = i;
1594 1644
@@ -1705,7 +1755,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1705 1755
1706/* Called with ovs_mutex or RCU read lock. */ 1756/* Called with ovs_mutex or RCU read lock. */
1707static struct vport *lookup_vport(struct net *net, 1757static struct vport *lookup_vport(struct net *net,
1708 struct ovs_header *ovs_header, 1758 const struct ovs_header *ovs_header,
1709 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) 1759 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1710{ 1760{
1711 struct datapath *dp; 1761 struct datapath *dp;
@@ -1762,6 +1812,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1762 return -ENOMEM; 1812 return -ENOMEM;
1763 1813
1764 ovs_lock(); 1814 ovs_lock();
1815restart:
1765 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1816 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1766 err = -ENODEV; 1817 err = -ENODEV;
1767 if (!dp) 1818 if (!dp)
@@ -1793,8 +1844,11 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1793 1844
1794 vport = new_vport(&parms); 1845 vport = new_vport(&parms);
1795 err = PTR_ERR(vport); 1846 err = PTR_ERR(vport);
1796 if (IS_ERR(vport)) 1847 if (IS_ERR(vport)) {
1848 if (err == -EAGAIN)
1849 goto restart;
1797 goto exit_unlock_free; 1850 goto exit_unlock_free;
1851 }
1798 1852
1799 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 1853 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1800 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 1854 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
@@ -1937,7 +1991,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1937 int i, j = 0; 1991 int i, j = 0;
1938 1992
1939 rcu_read_lock(); 1993 rcu_read_lock();
1940 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1994 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1941 if (!dp) { 1995 if (!dp) {
1942 rcu_read_unlock(); 1996 rcu_read_unlock();
1943 return -ENODEV; 1997 return -ENODEV;
@@ -2110,12 +2164,18 @@ static int __init dp_init(void)
2110 if (err) 2164 if (err)
2111 goto error_netns_exit; 2165 goto error_netns_exit;
2112 2166
2167 err = ovs_netdev_init();
2168 if (err)
2169 goto error_unreg_notifier;
2170
2113 err = dp_register_genl(); 2171 err = dp_register_genl();
2114 if (err < 0) 2172 if (err < 0)
2115 goto error_unreg_notifier; 2173 goto error_unreg_netdev;
2116 2174
2117 return 0; 2175 return 0;
2118 2176
2177error_unreg_netdev:
2178 ovs_netdev_exit();
2119error_unreg_notifier: 2179error_unreg_notifier:
2120 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2180 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2121error_netns_exit: 2181error_netns_exit:
@@ -2135,6 +2195,7 @@ error:
2135static void dp_cleanup(void) 2195static void dp_cleanup(void)
2136{ 2196{
2137 dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); 2197 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2198 ovs_netdev_exit();
2138 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2199 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2139 unregister_pernet_device(&ovs_net_ops); 2200 unregister_pernet_device(&ovs_net_ops);
2140 rcu_barrier(); 2201 rcu_barrier();
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 974135439c5c..3ece94563079 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -94,14 +94,12 @@ struct datapath {
94 94
95/** 95/**
96 * struct ovs_skb_cb - OVS data in skb CB 96 * struct ovs_skb_cb - OVS data in skb CB
97 * @flow: The flow associated with this packet. May be %NULL if no flow.
98 * @egress_tun_key: Tunnel information about this packet on egress path. 97 * @egress_tun_key: Tunnel information about this packet on egress path.
99 * NULL if the packet is not being tunneled. 98 * NULL if the packet is not being tunneled.
100 * @input_vport: The original vport packet came in on. This value is cached 99 * @input_vport: The original vport packet came in on. This value is cached
101 * when a packet is received by OVS. 100 * when a packet is received by OVS.
102 */ 101 */
103struct ovs_skb_cb { 102struct ovs_skb_cb {
104 struct sw_flow *flow;
105 struct ovs_tunnel_info *egress_tun_info; 103 struct ovs_tunnel_info *egress_tun_info;
106 struct vport *input_vport; 104 struct vport *input_vport;
107}; 105};
@@ -110,18 +108,18 @@ struct ovs_skb_cb {
110/** 108/**
111 * struct dp_upcall - metadata to include with a packet to send to userspace 109 * struct dp_upcall - metadata to include with a packet to send to userspace
112 * @cmd: One of %OVS_PACKET_CMD_*. 110 * @cmd: One of %OVS_PACKET_CMD_*.
113 * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull.
114 * @userdata: If nonnull, its variable-length value is passed to userspace as 111 * @userdata: If nonnull, its variable-length value is passed to userspace as
115 * %OVS_PACKET_ATTR_USERDATA. 112 * %OVS_PACKET_ATTR_USERDATA.
116 * @pid: Netlink PID to which packet should be sent. If @pid is 0 then no 113 * @portid: Netlink portid to which packet should be sent. If @portid is 0
117 * packet is sent and the packet is accounted in the datapath's @n_lost 114 * then no packet is sent and the packet is accounted in the datapath's @n_lost
118 * counter. 115 * counter.
116 * @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY.
119 */ 117 */
120struct dp_upcall_info { 118struct dp_upcall_info {
121 u8 cmd; 119 const struct ovs_tunnel_info *egress_tun_info;
122 const struct sw_flow_key *key;
123 const struct nlattr *userdata; 120 const struct nlattr *userdata;
124 u32 portid; 121 u32 portid;
122 u8 cmd;
125}; 123};
126 124
127/** 125/**
@@ -151,7 +149,7 @@ int lockdep_ovsl_is_held(void);
151#define rcu_dereference_ovsl(p) \ 149#define rcu_dereference_ovsl(p) \
152 rcu_dereference_check(p, lockdep_ovsl_is_held()) 150 rcu_dereference_check(p, lockdep_ovsl_is_held())
153 151
154static inline struct net *ovs_dp_get_net(struct datapath *dp) 152static inline struct net *ovs_dp_get_net(const struct datapath *dp)
155{ 153{
156 return read_pnet(&dp->net); 154 return read_pnet(&dp->net);
157} 155}
@@ -187,23 +185,23 @@ extern struct genl_family dp_vport_genl_family;
187void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key); 185void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
188void ovs_dp_detach_port(struct vport *); 186void ovs_dp_detach_port(struct vport *);
189int ovs_dp_upcall(struct datapath *, struct sk_buff *, 187int ovs_dp_upcall(struct datapath *, struct sk_buff *,
190 const struct dp_upcall_info *); 188 const struct sw_flow_key *, const struct dp_upcall_info *);
191 189
192const char *ovs_dp_name(const struct datapath *dp); 190const char *ovs_dp_name(const struct datapath *dp);
193struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, 191struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
194 u8 cmd); 192 u8 cmd);
195 193
196int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, 194int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
197 struct sw_flow_key *); 195 const struct sw_flow_actions *, struct sw_flow_key *);
198 196
199void ovs_dp_notify_wq(struct work_struct *work); 197void ovs_dp_notify_wq(struct work_struct *work);
200 198
201int action_fifos_init(void); 199int action_fifos_init(void);
202void action_fifos_exit(void); 200void action_fifos_exit(void);
203 201
204#define OVS_NLERR(fmt, ...) \ 202#define OVS_NLERR(logging_allowed, fmt, ...) \
205do { \ 203do { \
206 if (net_ratelimit()) \ 204 if (logging_allowed && net_ratelimit()) \
207 pr_info("netlink: " fmt, ##__VA_ARGS__); \ 205 pr_info("netlink: " fmt "\n", ##__VA_ARGS__); \
208} while (0) 206} while (0)
209#endif /* datapath.h */ 207#endif /* datapath.h */
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 2b78789ea7c5..da2fae0873a5 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -32,6 +32,7 @@
32#include <linux/if_arp.h> 32#include <linux/if_arp.h>
33#include <linux/ip.h> 33#include <linux/ip.h>
34#include <linux/ipv6.h> 34#include <linux/ipv6.h>
35#include <linux/mpls.h>
35#include <linux/sctp.h> 36#include <linux/sctp.h>
36#include <linux/smp.h> 37#include <linux/smp.h>
37#include <linux/tcp.h> 38#include <linux/tcp.h>
@@ -42,6 +43,7 @@
42#include <net/ip.h> 43#include <net/ip.h>
43#include <net/ip_tunnels.h> 44#include <net/ip_tunnels.h>
44#include <net/ipv6.h> 45#include <net/ipv6.h>
46#include <net/mpls.h>
45#include <net/ndisc.h> 47#include <net/ndisc.h>
46 48
47#include "datapath.h" 49#include "datapath.h"
@@ -64,10 +66,11 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
64#define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF)) 66#define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))
65 67
66void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, 68void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
67 struct sk_buff *skb) 69 const struct sk_buff *skb)
68{ 70{
69 struct flow_stats *stats; 71 struct flow_stats *stats;
70 int node = numa_node_id(); 72 int node = numa_node_id();
73 int len = skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
71 74
72 stats = rcu_dereference(flow->stats[node]); 75 stats = rcu_dereference(flow->stats[node]);
73 76
@@ -103,7 +106,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
103 if (likely(new_stats)) { 106 if (likely(new_stats)) {
104 new_stats->used = jiffies; 107 new_stats->used = jiffies;
105 new_stats->packet_count = 1; 108 new_stats->packet_count = 1;
106 new_stats->byte_count = skb->len; 109 new_stats->byte_count = len;
107 new_stats->tcp_flags = tcp_flags; 110 new_stats->tcp_flags = tcp_flags;
108 spin_lock_init(&new_stats->lock); 111 spin_lock_init(&new_stats->lock);
109 112
@@ -118,7 +121,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
118 121
119 stats->used = jiffies; 122 stats->used = jiffies;
120 stats->packet_count++; 123 stats->packet_count++;
121 stats->byte_count += skb->len; 124 stats->byte_count += len;
122 stats->tcp_flags |= tcp_flags; 125 stats->tcp_flags |= tcp_flags;
123unlock: 126unlock:
124 spin_unlock(&stats->lock); 127 spin_unlock(&stats->lock);
@@ -480,6 +483,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
480 return -ENOMEM; 483 return -ENOMEM;
481 484
482 skb_reset_network_header(skb); 485 skb_reset_network_header(skb);
486 skb_reset_mac_len(skb);
483 __skb_push(skb, skb->data - skb_mac_header(skb)); 487 __skb_push(skb, skb->data - skb_mac_header(skb));
484 488
485 /* Network layer. */ 489 /* Network layer. */
@@ -584,6 +588,33 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
584 memset(&key->ip, 0, sizeof(key->ip)); 588 memset(&key->ip, 0, sizeof(key->ip));
585 memset(&key->ipv4, 0, sizeof(key->ipv4)); 589 memset(&key->ipv4, 0, sizeof(key->ipv4));
586 } 590 }
591 } else if (eth_p_mpls(key->eth.type)) {
592 size_t stack_len = MPLS_HLEN;
593
594 /* In the presence of an MPLS label stack the end of the L2
595 * header and the beginning of the L3 header differ.
596 *
597 * Advance network_header to the beginning of the L3
598 * header. mac_len corresponds to the end of the L2 header.
599 */
600 while (1) {
601 __be32 lse;
602
603 error = check_header(skb, skb->mac_len + stack_len);
604 if (unlikely(error))
605 return 0;
606
607 memcpy(&lse, skb_network_header(skb), MPLS_HLEN);
608
609 if (stack_len == MPLS_HLEN)
610 memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
611
612 skb_set_network_header(skb, skb->mac_len + stack_len);
613 if (lse & htonl(MPLS_LS_S_MASK))
614 break;
615
616 stack_len += MPLS_HLEN;
617 }
587 } else if (key->eth.type == htons(ETH_P_IPV6)) { 618 } else if (key->eth.type == htons(ETH_P_IPV6)) {
588 int nh_len; /* IPv6 Header + Extensions */ 619 int nh_len; /* IPv6 Header + Extensions */
589 620
@@ -649,7 +680,7 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
649 return key_extract(skb, key); 680 return key_extract(skb, key);
650} 681}
651 682
652int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info, 683int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
653 struct sk_buff *skb, struct sw_flow_key *key) 684 struct sk_buff *skb, struct sw_flow_key *key)
654{ 685{
655 /* Extract metadata from packet. */ 686 /* Extract metadata from packet. */
@@ -682,12 +713,12 @@ int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info,
682 713
683int ovs_flow_key_extract_userspace(const struct nlattr *attr, 714int ovs_flow_key_extract_userspace(const struct nlattr *attr,
684 struct sk_buff *skb, 715 struct sk_buff *skb,
685 struct sw_flow_key *key) 716 struct sw_flow_key *key, bool log)
686{ 717{
687 int err; 718 int err;
688 719
689 /* Extract metadata from netlink attributes. */ 720 /* Extract metadata from netlink attributes. */
690 err = ovs_nla_get_flow_metadata(attr, key); 721 err = ovs_nla_get_flow_metadata(attr, key, log);
691 if (err) 722 if (err)
692 return err; 723 return err;
693 724
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 71813318c8c7..a8b30f334388 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -37,8 +37,8 @@ struct sk_buff;
37 37
38/* Used to memset ovs_key_ipv4_tunnel padding. */ 38/* Used to memset ovs_key_ipv4_tunnel padding. */
39#define OVS_TUNNEL_KEY_SIZE \ 39#define OVS_TUNNEL_KEY_SIZE \
40 (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + \ 40 (offsetof(struct ovs_key_ipv4_tunnel, tp_dst) + \
41 FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl)) 41 FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, tp_dst))
42 42
43struct ovs_key_ipv4_tunnel { 43struct ovs_key_ipv4_tunnel {
44 __be64 tun_id; 44 __be64 tun_id;
@@ -47,11 +47,13 @@ struct ovs_key_ipv4_tunnel {
47 __be16 tun_flags; 47 __be16 tun_flags;
48 u8 ipv4_tos; 48 u8 ipv4_tos;
49 u8 ipv4_ttl; 49 u8 ipv4_ttl;
50 __be16 tp_src;
51 __be16 tp_dst;
50} __packed __aligned(4); /* Minimize padding. */ 52} __packed __aligned(4); /* Minimize padding. */
51 53
52struct ovs_tunnel_info { 54struct ovs_tunnel_info {
53 struct ovs_key_ipv4_tunnel tunnel; 55 struct ovs_key_ipv4_tunnel tunnel;
54 struct geneve_opt *options; 56 const struct geneve_opt *options;
55 u8 options_len; 57 u8 options_len;
56}; 58};
57 59
@@ -64,27 +66,59 @@ struct ovs_tunnel_info {
64 FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \ 66 FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
65 opt_len)) 67 opt_len))
66 68
67static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, 69static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
68 const struct iphdr *iph, 70 __be32 saddr, __be32 daddr,
69 __be64 tun_id, __be16 tun_flags, 71 u8 tos, u8 ttl,
70 struct geneve_opt *opts, 72 __be16 tp_src,
71 u8 opts_len) 73 __be16 tp_dst,
74 __be64 tun_id,
75 __be16 tun_flags,
76 const struct geneve_opt *opts,
77 u8 opts_len)
72{ 78{
73 tun_info->tunnel.tun_id = tun_id; 79 tun_info->tunnel.tun_id = tun_id;
74 tun_info->tunnel.ipv4_src = iph->saddr; 80 tun_info->tunnel.ipv4_src = saddr;
75 tun_info->tunnel.ipv4_dst = iph->daddr; 81 tun_info->tunnel.ipv4_dst = daddr;
76 tun_info->tunnel.ipv4_tos = iph->tos; 82 tun_info->tunnel.ipv4_tos = tos;
77 tun_info->tunnel.ipv4_ttl = iph->ttl; 83 tun_info->tunnel.ipv4_ttl = ttl;
78 tun_info->tunnel.tun_flags = tun_flags; 84 tun_info->tunnel.tun_flags = tun_flags;
79 85
80 /* clear struct padding. */ 86 /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
81 memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE, 0, 87 * the upper tunnel are used.
82 sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE); 88 * E.g: GRE over IPSEC, the tp_src and tp_port are zero.
89 */
90 tun_info->tunnel.tp_src = tp_src;
91 tun_info->tunnel.tp_dst = tp_dst;
92
93 /* Clear struct padding. */
94 if (sizeof(tun_info->tunnel) != OVS_TUNNEL_KEY_SIZE)
95 memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE,
96 0, sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE);
83 97
84 tun_info->options = opts; 98 tun_info->options = opts;
85 tun_info->options_len = opts_len; 99 tun_info->options_len = opts_len;
86} 100}
87 101
102static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
103 const struct iphdr *iph,
104 __be16 tp_src,
105 __be16 tp_dst,
106 __be64 tun_id,
107 __be16 tun_flags,
108 const struct geneve_opt *opts,
109 u8 opts_len)
110{
111 __ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr,
112 iph->tos, iph->ttl,
113 tp_src, tp_dst,
114 tun_id, tun_flags,
115 opts, opts_len);
116}
117
118#define OVS_SW_FLOW_KEY_METADATA_SIZE \
119 (offsetof(struct sw_flow_key, recirc_id) + \
120 FIELD_SIZEOF(struct sw_flow_key, recirc_id))
121
88struct sw_flow_key { 122struct sw_flow_key {
89 u8 tun_opts[255]; 123 u8 tun_opts[255];
90 u8 tun_opts_len; 124 u8 tun_opts_len;
@@ -102,12 +136,17 @@ struct sw_flow_key {
102 __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ 136 __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
103 __be16 type; /* Ethernet frame type. */ 137 __be16 type; /* Ethernet frame type. */
104 } eth; 138 } eth;
105 struct { 139 union {
106 u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ 140 struct {
107 u8 tos; /* IP ToS. */ 141 __be32 top_lse; /* top label stack entry */
108 u8 ttl; /* IP TTL/hop limit. */ 142 } mpls;
109 u8 frag; /* One of OVS_FRAG_TYPE_*. */ 143 struct {
110 } ip; 144 u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
145 u8 tos; /* IP ToS. */
146 u8 ttl; /* IP TTL/hop limit. */
147 u8 frag; /* One of OVS_FRAG_TYPE_*. */
148 } ip;
149 };
111 struct { 150 struct {
112 __be16 src; /* TCP/UDP/SCTP source port. */ 151 __be16 src; /* TCP/UDP/SCTP source port. */
113 __be16 dst; /* TCP/UDP/SCTP destination port. */ 152 __be16 dst; /* TCP/UDP/SCTP destination port. */
@@ -205,18 +244,19 @@ struct arp_eth_header {
205} __packed; 244} __packed;
206 245
207void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags, 246void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
208 struct sk_buff *); 247 const struct sk_buff *);
209void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *, 248void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
210 unsigned long *used, __be16 *tcp_flags); 249 unsigned long *used, __be16 *tcp_flags);
211void ovs_flow_stats_clear(struct sw_flow *); 250void ovs_flow_stats_clear(struct sw_flow *);
212u64 ovs_flow_used_time(unsigned long flow_jiffies); 251u64 ovs_flow_used_time(unsigned long flow_jiffies);
213 252
214int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key); 253int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
215int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info, struct sk_buff *skb, 254int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
255 struct sk_buff *skb,
216 struct sw_flow_key *key); 256 struct sw_flow_key *key);
217/* Extract key from packet coming from userspace. */ 257/* Extract key from packet coming from userspace. */
218int ovs_flow_key_extract_userspace(const struct nlattr *attr, 258int ovs_flow_key_extract_userspace(const struct nlattr *attr,
219 struct sk_buff *skb, 259 struct sk_buff *skb,
220 struct sw_flow_key *key); 260 struct sw_flow_key *key, bool log);
221 261
222#endif /* flow.h */ 262#endif /* flow.h */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 939bcb32100f..d1eecf707613 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -46,24 +46,22 @@
46#include <net/ip.h> 46#include <net/ip.h>
47#include <net/ipv6.h> 47#include <net/ipv6.h>
48#include <net/ndisc.h> 48#include <net/ndisc.h>
49#include <net/mpls.h>
49 50
50#include "flow_netlink.h" 51#include "flow_netlink.h"
51 52
52static void update_range__(struct sw_flow_match *match, 53static void update_range(struct sw_flow_match *match,
53 size_t offset, size_t size, bool is_mask) 54 size_t offset, size_t size, bool is_mask)
54{ 55{
55 struct sw_flow_key_range *range = NULL; 56 struct sw_flow_key_range *range;
56 size_t start = rounddown(offset, sizeof(long)); 57 size_t start = rounddown(offset, sizeof(long));
57 size_t end = roundup(offset + size, sizeof(long)); 58 size_t end = roundup(offset + size, sizeof(long));
58 59
59 if (!is_mask) 60 if (!is_mask)
60 range = &match->range; 61 range = &match->range;
61 else if (match->mask) 62 else
62 range = &match->mask->range; 63 range = &match->mask->range;
63 64
64 if (!range)
65 return;
66
67 if (range->start == range->end) { 65 if (range->start == range->end) {
68 range->start = start; 66 range->start = start;
69 range->end = end; 67 range->end = end;
@@ -79,22 +77,20 @@ static void update_range__(struct sw_flow_match *match,
79 77
80#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ 78#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
81 do { \ 79 do { \
82 update_range__(match, offsetof(struct sw_flow_key, field), \ 80 update_range(match, offsetof(struct sw_flow_key, field), \
83 sizeof((match)->key->field), is_mask); \ 81 sizeof((match)->key->field), is_mask); \
84 if (is_mask) { \ 82 if (is_mask) \
85 if ((match)->mask) \ 83 (match)->mask->key.field = value; \
86 (match)->mask->key.field = value; \ 84 else \
87 } else { \
88 (match)->key->field = value; \ 85 (match)->key->field = value; \
89 } \
90 } while (0) 86 } while (0)
91 87
92#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \ 88#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \
93 do { \ 89 do { \
94 update_range__(match, offset, len, is_mask); \ 90 update_range(match, offset, len, is_mask); \
95 if (is_mask) \ 91 if (is_mask) \
96 memcpy((u8 *)&(match)->mask->key + offset, value_p, \ 92 memcpy((u8 *)&(match)->mask->key + offset, value_p, \
97 len); \ 93 len); \
98 else \ 94 else \
99 memcpy((u8 *)(match)->key + offset, value_p, len); \ 95 memcpy((u8 *)(match)->key + offset, value_p, len); \
100 } while (0) 96 } while (0)
@@ -103,22 +99,20 @@ static void update_range__(struct sw_flow_match *match,
103 SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \ 99 SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
104 value_p, len, is_mask) 100 value_p, len, is_mask)
105 101
106#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \ 102#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \
107 do { \ 103 do { \
108 update_range__(match, offsetof(struct sw_flow_key, field), \ 104 update_range(match, offsetof(struct sw_flow_key, field), \
109 sizeof((match)->key->field), is_mask); \ 105 sizeof((match)->key->field), is_mask); \
110 if (is_mask) { \ 106 if (is_mask) \
111 if ((match)->mask) \ 107 memset((u8 *)&(match)->mask->key.field, value, \
112 memset((u8 *)&(match)->mask->key.field, value,\ 108 sizeof((match)->mask->key.field)); \
113 sizeof((match)->mask->key.field)); \ 109 else \
114 } else { \
115 memset((u8 *)&(match)->key->field, value, \ 110 memset((u8 *)&(match)->key->field, value, \
116 sizeof((match)->key->field)); \ 111 sizeof((match)->key->field)); \
117 } \
118 } while (0) 112 } while (0)
119 113
120static bool match_validate(const struct sw_flow_match *match, 114static bool match_validate(const struct sw_flow_match *match,
121 u64 key_attrs, u64 mask_attrs) 115 u64 key_attrs, u64 mask_attrs, bool log)
122{ 116{
123 u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET; 117 u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
124 u64 mask_allowed = key_attrs; /* At most allow all key attributes */ 118 u64 mask_allowed = key_attrs; /* At most allow all key attributes */
@@ -134,7 +128,8 @@ static bool match_validate(const struct sw_flow_match *match,
134 | (1 << OVS_KEY_ATTR_ICMP) 128 | (1 << OVS_KEY_ATTR_ICMP)
135 | (1 << OVS_KEY_ATTR_ICMPV6) 129 | (1 << OVS_KEY_ATTR_ICMPV6)
136 | (1 << OVS_KEY_ATTR_ARP) 130 | (1 << OVS_KEY_ATTR_ARP)
137 | (1 << OVS_KEY_ATTR_ND)); 131 | (1 << OVS_KEY_ATTR_ND)
132 | (1 << OVS_KEY_ATTR_MPLS));
138 133
139 /* Always allowed mask fields. */ 134 /* Always allowed mask fields. */
140 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) 135 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
@@ -149,6 +144,12 @@ static bool match_validate(const struct sw_flow_match *match,
149 mask_allowed |= 1 << OVS_KEY_ATTR_ARP; 144 mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
150 } 145 }
151 146
147 if (eth_p_mpls(match->key->eth.type)) {
148 key_expected |= 1 << OVS_KEY_ATTR_MPLS;
149 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
150 mask_allowed |= 1 << OVS_KEY_ATTR_MPLS;
151 }
152
152 if (match->key->eth.type == htons(ETH_P_IP)) { 153 if (match->key->eth.type == htons(ETH_P_IP)) {
153 key_expected |= 1 << OVS_KEY_ATTR_IPV4; 154 key_expected |= 1 << OVS_KEY_ATTR_IPV4;
154 if (match->mask && (match->mask->key.eth.type == htons(0xffff))) 155 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
@@ -220,7 +221,7 @@ static bool match_validate(const struct sw_flow_match *match,
220 htons(NDISC_NEIGHBOUR_SOLICITATION) || 221 htons(NDISC_NEIGHBOUR_SOLICITATION) ||
221 match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 222 match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
222 key_expected |= 1 << OVS_KEY_ATTR_ND; 223 key_expected |= 1 << OVS_KEY_ATTR_ND;
223 if (match->mask && (match->mask->key.tp.src == htons(0xffff))) 224 if (match->mask && (match->mask->key.tp.src == htons(0xff)))
224 mask_allowed |= 1 << OVS_KEY_ATTR_ND; 225 mask_allowed |= 1 << OVS_KEY_ATTR_ND;
225 } 226 }
226 } 227 }
@@ -229,21 +230,65 @@ static bool match_validate(const struct sw_flow_match *match,
229 230
230 if ((key_attrs & key_expected) != key_expected) { 231 if ((key_attrs & key_expected) != key_expected) {
231 /* Key attributes check failed. */ 232 /* Key attributes check failed. */
232 OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", 233 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
233 (unsigned long long)key_attrs, (unsigned long long)key_expected); 234 (unsigned long long)key_attrs,
235 (unsigned long long)key_expected);
234 return false; 236 return false;
235 } 237 }
236 238
237 if ((mask_attrs & mask_allowed) != mask_attrs) { 239 if ((mask_attrs & mask_allowed) != mask_attrs) {
238 /* Mask attributes check failed. */ 240 /* Mask attributes check failed. */
239 OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", 241 OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)",
240 (unsigned long long)mask_attrs, (unsigned long long)mask_allowed); 242 (unsigned long long)mask_attrs,
243 (unsigned long long)mask_allowed);
241 return false; 244 return false;
242 } 245 }
243 246
244 return true; 247 return true;
245} 248}
246 249
250size_t ovs_tun_key_attr_size(void)
251{
252 /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
253 * updating this function.
254 */
255 return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
256 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
257 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
258 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
259 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
260 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
261 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
262 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
263 + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
264 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
265 + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */
266}
267
268size_t ovs_key_attr_size(void)
269{
270 /* Whenever adding new OVS_KEY_ FIELDS, we should consider
271 * updating this function.
272 */
273 BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22);
274
275 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
276 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
277 + ovs_tun_key_attr_size()
278 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
279 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
280 + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */
281 + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */
282 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
283 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
284 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
285 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
286 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
287 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
288 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
289 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
290}
291
247/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 292/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
248static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { 293static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
249 [OVS_KEY_ATTR_ENCAP] = -1, 294 [OVS_KEY_ATTR_ENCAP] = -1,
@@ -266,6 +311,7 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
266 [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32), 311 [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
267 [OVS_KEY_ATTR_DP_HASH] = sizeof(u32), 312 [OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
268 [OVS_KEY_ATTR_TUNNEL] = -1, 313 [OVS_KEY_ATTR_TUNNEL] = -1,
314 [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
269}; 315};
270 316
271static bool is_all_zero(const u8 *fp, size_t size) 317static bool is_all_zero(const u8 *fp, size_t size)
@@ -284,7 +330,7 @@ static bool is_all_zero(const u8 *fp, size_t size)
284 330
285static int __parse_flow_nlattrs(const struct nlattr *attr, 331static int __parse_flow_nlattrs(const struct nlattr *attr,
286 const struct nlattr *a[], 332 const struct nlattr *a[],
287 u64 *attrsp, bool nz) 333 u64 *attrsp, bool log, bool nz)
288{ 334{
289 const struct nlattr *nla; 335 const struct nlattr *nla;
290 u64 attrs; 336 u64 attrs;
@@ -296,21 +342,20 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
296 int expected_len; 342 int expected_len;
297 343
298 if (type > OVS_KEY_ATTR_MAX) { 344 if (type > OVS_KEY_ATTR_MAX) {
299 OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n", 345 OVS_NLERR(log, "Key type %d is out of range max %d",
300 type, OVS_KEY_ATTR_MAX); 346 type, OVS_KEY_ATTR_MAX);
301 return -EINVAL; 347 return -EINVAL;
302 } 348 }
303 349
304 if (attrs & (1 << type)) { 350 if (attrs & (1 << type)) {
305 OVS_NLERR("Duplicate key attribute (type %d).\n", type); 351 OVS_NLERR(log, "Duplicate key (type %d).", type);
306 return -EINVAL; 352 return -EINVAL;
307 } 353 }
308 354
309 expected_len = ovs_key_lens[type]; 355 expected_len = ovs_key_lens[type];
310 if (nla_len(nla) != expected_len && expected_len != -1) { 356 if (nla_len(nla) != expected_len && expected_len != -1) {
311 OVS_NLERR("Key attribute has unexpected length (type=%d" 357 OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
312 ", length=%d, expected=%d).\n", type, 358 type, nla_len(nla), expected_len);
313 nla_len(nla), expected_len);
314 return -EINVAL; 359 return -EINVAL;
315 } 360 }
316 361
@@ -320,7 +365,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
320 } 365 }
321 } 366 }
322 if (rem) { 367 if (rem) {
323 OVS_NLERR("Message has %d unknown bytes.\n", rem); 368 OVS_NLERR(log, "Message has %d unknown bytes.", rem);
324 return -EINVAL; 369 return -EINVAL;
325 } 370 }
326 371
@@ -329,28 +374,84 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
329} 374}
330 375
331static int parse_flow_mask_nlattrs(const struct nlattr *attr, 376static int parse_flow_mask_nlattrs(const struct nlattr *attr,
332 const struct nlattr *a[], u64 *attrsp) 377 const struct nlattr *a[], u64 *attrsp,
378 bool log)
333{ 379{
334 return __parse_flow_nlattrs(attr, a, attrsp, true); 380 return __parse_flow_nlattrs(attr, a, attrsp, log, true);
335} 381}
336 382
337static int parse_flow_nlattrs(const struct nlattr *attr, 383static int parse_flow_nlattrs(const struct nlattr *attr,
338 const struct nlattr *a[], u64 *attrsp) 384 const struct nlattr *a[], u64 *attrsp,
385 bool log)
386{
387 return __parse_flow_nlattrs(attr, a, attrsp, log, false);
388}
389
390static int genev_tun_opt_from_nlattr(const struct nlattr *a,
391 struct sw_flow_match *match, bool is_mask,
392 bool log)
339{ 393{
340 return __parse_flow_nlattrs(attr, a, attrsp, false); 394 unsigned long opt_key_offset;
395
396 if (nla_len(a) > sizeof(match->key->tun_opts)) {
397 OVS_NLERR(log, "Geneve option length err (len %d, max %zu).",
398 nla_len(a), sizeof(match->key->tun_opts));
399 return -EINVAL;
400 }
401
402 if (nla_len(a) % 4 != 0) {
403 OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.",
404 nla_len(a));
405 return -EINVAL;
406 }
407
408 /* We need to record the length of the options passed
409 * down, otherwise packets with the same format but
410 * additional options will be silently matched.
411 */
412 if (!is_mask) {
413 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
414 false);
415 } else {
416 /* This is somewhat unusual because it looks at
417 * both the key and mask while parsing the
418 * attributes (and by extension assumes the key
419 * is parsed first). Normally, we would verify
420 * that each is the correct length and that the
421 * attributes line up in the validate function.
422 * However, that is difficult because this is
423 * variable length and we won't have the
424 * information later.
425 */
426 if (match->key->tun_opts_len != nla_len(a)) {
427 OVS_NLERR(log, "Geneve option len %d != mask len %d",
428 match->key->tun_opts_len, nla_len(a));
429 return -EINVAL;
430 }
431
432 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
433 }
434
435 opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0,
436 nla_len(a));
437 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
438 nla_len(a), is_mask);
439 return 0;
341} 440}
342 441
343static int ipv4_tun_from_nlattr(const struct nlattr *attr, 442static int ipv4_tun_from_nlattr(const struct nlattr *attr,
344 struct sw_flow_match *match, bool is_mask) 443 struct sw_flow_match *match, bool is_mask,
444 bool log)
345{ 445{
346 struct nlattr *a; 446 struct nlattr *a;
347 int rem; 447 int rem;
348 bool ttl = false; 448 bool ttl = false;
349 __be16 tun_flags = 0; 449 __be16 tun_flags = 0;
350 unsigned long opt_key_offset;
351 450
352 nla_for_each_nested(a, attr, rem) { 451 nla_for_each_nested(a, attr, rem) {
353 int type = nla_type(a); 452 int type = nla_type(a);
453 int err;
454
354 static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 455 static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
355 [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), 456 [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
356 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), 457 [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
@@ -359,20 +460,21 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
359 [OVS_TUNNEL_KEY_ATTR_TTL] = 1, 460 [OVS_TUNNEL_KEY_ATTR_TTL] = 1,
360 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, 461 [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
361 [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, 462 [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
463 [OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16),
464 [OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16),
362 [OVS_TUNNEL_KEY_ATTR_OAM] = 0, 465 [OVS_TUNNEL_KEY_ATTR_OAM] = 0,
363 [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1, 466 [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
364 }; 467 };
365 468
366 if (type > OVS_TUNNEL_KEY_ATTR_MAX) { 469 if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
367 OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n", 470 OVS_NLERR(log, "Tunnel attr %d out of range max %d",
368 type, OVS_TUNNEL_KEY_ATTR_MAX); 471 type, OVS_TUNNEL_KEY_ATTR_MAX);
369 return -EINVAL; 472 return -EINVAL;
370 } 473 }
371 474
372 if (ovs_tunnel_key_lens[type] != nla_len(a) && 475 if (ovs_tunnel_key_lens[type] != nla_len(a) &&
373 ovs_tunnel_key_lens[type] != -1) { 476 ovs_tunnel_key_lens[type] != -1) {
374 OVS_NLERR("IPv4 tunnel attribute type has unexpected " 477 OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
375 " length (type=%d, length=%d, expected=%d).\n",
376 type, nla_len(a), ovs_tunnel_key_lens[type]); 478 type, nla_len(a), ovs_tunnel_key_lens[type]);
377 return -EINVAL; 479 return -EINVAL;
378 } 480 }
@@ -406,62 +508,26 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
406 case OVS_TUNNEL_KEY_ATTR_CSUM: 508 case OVS_TUNNEL_KEY_ATTR_CSUM:
407 tun_flags |= TUNNEL_CSUM; 509 tun_flags |= TUNNEL_CSUM;
408 break; 510 break;
511 case OVS_TUNNEL_KEY_ATTR_TP_SRC:
512 SW_FLOW_KEY_PUT(match, tun_key.tp_src,
513 nla_get_be16(a), is_mask);
514 break;
515 case OVS_TUNNEL_KEY_ATTR_TP_DST:
516 SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
517 nla_get_be16(a), is_mask);
518 break;
409 case OVS_TUNNEL_KEY_ATTR_OAM: 519 case OVS_TUNNEL_KEY_ATTR_OAM:
410 tun_flags |= TUNNEL_OAM; 520 tun_flags |= TUNNEL_OAM;
411 break; 521 break;
412 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: 522 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
413 tun_flags |= TUNNEL_OPTIONS_PRESENT; 523 err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
414 if (nla_len(a) > sizeof(match->key->tun_opts)) { 524 if (err)
415 OVS_NLERR("Geneve option length exceeds maximum size (len %d, max %zu).\n", 525 return err;
416 nla_len(a),
417 sizeof(match->key->tun_opts));
418 return -EINVAL;
419 }
420
421 if (nla_len(a) % 4 != 0) {
422 OVS_NLERR("Geneve option length is not a multiple of 4 (len %d).\n",
423 nla_len(a));
424 return -EINVAL;
425 }
426
427 /* We need to record the length of the options passed
428 * down, otherwise packets with the same format but
429 * additional options will be silently matched.
430 */
431 if (!is_mask) {
432 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
433 false);
434 } else {
435 /* This is somewhat unusual because it looks at
436 * both the key and mask while parsing the
437 * attributes (and by extension assumes the key
438 * is parsed first). Normally, we would verify
439 * that each is the correct length and that the
440 * attributes line up in the validate function.
441 * However, that is difficult because this is
442 * variable length and we won't have the
443 * information later.
444 */
445 if (match->key->tun_opts_len != nla_len(a)) {
446 OVS_NLERR("Geneve option key length (%d) is different from mask length (%d).",
447 match->key->tun_opts_len,
448 nla_len(a));
449 return -EINVAL;
450 }
451
452 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff,
453 true);
454 }
455 526
456 opt_key_offset = (unsigned long)GENEVE_OPTS( 527 tun_flags |= TUNNEL_OPTIONS_PRESENT;
457 (struct sw_flow_key *)0,
458 nla_len(a));
459 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset,
460 nla_data(a), nla_len(a),
461 is_mask);
462 break; 528 break;
463 default: 529 default:
464 OVS_NLERR("Unknown IPv4 tunnel attribute (%d).\n", 530 OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
465 type); 531 type);
466 return -EINVAL; 532 return -EINVAL;
467 } 533 }
@@ -470,18 +536,19 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
470 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); 536 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
471 537
472 if (rem > 0) { 538 if (rem > 0) {
473 OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem); 539 OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.",
540 rem);
474 return -EINVAL; 541 return -EINVAL;
475 } 542 }
476 543
477 if (!is_mask) { 544 if (!is_mask) {
478 if (!match->key->tun_key.ipv4_dst) { 545 if (!match->key->tun_key.ipv4_dst) {
479 OVS_NLERR("IPv4 tunnel destination address is zero.\n"); 546 OVS_NLERR(log, "IPv4 tunnel dst address is zero");
480 return -EINVAL; 547 return -EINVAL;
481 } 548 }
482 549
483 if (!ttl) { 550 if (!ttl) {
484 OVS_NLERR("IPv4 tunnel TTL not specified.\n"); 551 OVS_NLERR(log, "IPv4 tunnel TTL not specified.");
485 return -EINVAL; 552 return -EINVAL;
486 } 553 }
487 } 554 }
@@ -514,6 +581,12 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
514 if ((output->tun_flags & TUNNEL_CSUM) && 581 if ((output->tun_flags & TUNNEL_CSUM) &&
515 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) 582 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
516 return -EMSGSIZE; 583 return -EMSGSIZE;
584 if (output->tp_src &&
585 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
586 return -EMSGSIZE;
587 if (output->tp_dst &&
588 nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
589 return -EMSGSIZE;
517 if ((output->tun_flags & TUNNEL_OAM) && 590 if ((output->tun_flags & TUNNEL_OAM) &&
518 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) 591 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
519 return -EMSGSIZE; 592 return -EMSGSIZE;
@@ -525,7 +598,6 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
525 return 0; 598 return 0;
526} 599}
527 600
528
529static int ipv4_tun_to_nlattr(struct sk_buff *skb, 601static int ipv4_tun_to_nlattr(struct sk_buff *skb,
530 const struct ovs_key_ipv4_tunnel *output, 602 const struct ovs_key_ipv4_tunnel *output,
531 const struct geneve_opt *tun_opts, 603 const struct geneve_opt *tun_opts,
@@ -546,8 +618,17 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
546 return 0; 618 return 0;
547} 619}
548 620
621int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
622 const struct ovs_tunnel_info *egress_tun_info)
623{
624 return __ipv4_tun_to_nlattr(skb, &egress_tun_info->tunnel,
625 egress_tun_info->options,
626 egress_tun_info->options_len);
627}
628
549static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, 629static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
550 const struct nlattr **a, bool is_mask) 630 const struct nlattr **a, bool is_mask,
631 bool log)
551{ 632{
552 if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { 633 if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
553 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); 634 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
@@ -572,10 +653,13 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
572 if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { 653 if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
573 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); 654 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
574 655
575 if (is_mask) 656 if (is_mask) {
576 in_port = 0xffffffff; /* Always exact match in_port. */ 657 in_port = 0xffffffff; /* Always exact match in_port. */
577 else if (in_port >= DP_MAX_PORTS) 658 } else if (in_port >= DP_MAX_PORTS) {
659 OVS_NLERR(log, "Port %d exceeds max allowable %d",
660 in_port, DP_MAX_PORTS);
578 return -EINVAL; 661 return -EINVAL;
662 }
579 663
580 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); 664 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
581 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 665 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
@@ -591,7 +675,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
591 } 675 }
592 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 676 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
593 if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, 677 if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
594 is_mask)) 678 is_mask, log))
595 return -EINVAL; 679 return -EINVAL;
596 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 680 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
597 } 681 }
@@ -599,12 +683,12 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
599} 683}
600 684
601static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, 685static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
602 const struct nlattr **a, bool is_mask) 686 const struct nlattr **a, bool is_mask,
687 bool log)
603{ 688{
604 int err; 689 int err;
605 u64 orig_attrs = attrs;
606 690
607 err = metadata_from_nlattrs(match, &attrs, a, is_mask); 691 err = metadata_from_nlattrs(match, &attrs, a, is_mask, log);
608 if (err) 692 if (err)
609 return err; 693 return err;
610 694
@@ -625,17 +709,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
625 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 709 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
626 if (!(tci & htons(VLAN_TAG_PRESENT))) { 710 if (!(tci & htons(VLAN_TAG_PRESENT))) {
627 if (is_mask) 711 if (is_mask)
628 OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n"); 712 OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.");
629 else 713 else
630 OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n"); 714 OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set.");
631 715
632 return -EINVAL; 716 return -EINVAL;
633 } 717 }
634 718
635 SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); 719 SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
636 attrs &= ~(1 << OVS_KEY_ATTR_VLAN); 720 attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
637 } else if (!is_mask) 721 }
638 SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
639 722
640 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 723 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
641 __be16 eth_type; 724 __be16 eth_type;
@@ -645,8 +728,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
645 /* Always exact match EtherType. */ 728 /* Always exact match EtherType. */
646 eth_type = htons(0xffff); 729 eth_type = htons(0xffff);
647 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { 730 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
648 OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n", 731 OVS_NLERR(log, "EtherType %x is less than min %x",
649 ntohs(eth_type), ETH_P_802_3_MIN); 732 ntohs(eth_type), ETH_P_802_3_MIN);
650 return -EINVAL; 733 return -EINVAL;
651 } 734 }
652 735
@@ -661,8 +744,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
661 744
662 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 745 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
663 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { 746 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
664 OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", 747 OVS_NLERR(log, "IPv4 frag type %d is out of range max %d",
665 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); 748 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
666 return -EINVAL; 749 return -EINVAL;
667 } 750 }
668 SW_FLOW_KEY_PUT(match, ip.proto, 751 SW_FLOW_KEY_PUT(match, ip.proto,
@@ -685,10 +768,17 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
685 768
686 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 769 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
687 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { 770 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
688 OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", 771 OVS_NLERR(log, "IPv6 frag type %d is out of range max %d",
689 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); 772 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
690 return -EINVAL; 773 return -EINVAL;
691 } 774 }
775
776 if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
777 OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x).\n",
778 ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
779 return -EINVAL;
780 }
781
692 SW_FLOW_KEY_PUT(match, ipv6.label, 782 SW_FLOW_KEY_PUT(match, ipv6.label,
693 ipv6_key->ipv6_label, is_mask); 783 ipv6_key->ipv6_label, is_mask);
694 SW_FLOW_KEY_PUT(match, ip.proto, 784 SW_FLOW_KEY_PUT(match, ip.proto,
@@ -716,7 +806,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
716 806
717 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 807 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
718 if (!is_mask && (arp_key->arp_op & htons(0xff00))) { 808 if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
719 OVS_NLERR("Unknown ARP opcode (opcode=%d).\n", 809 OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).",
720 arp_key->arp_op); 810 arp_key->arp_op);
721 return -EINVAL; 811 return -EINVAL;
722 } 812 }
@@ -735,6 +825,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
735 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 825 attrs &= ~(1 << OVS_KEY_ATTR_ARP);
736 } 826 }
737 827
828 if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
829 const struct ovs_key_mpls *mpls_key;
830
831 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
832 SW_FLOW_KEY_PUT(match, mpls.top_lse,
833 mpls_key->mpls_lse, is_mask);
834
835 attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
836 }
837
738 if (attrs & (1 << OVS_KEY_ATTR_TCP)) { 838 if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
739 const struct ovs_key_tcp *tcp_key; 839 const struct ovs_key_tcp *tcp_key;
740 840
@@ -745,15 +845,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
745 } 845 }
746 846
747 if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { 847 if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
748 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { 848 SW_FLOW_KEY_PUT(match, tp.flags,
749 SW_FLOW_KEY_PUT(match, tp.flags, 849 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
750 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), 850 is_mask);
751 is_mask);
752 } else {
753 SW_FLOW_KEY_PUT(match, tp.flags,
754 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
755 is_mask);
756 }
757 attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS); 851 attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
758 } 852 }
759 853
@@ -812,8 +906,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
812 attrs &= ~(1 << OVS_KEY_ATTR_ND); 906 attrs &= ~(1 << OVS_KEY_ATTR_ND);
813 } 907 }
814 908
815 if (attrs != 0) 909 if (attrs != 0) {
910 OVS_NLERR(log, "Unknown key attributes %llx",
911 (unsigned long long)attrs);
816 return -EINVAL; 912 return -EINVAL;
913 }
817 914
818 return 0; 915 return 0;
819} 916}
@@ -851,10 +948,14 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val)
851 * of this flow. 948 * of this flow.
852 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink 949 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
853 * attribute specifies the mask field of the wildcarded flow. 950 * attribute specifies the mask field of the wildcarded flow.
951 * @log: Boolean to allow kernel error logging. Normally true, but when
952 * probing for feature compatibility this should be passed in as false to
953 * suppress unnecessary error logging.
854 */ 954 */
855int ovs_nla_get_match(struct sw_flow_match *match, 955int ovs_nla_get_match(struct sw_flow_match *match,
856 const struct nlattr *key, 956 const struct nlattr *nla_key,
857 const struct nlattr *mask) 957 const struct nlattr *nla_mask,
958 bool log)
858{ 959{
859 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 960 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
860 const struct nlattr *encap; 961 const struct nlattr *encap;
@@ -864,7 +965,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
864 bool encap_valid = false; 965 bool encap_valid = false;
865 int err; 966 int err;
866 967
867 err = parse_flow_nlattrs(key, a, &key_attrs); 968 err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
868 if (err) 969 if (err)
869 return err; 970 return err;
870 971
@@ -875,7 +976,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
875 976
876 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && 977 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
877 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { 978 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
878 OVS_NLERR("Invalid Vlan frame.\n"); 979 OVS_NLERR(log, "Invalid Vlan frame.");
879 return -EINVAL; 980 return -EINVAL;
880 } 981 }
881 982
@@ -886,61 +987,68 @@ int ovs_nla_get_match(struct sw_flow_match *match,
886 encap_valid = true; 987 encap_valid = true;
887 988
888 if (tci & htons(VLAN_TAG_PRESENT)) { 989 if (tci & htons(VLAN_TAG_PRESENT)) {
889 err = parse_flow_nlattrs(encap, a, &key_attrs); 990 err = parse_flow_nlattrs(encap, a, &key_attrs, log);
890 if (err) 991 if (err)
891 return err; 992 return err;
892 } else if (!tci) { 993 } else if (!tci) {
893 /* Corner case for truncated 802.1Q header. */ 994 /* Corner case for truncated 802.1Q header. */
894 if (nla_len(encap)) { 995 if (nla_len(encap)) {
895 OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n"); 996 OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute.");
896 return -EINVAL; 997 return -EINVAL;
897 } 998 }
898 } else { 999 } else {
899 OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n"); 1000 OVS_NLERR(log, "Encap attr is set for non-VLAN frame");
900 return -EINVAL; 1001 return -EINVAL;
901 } 1002 }
902 } 1003 }
903 1004
904 err = ovs_key_from_nlattrs(match, key_attrs, a, false); 1005 err = ovs_key_from_nlattrs(match, key_attrs, a, false, log);
905 if (err) 1006 if (err)
906 return err; 1007 return err;
907 1008
908 if (match->mask && !mask) { 1009 if (match->mask) {
909 /* Create an exact match mask. We need to set to 0xff all the 1010 if (!nla_mask) {
910 * 'match->mask' fields that have been touched in 'match->key'. 1011 /* Create an exact match mask. We need to set to 0xff
911 * We cannot simply memset 'match->mask', because padding bytes 1012 * all the 'match->mask' fields that have been touched
912 * and fields not specified in 'match->key' should be left to 0. 1013 * in 'match->key'. We cannot simply memset
913 * Instead, we use a stream of netlink attributes, copied from 1014 * 'match->mask', because padding bytes and fields not
914 * 'key' and set to 0xff: ovs_key_from_nlattrs() will take care 1015 * specified in 'match->key' should be left to 0.
915 * of filling 'match->mask' appropriately. 1016 * Instead, we use a stream of netlink attributes,
916 */ 1017 * copied from 'key' and set to 0xff.
917 newmask = kmemdup(key, nla_total_size(nla_len(key)), 1018 * ovs_key_from_nlattrs() will take care of filling
918 GFP_KERNEL); 1019 * 'match->mask' appropriately.
919 if (!newmask) 1020 */
920 return -ENOMEM; 1021 newmask = kmemdup(nla_key,
1022 nla_total_size(nla_len(nla_key)),
1023 GFP_KERNEL);
1024 if (!newmask)
1025 return -ENOMEM;
921 1026
922 mask_set_nlattr(newmask, 0xff); 1027 mask_set_nlattr(newmask, 0xff);
923 1028
924 /* The userspace does not send tunnel attributes that are 0, 1029 /* The userspace does not send tunnel attributes that
925 * but we should not wildcard them nonetheless. 1030 * are 0, but we should not wildcard them nonetheless.
926 */ 1031 */
927 if (match->key->tun_key.ipv4_dst) 1032 if (match->key->tun_key.ipv4_dst)
928 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 0xff, true); 1033 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
1034 0xff, true);
929 1035
930 mask = newmask; 1036 nla_mask = newmask;
931 } 1037 }
932 1038
933 if (mask) { 1039 err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log);
934 err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
935 if (err) 1040 if (err)
936 goto free_newmask; 1041 goto free_newmask;
937 1042
1043 /* Always match on tci. */
1044 SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
1045
938 if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { 1046 if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) {
939 __be16 eth_type = 0; 1047 __be16 eth_type = 0;
940 __be16 tci = 0; 1048 __be16 tci = 0;
941 1049
942 if (!encap_valid) { 1050 if (!encap_valid) {
943 OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); 1051 OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame.");
944 err = -EINVAL; 1052 err = -EINVAL;
945 goto free_newmask; 1053 goto free_newmask;
946 } 1054 }
@@ -952,12 +1060,13 @@ int ovs_nla_get_match(struct sw_flow_match *match,
952 if (eth_type == htons(0xffff)) { 1060 if (eth_type == htons(0xffff)) {
953 mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1061 mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
954 encap = a[OVS_KEY_ATTR_ENCAP]; 1062 encap = a[OVS_KEY_ATTR_ENCAP];
955 err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); 1063 err = parse_flow_mask_nlattrs(encap, a,
1064 &mask_attrs, log);
956 if (err) 1065 if (err)
957 goto free_newmask; 1066 goto free_newmask;
958 } else { 1067 } else {
959 OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", 1068 OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).",
960 ntohs(eth_type)); 1069 ntohs(eth_type));
961 err = -EINVAL; 1070 err = -EINVAL;
962 goto free_newmask; 1071 goto free_newmask;
963 } 1072 }
@@ -966,18 +1075,19 @@ int ovs_nla_get_match(struct sw_flow_match *match,
966 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1075 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
967 1076
968 if (!(tci & htons(VLAN_TAG_PRESENT))) { 1077 if (!(tci & htons(VLAN_TAG_PRESENT))) {
969 OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); 1078 OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).",
1079 ntohs(tci));
970 err = -EINVAL; 1080 err = -EINVAL;
971 goto free_newmask; 1081 goto free_newmask;
972 } 1082 }
973 } 1083 }
974 1084
975 err = ovs_key_from_nlattrs(match, mask_attrs, a, true); 1085 err = ovs_key_from_nlattrs(match, mask_attrs, a, true, log);
976 if (err) 1086 if (err)
977 goto free_newmask; 1087 goto free_newmask;
978 } 1088 }
979 1089
980 if (!match_validate(match, key_attrs, mask_attrs)) 1090 if (!match_validate(match, key_attrs, mask_attrs, log))
981 err = -EINVAL; 1091 err = -EINVAL;
982 1092
983free_newmask: 1093free_newmask:
@@ -990,6 +1100,9 @@ free_newmask:
990 * @key: Receives extracted in_port, priority, tun_key and skb_mark. 1100 * @key: Receives extracted in_port, priority, tun_key and skb_mark.
991 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1101 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
992 * sequence. 1102 * sequence.
1103 * @log: Boolean to allow kernel error logging. Normally true, but when
1104 * probing for feature compatibility this should be passed in as false to
1105 * suppress unnecessary error logging.
993 * 1106 *
994 * This parses a series of Netlink attributes that form a flow key, which must 1107 * This parses a series of Netlink attributes that form a flow key, which must
995 * take the same form accepted by flow_from_nlattrs(), but only enough of it to 1108 * take the same form accepted by flow_from_nlattrs(), but only enough of it to
@@ -998,14 +1111,15 @@ free_newmask:
998 */ 1111 */
999 1112
1000int ovs_nla_get_flow_metadata(const struct nlattr *attr, 1113int ovs_nla_get_flow_metadata(const struct nlattr *attr,
1001 struct sw_flow_key *key) 1114 struct sw_flow_key *key,
1115 bool log)
1002{ 1116{
1003 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1117 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1004 struct sw_flow_match match; 1118 struct sw_flow_match match;
1005 u64 attrs = 0; 1119 u64 attrs = 0;
1006 int err; 1120 int err;
1007 1121
1008 err = parse_flow_nlattrs(attr, a, &attrs); 1122 err = parse_flow_nlattrs(attr, a, &attrs, log);
1009 if (err) 1123 if (err)
1010 return -EINVAL; 1124 return -EINVAL;
1011 1125
@@ -1014,7 +1128,7 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr,
1014 1128
1015 key->phy.in_port = DP_MAX_PORTS; 1129 key->phy.in_port = DP_MAX_PORTS;
1016 1130
1017 return metadata_from_nlattrs(&match, &attrs, a, false); 1131 return metadata_from_nlattrs(&match, &attrs, a, false, log);
1018} 1132}
1019 1133
1020int ovs_nla_put_flow(const struct sw_flow_key *swkey, 1134int ovs_nla_put_flow(const struct sw_flow_key *swkey,
@@ -1140,6 +1254,14 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
1140 arp_key->arp_op = htons(output->ip.proto); 1254 arp_key->arp_op = htons(output->ip.proto);
1141 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); 1255 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
1142 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); 1256 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
1257 } else if (eth_p_mpls(swkey->eth.type)) {
1258 struct ovs_key_mpls *mpls_key;
1259
1260 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
1261 if (!nla)
1262 goto nla_put_failure;
1263 mpls_key = nla_data(nla);
1264 mpls_key->mpls_lse = output->mpls.top_lse;
1143 } 1265 }
1144 1266
1145 if ((swkey->eth.type == htons(ETH_P_IP) || 1267 if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -1226,12 +1348,14 @@ nla_put_failure:
1226 1348
1227#define MAX_ACTIONS_BUFSIZE (32 * 1024) 1349#define MAX_ACTIONS_BUFSIZE (32 * 1024)
1228 1350
1229struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size) 1351static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log)
1230{ 1352{
1231 struct sw_flow_actions *sfa; 1353 struct sw_flow_actions *sfa;
1232 1354
1233 if (size > MAX_ACTIONS_BUFSIZE) 1355 if (size > MAX_ACTIONS_BUFSIZE) {
1356 OVS_NLERR(log, "Flow action size %u bytes exceeds max", size);
1234 return ERR_PTR(-EINVAL); 1357 return ERR_PTR(-EINVAL);
1358 }
1235 1359
1236 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); 1360 sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
1237 if (!sfa) 1361 if (!sfa)
@@ -1249,7 +1373,7 @@ void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
1249} 1373}
1250 1374
1251static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, 1375static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
1252 int attr_len) 1376 int attr_len, bool log)
1253{ 1377{
1254 1378
1255 struct sw_flow_actions *acts; 1379 struct sw_flow_actions *acts;
@@ -1269,7 +1393,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
1269 new_acts_size = MAX_ACTIONS_BUFSIZE; 1393 new_acts_size = MAX_ACTIONS_BUFSIZE;
1270 } 1394 }
1271 1395
1272 acts = ovs_nla_alloc_flow_actions(new_acts_size); 1396 acts = nla_alloc_flow_actions(new_acts_size, log);
1273 if (IS_ERR(acts)) 1397 if (IS_ERR(acts))
1274 return (void *)acts; 1398 return (void *)acts;
1275 1399
@@ -1284,11 +1408,11 @@ out:
1284} 1408}
1285 1409
1286static struct nlattr *__add_action(struct sw_flow_actions **sfa, 1410static struct nlattr *__add_action(struct sw_flow_actions **sfa,
1287 int attrtype, void *data, int len) 1411 int attrtype, void *data, int len, bool log)
1288{ 1412{
1289 struct nlattr *a; 1413 struct nlattr *a;
1290 1414
1291 a = reserve_sfa_size(sfa, nla_attr_size(len)); 1415 a = reserve_sfa_size(sfa, nla_attr_size(len), log);
1292 if (IS_ERR(a)) 1416 if (IS_ERR(a))
1293 return a; 1417 return a;
1294 1418
@@ -1303,24 +1427,22 @@ static struct nlattr *__add_action(struct sw_flow_actions **sfa,
1303} 1427}
1304 1428
1305static int add_action(struct sw_flow_actions **sfa, int attrtype, 1429static int add_action(struct sw_flow_actions **sfa, int attrtype,
1306 void *data, int len) 1430 void *data, int len, bool log)
1307{ 1431{
1308 struct nlattr *a; 1432 struct nlattr *a;
1309 1433
1310 a = __add_action(sfa, attrtype, data, len); 1434 a = __add_action(sfa, attrtype, data, len, log);
1311 if (IS_ERR(a))
1312 return PTR_ERR(a);
1313 1435
1314 return 0; 1436 return PTR_ERR_OR_ZERO(a);
1315} 1437}
1316 1438
1317static inline int add_nested_action_start(struct sw_flow_actions **sfa, 1439static inline int add_nested_action_start(struct sw_flow_actions **sfa,
1318 int attrtype) 1440 int attrtype, bool log)
1319{ 1441{
1320 int used = (*sfa)->actions_len; 1442 int used = (*sfa)->actions_len;
1321 int err; 1443 int err;
1322 1444
1323 err = add_action(sfa, attrtype, NULL, 0); 1445 err = add_action(sfa, attrtype, NULL, 0, log);
1324 if (err) 1446 if (err)
1325 return err; 1447 return err;
1326 1448
@@ -1336,9 +1458,15 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
1336 a->nla_len = sfa->actions_len - st_offset; 1458 a->nla_len = sfa->actions_len - st_offset;
1337} 1459}
1338 1460
1461static int __ovs_nla_copy_actions(const struct nlattr *attr,
1462 const struct sw_flow_key *key,
1463 int depth, struct sw_flow_actions **sfa,
1464 __be16 eth_type, __be16 vlan_tci, bool log);
1465
1339static int validate_and_copy_sample(const struct nlattr *attr, 1466static int validate_and_copy_sample(const struct nlattr *attr,
1340 const struct sw_flow_key *key, int depth, 1467 const struct sw_flow_key *key, int depth,
1341 struct sw_flow_actions **sfa) 1468 struct sw_flow_actions **sfa,
1469 __be16 eth_type, __be16 vlan_tci, bool log)
1342{ 1470{
1343 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; 1471 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
1344 const struct nlattr *probability, *actions; 1472 const struct nlattr *probability, *actions;
@@ -1364,18 +1492,19 @@ static int validate_and_copy_sample(const struct nlattr *attr,
1364 return -EINVAL; 1492 return -EINVAL;
1365 1493
1366 /* validation done, copy sample action. */ 1494 /* validation done, copy sample action. */
1367 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); 1495 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
1368 if (start < 0) 1496 if (start < 0)
1369 return start; 1497 return start;
1370 err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, 1498 err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
1371 nla_data(probability), sizeof(u32)); 1499 nla_data(probability), sizeof(u32), log);
1372 if (err) 1500 if (err)
1373 return err; 1501 return err;
1374 st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); 1502 st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log);
1375 if (st_acts < 0) 1503 if (st_acts < 0)
1376 return st_acts; 1504 return st_acts;
1377 1505
1378 err = ovs_nla_copy_actions(actions, key, depth + 1, sfa); 1506 err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa,
1507 eth_type, vlan_tci, log);
1379 if (err) 1508 if (err)
1380 return err; 1509 return err;
1381 1510
@@ -1385,10 +1514,10 @@ static int validate_and_copy_sample(const struct nlattr *attr,
1385 return 0; 1514 return 0;
1386} 1515}
1387 1516
1388static int validate_tp_port(const struct sw_flow_key *flow_key) 1517static int validate_tp_port(const struct sw_flow_key *flow_key,
1518 __be16 eth_type)
1389{ 1519{
1390 if ((flow_key->eth.type == htons(ETH_P_IP) || 1520 if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) &&
1391 flow_key->eth.type == htons(ETH_P_IPV6)) &&
1392 (flow_key->tp.src || flow_key->tp.dst)) 1521 (flow_key->tp.src || flow_key->tp.dst))
1393 return 0; 1522 return 0;
1394 1523
@@ -1412,7 +1541,7 @@ void ovs_match_init(struct sw_flow_match *match,
1412} 1541}
1413 1542
1414static int validate_and_copy_set_tun(const struct nlattr *attr, 1543static int validate_and_copy_set_tun(const struct nlattr *attr,
1415 struct sw_flow_actions **sfa) 1544 struct sw_flow_actions **sfa, bool log)
1416{ 1545{
1417 struct sw_flow_match match; 1546 struct sw_flow_match match;
1418 struct sw_flow_key key; 1547 struct sw_flow_key key;
@@ -1421,7 +1550,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
1421 int err, start; 1550 int err, start;
1422 1551
1423 ovs_match_init(&match, &key, NULL); 1552 ovs_match_init(&match, &key, NULL);
1424 err = ipv4_tun_from_nlattr(nla_data(attr), &match, false); 1553 err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
1425 if (err) 1554 if (err)
1426 return err; 1555 return err;
1427 1556
@@ -1450,12 +1579,12 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
1450 key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; 1579 key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
1451 }; 1580 };
1452 1581
1453 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); 1582 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
1454 if (start < 0) 1583 if (start < 0)
1455 return start; 1584 return start;
1456 1585
1457 a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, 1586 a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
1458 sizeof(*tun_info) + key.tun_opts_len); 1587 sizeof(*tun_info) + key.tun_opts_len, log);
1459 if (IS_ERR(a)) 1588 if (IS_ERR(a))
1460 return PTR_ERR(a); 1589 return PTR_ERR(a);
1461 1590
@@ -1483,7 +1612,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
1483static int validate_set(const struct nlattr *a, 1612static int validate_set(const struct nlattr *a,
1484 const struct sw_flow_key *flow_key, 1613 const struct sw_flow_key *flow_key,
1485 struct sw_flow_actions **sfa, 1614 struct sw_flow_actions **sfa,
1486 bool *set_tun) 1615 bool *set_tun, __be16 eth_type, bool log)
1487{ 1616{
1488 const struct nlattr *ovs_key = nla_data(a); 1617 const struct nlattr *ovs_key = nla_data(a);
1489 int key_type = nla_type(ovs_key); 1618 int key_type = nla_type(ovs_key);
@@ -1508,14 +1637,17 @@ static int validate_set(const struct nlattr *a,
1508 break; 1637 break;
1509 1638
1510 case OVS_KEY_ATTR_TUNNEL: 1639 case OVS_KEY_ATTR_TUNNEL:
1640 if (eth_p_mpls(eth_type))
1641 return -EINVAL;
1642
1511 *set_tun = true; 1643 *set_tun = true;
1512 err = validate_and_copy_set_tun(a, sfa); 1644 err = validate_and_copy_set_tun(a, sfa, log);
1513 if (err) 1645 if (err)
1514 return err; 1646 return err;
1515 break; 1647 break;
1516 1648
1517 case OVS_KEY_ATTR_IPV4: 1649 case OVS_KEY_ATTR_IPV4:
1518 if (flow_key->eth.type != htons(ETH_P_IP)) 1650 if (eth_type != htons(ETH_P_IP))
1519 return -EINVAL; 1651 return -EINVAL;
1520 1652
1521 if (!flow_key->ip.proto) 1653 if (!flow_key->ip.proto)
@@ -1531,7 +1663,7 @@ static int validate_set(const struct nlattr *a,
1531 break; 1663 break;
1532 1664
1533 case OVS_KEY_ATTR_IPV6: 1665 case OVS_KEY_ATTR_IPV6:
1534 if (flow_key->eth.type != htons(ETH_P_IPV6)) 1666 if (eth_type != htons(ETH_P_IPV6))
1535 return -EINVAL; 1667 return -EINVAL;
1536 1668
1537 if (!flow_key->ip.proto) 1669 if (!flow_key->ip.proto)
@@ -1553,19 +1685,24 @@ static int validate_set(const struct nlattr *a,
1553 if (flow_key->ip.proto != IPPROTO_TCP) 1685 if (flow_key->ip.proto != IPPROTO_TCP)
1554 return -EINVAL; 1686 return -EINVAL;
1555 1687
1556 return validate_tp_port(flow_key); 1688 return validate_tp_port(flow_key, eth_type);
1557 1689
1558 case OVS_KEY_ATTR_UDP: 1690 case OVS_KEY_ATTR_UDP:
1559 if (flow_key->ip.proto != IPPROTO_UDP) 1691 if (flow_key->ip.proto != IPPROTO_UDP)
1560 return -EINVAL; 1692 return -EINVAL;
1561 1693
1562 return validate_tp_port(flow_key); 1694 return validate_tp_port(flow_key, eth_type);
1695
1696 case OVS_KEY_ATTR_MPLS:
1697 if (!eth_p_mpls(eth_type))
1698 return -EINVAL;
1699 break;
1563 1700
1564 case OVS_KEY_ATTR_SCTP: 1701 case OVS_KEY_ATTR_SCTP:
1565 if (flow_key->ip.proto != IPPROTO_SCTP) 1702 if (flow_key->ip.proto != IPPROTO_SCTP)
1566 return -EINVAL; 1703 return -EINVAL;
1567 1704
1568 return validate_tp_port(flow_key); 1705 return validate_tp_port(flow_key, eth_type);
1569 1706
1570 default: 1707 default:
1571 return -EINVAL; 1708 return -EINVAL;
@@ -1579,6 +1716,7 @@ static int validate_userspace(const struct nlattr *attr)
1579 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { 1716 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
1580 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, 1717 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
1581 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, 1718 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
1719 [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
1582 }; 1720 };
1583 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; 1721 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
1584 int error; 1722 int error;
@@ -1596,12 +1734,12 @@ static int validate_userspace(const struct nlattr *attr)
1596} 1734}
1597 1735
1598static int copy_action(const struct nlattr *from, 1736static int copy_action(const struct nlattr *from,
1599 struct sw_flow_actions **sfa) 1737 struct sw_flow_actions **sfa, bool log)
1600{ 1738{
1601 int totlen = NLA_ALIGN(from->nla_len); 1739 int totlen = NLA_ALIGN(from->nla_len);
1602 struct nlattr *to; 1740 struct nlattr *to;
1603 1741
1604 to = reserve_sfa_size(sfa, from->nla_len); 1742 to = reserve_sfa_size(sfa, from->nla_len, log);
1605 if (IS_ERR(to)) 1743 if (IS_ERR(to))
1606 return PTR_ERR(to); 1744 return PTR_ERR(to);
1607 1745
@@ -1609,10 +1747,10 @@ static int copy_action(const struct nlattr *from,
1609 return 0; 1747 return 0;
1610} 1748}
1611 1749
1612int ovs_nla_copy_actions(const struct nlattr *attr, 1750static int __ovs_nla_copy_actions(const struct nlattr *attr,
1613 const struct sw_flow_key *key, 1751 const struct sw_flow_key *key,
1614 int depth, 1752 int depth, struct sw_flow_actions **sfa,
1615 struct sw_flow_actions **sfa) 1753 __be16 eth_type, __be16 vlan_tci, bool log)
1616{ 1754{
1617 const struct nlattr *a; 1755 const struct nlattr *a;
1618 int rem, err; 1756 int rem, err;
@@ -1626,6 +1764,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
1626 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), 1764 [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
1627 [OVS_ACTION_ATTR_RECIRC] = sizeof(u32), 1765 [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
1628 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, 1766 [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
1767 [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
1768 [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
1629 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), 1769 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
1630 [OVS_ACTION_ATTR_POP_VLAN] = 0, 1770 [OVS_ACTION_ATTR_POP_VLAN] = 0,
1631 [OVS_ACTION_ATTR_SET] = (u32)-1, 1771 [OVS_ACTION_ATTR_SET] = (u32)-1,
@@ -1671,6 +1811,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
1671 } 1811 }
1672 1812
1673 case OVS_ACTION_ATTR_POP_VLAN: 1813 case OVS_ACTION_ATTR_POP_VLAN:
1814 vlan_tci = htons(0);
1674 break; 1815 break;
1675 1816
1676 case OVS_ACTION_ATTR_PUSH_VLAN: 1817 case OVS_ACTION_ATTR_PUSH_VLAN:
@@ -1679,29 +1820,69 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
1679 return -EINVAL; 1820 return -EINVAL;
1680 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) 1821 if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
1681 return -EINVAL; 1822 return -EINVAL;
1823 vlan_tci = vlan->vlan_tci;
1682 break; 1824 break;
1683 1825
1684 case OVS_ACTION_ATTR_RECIRC: 1826 case OVS_ACTION_ATTR_RECIRC:
1685 break; 1827 break;
1686 1828
1829 case OVS_ACTION_ATTR_PUSH_MPLS: {
1830 const struct ovs_action_push_mpls *mpls = nla_data(a);
1831
1832 if (!eth_p_mpls(mpls->mpls_ethertype))
1833 return -EINVAL;
1834 /* Prohibit push MPLS other than to a white list
1835 * for packets that have a known tag order.
1836 */
1837 if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
1838 (eth_type != htons(ETH_P_IP) &&
1839 eth_type != htons(ETH_P_IPV6) &&
1840 eth_type != htons(ETH_P_ARP) &&
1841 eth_type != htons(ETH_P_RARP) &&
1842 !eth_p_mpls(eth_type)))
1843 return -EINVAL;
1844 eth_type = mpls->mpls_ethertype;
1845 break;
1846 }
1847
1848 case OVS_ACTION_ATTR_POP_MPLS:
1849 if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
1850 !eth_p_mpls(eth_type))
1851 return -EINVAL;
1852
1853 /* Disallow subsequent L2.5+ set and mpls_pop actions
1854 * as there is no check here to ensure that the new
1855 * eth_type is valid and thus set actions could
1856 * write off the end of the packet or otherwise
1857 * corrupt it.
1858 *
1859 * Support for these actions is planned using packet
1860 * recirculation.
1861 */
1862 eth_type = htons(0);
1863 break;
1864
1687 case OVS_ACTION_ATTR_SET: 1865 case OVS_ACTION_ATTR_SET:
1688 err = validate_set(a, key, sfa, &skip_copy); 1866 err = validate_set(a, key, sfa,
1867 &skip_copy, eth_type, log);
1689 if (err) 1868 if (err)
1690 return err; 1869 return err;
1691 break; 1870 break;
1692 1871
1693 case OVS_ACTION_ATTR_SAMPLE: 1872 case OVS_ACTION_ATTR_SAMPLE:
1694 err = validate_and_copy_sample(a, key, depth, sfa); 1873 err = validate_and_copy_sample(a, key, depth, sfa,
1874 eth_type, vlan_tci, log);
1695 if (err) 1875 if (err)
1696 return err; 1876 return err;
1697 skip_copy = true; 1877 skip_copy = true;
1698 break; 1878 break;
1699 1879
1700 default: 1880 default:
1881 OVS_NLERR(log, "Unknown Action type %d", type);
1701 return -EINVAL; 1882 return -EINVAL;
1702 } 1883 }
1703 if (!skip_copy) { 1884 if (!skip_copy) {
1704 err = copy_action(a, sfa); 1885 err = copy_action(a, sfa, log);
1705 if (err) 1886 if (err)
1706 return err; 1887 return err;
1707 } 1888 }
@@ -1713,6 +1894,24 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
1713 return 0; 1894 return 0;
1714} 1895}
1715 1896
1897int ovs_nla_copy_actions(const struct nlattr *attr,
1898 const struct sw_flow_key *key,
1899 struct sw_flow_actions **sfa, bool log)
1900{
1901 int err;
1902
1903 *sfa = nla_alloc_flow_actions(nla_len(attr), log);
1904 if (IS_ERR(*sfa))
1905 return PTR_ERR(*sfa);
1906
1907 err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type,
1908 key->eth.tci, log);
1909 if (err)
1910 kfree(*sfa);
1911
1912 return err;
1913}
1914
1716static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) 1915static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
1717{ 1916{
1718 const struct nlattr *a; 1917 const struct nlattr *a;
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 206e45add888..577f12be3459 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -37,24 +37,28 @@
37 37
38#include "flow.h" 38#include "flow.h"
39 39
40size_t ovs_tun_key_attr_size(void);
41size_t ovs_key_attr_size(void);
42
40void ovs_match_init(struct sw_flow_match *match, 43void ovs_match_init(struct sw_flow_match *match,
41 struct sw_flow_key *key, struct sw_flow_mask *mask); 44 struct sw_flow_key *key, struct sw_flow_mask *mask);
42 45
43int ovs_nla_put_flow(const struct sw_flow_key *, 46int ovs_nla_put_flow(const struct sw_flow_key *,
44 const struct sw_flow_key *, struct sk_buff *); 47 const struct sw_flow_key *, struct sk_buff *);
45int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *); 48int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *,
49 bool log);
46 50
47int ovs_nla_get_match(struct sw_flow_match *match, 51int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key,
48 const struct nlattr *, 52 const struct nlattr *mask, bool log);
49 const struct nlattr *); 53int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
54 const struct ovs_tunnel_info *);
50 55
51int ovs_nla_copy_actions(const struct nlattr *attr, 56int ovs_nla_copy_actions(const struct nlattr *attr,
52 const struct sw_flow_key *key, int depth, 57 const struct sw_flow_key *key,
53 struct sw_flow_actions **sfa); 58 struct sw_flow_actions **sfa, bool log);
54int ovs_nla_put_actions(const struct nlattr *attr, 59int ovs_nla_put_actions(const struct nlattr *attr,
55 int len, struct sk_buff *skb); 60 int len, struct sk_buff *skb);
56 61
57struct sw_flow_actions *ovs_nla_alloc_flow_actions(int actions_len);
58void ovs_nla_free_flow_actions(struct sw_flow_actions *); 62void ovs_nla_free_flow_actions(struct sw_flow_actions *);
59 63
60#endif /* flow_netlink.h */ 64#endif /* flow_netlink.h */
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index cf2d853646f0..5899bf161c61 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2007-2013 Nicira, Inc. 2 * Copyright (c) 2007-2014 Nicira, Inc.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public 5 * modify it under the terms of version 2 of the GNU General Public
@@ -25,7 +25,7 @@
25#include <linux/if_vlan.h> 25#include <linux/if_vlan.h>
26#include <net/llc_pdu.h> 26#include <net/llc_pdu.h>
27#include <linux/kernel.h> 27#include <linux/kernel.h>
28#include <linux/hash.h> 28#include <linux/jhash.h>
29#include <linux/jiffies.h> 29#include <linux/jiffies.h>
30#include <linux/llc.h> 30#include <linux/llc.h>
31#include <linux/module.h> 31#include <linux/module.h>
@@ -107,7 +107,7 @@ err:
107 return ERR_PTR(-ENOMEM); 107 return ERR_PTR(-ENOMEM);
108} 108}
109 109
110int ovs_flow_tbl_count(struct flow_table *table) 110int ovs_flow_tbl_count(const struct flow_table *table)
111{ 111{
112 return table->count; 112 return table->count;
113} 113}
@@ -250,11 +250,14 @@ skip_flows:
250 __table_instance_destroy(ti); 250 __table_instance_destroy(ti);
251} 251}
252 252
253void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) 253/* No need for locking this function is called from RCU callback or
254 * error path.
255 */
256void ovs_flow_tbl_destroy(struct flow_table *table)
254{ 257{
255 struct table_instance *ti = ovsl_dereference(table->ti); 258 struct table_instance *ti = rcu_dereference_raw(table->ti);
256 259
257 table_instance_destroy(ti, deferred); 260 table_instance_destroy(ti, false);
258} 261}
259 262
260struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, 263struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -363,7 +366,7 @@ static u32 flow_hash(const struct sw_flow_key *key, int key_start,
363 /* Make sure number of hash bytes are multiple of u32. */ 366 /* Make sure number of hash bytes are multiple of u32. */
364 BUILD_BUG_ON(sizeof(long) % sizeof(u32)); 367 BUILD_BUG_ON(sizeof(long) % sizeof(u32));
365 368
366 return arch_fast_hash2(hash_key, hash_u32s, 0); 369 return jhash2(hash_key, hash_u32s, 0);
367} 370}
368 371
369static int flow_key_start(const struct sw_flow_key *key) 372static int flow_key_start(const struct sw_flow_key *key)
@@ -398,7 +401,7 @@ static bool flow_cmp_masked_key(const struct sw_flow *flow,
398} 401}
399 402
400bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, 403bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
401 struct sw_flow_match *match) 404 const struct sw_flow_match *match)
402{ 405{
403 struct sw_flow_key *key = match->key; 406 struct sw_flow_key *key = match->key;
404 int key_start = flow_key_start(key); 407 int key_start = flow_key_start(key);
@@ -409,7 +412,7 @@ bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
409 412
410static struct sw_flow *masked_flow_lookup(struct table_instance *ti, 413static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
411 const struct sw_flow_key *unmasked, 414 const struct sw_flow_key *unmasked,
412 struct sw_flow_mask *mask) 415 const struct sw_flow_mask *mask)
413{ 416{
414 struct sw_flow *flow; 417 struct sw_flow *flow;
415 struct hlist_head *head; 418 struct hlist_head *head;
@@ -457,7 +460,7 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
457} 460}
458 461
459struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, 462struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
460 struct sw_flow_match *match) 463 const struct sw_flow_match *match)
461{ 464{
462 struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); 465 struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
463 struct sw_flow_mask *mask; 466 struct sw_flow_mask *mask;
@@ -560,7 +563,7 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
560 563
561/* Add 'mask' into the mask list, if it is not already there. */ 564/* Add 'mask' into the mask list, if it is not already there. */
562static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, 565static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
563 struct sw_flow_mask *new) 566 const struct sw_flow_mask *new)
564{ 567{
565 struct sw_flow_mask *mask; 568 struct sw_flow_mask *mask;
566 mask = flow_mask_find(tbl, new); 569 mask = flow_mask_find(tbl, new);
@@ -583,7 +586,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
583 586
584/* Must be called with OVS mutex held. */ 587/* Must be called with OVS mutex held. */
585int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, 588int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
586 struct sw_flow_mask *mask) 589 const struct sw_flow_mask *mask)
587{ 590{
588 struct table_instance *new_ti = NULL; 591 struct table_instance *new_ti = NULL;
589 struct table_instance *ti; 592 struct table_instance *ti;
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 5918bff7f3f6..309fa6415689 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -61,12 +61,12 @@ struct sw_flow *ovs_flow_alloc(void);
61void ovs_flow_free(struct sw_flow *, bool deferred); 61void ovs_flow_free(struct sw_flow *, bool deferred);
62 62
63int ovs_flow_tbl_init(struct flow_table *); 63int ovs_flow_tbl_init(struct flow_table *);
64int ovs_flow_tbl_count(struct flow_table *table); 64int ovs_flow_tbl_count(const struct flow_table *table);
65void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred); 65void ovs_flow_tbl_destroy(struct flow_table *table);
66int ovs_flow_tbl_flush(struct flow_table *flow_table); 66int ovs_flow_tbl_flush(struct flow_table *flow_table);
67 67
68int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, 68int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
69 struct sw_flow_mask *mask); 69 const struct sw_flow_mask *mask);
70void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); 70void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
71int ovs_flow_tbl_num_masks(const struct flow_table *table); 71int ovs_flow_tbl_num_masks(const struct flow_table *table);
72struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, 72struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
@@ -77,9 +77,9 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
77struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *, 77struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
78 const struct sw_flow_key *); 78 const struct sw_flow_key *);
79struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, 79struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
80 struct sw_flow_match *match); 80 const struct sw_flow_match *match);
81bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, 81bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
82 struct sw_flow_match *match); 82 const struct sw_flow_match *match);
83 83
84void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, 84void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
85 const struct sw_flow_mask *mask); 85 const struct sw_flow_mask *mask);
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 106a9d80b663..484864dd0e68 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -17,6 +17,7 @@
17#include <linux/rculist.h> 17#include <linux/rculist.h>
18#include <linux/udp.h> 18#include <linux/udp.h>
19#include <linux/if_vlan.h> 19#include <linux/if_vlan.h>
20#include <linux/module.h>
20 21
21#include <net/geneve.h> 22#include <net/geneve.h>
22#include <net/icmp.h> 23#include <net/icmp.h>
@@ -28,6 +29,8 @@
28#include "datapath.h" 29#include "datapath.h"
29#include "vport.h" 30#include "vport.h"
30 31
32static struct vport_ops ovs_geneve_vport_ops;
33
31/** 34/**
32 * struct geneve_port - Keeps track of open UDP ports 35 * struct geneve_port - Keeps track of open UDP ports
33 * @gs: The socket created for this port number. 36 * @gs: The socket created for this port number.
@@ -65,7 +68,7 @@ static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
65} 68}
66 69
67/* Convert 24 bit VNI to 64 bit tunnel ID. */ 70/* Convert 24 bit VNI to 64 bit tunnel ID. */
68static __be64 vni_to_tunnel_id(__u8 *vni) 71static __be64 vni_to_tunnel_id(const __u8 *vni)
69{ 72{
70#ifdef __BIG_ENDIAN 73#ifdef __BIG_ENDIAN
71 return (vni[0] << 16) | (vni[1] << 8) | vni[2]; 74 return (vni[0] << 16) | (vni[1] << 8) | vni[2];
@@ -94,7 +97,9 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
94 97
95 key = vni_to_tunnel_id(geneveh->vni); 98 key = vni_to_tunnel_id(geneveh->vni);
96 99
97 ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, flags, 100 ovs_flow_tun_info_init(&tun_info, ip_hdr(skb),
101 udp_hdr(skb)->source, udp_hdr(skb)->dest,
102 key, flags,
98 geneveh->options, opts_len); 103 geneveh->options, opts_len);
99 104
100 ovs_vport_receive(vport, skb, &tun_info); 105 ovs_vport_receive(vport, skb, &tun_info);
@@ -214,7 +219,10 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
214 false); 219 false);
215 if (err < 0) 220 if (err < 0)
216 ip_rt_put(rt); 221 ip_rt_put(rt);
222 return err;
223
217error: 224error:
225 kfree_skb(skb);
218 return err; 226 return err;
219} 227}
220 228
@@ -225,11 +233,46 @@ static const char *geneve_get_name(const struct vport *vport)
225 return geneve_port->name; 233 return geneve_port->name;
226} 234}
227 235
228const struct vport_ops ovs_geneve_vport_ops = { 236static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
237 struct ovs_tunnel_info *egress_tun_info)
238{
239 struct geneve_port *geneve_port = geneve_vport(vport);
240 struct net *net = ovs_dp_get_net(vport->dp);
241 __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport;
242 __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
243
244 /* Get tp_src and tp_dst, refert to geneve_build_header().
245 */
246 return ovs_tunnel_get_egress_info(egress_tun_info,
247 ovs_dp_get_net(vport->dp),
248 OVS_CB(skb)->egress_tun_info,
249 IPPROTO_UDP, skb->mark, sport, dport);
250}
251
252static struct vport_ops ovs_geneve_vport_ops = {
229 .type = OVS_VPORT_TYPE_GENEVE, 253 .type = OVS_VPORT_TYPE_GENEVE,
230 .create = geneve_tnl_create, 254 .create = geneve_tnl_create,
231 .destroy = geneve_tnl_destroy, 255 .destroy = geneve_tnl_destroy,
232 .get_name = geneve_get_name, 256 .get_name = geneve_get_name,
233 .get_options = geneve_get_options, 257 .get_options = geneve_get_options,
234 .send = geneve_tnl_send, 258 .send = geneve_tnl_send,
259 .owner = THIS_MODULE,
260 .get_egress_tun_info = geneve_get_egress_tun_info,
235}; 261};
262
263static int __init ovs_geneve_tnl_init(void)
264{
265 return ovs_vport_ops_register(&ovs_geneve_vport_ops);
266}
267
268static void __exit ovs_geneve_tnl_exit(void)
269{
270 ovs_vport_ops_unregister(&ovs_geneve_vport_ops);
271}
272
273module_init(ovs_geneve_tnl_init);
274module_exit(ovs_geneve_tnl_exit);
275
276MODULE_DESCRIPTION("OVS: Geneve swiching port");
277MODULE_LICENSE("GPL");
278MODULE_ALIAS("vport-type-5");
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 108b82da2fd9..d4168c442db5 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -29,6 +29,7 @@
29#include <linux/jhash.h> 29#include <linux/jhash.h>
30#include <linux/list.h> 30#include <linux/list.h>
31#include <linux/kernel.h> 31#include <linux/kernel.h>
32#include <linux/module.h>
32#include <linux/workqueue.h> 33#include <linux/workqueue.h>
33#include <linux/rculist.h> 34#include <linux/rculist.h>
34#include <net/route.h> 35#include <net/route.h>
@@ -45,6 +46,8 @@
45#include "datapath.h" 46#include "datapath.h"
46#include "vport.h" 47#include "vport.h"
47 48
49static struct vport_ops ovs_gre_vport_ops;
50
48/* Returns the least-significant 32 bits of a __be64. */ 51/* Returns the least-significant 32 bits of a __be64. */
49static __be32 be64_get_low32(__be64 x) 52static __be32 be64_get_low32(__be64 x)
50{ 53{
@@ -70,7 +73,7 @@ static struct sk_buff *__build_header(struct sk_buff *skb,
70 73
71 skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM)); 74 skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
72 if (IS_ERR(skb)) 75 if (IS_ERR(skb))
73 return NULL; 76 return skb;
74 77
75 tpi.flags = filter_tnl_flags(tun_key->tun_flags); 78 tpi.flags = filter_tnl_flags(tun_key->tun_flags);
76 tpi.proto = htons(ETH_P_TEB); 79 tpi.proto = htons(ETH_P_TEB);
@@ -105,7 +108,7 @@ static int gre_rcv(struct sk_buff *skb,
105 return PACKET_REJECT; 108 return PACKET_REJECT;
106 109
107 key = key_to_tunnel_id(tpi->key, tpi->seq); 110 key = key_to_tunnel_id(tpi->key, tpi->seq);
108 ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), key, 111 ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), 0, 0, key,
109 filter_tnl_flags(tpi->flags), NULL, 0); 112 filter_tnl_flags(tpi->flags), NULL, 0);
110 113
111 ovs_vport_receive(vport, skb, &tun_info); 114 ovs_vport_receive(vport, skb, &tun_info);
@@ -141,7 +144,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
141 144
142 if (unlikely(!OVS_CB(skb)->egress_tun_info)) { 145 if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
143 err = -EINVAL; 146 err = -EINVAL;
144 goto error; 147 goto err_free_skb;
145 } 148 }
146 149
147 tun_key = &OVS_CB(skb)->egress_tun_info->tunnel; 150 tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
@@ -154,8 +157,10 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
154 fl.flowi4_proto = IPPROTO_GRE; 157 fl.flowi4_proto = IPPROTO_GRE;
155 158
156 rt = ip_route_output_key(net, &fl); 159 rt = ip_route_output_key(net, &fl);
157 if (IS_ERR(rt)) 160 if (IS_ERR(rt)) {
158 return PTR_ERR(rt); 161 err = PTR_ERR(rt);
162 goto err_free_skb;
163 }
159 164
160 tunnel_hlen = ip_gre_calc_hlen(tun_key->tun_flags); 165 tunnel_hlen = ip_gre_calc_hlen(tun_key->tun_flags);
161 166
@@ -172,20 +177,17 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
172 goto err_free_rt; 177 goto err_free_rt;
173 } 178 }
174 179
175 if (vlan_tx_tag_present(skb)) { 180 skb = vlan_hwaccel_push_inside(skb);
176 if (unlikely(!__vlan_put_tag(skb, 181 if (unlikely(!skb)) {
177 skb->vlan_proto, 182 err = -ENOMEM;
178 vlan_tx_tag_get(skb)))) { 183 goto err_free_rt;
179 err = -ENOMEM;
180 goto err_free_rt;
181 }
182 skb->vlan_tci = 0;
183 } 184 }
184 185
185 /* Push Tunnel header. */ 186 /* Push Tunnel header. */
186 skb = __build_header(skb, tunnel_hlen); 187 skb = __build_header(skb, tunnel_hlen);
187 if (unlikely(!skb)) { 188 if (IS_ERR(skb)) {
188 err = 0; 189 err = PTR_ERR(skb);
190 skb = NULL;
189 goto err_free_rt; 191 goto err_free_rt;
190 } 192 }
191 193
@@ -199,7 +201,8 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
199 tun_key->ipv4_tos, tun_key->ipv4_ttl, df, false); 201 tun_key->ipv4_tos, tun_key->ipv4_ttl, df, false);
200err_free_rt: 202err_free_rt:
201 ip_rt_put(rt); 203 ip_rt_put(rt);
202error: 204err_free_skb:
205 kfree_skb(skb);
203 return err; 206 return err;
204} 207}
205 208
@@ -281,10 +284,38 @@ static void gre_tnl_destroy(struct vport *vport)
281 gre_exit(); 284 gre_exit();
282} 285}
283 286
284const struct vport_ops ovs_gre_vport_ops = { 287static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
288 struct ovs_tunnel_info *egress_tun_info)
289{
290 return ovs_tunnel_get_egress_info(egress_tun_info,
291 ovs_dp_get_net(vport->dp),
292 OVS_CB(skb)->egress_tun_info,
293 IPPROTO_GRE, skb->mark, 0, 0);
294}
295
296static struct vport_ops ovs_gre_vport_ops = {
285 .type = OVS_VPORT_TYPE_GRE, 297 .type = OVS_VPORT_TYPE_GRE,
286 .create = gre_create, 298 .create = gre_create,
287 .destroy = gre_tnl_destroy, 299 .destroy = gre_tnl_destroy,
288 .get_name = gre_get_name, 300 .get_name = gre_get_name,
289 .send = gre_tnl_send, 301 .send = gre_tnl_send,
302 .get_egress_tun_info = gre_get_egress_tun_info,
303 .owner = THIS_MODULE,
290}; 304};
305
306static int __init ovs_gre_tnl_init(void)
307{
308 return ovs_vport_ops_register(&ovs_gre_vport_ops);
309}
310
311static void __exit ovs_gre_tnl_exit(void)
312{
313 ovs_vport_ops_unregister(&ovs_gre_vport_ops);
314}
315
316module_init(ovs_gre_tnl_init);
317module_exit(ovs_gre_tnl_exit);
318
319MODULE_DESCRIPTION("OVS: GRE switching port");
320MODULE_LICENSE("GPL");
321MODULE_ALIAS("vport-type-3");
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 84516126e5f3..6a55f7105505 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -36,6 +36,8 @@ struct internal_dev {
36 struct vport *vport; 36 struct vport *vport;
37}; 37};
38 38
39static struct vport_ops ovs_internal_vport_ops;
40
39static struct internal_dev *internal_dev_priv(struct net_device *netdev) 41static struct internal_dev *internal_dev_priv(struct net_device *netdev)
40{ 42{
41 return netdev_priv(netdev); 43 return netdev_priv(netdev);
@@ -222,6 +224,11 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
222 struct net_device *netdev = netdev_vport_priv(vport)->dev; 224 struct net_device *netdev = netdev_vport_priv(vport)->dev;
223 int len; 225 int len;
224 226
227 if (unlikely(!(netdev->flags & IFF_UP))) {
228 kfree_skb(skb);
229 return 0;
230 }
231
225 len = skb->len; 232 len = skb->len;
226 233
227 skb_dst_drop(skb); 234 skb_dst_drop(skb);
@@ -238,7 +245,7 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
238 return len; 245 return len;
239} 246}
240 247
241const struct vport_ops ovs_internal_vport_ops = { 248static struct vport_ops ovs_internal_vport_ops = {
242 .type = OVS_VPORT_TYPE_INTERNAL, 249 .type = OVS_VPORT_TYPE_INTERNAL,
243 .create = internal_dev_create, 250 .create = internal_dev_create,
244 .destroy = internal_dev_destroy, 251 .destroy = internal_dev_destroy,
@@ -261,10 +268,21 @@ struct vport *ovs_internal_dev_get_vport(struct net_device *netdev)
261 268
262int ovs_internal_dev_rtnl_link_register(void) 269int ovs_internal_dev_rtnl_link_register(void)
263{ 270{
264 return rtnl_link_register(&internal_dev_link_ops); 271 int err;
272
273 err = rtnl_link_register(&internal_dev_link_ops);
274 if (err < 0)
275 return err;
276
277 err = ovs_vport_ops_register(&ovs_internal_vport_ops);
278 if (err < 0)
279 rtnl_link_unregister(&internal_dev_link_ops);
280
281 return err;
265} 282}
266 283
267void ovs_internal_dev_rtnl_link_unregister(void) 284void ovs_internal_dev_rtnl_link_unregister(void)
268{ 285{
286 ovs_vport_ops_unregister(&ovs_internal_vport_ops);
269 rtnl_link_unregister(&internal_dev_link_ops); 287 rtnl_link_unregister(&internal_dev_link_ops);
270} 288}
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index d21f77d875ba..4776282c6417 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -33,6 +33,8 @@
33#include "vport-internal_dev.h" 33#include "vport-internal_dev.h"
34#include "vport-netdev.h" 34#include "vport-netdev.h"
35 35
36static struct vport_ops ovs_netdev_vport_ops;
37
36/* Must be called with rcu_read_lock. */ 38/* Must be called with rcu_read_lock. */
37static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) 39static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
38{ 40{
@@ -75,7 +77,7 @@ static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
75 return RX_HANDLER_CONSUMED; 77 return RX_HANDLER_CONSUMED;
76} 78}
77 79
78static struct net_device *get_dpdev(struct datapath *dp) 80static struct net_device *get_dpdev(const struct datapath *dp)
79{ 81{
80 struct vport *local; 82 struct vport *local;
81 83
@@ -224,10 +226,20 @@ struct vport *ovs_netdev_get_vport(struct net_device *dev)
224 return NULL; 226 return NULL;
225} 227}
226 228
227const struct vport_ops ovs_netdev_vport_ops = { 229static struct vport_ops ovs_netdev_vport_ops = {
228 .type = OVS_VPORT_TYPE_NETDEV, 230 .type = OVS_VPORT_TYPE_NETDEV,
229 .create = netdev_create, 231 .create = netdev_create,
230 .destroy = netdev_destroy, 232 .destroy = netdev_destroy,
231 .get_name = ovs_netdev_get_name, 233 .get_name = ovs_netdev_get_name,
232 .send = netdev_send, 234 .send = netdev_send,
233}; 235};
236
237int __init ovs_netdev_init(void)
238{
239 return ovs_vport_ops_register(&ovs_netdev_vport_ops);
240}
241
242void ovs_netdev_exit(void)
243{
244 ovs_vport_ops_unregister(&ovs_netdev_vport_ops);
245}
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index 8df01c1127e5..6f7038e79c52 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -41,4 +41,7 @@ netdev_vport_priv(const struct vport *vport)
41const char *ovs_netdev_get_name(const struct vport *); 41const char *ovs_netdev_get_name(const struct vport *);
42void ovs_netdev_detach_dev(struct vport *); 42void ovs_netdev_detach_dev(struct vport *);
43 43
44int __init ovs_netdev_init(void);
45void ovs_netdev_exit(void);
46
44#endif /* vport_netdev.h */ 47#endif /* vport_netdev.h */
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 2735e01dca73..d7c46b301024 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -24,6 +24,7 @@
24#include <linux/net.h> 24#include <linux/net.h>
25#include <linux/rculist.h> 25#include <linux/rculist.h>
26#include <linux/udp.h> 26#include <linux/udp.h>
27#include <linux/module.h>
27 28
28#include <net/icmp.h> 29#include <net/icmp.h>
29#include <net/ip.h> 30#include <net/ip.h>
@@ -50,6 +51,8 @@ struct vxlan_port {
50 char name[IFNAMSIZ]; 51 char name[IFNAMSIZ];
51}; 52};
52 53
54static struct vport_ops ovs_vxlan_vport_ops;
55
53static inline struct vxlan_port *vxlan_vport(const struct vport *vport) 56static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
54{ 57{
55 return vport_priv(vport); 58 return vport_priv(vport);
@@ -66,7 +69,9 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
66 /* Save outer tunnel values */ 69 /* Save outer tunnel values */
67 iph = ip_hdr(skb); 70 iph = ip_hdr(skb);
68 key = cpu_to_be64(ntohl(vx_vni) >> 8); 71 key = cpu_to_be64(ntohl(vx_vni) >> 8);
69 ovs_flow_tun_info_init(&tun_info, iph, key, TUNNEL_KEY, NULL, 0); 72 ovs_flow_tun_info_init(&tun_info, iph,
73 udp_hdr(skb)->source, udp_hdr(skb)->dest,
74 key, TUNNEL_KEY, NULL, 0);
70 75
71 ovs_vport_receive(vport, skb, &tun_info); 76 ovs_vport_receive(vport, skb, &tun_info);
72} 77}
@@ -182,21 +187,61 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
182 false); 187 false);
183 if (err < 0) 188 if (err < 0)
184 ip_rt_put(rt); 189 ip_rt_put(rt);
190 return err;
185error: 191error:
192 kfree_skb(skb);
186 return err; 193 return err;
187} 194}
188 195
196static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
197 struct ovs_tunnel_info *egress_tun_info)
198{
199 struct net *net = ovs_dp_get_net(vport->dp);
200 struct vxlan_port *vxlan_port = vxlan_vport(vport);
201 __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
202 __be16 src_port;
203 int port_min;
204 int port_max;
205
206 inet_get_local_port_range(net, &port_min, &port_max);
207 src_port = udp_flow_src_port(net, skb, 0, 0, true);
208
209 return ovs_tunnel_get_egress_info(egress_tun_info, net,
210 OVS_CB(skb)->egress_tun_info,
211 IPPROTO_UDP, skb->mark,
212 src_port, dst_port);
213}
214
189static const char *vxlan_get_name(const struct vport *vport) 215static const char *vxlan_get_name(const struct vport *vport)
190{ 216{
191 struct vxlan_port *vxlan_port = vxlan_vport(vport); 217 struct vxlan_port *vxlan_port = vxlan_vport(vport);
192 return vxlan_port->name; 218 return vxlan_port->name;
193} 219}
194 220
195const struct vport_ops ovs_vxlan_vport_ops = { 221static struct vport_ops ovs_vxlan_vport_ops = {
196 .type = OVS_VPORT_TYPE_VXLAN, 222 .type = OVS_VPORT_TYPE_VXLAN,
197 .create = vxlan_tnl_create, 223 .create = vxlan_tnl_create,
198 .destroy = vxlan_tnl_destroy, 224 .destroy = vxlan_tnl_destroy,
199 .get_name = vxlan_get_name, 225 .get_name = vxlan_get_name,
200 .get_options = vxlan_get_options, 226 .get_options = vxlan_get_options,
201 .send = vxlan_tnl_send, 227 .send = vxlan_tnl_send,
228 .get_egress_tun_info = vxlan_get_egress_tun_info,
229 .owner = THIS_MODULE,
202}; 230};
231
232static int __init ovs_vxlan_tnl_init(void)
233{
234 return ovs_vport_ops_register(&ovs_vxlan_vport_ops);
235}
236
237static void __exit ovs_vxlan_tnl_exit(void)
238{
239 ovs_vport_ops_unregister(&ovs_vxlan_vport_ops);
240}
241
242module_init(ovs_vxlan_tnl_init);
243module_exit(ovs_vxlan_tnl_exit);
244
245MODULE_DESCRIPTION("OVS: VXLAN switching port");
246MODULE_LICENSE("GPL");
247MODULE_ALIAS("vport-type-4");
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 6015802ebe6f..2034c6d9cb5a 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -28,6 +28,7 @@
28#include <linux/rtnetlink.h> 28#include <linux/rtnetlink.h>
29#include <linux/compat.h> 29#include <linux/compat.h>
30#include <net/net_namespace.h> 30#include <net/net_namespace.h>
31#include <linux/module.h>
31 32
32#include "datapath.h" 33#include "datapath.h"
33#include "vport.h" 34#include "vport.h"
@@ -36,22 +37,7 @@
36static void ovs_vport_record_error(struct vport *, 37static void ovs_vport_record_error(struct vport *,
37 enum vport_err_type err_type); 38 enum vport_err_type err_type);
38 39
39/* List of statically compiled vport implementations. Don't forget to also 40static LIST_HEAD(vport_ops_list);
40 * add yours to the list at the bottom of vport.h. */
41static const struct vport_ops *vport_ops_list[] = {
42 &ovs_netdev_vport_ops,
43 &ovs_internal_vport_ops,
44
45#ifdef CONFIG_OPENVSWITCH_GRE
46 &ovs_gre_vport_ops,
47#endif
48#ifdef CONFIG_OPENVSWITCH_VXLAN
49 &ovs_vxlan_vport_ops,
50#endif
51#ifdef CONFIG_OPENVSWITCH_GENEVE
52 &ovs_geneve_vport_ops,
53#endif
54};
55 41
56/* Protected by RCU read lock for reading, ovs_mutex for writing. */ 42/* Protected by RCU read lock for reading, ovs_mutex for writing. */
57static struct hlist_head *dev_table; 43static struct hlist_head *dev_table;
@@ -82,12 +68,38 @@ void ovs_vport_exit(void)
82 kfree(dev_table); 68 kfree(dev_table);
83} 69}
84 70
85static struct hlist_head *hash_bucket(struct net *net, const char *name) 71static struct hlist_head *hash_bucket(const struct net *net, const char *name)
86{ 72{
87 unsigned int hash = jhash(name, strlen(name), (unsigned long) net); 73 unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
88 return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)]; 74 return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
89} 75}
90 76
77int ovs_vport_ops_register(struct vport_ops *ops)
78{
79 int err = -EEXIST;
80 struct vport_ops *o;
81
82 ovs_lock();
83 list_for_each_entry(o, &vport_ops_list, list)
84 if (ops->type == o->type)
85 goto errout;
86
87 list_add_tail(&ops->list, &vport_ops_list);
88 err = 0;
89errout:
90 ovs_unlock();
91 return err;
92}
93EXPORT_SYMBOL_GPL(ovs_vport_ops_register);
94
95void ovs_vport_ops_unregister(struct vport_ops *ops)
96{
97 ovs_lock();
98 list_del(&ops->list);
99 ovs_unlock();
100}
101EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister);
102
91/** 103/**
92 * ovs_vport_locate - find a port that has already been created 104 * ovs_vport_locate - find a port that has already been created
93 * 105 *
@@ -95,7 +107,7 @@ static struct hlist_head *hash_bucket(struct net *net, const char *name)
95 * 107 *
96 * Must be called with ovs or RCU read lock. 108 * Must be called with ovs or RCU read lock.
97 */ 109 */
98struct vport *ovs_vport_locate(struct net *net, const char *name) 110struct vport *ovs_vport_locate(const struct net *net, const char *name)
99{ 111{
100 struct hlist_head *bucket = hash_bucket(net, name); 112 struct hlist_head *bucket = hash_bucket(net, name);
101 struct vport *vport; 113 struct vport *vport;
@@ -153,6 +165,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
153 165
154 return vport; 166 return vport;
155} 167}
168EXPORT_SYMBOL_GPL(ovs_vport_alloc);
156 169
157/** 170/**
158 * ovs_vport_free - uninitialize and free vport 171 * ovs_vport_free - uninitialize and free vport
@@ -173,6 +186,18 @@ void ovs_vport_free(struct vport *vport)
173 free_percpu(vport->percpu_stats); 186 free_percpu(vport->percpu_stats);
174 kfree(vport); 187 kfree(vport);
175} 188}
189EXPORT_SYMBOL_GPL(ovs_vport_free);
190
191static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms)
192{
193 struct vport_ops *ops;
194
195 list_for_each_entry(ops, &vport_ops_list, list)
196 if (ops->type == parms->type)
197 return ops;
198
199 return NULL;
200}
176 201
177/** 202/**
178 * ovs_vport_add - add vport device (for kernel callers) 203 * ovs_vport_add - add vport device (for kernel callers)
@@ -184,31 +209,40 @@ void ovs_vport_free(struct vport *vport)
184 */ 209 */
185struct vport *ovs_vport_add(const struct vport_parms *parms) 210struct vport *ovs_vport_add(const struct vport_parms *parms)
186{ 211{
212 struct vport_ops *ops;
187 struct vport *vport; 213 struct vport *vport;
188 int err = 0;
189 int i;
190 214
191 for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) { 215 ops = ovs_vport_lookup(parms);
192 if (vport_ops_list[i]->type == parms->type) { 216 if (ops) {
193 struct hlist_head *bucket; 217 struct hlist_head *bucket;
194 218
195 vport = vport_ops_list[i]->create(parms); 219 if (!try_module_get(ops->owner))
196 if (IS_ERR(vport)) { 220 return ERR_PTR(-EAFNOSUPPORT);
197 err = PTR_ERR(vport);
198 goto out;
199 }
200 221
201 bucket = hash_bucket(ovs_dp_get_net(vport->dp), 222 vport = ops->create(parms);
202 vport->ops->get_name(vport)); 223 if (IS_ERR(vport)) {
203 hlist_add_head_rcu(&vport->hash_node, bucket); 224 module_put(ops->owner);
204 return vport; 225 return vport;
205 } 226 }
227
228 bucket = hash_bucket(ovs_dp_get_net(vport->dp),
229 vport->ops->get_name(vport));
230 hlist_add_head_rcu(&vport->hash_node, bucket);
231 return vport;
206 } 232 }
207 233
208 err = -EAFNOSUPPORT; 234 /* Unlock to attempt module load and return -EAGAIN if load
235 * was successful as we need to restart the port addition
236 * workflow.
237 */
238 ovs_unlock();
239 request_module("vport-type-%d", parms->type);
240 ovs_lock();
209 241
210out: 242 if (!ovs_vport_lookup(parms))
211 return ERR_PTR(err); 243 return ERR_PTR(-EAFNOSUPPORT);
244 else
245 return ERR_PTR(-EAGAIN);
212} 246}
213 247
214/** 248/**
@@ -242,6 +276,8 @@ void ovs_vport_del(struct vport *vport)
242 hlist_del_rcu(&vport->hash_node); 276 hlist_del_rcu(&vport->hash_node);
243 277
244 vport->ops->destroy(vport); 278 vport->ops->destroy(vport);
279
280 module_put(vport->ops->owner);
245} 281}
246 282
247/** 283/**
@@ -344,7 +380,7 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
344 * 380 *
345 * Must be called with ovs_mutex. 381 * Must be called with ovs_mutex.
346 */ 382 */
347int ovs_vport_set_upcall_portids(struct vport *vport, struct nlattr *ids) 383int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids)
348{ 384{
349 struct vport_portids *old, *vport_portids; 385 struct vport_portids *old, *vport_portids;
350 386
@@ -435,7 +471,7 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
435 * skb->data should point to the Ethernet header. 471 * skb->data should point to the Ethernet header.
436 */ 472 */
437void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, 473void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
438 struct ovs_tunnel_info *tun_info) 474 const struct ovs_tunnel_info *tun_info)
439{ 475{
440 struct pcpu_sw_netstats *stats; 476 struct pcpu_sw_netstats *stats;
441 struct sw_flow_key key; 477 struct sw_flow_key key;
@@ -444,7 +480,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
444 stats = this_cpu_ptr(vport->percpu_stats); 480 stats = this_cpu_ptr(vport->percpu_stats);
445 u64_stats_update_begin(&stats->syncp); 481 u64_stats_update_begin(&stats->syncp);
446 stats->rx_packets++; 482 stats->rx_packets++;
447 stats->rx_bytes += skb->len; 483 stats->rx_bytes += skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
448 u64_stats_update_end(&stats->syncp); 484 u64_stats_update_end(&stats->syncp);
449 485
450 OVS_CB(skb)->input_vport = vport; 486 OVS_CB(skb)->input_vport = vport;
@@ -457,6 +493,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
457 } 493 }
458 ovs_dp_process_packet(skb, &key); 494 ovs_dp_process_packet(skb, &key);
459} 495}
496EXPORT_SYMBOL_GPL(ovs_vport_receive);
460 497
461/** 498/**
462 * ovs_vport_send - send a packet on a device 499 * ovs_vport_send - send a packet on a device
@@ -482,10 +519,9 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
482 u64_stats_update_end(&stats->syncp); 519 u64_stats_update_end(&stats->syncp);
483 } else if (sent < 0) { 520 } else if (sent < 0) {
484 ovs_vport_record_error(vport, VPORT_E_TX_ERROR); 521 ovs_vport_record_error(vport, VPORT_E_TX_ERROR);
485 kfree_skb(skb); 522 } else {
486 } else
487 ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); 523 ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
488 524 }
489 return sent; 525 return sent;
490} 526}
491 527
@@ -535,3 +571,65 @@ void ovs_vport_deferred_free(struct vport *vport)
535 571
536 call_rcu(&vport->rcu, free_vport_rcu); 572 call_rcu(&vport->rcu, free_vport_rcu);
537} 573}
574EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
575
576int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
577 struct net *net,
578 const struct ovs_tunnel_info *tun_info,
579 u8 ipproto,
580 u32 skb_mark,
581 __be16 tp_src,
582 __be16 tp_dst)
583{
584 const struct ovs_key_ipv4_tunnel *tun_key;
585 struct rtable *rt;
586 struct flowi4 fl;
587
588 if (unlikely(!tun_info))
589 return -EINVAL;
590
591 tun_key = &tun_info->tunnel;
592
593 /* Route lookup to get srouce IP address.
594 * The process may need to be changed if the corresponding process
595 * in vports ops changed.
596 */
597 memset(&fl, 0, sizeof(fl));
598 fl.daddr = tun_key->ipv4_dst;
599 fl.saddr = tun_key->ipv4_src;
600 fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
601 fl.flowi4_mark = skb_mark;
602 fl.flowi4_proto = ipproto;
603
604 rt = ip_route_output_key(net, &fl);
605 if (IS_ERR(rt))
606 return PTR_ERR(rt);
607
608 ip_rt_put(rt);
609
610 /* Generate egress_tun_info based on tun_info,
611 * saddr, tp_src and tp_dst
612 */
613 __ovs_flow_tun_info_init(egress_tun_info,
614 fl.saddr, tun_key->ipv4_dst,
615 tun_key->ipv4_tos,
616 tun_key->ipv4_ttl,
617 tp_src, tp_dst,
618 tun_key->tun_id,
619 tun_key->tun_flags,
620 tun_info->options,
621 tun_info->options_len);
622
623 return 0;
624}
625EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
626
627int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
628 struct ovs_tunnel_info *info)
629{
630 /* get_egress_tun_info() is only implemented on tunnel ports. */
631 if (unlikely(!vport->ops->get_egress_tun_info))
632 return -EINVAL;
633
634 return vport->ops->get_egress_tun_info(vport, skb, info);
635}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 8942125de3a6..99c8e71d9e6c 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -45,19 +45,29 @@ void ovs_vport_exit(void);
45struct vport *ovs_vport_add(const struct vport_parms *); 45struct vport *ovs_vport_add(const struct vport_parms *);
46void ovs_vport_del(struct vport *); 46void ovs_vport_del(struct vport *);
47 47
48struct vport *ovs_vport_locate(struct net *net, const char *name); 48struct vport *ovs_vport_locate(const struct net *net, const char *name);
49 49
50void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *); 50void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *);
51 51
52int ovs_vport_set_options(struct vport *, struct nlattr *options); 52int ovs_vport_set_options(struct vport *, struct nlattr *options);
53int ovs_vport_get_options(const struct vport *, struct sk_buff *); 53int ovs_vport_get_options(const struct vport *, struct sk_buff *);
54 54
55int ovs_vport_set_upcall_portids(struct vport *, struct nlattr *pids); 55int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids);
56int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *); 56int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
57u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *); 57u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
58 58
59int ovs_vport_send(struct vport *, struct sk_buff *); 59int ovs_vport_send(struct vport *, struct sk_buff *);
60 60
61int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
62 struct net *net,
63 const struct ovs_tunnel_info *tun_info,
64 u8 ipproto,
65 u32 skb_mark,
66 __be16 tp_src,
67 __be16 tp_dst);
68int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
69 struct ovs_tunnel_info *info);
70
61/* The following definitions are for implementers of vport devices: */ 71/* The following definitions are for implementers of vport devices: */
62 72
63struct vport_err_stats { 73struct vport_err_stats {
@@ -146,6 +156,8 @@ struct vport_parms {
146 * @get_name: Get the device's name. 156 * @get_name: Get the device's name.
147 * @send: Send a packet on the device. Returns the length of the packet sent, 157 * @send: Send a packet on the device. Returns the length of the packet sent,
148 * zero for dropped packets or negative for error. 158 * zero for dropped packets or negative for error.
159 * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for
160 * a packet.
149 */ 161 */
150struct vport_ops { 162struct vport_ops {
151 enum ovs_vport_type type; 163 enum ovs_vport_type type;
@@ -161,6 +173,11 @@ struct vport_ops {
161 const char *(*get_name)(const struct vport *); 173 const char *(*get_name)(const struct vport *);
162 174
163 int (*send)(struct vport *, struct sk_buff *); 175 int (*send)(struct vport *, struct sk_buff *);
176 int (*get_egress_tun_info)(struct vport *, struct sk_buff *,
177 struct ovs_tunnel_info *);
178
179 struct module *owner;
180 struct list_head list;
164}; 181};
165 182
166enum vport_err_type { 183enum vport_err_type {
@@ -207,15 +224,7 @@ static inline struct vport *vport_from_priv(void *priv)
207} 224}
208 225
209void ovs_vport_receive(struct vport *, struct sk_buff *, 226void ovs_vport_receive(struct vport *, struct sk_buff *,
210 struct ovs_tunnel_info *); 227 const struct ovs_tunnel_info *);
211
212/* List of statically compiled vport implementations. Don't forget to also
213 * add yours to the list at the top of vport.c. */
214extern const struct vport_ops ovs_netdev_vport_ops;
215extern const struct vport_ops ovs_internal_vport_ops;
216extern const struct vport_ops ovs_gre_vport_ops;
217extern const struct vport_ops ovs_vxlan_vport_ops;
218extern const struct vport_ops ovs_geneve_vport_ops;
219 228
220static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, 229static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
221 const void *start, unsigned int len) 230 const void *start, unsigned int len)
@@ -224,4 +233,7 @@ static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
224 skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); 233 skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
225} 234}
226 235
236int ovs_vport_ops_register(struct vport_ops *ops);
237void ovs_vport_ops_unregister(struct vport_ops *ops);
238
227#endif /* vport.h */ 239#endif /* vport.h */