aboutsummaryrefslogtreecommitdiffstats
path: root/net/openvswitch
diff options
context:
space:
mode:
authorJarno Rajahalme <jrajahalme@nicira.com>2015-02-05 16:40:49 -0500
committerDavid S. Miller <davem@davemloft.net>2015-02-08 01:40:17 -0500
commit83d2b9ba1abca241df44a502b6da950a25856b5b (patch)
tree16b6618df6b9cdbbf955457352cb71acd8cca324 /net/openvswitch
parent2150f984258e7909327cb90115c8ff41b4e9acb5 (diff)
net: openvswitch: Support masked set actions.
OVS userspace already probes the openvswitch kernel module for OVS_ACTION_ATTR_SET_MASKED support. This patch adds the kernel module implementation of masked set actions. The existing set action sets many fields at once. When only a subset of the IP header fields, for example, should be modified, all the IP fields need to be exact matched so that the other field values can be copied to the set action. A masked set action allows modification of an arbitrary subset of the supported header bits without requiring the rest to be matched. Masked set action is now supported for all writeable key types, except for the tunnel key. The set tunnel action is an exception as any input tunnel info is cleared before action processing starts, so there is no tunnel info to mask. The kernel module converts all (non-tunnel) set actions to masked set actions. This makes action processing more uniform, and results in less branching and duplicating the action processing code. When returning actions to userspace, the fully masked set actions are converted back to normal set actions. We use a kernel internal action code to be able to tell the userspace provided and converted masked set actions apart. Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Pravin B Shelar <pshelar@nicira.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/openvswitch')
-rw-r--r--net/openvswitch/actions.c373
-rw-r--r--net/openvswitch/flow_netlink.c161
2 files changed, 362 insertions, 172 deletions
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index b4cffe686126..b491c1c296fe 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -185,10 +185,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
185 return 0; 185 return 0;
186} 186}
187 187
188static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key, 188/* 'KEY' must not have any bits set outside of the 'MASK' */
189 const __be32 *mpls_lse) 189#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
190#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK))
191
192static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
193 const __be32 *mpls_lse, const __be32 *mask)
190{ 194{
191 __be32 *stack; 195 __be32 *stack;
196 __be32 lse;
192 int err; 197 int err;
193 198
194 err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN); 199 err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
@@ -196,14 +201,16 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key,
196 return err; 201 return err;
197 202
198 stack = (__be32 *)skb_mpls_header(skb); 203 stack = (__be32 *)skb_mpls_header(skb);
204 lse = MASKED(*stack, *mpls_lse, *mask);
199 if (skb->ip_summed == CHECKSUM_COMPLETE) { 205 if (skb->ip_summed == CHECKSUM_COMPLETE) {
200 __be32 diff[] = { ~(*stack), *mpls_lse }; 206 __be32 diff[] = { ~(*stack), lse };
207
201 skb->csum = ~csum_partial((char *)diff, sizeof(diff), 208 skb->csum = ~csum_partial((char *)diff, sizeof(diff),
202 ~skb->csum); 209 ~skb->csum);
203 } 210 }
204 211
205 *stack = *mpls_lse; 212 *stack = lse;
206 key->mpls.top_lse = *mpls_lse; 213 flow_key->mpls.top_lse = lse;
207 return 0; 214 return 0;
208} 215}
209 216
@@ -230,23 +237,39 @@ static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
230 ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); 237 ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
231} 238}
232 239
233static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key, 240/* 'src' is already properly masked. */
234 const struct ovs_key_ethernet *eth_key) 241static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
242{
243 u16 *dst = (u16 *)dst_;
244 const u16 *src = (const u16 *)src_;
245 const u16 *mask = (const u16 *)mask_;
246
247 SET_MASKED(dst[0], src[0], mask[0]);
248 SET_MASKED(dst[1], src[1], mask[1]);
249 SET_MASKED(dst[2], src[2], mask[2]);
250}
251
252static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
253 const struct ovs_key_ethernet *key,
254 const struct ovs_key_ethernet *mask)
235{ 255{
236 int err; 256 int err;
257
237 err = skb_ensure_writable(skb, ETH_HLEN); 258 err = skb_ensure_writable(skb, ETH_HLEN);
238 if (unlikely(err)) 259 if (unlikely(err))
239 return err; 260 return err;
240 261
241 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); 262 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
242 263
243 ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src); 264 ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
244 ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst); 265 mask->eth_src);
266 ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
267 mask->eth_dst);
245 268
246 ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); 269 ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
247 270
248 ether_addr_copy(key->eth.src, eth_key->eth_src); 271 ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
249 ether_addr_copy(key->eth.dst, eth_key->eth_dst); 272 ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
250 return 0; 273 return 0;
251} 274}
252 275
@@ -304,6 +327,15 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
304 } 327 }
305} 328}
306 329
330static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
331 const __be32 mask[4], __be32 masked[4])
332{
333 masked[0] = MASKED(old[0], addr[0], mask[0]);
334 masked[1] = MASKED(old[1], addr[1], mask[1]);
335 masked[2] = MASKED(old[2], addr[2], mask[2]);
336 masked[3] = MASKED(old[3], addr[3], mask[3]);
337}
338
307static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, 339static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
308 __be32 addr[4], const __be32 new_addr[4], 340 __be32 addr[4], const __be32 new_addr[4],
309 bool recalculate_csum) 341 bool recalculate_csum)
@@ -315,29 +347,29 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
315 memcpy(addr, new_addr, sizeof(__be32[4])); 347 memcpy(addr, new_addr, sizeof(__be32[4]));
316} 348}
317 349
318static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc) 350static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
319{ 351{
320 nh->priority = tc >> 4; 352 /* Bits 21-24 are always unmasked, so this retains their values. */
321 nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4); 353 SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
354 SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
355 SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
322} 356}
323 357
324static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl) 358static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
359 u8 mask)
325{ 360{
326 nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16; 361 new_ttl = MASKED(nh->ttl, new_ttl, mask);
327 nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
328 nh->flow_lbl[2] = fl & 0x000000FF;
329}
330 362
331static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
332{
333 csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8)); 363 csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
334 nh->ttl = new_ttl; 364 nh->ttl = new_ttl;
335} 365}
336 366
337static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key, 367static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
338 const struct ovs_key_ipv4 *ipv4_key) 368 const struct ovs_key_ipv4 *key,
369 const struct ovs_key_ipv4 *mask)
339{ 370{
340 struct iphdr *nh; 371 struct iphdr *nh;
372 __be32 new_addr;
341 int err; 373 int err;
342 374
343 err = skb_ensure_writable(skb, skb_network_offset(skb) + 375 err = skb_ensure_writable(skb, skb_network_offset(skb) +
@@ -347,36 +379,49 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key,
347 379
348 nh = ip_hdr(skb); 380 nh = ip_hdr(skb);
349 381
350 if (ipv4_key->ipv4_src != nh->saddr) { 382 /* Setting an IP addresses is typically only a side effect of
351 set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src); 383 * matching on them in the current userspace implementation, so it
352 key->ipv4.addr.src = ipv4_key->ipv4_src; 384 * makes sense to check if the value actually changed.
353 } 385 */
386 if (mask->ipv4_src) {
387 new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
354 388
355 if (ipv4_key->ipv4_dst != nh->daddr) { 389 if (unlikely(new_addr != nh->saddr)) {
356 set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst); 390 set_ip_addr(skb, nh, &nh->saddr, new_addr);
357 key->ipv4.addr.dst = ipv4_key->ipv4_dst; 391 flow_key->ipv4.addr.src = new_addr;
392 }
358 } 393 }
394 if (mask->ipv4_dst) {
395 new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
359 396
360 if (ipv4_key->ipv4_tos != nh->tos) { 397 if (unlikely(new_addr != nh->daddr)) {
361 ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos); 398 set_ip_addr(skb, nh, &nh->daddr, new_addr);
362 key->ip.tos = nh->tos; 399 flow_key->ipv4.addr.dst = new_addr;
400 }
363 } 401 }
364 402 if (mask->ipv4_tos) {
365 if (ipv4_key->ipv4_ttl != nh->ttl) { 403 ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
366 set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl); 404 flow_key->ip.tos = nh->tos;
367 key->ip.ttl = ipv4_key->ipv4_ttl; 405 }
406 if (mask->ipv4_ttl) {
407 set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
408 flow_key->ip.ttl = nh->ttl;
368 } 409 }
369 410
370 return 0; 411 return 0;
371} 412}
372 413
373static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key, 414static bool is_ipv6_mask_nonzero(const __be32 addr[4])
374 const struct ovs_key_ipv6 *ipv6_key) 415{
416 return !!(addr[0] | addr[1] | addr[2] | addr[3]);
417}
418
419static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
420 const struct ovs_key_ipv6 *key,
421 const struct ovs_key_ipv6 *mask)
375{ 422{
376 struct ipv6hdr *nh; 423 struct ipv6hdr *nh;
377 int err; 424 int err;
378 __be32 *saddr;
379 __be32 *daddr;
380 425
381 err = skb_ensure_writable(skb, skb_network_offset(skb) + 426 err = skb_ensure_writable(skb, skb_network_offset(skb) +
382 sizeof(struct ipv6hdr)); 427 sizeof(struct ipv6hdr));
@@ -384,71 +429,77 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key,
384 return err; 429 return err;
385 430
386 nh = ipv6_hdr(skb); 431 nh = ipv6_hdr(skb);
387 saddr = (__be32 *)&nh->saddr;
388 daddr = (__be32 *)&nh->daddr;
389
390 if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) {
391 set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
392 ipv6_key->ipv6_src, true);
393 memcpy(&key->ipv6.addr.src, ipv6_key->ipv6_src,
394 sizeof(ipv6_key->ipv6_src));
395 }
396 432
397 if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) { 433 /* Setting an IP addresses is typically only a side effect of
434 * matching on them in the current userspace implementation, so it
435 * makes sense to check if the value actually changed.
436 */
437 if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
438 __be32 *saddr = (__be32 *)&nh->saddr;
439 __be32 masked[4];
440
441 mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
442
443 if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
444 set_ipv6_addr(skb, key->ipv6_proto, saddr, masked,
445 true);
446 memcpy(&flow_key->ipv6.addr.src, masked,
447 sizeof(flow_key->ipv6.addr.src));
448 }
449 }
450 if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
398 unsigned int offset = 0; 451 unsigned int offset = 0;
399 int flags = IP6_FH_F_SKIP_RH; 452 int flags = IP6_FH_F_SKIP_RH;
400 bool recalc_csum = true; 453 bool recalc_csum = true;
401 454 __be32 *daddr = (__be32 *)&nh->daddr;
402 if (ipv6_ext_hdr(nh->nexthdr)) 455 __be32 masked[4];
403 recalc_csum = ipv6_find_hdr(skb, &offset, 456
404 NEXTHDR_ROUTING, NULL, 457 mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);
405 &flags) != NEXTHDR_ROUTING; 458
406 459 if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
407 set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr, 460 if (ipv6_ext_hdr(nh->nexthdr))
408 ipv6_key->ipv6_dst, recalc_csum); 461 recalc_csum = (ipv6_find_hdr(skb, &offset,
409 memcpy(&key->ipv6.addr.dst, ipv6_key->ipv6_dst, 462 NEXTHDR_ROUTING,
410 sizeof(ipv6_key->ipv6_dst)); 463 NULL, &flags)
464 != NEXTHDR_ROUTING);
465
466 set_ipv6_addr(skb, key->ipv6_proto, daddr, masked,
467 recalc_csum);
468 memcpy(&flow_key->ipv6.addr.dst, masked,
469 sizeof(flow_key->ipv6.addr.dst));
470 }
471 }
472 if (mask->ipv6_tclass) {
473 ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
474 flow_key->ip.tos = ipv6_get_dsfield(nh);
475 }
476 if (mask->ipv6_label) {
477 set_ipv6_fl(nh, ntohl(key->ipv6_label),
478 ntohl(mask->ipv6_label));
479 flow_key->ipv6.label =
480 *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
481 }
482 if (mask->ipv6_hlimit) {
483 SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit);
484 flow_key->ip.ttl = nh->hop_limit;
411 } 485 }
412
413 set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
414 key->ip.tos = ipv6_get_dsfield(nh);
415
416 set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
417 key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
418
419 nh->hop_limit = ipv6_key->ipv6_hlimit;
420 key->ip.ttl = ipv6_key->ipv6_hlimit;
421 return 0; 486 return 0;
422} 487}
423 488
424/* Must follow skb_ensure_writable() since that can move the skb data. */ 489/* Must follow skb_ensure_writable() since that can move the skb data. */
425static void set_tp_port(struct sk_buff *skb, __be16 *port, 490static void set_tp_port(struct sk_buff *skb, __be16 *port,
426 __be16 new_port, __sum16 *check) 491 __be16 new_port, __sum16 *check)
427{ 492{
428 inet_proto_csum_replace2(check, skb, *port, new_port, 0); 493 inet_proto_csum_replace2(check, skb, *port, new_port, 0);
429 *port = new_port; 494 *port = new_port;
430 skb_clear_hash(skb);
431}
432
433static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port)
434{
435 struct udphdr *uh = udp_hdr(skb);
436
437 if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
438 set_tp_port(skb, port, new_port, &uh->check);
439
440 if (!uh->check)
441 uh->check = CSUM_MANGLED_0;
442 } else {
443 *port = new_port;
444 skb_clear_hash(skb);
445 }
446} 495}
447 496
448static int set_udp(struct sk_buff *skb, struct sw_flow_key *key, 497static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
449 const struct ovs_key_udp *udp_port_key) 498 const struct ovs_key_udp *key,
499 const struct ovs_key_udp *mask)
450{ 500{
451 struct udphdr *uh; 501 struct udphdr *uh;
502 __be16 src, dst;
452 int err; 503 int err;
453 504
454 err = skb_ensure_writable(skb, skb_transport_offset(skb) + 505 err = skb_ensure_writable(skb, skb_transport_offset(skb) +
@@ -457,23 +508,40 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *key,
457 return err; 508 return err;
458 509
459 uh = udp_hdr(skb); 510 uh = udp_hdr(skb);
460 if (udp_port_key->udp_src != uh->source) { 511 /* Either of the masks is non-zero, so do not bother checking them. */
461 set_udp_port(skb, &uh->source, udp_port_key->udp_src); 512 src = MASKED(uh->source, key->udp_src, mask->udp_src);
462 key->tp.src = udp_port_key->udp_src; 513 dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst);
463 }
464 514
465 if (udp_port_key->udp_dst != uh->dest) { 515 if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
466 set_udp_port(skb, &uh->dest, udp_port_key->udp_dst); 516 if (likely(src != uh->source)) {
467 key->tp.dst = udp_port_key->udp_dst; 517 set_tp_port(skb, &uh->source, src, &uh->check);
518 flow_key->tp.src = src;
519 }
520 if (likely(dst != uh->dest)) {
521 set_tp_port(skb, &uh->dest, dst, &uh->check);
522 flow_key->tp.dst = dst;
523 }
524
525 if (unlikely(!uh->check))
526 uh->check = CSUM_MANGLED_0;
527 } else {
528 uh->source = src;
529 uh->dest = dst;
530 flow_key->tp.src = src;
531 flow_key->tp.dst = dst;
468 } 532 }
469 533
534 skb_clear_hash(skb);
535
470 return 0; 536 return 0;
471} 537}
472 538
473static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key, 539static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
474 const struct ovs_key_tcp *tcp_port_key) 540 const struct ovs_key_tcp *key,
541 const struct ovs_key_tcp *mask)
475{ 542{
476 struct tcphdr *th; 543 struct tcphdr *th;
544 __be16 src, dst;
477 int err; 545 int err;
478 546
479 err = skb_ensure_writable(skb, skb_transport_offset(skb) + 547 err = skb_ensure_writable(skb, skb_transport_offset(skb) +
@@ -482,50 +550,49 @@ static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key,
482 return err; 550 return err;
483 551
484 th = tcp_hdr(skb); 552 th = tcp_hdr(skb);
485 if (tcp_port_key->tcp_src != th->source) { 553 src = MASKED(th->source, key->tcp_src, mask->tcp_src);
486 set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check); 554 if (likely(src != th->source)) {
487 key->tp.src = tcp_port_key->tcp_src; 555 set_tp_port(skb, &th->source, src, &th->check);
556 flow_key->tp.src = src;
488 } 557 }
489 558 dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
490 if (tcp_port_key->tcp_dst != th->dest) { 559 if (likely(dst != th->dest)) {
491 set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check); 560 set_tp_port(skb, &th->dest, dst, &th->check);
492 key->tp.dst = tcp_port_key->tcp_dst; 561 flow_key->tp.dst = dst;
493 } 562 }
563 skb_clear_hash(skb);
494 564
495 return 0; 565 return 0;
496} 566}
497 567
498static int set_sctp(struct sk_buff *skb, struct sw_flow_key *key, 568static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
499 const struct ovs_key_sctp *sctp_port_key) 569 const struct ovs_key_sctp *key,
570 const struct ovs_key_sctp *mask)
500{ 571{
572 unsigned int sctphoff = skb_transport_offset(skb);
501 struct sctphdr *sh; 573 struct sctphdr *sh;
574 __le32 old_correct_csum, new_csum, old_csum;
502 int err; 575 int err;
503 unsigned int sctphoff = skb_transport_offset(skb);
504 576
505 err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr)); 577 err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
506 if (unlikely(err)) 578 if (unlikely(err))
507 return err; 579 return err;
508 580
509 sh = sctp_hdr(skb); 581 sh = sctp_hdr(skb);
510 if (sctp_port_key->sctp_src != sh->source || 582 old_csum = sh->checksum;
511 sctp_port_key->sctp_dst != sh->dest) { 583 old_correct_csum = sctp_compute_cksum(skb, sctphoff);
512 __le32 old_correct_csum, new_csum, old_csum;
513 584
514 old_csum = sh->checksum; 585 sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src);
515 old_correct_csum = sctp_compute_cksum(skb, sctphoff); 586 sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
516 587
517 sh->source = sctp_port_key->sctp_src; 588 new_csum = sctp_compute_cksum(skb, sctphoff);
518 sh->dest = sctp_port_key->sctp_dst;
519 589
520 new_csum = sctp_compute_cksum(skb, sctphoff); 590 /* Carry any checksum errors through. */
591 sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
521 592
522 /* Carry any checksum errors through. */ 593 skb_clear_hash(skb);
523 sh->checksum = old_csum ^ old_correct_csum ^ new_csum; 594 flow_key->tp.src = sh->source;
524 595 flow_key->tp.dst = sh->dest;
525 skb_clear_hash(skb);
526 key->tp.src = sctp_port_key->sctp_src;
527 key->tp.dst = sctp_port_key->sctp_dst;
528 }
529 596
530 return 0; 597 return 0;
531} 598}
@@ -653,52 +720,77 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
653 key->ovs_flow_hash = hash; 720 key->ovs_flow_hash = hash;
654} 721}
655 722
656static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key, 723static int execute_set_action(struct sk_buff *skb,
657 const struct nlattr *nested_attr) 724 struct sw_flow_key *flow_key,
725 const struct nlattr *a)
726{
727 /* Only tunnel set execution is supported without a mask. */
728 if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
729 OVS_CB(skb)->egress_tun_info = nla_data(a);
730 return 0;
731 }
732
733 return -EINVAL;
734}
735
736/* Mask is at the midpoint of the data. */
737#define get_mask(a, type) ((const type)nla_data(a) + 1)
738
739static int execute_masked_set_action(struct sk_buff *skb,
740 struct sw_flow_key *flow_key,
741 const struct nlattr *a)
658{ 742{
659 int err = 0; 743 int err = 0;
660 744
661 switch (nla_type(nested_attr)) { 745 switch (nla_type(a)) {
662 case OVS_KEY_ATTR_PRIORITY: 746 case OVS_KEY_ATTR_PRIORITY:
663 skb->priority = nla_get_u32(nested_attr); 747 SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *));
664 key->phy.priority = skb->priority; 748 flow_key->phy.priority = skb->priority;
665 break; 749 break;
666 750
667 case OVS_KEY_ATTR_SKB_MARK: 751 case OVS_KEY_ATTR_SKB_MARK:
668 skb->mark = nla_get_u32(nested_attr); 752 SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
669 key->phy.skb_mark = skb->mark; 753 flow_key->phy.skb_mark = skb->mark;
670 break; 754 break;
671 755
672 case OVS_KEY_ATTR_TUNNEL_INFO: 756 case OVS_KEY_ATTR_TUNNEL_INFO:
673 OVS_CB(skb)->egress_tun_info = nla_data(nested_attr); 757 /* Masked data not supported for tunnel. */
758 err = -EINVAL;
674 break; 759 break;
675 760
676 case OVS_KEY_ATTR_ETHERNET: 761 case OVS_KEY_ATTR_ETHERNET:
677 err = set_eth_addr(skb, key, nla_data(nested_attr)); 762 err = set_eth_addr(skb, flow_key, nla_data(a),
763 get_mask(a, struct ovs_key_ethernet *));
678 break; 764 break;
679 765
680 case OVS_KEY_ATTR_IPV4: 766 case OVS_KEY_ATTR_IPV4:
681 err = set_ipv4(skb, key, nla_data(nested_attr)); 767 err = set_ipv4(skb, flow_key, nla_data(a),
768 get_mask(a, struct ovs_key_ipv4 *));
682 break; 769 break;
683 770
684 case OVS_KEY_ATTR_IPV6: 771 case OVS_KEY_ATTR_IPV6:
685 err = set_ipv6(skb, key, nla_data(nested_attr)); 772 err = set_ipv6(skb, flow_key, nla_data(a),
773 get_mask(a, struct ovs_key_ipv6 *));
686 break; 774 break;
687 775
688 case OVS_KEY_ATTR_TCP: 776 case OVS_KEY_ATTR_TCP:
689 err = set_tcp(skb, key, nla_data(nested_attr)); 777 err = set_tcp(skb, flow_key, nla_data(a),
778 get_mask(a, struct ovs_key_tcp *));
690 break; 779 break;
691 780
692 case OVS_KEY_ATTR_UDP: 781 case OVS_KEY_ATTR_UDP:
693 err = set_udp(skb, key, nla_data(nested_attr)); 782 err = set_udp(skb, flow_key, nla_data(a),
783 get_mask(a, struct ovs_key_udp *));
694 break; 784 break;
695 785
696 case OVS_KEY_ATTR_SCTP: 786 case OVS_KEY_ATTR_SCTP:
697 err = set_sctp(skb, key, nla_data(nested_attr)); 787 err = set_sctp(skb, flow_key, nla_data(a),
788 get_mask(a, struct ovs_key_sctp *));
698 break; 789 break;
699 790
700 case OVS_KEY_ATTR_MPLS: 791 case OVS_KEY_ATTR_MPLS:
701 err = set_mpls(skb, key, nla_data(nested_attr)); 792 err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
793 __be32 *));
702 break; 794 break;
703 } 795 }
704 796
@@ -818,6 +910,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
818 err = execute_set_action(skb, key, nla_data(a)); 910 err = execute_set_action(skb, key, nla_data(a));
819 break; 911 break;
820 912
913 case OVS_ACTION_ATTR_SET_MASKED:
914 case OVS_ACTION_ATTR_SET_TO_MASKED:
915 err = execute_masked_set_action(skb, key, nla_data(a));
916 break;
917
821 case OVS_ACTION_ATTR_SAMPLE: 918 case OVS_ACTION_ATTR_SAMPLE:
822 err = sample(dp, skb, key, a); 919 err = sample(dp, skb, key, a);
823 break; 920 break;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 8b9a612b39d1..993281e6278d 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1695,16 +1695,6 @@ static int validate_and_copy_sample(const struct nlattr *attr,
1695 return 0; 1695 return 0;
1696} 1696}
1697 1697
1698static int validate_tp_port(const struct sw_flow_key *flow_key,
1699 __be16 eth_type)
1700{
1701 if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) &&
1702 (flow_key->tp.src || flow_key->tp.dst))
1703 return 0;
1704
1705 return -EINVAL;
1706}
1707
1708void ovs_match_init(struct sw_flow_match *match, 1698void ovs_match_init(struct sw_flow_match *match,
1709 struct sw_flow_key *key, 1699 struct sw_flow_key *key,
1710 struct sw_flow_mask *mask) 1700 struct sw_flow_mask *mask)
@@ -1805,23 +1795,45 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
1805 return err; 1795 return err;
1806} 1796}
1807 1797
1798/* Return false if there are any non-masked bits set.
1799 * Mask follows data immediately, before any netlink padding.
1800 */
1801static bool validate_masked(u8 *data, int len)
1802{
1803 u8 *mask = data + len;
1804
1805 while (len--)
1806 if (*data++ & ~*mask++)
1807 return false;
1808
1809 return true;
1810}
1811
1808static int validate_set(const struct nlattr *a, 1812static int validate_set(const struct nlattr *a,
1809 const struct sw_flow_key *flow_key, 1813 const struct sw_flow_key *flow_key,
1810 struct sw_flow_actions **sfa, 1814 struct sw_flow_actions **sfa,
1811 bool *set_tun, __be16 eth_type, bool log) 1815 bool *skip_copy, __be16 eth_type, bool masked, bool log)
1812{ 1816{
1813 const struct nlattr *ovs_key = nla_data(a); 1817 const struct nlattr *ovs_key = nla_data(a);
1814 int key_type = nla_type(ovs_key); 1818 int key_type = nla_type(ovs_key);
1819 size_t key_len;
1815 1820
1816 /* There can be only one key in a action */ 1821 /* There can be only one key in a action */
1817 if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) 1822 if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
1818 return -EINVAL; 1823 return -EINVAL;
1819 1824
1825 key_len = nla_len(ovs_key);
1826 if (masked)
1827 key_len /= 2;
1828
1820 if (key_type > OVS_KEY_ATTR_MAX || 1829 if (key_type > OVS_KEY_ATTR_MAX ||
1821 (ovs_key_lens[key_type].len != nla_len(ovs_key) && 1830 (ovs_key_lens[key_type].len != key_len &&
1822 ovs_key_lens[key_type].len != OVS_ATTR_NESTED)) 1831 ovs_key_lens[key_type].len != OVS_ATTR_NESTED))
1823 return -EINVAL; 1832 return -EINVAL;
1824 1833
1834 if (masked && !validate_masked(nla_data(ovs_key), key_len))
1835 return -EINVAL;
1836
1825 switch (key_type) { 1837 switch (key_type) {
1826 const struct ovs_key_ipv4 *ipv4_key; 1838 const struct ovs_key_ipv4 *ipv4_key;
1827 const struct ovs_key_ipv6 *ipv6_key; 1839 const struct ovs_key_ipv6 *ipv6_key;
@@ -1836,7 +1848,10 @@ static int validate_set(const struct nlattr *a,
1836 if (eth_p_mpls(eth_type)) 1848 if (eth_p_mpls(eth_type))
1837 return -EINVAL; 1849 return -EINVAL;
1838 1850
1839 *set_tun = true; 1851 if (masked)
1852 return -EINVAL; /* Masked tunnel set not supported. */
1853
1854 *skip_copy = true;
1840 err = validate_and_copy_set_tun(a, sfa, log); 1855 err = validate_and_copy_set_tun(a, sfa, log);
1841 if (err) 1856 if (err)
1842 return err; 1857 return err;
@@ -1846,48 +1861,66 @@ static int validate_set(const struct nlattr *a,
1846 if (eth_type != htons(ETH_P_IP)) 1861 if (eth_type != htons(ETH_P_IP))
1847 return -EINVAL; 1862 return -EINVAL;
1848 1863
1849 if (!flow_key->ip.proto)
1850 return -EINVAL;
1851
1852 ipv4_key = nla_data(ovs_key); 1864 ipv4_key = nla_data(ovs_key);
1853 if (ipv4_key->ipv4_proto != flow_key->ip.proto)
1854 return -EINVAL;
1855 1865
1856 if (ipv4_key->ipv4_frag != flow_key->ip.frag) 1866 if (masked) {
1857 return -EINVAL; 1867 const struct ovs_key_ipv4 *mask = ipv4_key + 1;
1858 1868
1869 /* Non-writeable fields. */
1870 if (mask->ipv4_proto || mask->ipv4_frag)
1871 return -EINVAL;
1872 } else {
1873 if (ipv4_key->ipv4_proto != flow_key->ip.proto)
1874 return -EINVAL;
1875
1876 if (ipv4_key->ipv4_frag != flow_key->ip.frag)
1877 return -EINVAL;
1878 }
1859 break; 1879 break;
1860 1880
1861 case OVS_KEY_ATTR_IPV6: 1881 case OVS_KEY_ATTR_IPV6:
1862 if (eth_type != htons(ETH_P_IPV6)) 1882 if (eth_type != htons(ETH_P_IPV6))
1863 return -EINVAL; 1883 return -EINVAL;
1864 1884
1865 if (!flow_key->ip.proto)
1866 return -EINVAL;
1867
1868 ipv6_key = nla_data(ovs_key); 1885 ipv6_key = nla_data(ovs_key);
1869 if (ipv6_key->ipv6_proto != flow_key->ip.proto)
1870 return -EINVAL;
1871 1886
1872 if (ipv6_key->ipv6_frag != flow_key->ip.frag) 1887 if (masked) {
1873 return -EINVAL; 1888 const struct ovs_key_ipv6 *mask = ipv6_key + 1;
1889
1890 /* Non-writeable fields. */
1891 if (mask->ipv6_proto || mask->ipv6_frag)
1892 return -EINVAL;
1893
1894 /* Invalid bits in the flow label mask? */
1895 if (ntohl(mask->ipv6_label) & 0xFFF00000)
1896 return -EINVAL;
1897 } else {
1898 if (ipv6_key->ipv6_proto != flow_key->ip.proto)
1899 return -EINVAL;
1874 1900
1901 if (ipv6_key->ipv6_frag != flow_key->ip.frag)
1902 return -EINVAL;
1903 }
1875 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) 1904 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
1876 return -EINVAL; 1905 return -EINVAL;
1877 1906
1878 break; 1907 break;
1879 1908
1880 case OVS_KEY_ATTR_TCP: 1909 case OVS_KEY_ATTR_TCP:
1881 if (flow_key->ip.proto != IPPROTO_TCP) 1910 if ((eth_type != htons(ETH_P_IP) &&
1911 eth_type != htons(ETH_P_IPV6)) ||
1912 flow_key->ip.proto != IPPROTO_TCP)
1882 return -EINVAL; 1913 return -EINVAL;
1883 1914
1884 return validate_tp_port(flow_key, eth_type); 1915 break;
1885 1916
1886 case OVS_KEY_ATTR_UDP: 1917 case OVS_KEY_ATTR_UDP:
1887 if (flow_key->ip.proto != IPPROTO_UDP) 1918 if ((eth_type != htons(ETH_P_IP) &&
1919 eth_type != htons(ETH_P_IPV6)) ||
1920 flow_key->ip.proto != IPPROTO_UDP)
1888 return -EINVAL; 1921 return -EINVAL;
1889 1922
1890 return validate_tp_port(flow_key, eth_type); 1923 break;
1891 1924
1892 case OVS_KEY_ATTR_MPLS: 1925 case OVS_KEY_ATTR_MPLS:
1893 if (!eth_p_mpls(eth_type)) 1926 if (!eth_p_mpls(eth_type))
@@ -1895,15 +1928,45 @@ static int validate_set(const struct nlattr *a,
1895 break; 1928 break;
1896 1929
1897 case OVS_KEY_ATTR_SCTP: 1930 case OVS_KEY_ATTR_SCTP:
1898 if (flow_key->ip.proto != IPPROTO_SCTP) 1931 if ((eth_type != htons(ETH_P_IP) &&
1932 eth_type != htons(ETH_P_IPV6)) ||
1933 flow_key->ip.proto != IPPROTO_SCTP)
1899 return -EINVAL; 1934 return -EINVAL;
1900 1935
1901 return validate_tp_port(flow_key, eth_type); 1936 break;
1902 1937
1903 default: 1938 default:
1904 return -EINVAL; 1939 return -EINVAL;
1905 } 1940 }
1906 1941
1942 /* Convert non-masked non-tunnel set actions to masked set actions. */
1943 if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
1944 int start, len = key_len * 2;
1945 struct nlattr *at;
1946
1947 *skip_copy = true;
1948
1949 start = add_nested_action_start(sfa,
1950 OVS_ACTION_ATTR_SET_TO_MASKED,
1951 log);
1952 if (start < 0)
1953 return start;
1954
1955 at = __add_action(sfa, key_type, NULL, len, log);
1956 if (IS_ERR(at))
1957 return PTR_ERR(at);
1958
1959 memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
1960 memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */
1961 /* Clear non-writeable bits from otherwise writeable fields. */
1962 if (key_type == OVS_KEY_ATTR_IPV6) {
1963 struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
1964
1965 mask->ipv6_label &= htonl(0x000FFFFF);
1966 }
1967 add_nested_action_end(*sfa, start);
1968 }
1969
1907 return 0; 1970 return 0;
1908} 1971}
1909 1972
@@ -1965,6 +2028,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
1965 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), 2028 [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
1966 [OVS_ACTION_ATTR_POP_VLAN] = 0, 2029 [OVS_ACTION_ATTR_POP_VLAN] = 0,
1967 [OVS_ACTION_ATTR_SET] = (u32)-1, 2030 [OVS_ACTION_ATTR_SET] = (u32)-1,
2031 [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
1968 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, 2032 [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
1969 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash) 2033 [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
1970 }; 2034 };
@@ -2060,7 +2124,14 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
2060 2124
2061 case OVS_ACTION_ATTR_SET: 2125 case OVS_ACTION_ATTR_SET:
2062 err = validate_set(a, key, sfa, 2126 err = validate_set(a, key, sfa,
2063 &skip_copy, eth_type, log); 2127 &skip_copy, eth_type, false, log);
2128 if (err)
2129 return err;
2130 break;
2131
2132 case OVS_ACTION_ATTR_SET_MASKED:
2133 err = validate_set(a, key, sfa,
2134 &skip_copy, eth_type, true, log);
2064 if (err) 2135 if (err)
2065 return err; 2136 return err;
2066 break; 2137 break;
@@ -2090,6 +2161,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
2090 return 0; 2161 return 0;
2091} 2162}
2092 2163
2164/* 'key' must be the masked key. */
2093int ovs_nla_copy_actions(const struct nlattr *attr, 2165int ovs_nla_copy_actions(const struct nlattr *attr,
2094 const struct sw_flow_key *key, 2166 const struct sw_flow_key *key,
2095 struct sw_flow_actions **sfa, bool log) 2167 struct sw_flow_actions **sfa, bool log)
@@ -2177,6 +2249,21 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
2177 return 0; 2249 return 0;
2178} 2250}
2179 2251
2252static int masked_set_action_to_set_action_attr(const struct nlattr *a,
2253 struct sk_buff *skb)
2254{
2255 const struct nlattr *ovs_key = nla_data(a);
2256 size_t key_len = nla_len(ovs_key) / 2;
2257
2258 /* Revert the conversion we did from a non-masked set action to
2259 * masked set action.
2260 */
2261 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a) - key_len, ovs_key))
2262 return -EMSGSIZE;
2263
2264 return 0;
2265}
2266
2180int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) 2267int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
2181{ 2268{
2182 const struct nlattr *a; 2269 const struct nlattr *a;
@@ -2192,6 +2279,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
2192 return err; 2279 return err;
2193 break; 2280 break;
2194 2281
2282 case OVS_ACTION_ATTR_SET_TO_MASKED:
2283 err = masked_set_action_to_set_action_attr(a, skb);
2284 if (err)
2285 return err;
2286 break;
2287
2195 case OVS_ACTION_ATTR_SAMPLE: 2288 case OVS_ACTION_ATTR_SAMPLE:
2196 err = sample_action_to_attr(a, skb); 2289 err = sample_action_to_attr(a, skb);
2197 if (err) 2290 if (err)