diff options
| author | Jarno Rajahalme <jarno@ovn.org> | 2016-03-10 13:54:23 -0500 |
|---|---|---|
| committer | Pablo Neira Ayuso <pablo@netfilter.org> | 2016-03-14 18:47:29 -0400 |
| commit | 05752523e56502cd9975aec0a2ded465d51a71f3 (patch) | |
| tree | 790e1c4d9855ca15e1104ef9697c342dfe08a780 /net/openvswitch | |
| parent | 28b6e0c1ace45779c60e7cefe6d469b7ecb520b8 (diff) | |
openvswitch: Interface with NAT.
Extend OVS conntrack interface to cover NAT. New nested
OVS_CT_ATTR_NAT attribute may be used to include NAT with a CT action.
A bare OVS_CT_ATTR_NAT only mangles existing and expected connections.
If OVS_NAT_ATTR_SRC or OVS_NAT_ATTR_DST is included within the nested
attributes, new (non-committed/non-confirmed) connections are mangled
according to the rest of the nested attributes.
The corresponding OVS userspace patch series includes test cases (in
tests/system-traffic.at) that also serve as example uses.
This work extends on a branch by Thomas Graf at
https://github.com/tgraf/ovs/tree/nat.
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'net/openvswitch')
| -rw-r--r-- | net/openvswitch/Kconfig | 3 | ||||
| -rw-r--r-- | net/openvswitch/conntrack.c | 524 | ||||
| -rw-r--r-- | net/openvswitch/conntrack.h | 3 |
3 files changed, 502 insertions, 28 deletions
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index cd5fd9d728a7..234a73344c6e 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig | |||
| @@ -6,7 +6,8 @@ config OPENVSWITCH | |||
| 6 | tristate "Open vSwitch" | 6 | tristate "Open vSwitch" |
| 7 | depends on INET | 7 | depends on INET |
| 8 | depends on !NF_CONNTRACK || \ | 8 | depends on !NF_CONNTRACK || \ |
| 9 | (NF_CONNTRACK && (!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6)) | 9 | (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \ |
| 10 | (!NF_NAT || NF_NAT))) | ||
| 10 | select LIBCRC32C | 11 | select LIBCRC32C |
| 11 | select MPLS | 12 | select MPLS |
| 12 | select NET_MPLS_GSO | 13 | select NET_MPLS_GSO |
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index f718b724e650..dc5eb29fe7d6 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c | |||
| @@ -13,21 +13,31 @@ | |||
| 13 | 13 | ||
| 14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
| 15 | #include <linux/openvswitch.h> | 15 | #include <linux/openvswitch.h> |
| 16 | #include <linux/tcp.h> | ||
| 17 | #include <linux/udp.h> | ||
| 18 | #include <linux/sctp.h> | ||
| 16 | #include <net/ip.h> | 19 | #include <net/ip.h> |
| 17 | #include <net/netfilter/nf_conntrack_core.h> | 20 | #include <net/netfilter/nf_conntrack_core.h> |
| 18 | #include <net/netfilter/nf_conntrack_helper.h> | 21 | #include <net/netfilter/nf_conntrack_helper.h> |
| 19 | #include <net/netfilter/nf_conntrack_labels.h> | 22 | #include <net/netfilter/nf_conntrack_labels.h> |
| 23 | #include <net/netfilter/nf_conntrack_seqadj.h> | ||
| 20 | #include <net/netfilter/nf_conntrack_zones.h> | 24 | #include <net/netfilter/nf_conntrack_zones.h> |
| 21 | #include <net/netfilter/ipv6/nf_defrag_ipv6.h> | 25 | #include <net/netfilter/ipv6/nf_defrag_ipv6.h> |
| 22 | 26 | ||
| 27 | #ifdef CONFIG_NF_NAT_NEEDED | ||
| 28 | #include <linux/netfilter/nf_nat.h> | ||
| 29 | #include <net/netfilter/nf_nat_core.h> | ||
| 30 | #include <net/netfilter/nf_nat_l3proto.h> | ||
| 31 | #endif | ||
| 32 | |||
| 23 | #include "datapath.h" | 33 | #include "datapath.h" |
| 24 | #include "conntrack.h" | 34 | #include "conntrack.h" |
| 25 | #include "flow.h" | 35 | #include "flow.h" |
| 26 | #include "flow_netlink.h" | 36 | #include "flow_netlink.h" |
| 27 | 37 | ||
| 28 | struct ovs_ct_len_tbl { | 38 | struct ovs_ct_len_tbl { |
| 29 | size_t maxlen; | 39 | int maxlen; |
| 30 | size_t minlen; | 40 | int minlen; |
| 31 | }; | 41 | }; |
| 32 | 42 | ||
| 33 | /* Metadata mark for masked write to conntrack mark */ | 43 | /* Metadata mark for masked write to conntrack mark */ |
| @@ -42,15 +52,25 @@ struct md_labels { | |||
| 42 | struct ovs_key_ct_labels mask; | 52 | struct ovs_key_ct_labels mask; |
| 43 | }; | 53 | }; |
| 44 | 54 | ||
| 55 | enum ovs_ct_nat { | ||
| 56 | OVS_CT_NAT = 1 << 0, /* NAT for committed connections only. */ | ||
| 57 | OVS_CT_SRC_NAT = 1 << 1, /* Source NAT for NEW connections. */ | ||
| 58 | OVS_CT_DST_NAT = 1 << 2, /* Destination NAT for NEW connections. */ | ||
| 59 | }; | ||
| 60 | |||
| 45 | /* Conntrack action context for execution. */ | 61 | /* Conntrack action context for execution. */ |
| 46 | struct ovs_conntrack_info { | 62 | struct ovs_conntrack_info { |
| 47 | struct nf_conntrack_helper *helper; | 63 | struct nf_conntrack_helper *helper; |
| 48 | struct nf_conntrack_zone zone; | 64 | struct nf_conntrack_zone zone; |
| 49 | struct nf_conn *ct; | 65 | struct nf_conn *ct; |
| 50 | u8 commit : 1; | 66 | u8 commit : 1; |
| 67 | u8 nat : 3; /* enum ovs_ct_nat */ | ||
| 51 | u16 family; | 68 | u16 family; |
| 52 | struct md_mark mark; | 69 | struct md_mark mark; |
| 53 | struct md_labels labels; | 70 | struct md_labels labels; |
| 71 | #ifdef CONFIG_NF_NAT_NEEDED | ||
| 72 | struct nf_nat_range range; /* Only present for SRC NAT and DST NAT. */ | ||
| 73 | #endif | ||
| 54 | }; | 74 | }; |
| 55 | 75 | ||
| 56 | static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info); | 76 | static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info); |
| @@ -137,12 +157,15 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, | |||
| 137 | ovs_ct_get_labels(ct, &key->ct.labels); | 157 | ovs_ct_get_labels(ct, &key->ct.labels); |
| 138 | } | 158 | } |
| 139 | 159 | ||
| 140 | /* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has | 160 | /* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has |
| 141 | * previously sent the packet to conntrack via the ct action. | 161 | * previously sent the packet to conntrack via the ct action. If |
| 162 | * 'keep_nat_flags' is true, the existing NAT flags retained, else they are | ||
| 163 | * initialized from the connection status. | ||
| 142 | */ | 164 | */ |
| 143 | static void ovs_ct_update_key(const struct sk_buff *skb, | 165 | static void ovs_ct_update_key(const struct sk_buff *skb, |
| 144 | const struct ovs_conntrack_info *info, | 166 | const struct ovs_conntrack_info *info, |
| 145 | struct sw_flow_key *key, bool post_ct) | 167 | struct sw_flow_key *key, bool post_ct, |
| 168 | bool keep_nat_flags) | ||
| 146 | { | 169 | { |
| 147 | const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; | 170 | const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; |
| 148 | enum ip_conntrack_info ctinfo; | 171 | enum ip_conntrack_info ctinfo; |
| @@ -160,6 +183,14 @@ static void ovs_ct_update_key(const struct sk_buff *skb, | |||
| 160 | */ | 183 | */ |
| 161 | if (ct->master) | 184 | if (ct->master) |
| 162 | state |= OVS_CS_F_RELATED; | 185 | state |= OVS_CS_F_RELATED; |
| 186 | if (keep_nat_flags) { | ||
| 187 | state |= key->ct.state & OVS_CS_F_NAT_MASK; | ||
| 188 | } else { | ||
| 189 | if (ct->status & IPS_SRC_NAT) | ||
| 190 | state |= OVS_CS_F_SRC_NAT; | ||
| 191 | if (ct->status & IPS_DST_NAT) | ||
| 192 | state |= OVS_CS_F_DST_NAT; | ||
| 193 | } | ||
| 163 | zone = nf_ct_zone(ct); | 194 | zone = nf_ct_zone(ct); |
| 164 | } else if (post_ct) { | 195 | } else if (post_ct) { |
| 165 | state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID; | 196 | state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID; |
| @@ -174,7 +205,7 @@ static void ovs_ct_update_key(const struct sk_buff *skb, | |||
| 174 | */ | 205 | */ |
| 175 | void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) | 206 | void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) |
| 176 | { | 207 | { |
| 177 | ovs_ct_update_key(skb, NULL, key, false); | 208 | ovs_ct_update_key(skb, NULL, key, false, false); |
| 178 | } | 209 | } |
| 179 | 210 | ||
| 180 | int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) | 211 | int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) |
| @@ -263,6 +294,7 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto) | |||
| 263 | enum ip_conntrack_info ctinfo; | 294 | enum ip_conntrack_info ctinfo; |
| 264 | unsigned int protoff; | 295 | unsigned int protoff; |
| 265 | struct nf_conn *ct; | 296 | struct nf_conn *ct; |
| 297 | int err; | ||
| 266 | 298 | ||
| 267 | ct = nf_ct_get(skb, &ctinfo); | 299 | ct = nf_ct_get(skb, &ctinfo); |
| 268 | if (!ct || ctinfo == IP_CT_RELATED_REPLY) | 300 | if (!ct || ctinfo == IP_CT_RELATED_REPLY) |
| @@ -299,7 +331,18 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto) | |||
| 299 | return NF_DROP; | 331 | return NF_DROP; |
| 300 | } | 332 | } |
| 301 | 333 | ||
| 302 | return helper->help(skb, protoff, ct, ctinfo); | 334 | err = helper->help(skb, protoff, ct, ctinfo); |
| 335 | if (err != NF_ACCEPT) | ||
| 336 | return err; | ||
| 337 | |||
| 338 | /* Adjust seqs after helper. This is needed due to some helpers (e.g., | ||
| 339 | * FTP with NAT) adusting the TCP payload size when mangling IP | ||
| 340 | * addresses and/or port numbers in the text-based control connection. | ||
| 341 | */ | ||
| 342 | if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && | ||
| 343 | !nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) | ||
| 344 | return NF_DROP; | ||
| 345 | return NF_ACCEPT; | ||
| 303 | } | 346 | } |
| 304 | 347 | ||
| 305 | /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero | 348 | /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero |
| @@ -468,6 +511,200 @@ static bool skb_nfct_cached(struct net *net, | |||
| 468 | return true; | 511 | return true; |
| 469 | } | 512 | } |
| 470 | 513 | ||
| 514 | #ifdef CONFIG_NF_NAT_NEEDED | ||
| 515 | /* Modelled after nf_nat_ipv[46]_fn(). | ||
| 516 | * range is only used for new, uninitialized NAT state. | ||
| 517 | * Returns either NF_ACCEPT or NF_DROP. | ||
| 518 | */ | ||
| 519 | static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, | ||
| 520 | enum ip_conntrack_info ctinfo, | ||
| 521 | const struct nf_nat_range *range, | ||
| 522 | enum nf_nat_manip_type maniptype) | ||
| 523 | { | ||
| 524 | int hooknum, nh_off, err = NF_ACCEPT; | ||
| 525 | |||
| 526 | nh_off = skb_network_offset(skb); | ||
| 527 | skb_pull(skb, nh_off); | ||
| 528 | |||
| 529 | /* See HOOK2MANIP(). */ | ||
| 530 | if (maniptype == NF_NAT_MANIP_SRC) | ||
| 531 | hooknum = NF_INET_LOCAL_IN; /* Source NAT */ | ||
| 532 | else | ||
| 533 | hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */ | ||
| 534 | |||
| 535 | switch (ctinfo) { | ||
| 536 | case IP_CT_RELATED: | ||
| 537 | case IP_CT_RELATED_REPLY: | ||
| 538 | if (skb->protocol == htons(ETH_P_IP) && | ||
| 539 | ip_hdr(skb)->protocol == IPPROTO_ICMP) { | ||
| 540 | if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, | ||
| 541 | hooknum)) | ||
| 542 | err = NF_DROP; | ||
| 543 | goto push; | ||
| 544 | #if IS_ENABLED(CONFIG_NF_NAT_IPV6) | ||
| 545 | } else if (skb->protocol == htons(ETH_P_IPV6)) { | ||
| 546 | __be16 frag_off; | ||
| 547 | u8 nexthdr = ipv6_hdr(skb)->nexthdr; | ||
| 548 | int hdrlen = ipv6_skip_exthdr(skb, | ||
| 549 | sizeof(struct ipv6hdr), | ||
| 550 | &nexthdr, &frag_off); | ||
| 551 | |||
| 552 | if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { | ||
| 553 | if (!nf_nat_icmpv6_reply_translation(skb, ct, | ||
| 554 | ctinfo, | ||
| 555 | hooknum, | ||
| 556 | hdrlen)) | ||
| 557 | err = NF_DROP; | ||
| 558 | goto push; | ||
| 559 | } | ||
| 560 | #endif | ||
| 561 | } | ||
| 562 | /* Non-ICMP, fall thru to initialize if needed. */ | ||
| 563 | case IP_CT_NEW: | ||
| 564 | /* Seen it before? This can happen for loopback, retrans, | ||
| 565 | * or local packets. | ||
| 566 | */ | ||
| 567 | if (!nf_nat_initialized(ct, maniptype)) { | ||
| 568 | /* Initialize according to the NAT action. */ | ||
| 569 | err = (range && range->flags & NF_NAT_RANGE_MAP_IPS) | ||
| 570 | /* Action is set up to establish a new | ||
| 571 | * mapping. | ||
| 572 | */ | ||
| 573 | ? nf_nat_setup_info(ct, range, maniptype) | ||
| 574 | : nf_nat_alloc_null_binding(ct, hooknum); | ||
| 575 | if (err != NF_ACCEPT) | ||
| 576 | goto push; | ||
| 577 | } | ||
| 578 | break; | ||
| 579 | |||
| 580 | case IP_CT_ESTABLISHED: | ||
| 581 | case IP_CT_ESTABLISHED_REPLY: | ||
| 582 | break; | ||
| 583 | |||
| 584 | default: | ||
| 585 | err = NF_DROP; | ||
| 586 | goto push; | ||
| 587 | } | ||
| 588 | |||
| 589 | err = nf_nat_packet(ct, ctinfo, hooknum, skb); | ||
| 590 | push: | ||
| 591 | skb_push(skb, nh_off); | ||
| 592 | |||
| 593 | return err; | ||
| 594 | } | ||
| 595 | |||
| 596 | static void ovs_nat_update_key(struct sw_flow_key *key, | ||
| 597 | const struct sk_buff *skb, | ||
| 598 | enum nf_nat_manip_type maniptype) | ||
| 599 | { | ||
| 600 | if (maniptype == NF_NAT_MANIP_SRC) { | ||
| 601 | __be16 src; | ||
| 602 | |||
| 603 | key->ct.state |= OVS_CS_F_SRC_NAT; | ||
| 604 | if (key->eth.type == htons(ETH_P_IP)) | ||
| 605 | key->ipv4.addr.src = ip_hdr(skb)->saddr; | ||
| 606 | else if (key->eth.type == htons(ETH_P_IPV6)) | ||
| 607 | memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr, | ||
| 608 | sizeof(key->ipv6.addr.src)); | ||
| 609 | else | ||
| 610 | return; | ||
| 611 | |||
| 612 | if (key->ip.proto == IPPROTO_UDP) | ||
| 613 | src = udp_hdr(skb)->source; | ||
| 614 | else if (key->ip.proto == IPPROTO_TCP) | ||
| 615 | src = tcp_hdr(skb)->source; | ||
| 616 | else if (key->ip.proto == IPPROTO_SCTP) | ||
| 617 | src = sctp_hdr(skb)->source; | ||
| 618 | else | ||
| 619 | return; | ||
| 620 | |||
| 621 | key->tp.src = src; | ||
| 622 | } else { | ||
| 623 | __be16 dst; | ||
| 624 | |||
| 625 | key->ct.state |= OVS_CS_F_DST_NAT; | ||
| 626 | if (key->eth.type == htons(ETH_P_IP)) | ||
| 627 | key->ipv4.addr.dst = ip_hdr(skb)->daddr; | ||
| 628 | else if (key->eth.type == htons(ETH_P_IPV6)) | ||
| 629 | memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr, | ||
| 630 | sizeof(key->ipv6.addr.dst)); | ||
| 631 | else | ||
| 632 | return; | ||
| 633 | |||
| 634 | if (key->ip.proto == IPPROTO_UDP) | ||
| 635 | dst = udp_hdr(skb)->dest; | ||
| 636 | else if (key->ip.proto == IPPROTO_TCP) | ||
| 637 | dst = tcp_hdr(skb)->dest; | ||
| 638 | else if (key->ip.proto == IPPROTO_SCTP) | ||
| 639 | dst = sctp_hdr(skb)->dest; | ||
| 640 | else | ||
| 641 | return; | ||
| 642 | |||
| 643 | key->tp.dst = dst; | ||
| 644 | } | ||
| 645 | } | ||
| 646 | |||
| 647 | /* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */ | ||
| 648 | static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, | ||
| 649 | const struct ovs_conntrack_info *info, | ||
| 650 | struct sk_buff *skb, struct nf_conn *ct, | ||
| 651 | enum ip_conntrack_info ctinfo) | ||
| 652 | { | ||
| 653 | enum nf_nat_manip_type maniptype; | ||
| 654 | int err; | ||
| 655 | |||
| 656 | if (nf_ct_is_untracked(ct)) { | ||
| 657 | /* A NAT action may only be performed on tracked packets. */ | ||
| 658 | return NF_ACCEPT; | ||
| 659 | } | ||
| 660 | |||
| 661 | /* Add NAT extension if not confirmed yet. */ | ||
| 662 | if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) | ||
| 663 | return NF_ACCEPT; /* Can't NAT. */ | ||
| 664 | |||
| 665 | /* Determine NAT type. | ||
| 666 | * Check if the NAT type can be deduced from the tracked connection. | ||
| 667 | * Make sure expected traffic is NATted only when committing. | ||
| 668 | */ | ||
| 669 | if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW && | ||
| 670 | ct->status & IPS_NAT_MASK && | ||
| 671 | (!(ct->status & IPS_EXPECTED_BIT) || info->commit)) { | ||
| 672 | /* NAT an established or related connection like before. */ | ||
| 673 | if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) | ||
| 674 | /* This is the REPLY direction for a connection | ||
| 675 | * for which NAT was applied in the forward | ||
| 676 | * direction. Do the reverse NAT. | ||
| 677 | */ | ||
| 678 | maniptype = ct->status & IPS_SRC_NAT | ||
| 679 | ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC; | ||
| 680 | else | ||
| 681 | maniptype = ct->status & IPS_SRC_NAT | ||
| 682 | ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST; | ||
| 683 | } else if (info->nat & OVS_CT_SRC_NAT) { | ||
| 684 | maniptype = NF_NAT_MANIP_SRC; | ||
| 685 | } else if (info->nat & OVS_CT_DST_NAT) { | ||
| 686 | maniptype = NF_NAT_MANIP_DST; | ||
| 687 | } else { | ||
| 688 | return NF_ACCEPT; /* Connection is not NATed. */ | ||
| 689 | } | ||
| 690 | err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype); | ||
| 691 | |||
| 692 | /* Mark NAT done if successful and update the flow key. */ | ||
| 693 | if (err == NF_ACCEPT) | ||
| 694 | ovs_nat_update_key(key, skb, maniptype); | ||
| 695 | |||
| 696 | return err; | ||
| 697 | } | ||
| 698 | #else /* !CONFIG_NF_NAT_NEEDED */ | ||
| 699 | static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, | ||
| 700 | const struct ovs_conntrack_info *info, | ||
| 701 | struct sk_buff *skb, struct nf_conn *ct, | ||
| 702 | enum ip_conntrack_info ctinfo) | ||
| 703 | { | ||
| 704 | return NF_ACCEPT; | ||
| 705 | } | ||
| 706 | #endif | ||
| 707 | |||
| 471 | /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if | 708 | /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if |
| 472 | * not done already. Update key with new CT state after passing the packet | 709 | * not done already. Update key with new CT state after passing the packet |
| 473 | * through conntrack. | 710 | * through conntrack. |
| @@ -509,19 +746,43 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, | |||
| 509 | if (err != NF_ACCEPT) | 746 | if (err != NF_ACCEPT) |
| 510 | return -ENOENT; | 747 | return -ENOENT; |
| 511 | 748 | ||
| 512 | ovs_ct_update_key(skb, info, key, true); | 749 | /* Clear CT state NAT flags to mark that we have not yet done |
| 750 | * NAT after the nf_conntrack_in() call. We can actually clear | ||
| 751 | * the whole state, as it will be re-initialized below. | ||
| 752 | */ | ||
| 753 | key->ct.state = 0; | ||
| 754 | |||
| 755 | /* Update the key, but keep the NAT flags. */ | ||
| 756 | ovs_ct_update_key(skb, info, key, true, true); | ||
| 513 | } | 757 | } |
| 514 | 758 | ||
| 515 | /* Call the helper only if: | ||
| 516 | * - nf_conntrack_in() was executed above ("!cached") for a confirmed | ||
| 517 | * connection, or | ||
| 518 | * - When committing an unconfirmed connection. | ||
| 519 | */ | ||
| 520 | ct = nf_ct_get(skb, &ctinfo); | 759 | ct = nf_ct_get(skb, &ctinfo); |
| 521 | if (ct && (nf_ct_is_confirmed(ct) ? !cached : info->commit) && | 760 | if (ct) { |
| 522 | ovs_ct_helper(skb, info->family) != NF_ACCEPT) { | 761 | /* Packets starting a new connection must be NATted before the |
| 523 | WARN_ONCE(1, "helper rejected packet"); | 762 | * helper, so that the helper knows about the NAT. We enforce |
| 524 | return -EINVAL; | 763 | * this by delaying both NAT and helper calls for unconfirmed |
| 764 | * connections until the committing CT action. For later | ||
| 765 | * packets NAT and Helper may be called in either order. | ||
| 766 | * | ||
| 767 | * NAT will be done only if the CT action has NAT, and only | ||
| 768 | * once per packet (per zone), as guarded by the NAT bits in | ||
| 769 | * the key->ct.state. | ||
| 770 | */ | ||
| 771 | if (info->nat && !(key->ct.state & OVS_CS_F_NAT_MASK) && | ||
| 772 | (nf_ct_is_confirmed(ct) || info->commit) && | ||
| 773 | ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) { | ||
| 774 | return -EINVAL; | ||
| 775 | } | ||
| 776 | |||
| 777 | /* Call the helper only if: | ||
| 778 | * - nf_conntrack_in() was executed above ("!cached") for a | ||
| 779 | * confirmed connection, or | ||
| 780 | * - When committing an unconfirmed connection. | ||
| 781 | */ | ||
| 782 | if ((nf_ct_is_confirmed(ct) ? !cached : info->commit) && | ||
| 783 | ovs_ct_helper(skb, info->family) != NF_ACCEPT) { | ||
| 784 | return -EINVAL; | ||
| 785 | } | ||
| 525 | } | 786 | } |
| 526 | 787 | ||
| 527 | return 0; | 788 | return 0; |
| @@ -545,15 +806,13 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, | |||
| 545 | if (exp) { | 806 | if (exp) { |
| 546 | u8 state; | 807 | u8 state; |
| 547 | 808 | ||
| 809 | /* NOTE: New connections are NATted and Helped only when | ||
| 810 | * committed, so we are not calling into NAT here. | ||
| 811 | */ | ||
| 548 | state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED; | 812 | state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED; |
| 549 | __ovs_ct_update_key(key, state, &info->zone, exp->master); | 813 | __ovs_ct_update_key(key, state, &info->zone, exp->master); |
| 550 | } else { | 814 | } else |
| 551 | int err; | 815 | return __ovs_ct_lookup(net, key, info, skb); |
| 552 | |||
| 553 | err = __ovs_ct_lookup(net, key, info, skb); | ||
| 554 | if (err) | ||
| 555 | return err; | ||
| 556 | } | ||
| 557 | 816 | ||
| 558 | return 0; | 817 | return 0; |
| 559 | } | 818 | } |
| @@ -653,6 +912,135 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, | |||
| 653 | return 0; | 912 | return 0; |
| 654 | } | 913 | } |
| 655 | 914 | ||
| 915 | #ifdef CONFIG_NF_NAT_NEEDED | ||
| 916 | static int parse_nat(const struct nlattr *attr, | ||
| 917 | struct ovs_conntrack_info *info, bool log) | ||
| 918 | { | ||
| 919 | struct nlattr *a; | ||
| 920 | int rem; | ||
| 921 | bool have_ip_max = false; | ||
| 922 | bool have_proto_max = false; | ||
| 923 | bool ip_vers = (info->family == NFPROTO_IPV6); | ||
| 924 | |||
| 925 | nla_for_each_nested(a, attr, rem) { | ||
| 926 | static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] = { | ||
| 927 | [OVS_NAT_ATTR_SRC] = {0, 0}, | ||
| 928 | [OVS_NAT_ATTR_DST] = {0, 0}, | ||
| 929 | [OVS_NAT_ATTR_IP_MIN] = {sizeof(struct in_addr), | ||
| 930 | sizeof(struct in6_addr)}, | ||
| 931 | [OVS_NAT_ATTR_IP_MAX] = {sizeof(struct in_addr), | ||
| 932 | sizeof(struct in6_addr)}, | ||
| 933 | [OVS_NAT_ATTR_PROTO_MIN] = {sizeof(u16), sizeof(u16)}, | ||
| 934 | [OVS_NAT_ATTR_PROTO_MAX] = {sizeof(u16), sizeof(u16)}, | ||
| 935 | [OVS_NAT_ATTR_PERSISTENT] = {0, 0}, | ||
| 936 | [OVS_NAT_ATTR_PROTO_HASH] = {0, 0}, | ||
| 937 | [OVS_NAT_ATTR_PROTO_RANDOM] = {0, 0}, | ||
| 938 | }; | ||
| 939 | int type = nla_type(a); | ||
| 940 | |||
| 941 | if (type > OVS_NAT_ATTR_MAX) { | ||
| 942 | OVS_NLERR(log, | ||
| 943 | "Unknown NAT attribute (type=%d, max=%d).\n", | ||
| 944 | type, OVS_NAT_ATTR_MAX); | ||
| 945 | return -EINVAL; | ||
| 946 | } | ||
| 947 | |||
| 948 | if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) { | ||
| 949 | OVS_NLERR(log, | ||
| 950 | "NAT attribute type %d has unexpected length (%d != %d).\n", | ||
| 951 | type, nla_len(a), | ||
| 952 | ovs_nat_attr_lens[type][ip_vers]); | ||
| 953 | return -EINVAL; | ||
| 954 | } | ||
| 955 | |||
| 956 | switch (type) { | ||
| 957 | case OVS_NAT_ATTR_SRC: | ||
| 958 | case OVS_NAT_ATTR_DST: | ||
| 959 | if (info->nat) { | ||
| 960 | OVS_NLERR(log, | ||
| 961 | "Only one type of NAT may be specified.\n" | ||
| 962 | ); | ||
| 963 | return -ERANGE; | ||
| 964 | } | ||
| 965 | info->nat |= OVS_CT_NAT; | ||
| 966 | info->nat |= ((type == OVS_NAT_ATTR_SRC) | ||
| 967 | ? OVS_CT_SRC_NAT : OVS_CT_DST_NAT); | ||
| 968 | break; | ||
| 969 | |||
| 970 | case OVS_NAT_ATTR_IP_MIN: | ||
| 971 | nla_memcpy(&info->range.min_addr, a, nla_len(a)); | ||
| 972 | info->range.flags |= NF_NAT_RANGE_MAP_IPS; | ||
| 973 | break; | ||
| 974 | |||
| 975 | case OVS_NAT_ATTR_IP_MAX: | ||
| 976 | have_ip_max = true; | ||
| 977 | nla_memcpy(&info->range.max_addr, a, | ||
| 978 | sizeof(info->range.max_addr)); | ||
| 979 | info->range.flags |= NF_NAT_RANGE_MAP_IPS; | ||
| 980 | break; | ||
| 981 | |||
| 982 | case OVS_NAT_ATTR_PROTO_MIN: | ||
| 983 | info->range.min_proto.all = htons(nla_get_u16(a)); | ||
| 984 | info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; | ||
| 985 | break; | ||
| 986 | |||
| 987 | case OVS_NAT_ATTR_PROTO_MAX: | ||
| 988 | have_proto_max = true; | ||
| 989 | info->range.max_proto.all = htons(nla_get_u16(a)); | ||
| 990 | info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; | ||
| 991 | break; | ||
| 992 | |||
| 993 | case OVS_NAT_ATTR_PERSISTENT: | ||
| 994 | info->range.flags |= NF_NAT_RANGE_PERSISTENT; | ||
| 995 | break; | ||
| 996 | |||
| 997 | case OVS_NAT_ATTR_PROTO_HASH: | ||
| 998 | info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM; | ||
| 999 | break; | ||
| 1000 | |||
| 1001 | case OVS_NAT_ATTR_PROTO_RANDOM: | ||
| 1002 | info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM_FULLY; | ||
| 1003 | break; | ||
| 1004 | |||
| 1005 | default: | ||
| 1006 | OVS_NLERR(log, "Unknown nat attribute (%d).\n", type); | ||
| 1007 | return -EINVAL; | ||
| 1008 | } | ||
| 1009 | } | ||
| 1010 | |||
| 1011 | if (rem > 0) { | ||
| 1012 | OVS_NLERR(log, "NAT attribute has %d unknown bytes.\n", rem); | ||
| 1013 | return -EINVAL; | ||
| 1014 | } | ||
| 1015 | if (!info->nat) { | ||
| 1016 | /* Do not allow flags if no type is given. */ | ||
| 1017 | if (info->range.flags) { | ||
| 1018 | OVS_NLERR(log, | ||
| 1019 | "NAT flags may be given only when NAT range (SRC or DST) is also specified.\n" | ||
| 1020 | ); | ||
| 1021 | return -EINVAL; | ||
| 1022 | } | ||
| 1023 | info->nat = OVS_CT_NAT; /* NAT existing connections. */ | ||
| 1024 | } else if (!info->commit) { | ||
| 1025 | OVS_NLERR(log, | ||
| 1026 | "NAT attributes may be specified only when CT COMMIT flag is also specified.\n" | ||
| 1027 | ); | ||
| 1028 | return -EINVAL; | ||
| 1029 | } | ||
| 1030 | /* Allow missing IP_MAX. */ | ||
| 1031 | if (info->range.flags & NF_NAT_RANGE_MAP_IPS && !have_ip_max) { | ||
| 1032 | memcpy(&info->range.max_addr, &info->range.min_addr, | ||
| 1033 | sizeof(info->range.max_addr)); | ||
| 1034 | } | ||
| 1035 | /* Allow missing PROTO_MAX. */ | ||
| 1036 | if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED && | ||
| 1037 | !have_proto_max) { | ||
| 1038 | info->range.max_proto.all = info->range.min_proto.all; | ||
| 1039 | } | ||
| 1040 | return 0; | ||
| 1041 | } | ||
| 1042 | #endif | ||
| 1043 | |||
| 656 | static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { | 1044 | static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { |
| 657 | [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 }, | 1045 | [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 }, |
| 658 | [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), | 1046 | [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), |
| @@ -662,7 +1050,11 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { | |||
| 662 | [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels), | 1050 | [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels), |
| 663 | .maxlen = sizeof(struct md_labels) }, | 1051 | .maxlen = sizeof(struct md_labels) }, |
| 664 | [OVS_CT_ATTR_HELPER] = { .minlen = 1, | 1052 | [OVS_CT_ATTR_HELPER] = { .minlen = 1, |
| 665 | .maxlen = NF_CT_HELPER_NAME_LEN } | 1053 | .maxlen = NF_CT_HELPER_NAME_LEN }, |
| 1054 | #ifdef CONFIG_NF_NAT_NEEDED | ||
| 1055 | /* NAT length is checked when parsing the nested attributes. */ | ||
| 1056 | [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX }, | ||
| 1057 | #endif | ||
| 666 | }; | 1058 | }; |
| 667 | 1059 | ||
| 668 | static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, | 1060 | static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, |
| @@ -729,6 +1121,15 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, | |||
| 729 | return -EINVAL; | 1121 | return -EINVAL; |
| 730 | } | 1122 | } |
| 731 | break; | 1123 | break; |
| 1124 | #ifdef CONFIG_NF_NAT_NEEDED | ||
| 1125 | case OVS_CT_ATTR_NAT: { | ||
| 1126 | int err = parse_nat(a, info, log); | ||
| 1127 | |||
| 1128 | if (err) | ||
| 1129 | return err; | ||
| 1130 | break; | ||
| 1131 | } | ||
| 1132 | #endif | ||
| 732 | default: | 1133 | default: |
| 733 | OVS_NLERR(log, "Unknown conntrack attr (%d)", | 1134 | OVS_NLERR(log, "Unknown conntrack attr (%d)", |
| 734 | type); | 1135 | type); |
| @@ -816,6 +1217,74 @@ err_free_ct: | |||
| 816 | return err; | 1217 | return err; |
| 817 | } | 1218 | } |
| 818 | 1219 | ||
| 1220 | #ifdef CONFIG_NF_NAT_NEEDED | ||
| 1221 | static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, | ||
| 1222 | struct sk_buff *skb) | ||
| 1223 | { | ||
| 1224 | struct nlattr *start; | ||
| 1225 | |||
| 1226 | start = nla_nest_start(skb, OVS_CT_ATTR_NAT); | ||
| 1227 | if (!start) | ||
| 1228 | return false; | ||
| 1229 | |||
| 1230 | if (info->nat & OVS_CT_SRC_NAT) { | ||
| 1231 | if (nla_put_flag(skb, OVS_NAT_ATTR_SRC)) | ||
| 1232 | return false; | ||
| 1233 | } else if (info->nat & OVS_CT_DST_NAT) { | ||
| 1234 | if (nla_put_flag(skb, OVS_NAT_ATTR_DST)) | ||
| 1235 | return false; | ||
| 1236 | } else { | ||
| 1237 | goto out; | ||
| 1238 | } | ||
| 1239 | |||
| 1240 | if (info->range.flags & NF_NAT_RANGE_MAP_IPS) { | ||
| 1241 | if (info->family == NFPROTO_IPV4) { | ||
| 1242 | if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN, | ||
| 1243 | info->range.min_addr.ip) || | ||
| 1244 | (info->range.max_addr.ip | ||
| 1245 | != info->range.min_addr.ip && | ||
| 1246 | (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX, | ||
| 1247 | info->range.max_addr.ip)))) | ||
| 1248 | return false; | ||
| 1249 | #if IS_ENABLED(CONFIG_NF_NAT_IPV6) | ||
| 1250 | } else if (info->family == NFPROTO_IPV6) { | ||
| 1251 | if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN, | ||
| 1252 | &info->range.min_addr.in6) || | ||
| 1253 | (memcmp(&info->range.max_addr.in6, | ||
| 1254 | &info->range.min_addr.in6, | ||
| 1255 | sizeof(info->range.max_addr.in6)) && | ||
| 1256 | (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX, | ||
| 1257 | &info->range.max_addr.in6)))) | ||
| 1258 | return false; | ||
| 1259 | #endif | ||
| 1260 | } else { | ||
| 1261 | return false; | ||
| 1262 | } | ||
| 1263 | } | ||
| 1264 | if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED && | ||
| 1265 | (nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN, | ||
| 1266 | ntohs(info->range.min_proto.all)) || | ||
| 1267 | (info->range.max_proto.all != info->range.min_proto.all && | ||
| 1268 | nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX, | ||
| 1269 | ntohs(info->range.max_proto.all))))) | ||
| 1270 | return false; | ||
| 1271 | |||
| 1272 | if (info->range.flags & NF_NAT_RANGE_PERSISTENT && | ||
| 1273 | nla_put_flag(skb, OVS_NAT_ATTR_PERSISTENT)) | ||
| 1274 | return false; | ||
| 1275 | if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM && | ||
| 1276 | nla_put_flag(skb, OVS_NAT_ATTR_PROTO_HASH)) | ||
| 1277 | return false; | ||
| 1278 | if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY && | ||
| 1279 | nla_put_flag(skb, OVS_NAT_ATTR_PROTO_RANDOM)) | ||
| 1280 | return false; | ||
| 1281 | out: | ||
| 1282 | nla_nest_end(skb, start); | ||
| 1283 | |||
| 1284 | return true; | ||
| 1285 | } | ||
| 1286 | #endif | ||
| 1287 | |||
| 819 | int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, | 1288 | int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, |
| 820 | struct sk_buff *skb) | 1289 | struct sk_buff *skb) |
| 821 | { | 1290 | { |
| @@ -844,7 +1313,10 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, | |||
| 844 | ct_info->helper->name)) | 1313 | ct_info->helper->name)) |
| 845 | return -EMSGSIZE; | 1314 | return -EMSGSIZE; |
| 846 | } | 1315 | } |
| 847 | 1316 | #ifdef CONFIG_NF_NAT_NEEDED | |
| 1317 | if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb)) | ||
| 1318 | return -EMSGSIZE; | ||
| 1319 | #endif | ||
| 848 | nla_nest_end(skb, start); | 1320 | nla_nest_end(skb, start); |
| 849 | 1321 | ||
| 850 | return 0; | 1322 | return 0; |
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index a7544f405c16..8f6230bd6183 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h | |||
| @@ -37,7 +37,8 @@ void ovs_ct_free_action(const struct nlattr *a); | |||
| 37 | 37 | ||
| 38 | #define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \ | 38 | #define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \ |
| 39 | OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \ | 39 | OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \ |
| 40 | OVS_CS_F_INVALID | OVS_CS_F_TRACKED) | 40 | OVS_CS_F_INVALID | OVS_CS_F_TRACKED | \ |
| 41 | OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT) | ||
| 41 | #else | 42 | #else |
| 42 | #include <linux/errno.h> | 43 | #include <linux/errno.h> |
| 43 | 44 | ||
