diff options
Diffstat (limited to 'net/openvswitch')
-rw-r--r-- | net/openvswitch/Kconfig | 6 | ||||
-rw-r--r-- | net/openvswitch/actions.c | 12 | ||||
-rw-r--r-- | net/openvswitch/conntrack.c | 663 | ||||
-rw-r--r-- | net/openvswitch/conntrack.h | 3 | ||||
-rw-r--r-- | net/openvswitch/datapath.c | 108 | ||||
-rw-r--r-- | net/openvswitch/datapath.h | 4 | ||||
-rw-r--r-- | net/openvswitch/flow.h | 2 | ||||
-rw-r--r-- | net/openvswitch/flow_netlink.c | 9 | ||||
-rw-r--r-- | net/openvswitch/vport-geneve.c | 2 | ||||
-rw-r--r-- | net/openvswitch/vport-internal_dev.c | 10 | ||||
-rw-r--r-- | net/openvswitch/vport-netdev.c | 2 | ||||
-rw-r--r-- | net/openvswitch/vport-vxlan.c | 2 | ||||
-rw-r--r-- | net/openvswitch/vport.h | 7 |
13 files changed, 735 insertions, 95 deletions
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index d143aa9f6654..ce947292ae77 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig | |||
@@ -6,10 +6,14 @@ config OPENVSWITCH | |||
6 | tristate "Open vSwitch" | 6 | tristate "Open vSwitch" |
7 | depends on INET | 7 | depends on INET |
8 | depends on !NF_CONNTRACK || \ | 8 | depends on !NF_CONNTRACK || \ |
9 | (NF_CONNTRACK && (!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6)) | 9 | (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \ |
10 | (!NF_NAT || NF_NAT) && \ | ||
11 | (!NF_NAT_IPV4 || NF_NAT_IPV4) && \ | ||
12 | (!NF_NAT_IPV6 || NF_NAT_IPV6))) | ||
10 | select LIBCRC32C | 13 | select LIBCRC32C |
11 | select MPLS | 14 | select MPLS |
12 | select NET_MPLS_GSO | 15 | select NET_MPLS_GSO |
16 | select DST_CACHE | ||
13 | ---help--- | 17 | ---help--- |
14 | Open vSwitch is a multilayer Ethernet switch targeted at virtualized | 18 | Open vSwitch is a multilayer Ethernet switch targeted at virtualized |
15 | environments. In addition to supporting a variety of features | 19 | environments. In addition to supporting a variety of features |
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 2d59df521915..879185fe183f 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c | |||
@@ -158,9 +158,7 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, | |||
158 | new_mpls_lse = (__be32 *)skb_mpls_header(skb); | 158 | new_mpls_lse = (__be32 *)skb_mpls_header(skb); |
159 | *new_mpls_lse = mpls->mpls_lse; | 159 | *new_mpls_lse = mpls->mpls_lse; |
160 | 160 | ||
161 | if (skb->ip_summed == CHECKSUM_COMPLETE) | 161 | skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); |
162 | skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse, | ||
163 | MPLS_HLEN, 0)); | ||
164 | 162 | ||
165 | hdr = eth_hdr(skb); | 163 | hdr = eth_hdr(skb); |
166 | hdr->h_proto = mpls->mpls_ethertype; | 164 | hdr->h_proto = mpls->mpls_ethertype; |
@@ -280,7 +278,7 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key, | |||
280 | ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst, | 278 | ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst, |
281 | mask->eth_dst); | 279 | mask->eth_dst); |
282 | 280 | ||
283 | ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); | 281 | skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); |
284 | 282 | ||
285 | ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source); | 283 | ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source); |
286 | ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest); | 284 | ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest); |
@@ -463,7 +461,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, | |||
463 | mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked); | 461 | mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked); |
464 | 462 | ||
465 | if (unlikely(memcmp(saddr, masked, sizeof(masked)))) { | 463 | if (unlikely(memcmp(saddr, masked, sizeof(masked)))) { |
466 | set_ipv6_addr(skb, key->ipv6_proto, saddr, masked, | 464 | set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked, |
467 | true); | 465 | true); |
468 | memcpy(&flow_key->ipv6.addr.src, masked, | 466 | memcpy(&flow_key->ipv6.addr.src, masked, |
469 | sizeof(flow_key->ipv6.addr.src)); | 467 | sizeof(flow_key->ipv6.addr.src)); |
@@ -485,7 +483,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, | |||
485 | NULL, &flags) | 483 | NULL, &flags) |
486 | != NEXTHDR_ROUTING); | 484 | != NEXTHDR_ROUTING); |
487 | 485 | ||
488 | set_ipv6_addr(skb, key->ipv6_proto, daddr, masked, | 486 | set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked, |
489 | recalc_csum); | 487 | recalc_csum); |
490 | memcpy(&flow_key->ipv6.addr.dst, masked, | 488 | memcpy(&flow_key->ipv6.addr.dst, masked, |
491 | sizeof(flow_key->ipv6.addr.dst)); | 489 | sizeof(flow_key->ipv6.addr.dst)); |
@@ -639,7 +637,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk | |||
639 | /* Reconstruct the MAC header. */ | 637 | /* Reconstruct the MAC header. */ |
640 | skb_push(skb, data->l2_len); | 638 | skb_push(skb, data->l2_len); |
641 | memcpy(skb->data, &data->l2_data, data->l2_len); | 639 | memcpy(skb->data, &data->l2_data, data->l2_len); |
642 | ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len); | 640 | skb_postpush_rcsum(skb, skb->data, data->l2_len); |
643 | skb_reset_mac_header(skb); | 641 | skb_reset_mac_header(skb); |
644 | 642 | ||
645 | ovs_vport_send(vport, skb); | 643 | ovs_vport_send(vport, skb); |
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index ee6ff8ffc12d..b5fea1101faa 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c | |||
@@ -13,21 +13,31 @@ | |||
13 | 13 | ||
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/openvswitch.h> | 15 | #include <linux/openvswitch.h> |
16 | #include <linux/tcp.h> | ||
17 | #include <linux/udp.h> | ||
18 | #include <linux/sctp.h> | ||
16 | #include <net/ip.h> | 19 | #include <net/ip.h> |
17 | #include <net/netfilter/nf_conntrack_core.h> | 20 | #include <net/netfilter/nf_conntrack_core.h> |
18 | #include <net/netfilter/nf_conntrack_helper.h> | 21 | #include <net/netfilter/nf_conntrack_helper.h> |
19 | #include <net/netfilter/nf_conntrack_labels.h> | 22 | #include <net/netfilter/nf_conntrack_labels.h> |
23 | #include <net/netfilter/nf_conntrack_seqadj.h> | ||
20 | #include <net/netfilter/nf_conntrack_zones.h> | 24 | #include <net/netfilter/nf_conntrack_zones.h> |
21 | #include <net/netfilter/ipv6/nf_defrag_ipv6.h> | 25 | #include <net/netfilter/ipv6/nf_defrag_ipv6.h> |
22 | 26 | ||
27 | #ifdef CONFIG_NF_NAT_NEEDED | ||
28 | #include <linux/netfilter/nf_nat.h> | ||
29 | #include <net/netfilter/nf_nat_core.h> | ||
30 | #include <net/netfilter/nf_nat_l3proto.h> | ||
31 | #endif | ||
32 | |||
23 | #include "datapath.h" | 33 | #include "datapath.h" |
24 | #include "conntrack.h" | 34 | #include "conntrack.h" |
25 | #include "flow.h" | 35 | #include "flow.h" |
26 | #include "flow_netlink.h" | 36 | #include "flow_netlink.h" |
27 | 37 | ||
28 | struct ovs_ct_len_tbl { | 38 | struct ovs_ct_len_tbl { |
29 | size_t maxlen; | 39 | int maxlen; |
30 | size_t minlen; | 40 | int minlen; |
31 | }; | 41 | }; |
32 | 42 | ||
33 | /* Metadata mark for masked write to conntrack mark */ | 43 | /* Metadata mark for masked write to conntrack mark */ |
@@ -42,15 +52,25 @@ struct md_labels { | |||
42 | struct ovs_key_ct_labels mask; | 52 | struct ovs_key_ct_labels mask; |
43 | }; | 53 | }; |
44 | 54 | ||
55 | enum ovs_ct_nat { | ||
56 | OVS_CT_NAT = 1 << 0, /* NAT for committed connections only. */ | ||
57 | OVS_CT_SRC_NAT = 1 << 1, /* Source NAT for NEW connections. */ | ||
58 | OVS_CT_DST_NAT = 1 << 2, /* Destination NAT for NEW connections. */ | ||
59 | }; | ||
60 | |||
45 | /* Conntrack action context for execution. */ | 61 | /* Conntrack action context for execution. */ |
46 | struct ovs_conntrack_info { | 62 | struct ovs_conntrack_info { |
47 | struct nf_conntrack_helper *helper; | 63 | struct nf_conntrack_helper *helper; |
48 | struct nf_conntrack_zone zone; | 64 | struct nf_conntrack_zone zone; |
49 | struct nf_conn *ct; | 65 | struct nf_conn *ct; |
50 | u8 commit : 1; | 66 | u8 commit : 1; |
67 | u8 nat : 3; /* enum ovs_ct_nat */ | ||
51 | u16 family; | 68 | u16 family; |
52 | struct md_mark mark; | 69 | struct md_mark mark; |
53 | struct md_labels labels; | 70 | struct md_labels labels; |
71 | #ifdef CONFIG_NF_NAT_NEEDED | ||
72 | struct nf_nat_range range; /* Only present for SRC NAT and DST NAT. */ | ||
73 | #endif | ||
54 | }; | 74 | }; |
55 | 75 | ||
56 | static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info); | 76 | static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info); |
@@ -75,7 +95,6 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo) | |||
75 | switch (ctinfo) { | 95 | switch (ctinfo) { |
76 | case IP_CT_ESTABLISHED_REPLY: | 96 | case IP_CT_ESTABLISHED_REPLY: |
77 | case IP_CT_RELATED_REPLY: | 97 | case IP_CT_RELATED_REPLY: |
78 | case IP_CT_NEW_REPLY: | ||
79 | ct_state |= OVS_CS_F_REPLY_DIR; | 98 | ct_state |= OVS_CS_F_REPLY_DIR; |
80 | break; | 99 | break; |
81 | default: | 100 | default: |
@@ -92,7 +111,6 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo) | |||
92 | ct_state |= OVS_CS_F_RELATED; | 111 | ct_state |= OVS_CS_F_RELATED; |
93 | break; | 112 | break; |
94 | case IP_CT_NEW: | 113 | case IP_CT_NEW: |
95 | case IP_CT_NEW_REPLY: | ||
96 | ct_state |= OVS_CS_F_NEW; | 114 | ct_state |= OVS_CS_F_NEW; |
97 | break; | 115 | break; |
98 | default: | 116 | default: |
@@ -139,12 +157,15 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state, | |||
139 | ovs_ct_get_labels(ct, &key->ct.labels); | 157 | ovs_ct_get_labels(ct, &key->ct.labels); |
140 | } | 158 | } |
141 | 159 | ||
142 | /* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has | 160 | /* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has |
143 | * previously sent the packet to conntrack via the ct action. | 161 | * previously sent the packet to conntrack via the ct action. If |
162 | * 'keep_nat_flags' is true, the existing NAT flags retained, else they are | ||
163 | * initialized from the connection status. | ||
144 | */ | 164 | */ |
145 | static void ovs_ct_update_key(const struct sk_buff *skb, | 165 | static void ovs_ct_update_key(const struct sk_buff *skb, |
146 | const struct ovs_conntrack_info *info, | 166 | const struct ovs_conntrack_info *info, |
147 | struct sw_flow_key *key, bool post_ct) | 167 | struct sw_flow_key *key, bool post_ct, |
168 | bool keep_nat_flags) | ||
148 | { | 169 | { |
149 | const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; | 170 | const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; |
150 | enum ip_conntrack_info ctinfo; | 171 | enum ip_conntrack_info ctinfo; |
@@ -154,10 +175,22 @@ static void ovs_ct_update_key(const struct sk_buff *skb, | |||
154 | ct = nf_ct_get(skb, &ctinfo); | 175 | ct = nf_ct_get(skb, &ctinfo); |
155 | if (ct) { | 176 | if (ct) { |
156 | state = ovs_ct_get_state(ctinfo); | 177 | state = ovs_ct_get_state(ctinfo); |
178 | /* All unconfirmed entries are NEW connections. */ | ||
157 | if (!nf_ct_is_confirmed(ct)) | 179 | if (!nf_ct_is_confirmed(ct)) |
158 | state |= OVS_CS_F_NEW; | 180 | state |= OVS_CS_F_NEW; |
181 | /* OVS persists the related flag for the duration of the | ||
182 | * connection. | ||
183 | */ | ||
159 | if (ct->master) | 184 | if (ct->master) |
160 | state |= OVS_CS_F_RELATED; | 185 | state |= OVS_CS_F_RELATED; |
186 | if (keep_nat_flags) { | ||
187 | state |= key->ct.state & OVS_CS_F_NAT_MASK; | ||
188 | } else { | ||
189 | if (ct->status & IPS_SRC_NAT) | ||
190 | state |= OVS_CS_F_SRC_NAT; | ||
191 | if (ct->status & IPS_DST_NAT) | ||
192 | state |= OVS_CS_F_DST_NAT; | ||
193 | } | ||
161 | zone = nf_ct_zone(ct); | 194 | zone = nf_ct_zone(ct); |
162 | } else if (post_ct) { | 195 | } else if (post_ct) { |
163 | state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID; | 196 | state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID; |
@@ -167,9 +200,12 @@ static void ovs_ct_update_key(const struct sk_buff *skb, | |||
167 | __ovs_ct_update_key(key, state, zone, ct); | 200 | __ovs_ct_update_key(key, state, zone, ct); |
168 | } | 201 | } |
169 | 202 | ||
203 | /* This is called to initialize CT key fields possibly coming in from the local | ||
204 | * stack. | ||
205 | */ | ||
170 | void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) | 206 | void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) |
171 | { | 207 | { |
172 | ovs_ct_update_key(skb, NULL, key, false); | 208 | ovs_ct_update_key(skb, NULL, key, false, false); |
173 | } | 209 | } |
174 | 210 | ||
175 | int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) | 211 | int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb) |
@@ -201,7 +237,6 @@ static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key, | |||
201 | struct nf_conn *ct; | 237 | struct nf_conn *ct; |
202 | u32 new_mark; | 238 | u32 new_mark; |
203 | 239 | ||
204 | |||
205 | /* The connection could be invalid, in which case set_mark is no-op. */ | 240 | /* The connection could be invalid, in which case set_mark is no-op. */ |
206 | ct = nf_ct_get(skb, &ctinfo); | 241 | ct = nf_ct_get(skb, &ctinfo); |
207 | if (!ct) | 242 | if (!ct) |
@@ -259,6 +294,7 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto) | |||
259 | enum ip_conntrack_info ctinfo; | 294 | enum ip_conntrack_info ctinfo; |
260 | unsigned int protoff; | 295 | unsigned int protoff; |
261 | struct nf_conn *ct; | 296 | struct nf_conn *ct; |
297 | int err; | ||
262 | 298 | ||
263 | ct = nf_ct_get(skb, &ctinfo); | 299 | ct = nf_ct_get(skb, &ctinfo); |
264 | if (!ct || ctinfo == IP_CT_RELATED_REPLY) | 300 | if (!ct || ctinfo == IP_CT_RELATED_REPLY) |
@@ -295,7 +331,18 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto) | |||
295 | return NF_DROP; | 331 | return NF_DROP; |
296 | } | 332 | } |
297 | 333 | ||
298 | return helper->help(skb, protoff, ct, ctinfo); | 334 | err = helper->help(skb, protoff, ct, ctinfo); |
335 | if (err != NF_ACCEPT) | ||
336 | return err; | ||
337 | |||
338 | /* Adjust seqs after helper. This is needed due to some helpers (e.g., | ||
339 | * FTP with NAT) adusting the TCP payload size when mangling IP | ||
340 | * addresses and/or port numbers in the text-based control connection. | ||
341 | */ | ||
342 | if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && | ||
343 | !nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) | ||
344 | return NF_DROP; | ||
345 | return NF_ACCEPT; | ||
299 | } | 346 | } |
300 | 347 | ||
301 | /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero | 348 | /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero |
@@ -320,6 +367,7 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, | |||
320 | } else if (key->eth.type == htons(ETH_P_IPV6)) { | 367 | } else if (key->eth.type == htons(ETH_P_IPV6)) { |
321 | enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; | 368 | enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; |
322 | 369 | ||
370 | skb_orphan(skb); | ||
323 | memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); | 371 | memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); |
324 | err = nf_ct_frag6_gather(net, skb, user); | 372 | err = nf_ct_frag6_gather(net, skb, user); |
325 | if (err) | 373 | if (err) |
@@ -352,14 +400,101 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone, | |||
352 | return __nf_ct_expect_find(net, zone, &tuple); | 400 | return __nf_ct_expect_find(net, zone, &tuple); |
353 | } | 401 | } |
354 | 402 | ||
403 | /* This replicates logic from nf_conntrack_core.c that is not exported. */ | ||
404 | static enum ip_conntrack_info | ||
405 | ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h) | ||
406 | { | ||
407 | const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); | ||
408 | |||
409 | if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) | ||
410 | return IP_CT_ESTABLISHED_REPLY; | ||
411 | /* Once we've had two way comms, always ESTABLISHED. */ | ||
412 | if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) | ||
413 | return IP_CT_ESTABLISHED; | ||
414 | if (test_bit(IPS_EXPECTED_BIT, &ct->status)) | ||
415 | return IP_CT_RELATED; | ||
416 | return IP_CT_NEW; | ||
417 | } | ||
418 | |||
419 | /* Find an existing connection which this packet belongs to without | ||
420 | * re-attributing statistics or modifying the connection state. This allows an | ||
421 | * skb->nfct lost due to an upcall to be recovered during actions execution. | ||
422 | * | ||
423 | * Must be called with rcu_read_lock. | ||
424 | * | ||
425 | * On success, populates skb->nfct and skb->nfctinfo, and returns the | ||
426 | * connection. Returns NULL if there is no existing entry. | ||
427 | */ | ||
428 | static struct nf_conn * | ||
429 | ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, | ||
430 | u8 l3num, struct sk_buff *skb) | ||
431 | { | ||
432 | struct nf_conntrack_l3proto *l3proto; | ||
433 | struct nf_conntrack_l4proto *l4proto; | ||
434 | struct nf_conntrack_tuple tuple; | ||
435 | struct nf_conntrack_tuple_hash *h; | ||
436 | enum ip_conntrack_info ctinfo; | ||
437 | struct nf_conn *ct; | ||
438 | unsigned int dataoff; | ||
439 | u8 protonum; | ||
440 | |||
441 | l3proto = __nf_ct_l3proto_find(l3num); | ||
442 | if (!l3proto) { | ||
443 | pr_debug("ovs_ct_find_existing: Can't get l3proto\n"); | ||
444 | return NULL; | ||
445 | } | ||
446 | if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, | ||
447 | &protonum) <= 0) { | ||
448 | pr_debug("ovs_ct_find_existing: Can't get protonum\n"); | ||
449 | return NULL; | ||
450 | } | ||
451 | l4proto = __nf_ct_l4proto_find(l3num, protonum); | ||
452 | if (!l4proto) { | ||
453 | pr_debug("ovs_ct_find_existing: Can't get l4proto\n"); | ||
454 | return NULL; | ||
455 | } | ||
456 | if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, | ||
457 | protonum, net, &tuple, l3proto, l4proto)) { | ||
458 | pr_debug("ovs_ct_find_existing: Can't get tuple\n"); | ||
459 | return NULL; | ||
460 | } | ||
461 | |||
462 | /* look for tuple match */ | ||
463 | h = nf_conntrack_find_get(net, zone, &tuple); | ||
464 | if (!h) | ||
465 | return NULL; /* Not found. */ | ||
466 | |||
467 | ct = nf_ct_tuplehash_to_ctrack(h); | ||
468 | |||
469 | ctinfo = ovs_ct_get_info(h); | ||
470 | if (ctinfo == IP_CT_NEW) { | ||
471 | /* This should not happen. */ | ||
472 | WARN_ONCE(1, "ovs_ct_find_existing: new packet for %p\n", ct); | ||
473 | } | ||
474 | skb->nfct = &ct->ct_general; | ||
475 | skb->nfctinfo = ctinfo; | ||
476 | return ct; | ||
477 | } | ||
478 | |||
355 | /* Determine whether skb->nfct is equal to the result of conntrack lookup. */ | 479 | /* Determine whether skb->nfct is equal to the result of conntrack lookup. */ |
356 | static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb, | 480 | static bool skb_nfct_cached(struct net *net, |
357 | const struct ovs_conntrack_info *info) | 481 | const struct sw_flow_key *key, |
482 | const struct ovs_conntrack_info *info, | ||
483 | struct sk_buff *skb) | ||
358 | { | 484 | { |
359 | enum ip_conntrack_info ctinfo; | 485 | enum ip_conntrack_info ctinfo; |
360 | struct nf_conn *ct; | 486 | struct nf_conn *ct; |
361 | 487 | ||
362 | ct = nf_ct_get(skb, &ctinfo); | 488 | ct = nf_ct_get(skb, &ctinfo); |
489 | /* If no ct, check if we have evidence that an existing conntrack entry | ||
490 | * might be found for this skb. This happens when we lose a skb->nfct | ||
491 | * due to an upcall. If the connection was not confirmed, it is not | ||
492 | * cached and needs to be run through conntrack again. | ||
493 | */ | ||
494 | if (!ct && key->ct.state & OVS_CS_F_TRACKED && | ||
495 | !(key->ct.state & OVS_CS_F_INVALID) && | ||
496 | key->ct.zone == info->zone.id) | ||
497 | ct = ovs_ct_find_existing(net, &info->zone, info->family, skb); | ||
363 | if (!ct) | 498 | if (!ct) |
364 | return false; | 499 | return false; |
365 | if (!net_eq(net, read_pnet(&ct->ct_net))) | 500 | if (!net_eq(net, read_pnet(&ct->ct_net))) |
@@ -377,6 +512,207 @@ static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb, | |||
377 | return true; | 512 | return true; |
378 | } | 513 | } |
379 | 514 | ||
515 | #ifdef CONFIG_NF_NAT_NEEDED | ||
516 | /* Modelled after nf_nat_ipv[46]_fn(). | ||
517 | * range is only used for new, uninitialized NAT state. | ||
518 | * Returns either NF_ACCEPT or NF_DROP. | ||
519 | */ | ||
520 | static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, | ||
521 | enum ip_conntrack_info ctinfo, | ||
522 | const struct nf_nat_range *range, | ||
523 | enum nf_nat_manip_type maniptype) | ||
524 | { | ||
525 | int hooknum, nh_off, err = NF_ACCEPT; | ||
526 | |||
527 | nh_off = skb_network_offset(skb); | ||
528 | skb_pull(skb, nh_off); | ||
529 | |||
530 | /* See HOOK2MANIP(). */ | ||
531 | if (maniptype == NF_NAT_MANIP_SRC) | ||
532 | hooknum = NF_INET_LOCAL_IN; /* Source NAT */ | ||
533 | else | ||
534 | hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */ | ||
535 | |||
536 | switch (ctinfo) { | ||
537 | case IP_CT_RELATED: | ||
538 | case IP_CT_RELATED_REPLY: | ||
539 | if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && | ||
540 | skb->protocol == htons(ETH_P_IP) && | ||
541 | ip_hdr(skb)->protocol == IPPROTO_ICMP) { | ||
542 | if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, | ||
543 | hooknum)) | ||
544 | err = NF_DROP; | ||
545 | goto push; | ||
546 | } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && | ||
547 | skb->protocol == htons(ETH_P_IPV6)) { | ||
548 | __be16 frag_off; | ||
549 | u8 nexthdr = ipv6_hdr(skb)->nexthdr; | ||
550 | int hdrlen = ipv6_skip_exthdr(skb, | ||
551 | sizeof(struct ipv6hdr), | ||
552 | &nexthdr, &frag_off); | ||
553 | |||
554 | if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { | ||
555 | if (!nf_nat_icmpv6_reply_translation(skb, ct, | ||
556 | ctinfo, | ||
557 | hooknum, | ||
558 | hdrlen)) | ||
559 | err = NF_DROP; | ||
560 | goto push; | ||
561 | } | ||
562 | } | ||
563 | /* Non-ICMP, fall thru to initialize if needed. */ | ||
564 | case IP_CT_NEW: | ||
565 | /* Seen it before? This can happen for loopback, retrans, | ||
566 | * or local packets. | ||
567 | */ | ||
568 | if (!nf_nat_initialized(ct, maniptype)) { | ||
569 | /* Initialize according to the NAT action. */ | ||
570 | err = (range && range->flags & NF_NAT_RANGE_MAP_IPS) | ||
571 | /* Action is set up to establish a new | ||
572 | * mapping. | ||
573 | */ | ||
574 | ? nf_nat_setup_info(ct, range, maniptype) | ||
575 | : nf_nat_alloc_null_binding(ct, hooknum); | ||
576 | if (err != NF_ACCEPT) | ||
577 | goto push; | ||
578 | } | ||
579 | break; | ||
580 | |||
581 | case IP_CT_ESTABLISHED: | ||
582 | case IP_CT_ESTABLISHED_REPLY: | ||
583 | break; | ||
584 | |||
585 | default: | ||
586 | err = NF_DROP; | ||
587 | goto push; | ||
588 | } | ||
589 | |||
590 | err = nf_nat_packet(ct, ctinfo, hooknum, skb); | ||
591 | push: | ||
592 | skb_push(skb, nh_off); | ||
593 | |||
594 | return err; | ||
595 | } | ||
596 | |||
597 | static void ovs_nat_update_key(struct sw_flow_key *key, | ||
598 | const struct sk_buff *skb, | ||
599 | enum nf_nat_manip_type maniptype) | ||
600 | { | ||
601 | if (maniptype == NF_NAT_MANIP_SRC) { | ||
602 | __be16 src; | ||
603 | |||
604 | key->ct.state |= OVS_CS_F_SRC_NAT; | ||
605 | if (key->eth.type == htons(ETH_P_IP)) | ||
606 | key->ipv4.addr.src = ip_hdr(skb)->saddr; | ||
607 | else if (key->eth.type == htons(ETH_P_IPV6)) | ||
608 | memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr, | ||
609 | sizeof(key->ipv6.addr.src)); | ||
610 | else | ||
611 | return; | ||
612 | |||
613 | if (key->ip.proto == IPPROTO_UDP) | ||
614 | src = udp_hdr(skb)->source; | ||
615 | else if (key->ip.proto == IPPROTO_TCP) | ||
616 | src = tcp_hdr(skb)->source; | ||
617 | else if (key->ip.proto == IPPROTO_SCTP) | ||
618 | src = sctp_hdr(skb)->source; | ||
619 | else | ||
620 | return; | ||
621 | |||
622 | key->tp.src = src; | ||
623 | } else { | ||
624 | __be16 dst; | ||
625 | |||
626 | key->ct.state |= OVS_CS_F_DST_NAT; | ||
627 | if (key->eth.type == htons(ETH_P_IP)) | ||
628 | key->ipv4.addr.dst = ip_hdr(skb)->daddr; | ||
629 | else if (key->eth.type == htons(ETH_P_IPV6)) | ||
630 | memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr, | ||
631 | sizeof(key->ipv6.addr.dst)); | ||
632 | else | ||
633 | return; | ||
634 | |||
635 | if (key->ip.proto == IPPROTO_UDP) | ||
636 | dst = udp_hdr(skb)->dest; | ||
637 | else if (key->ip.proto == IPPROTO_TCP) | ||
638 | dst = tcp_hdr(skb)->dest; | ||
639 | else if (key->ip.proto == IPPROTO_SCTP) | ||
640 | dst = sctp_hdr(skb)->dest; | ||
641 | else | ||
642 | return; | ||
643 | |||
644 | key->tp.dst = dst; | ||
645 | } | ||
646 | } | ||
647 | |||
648 | /* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */ | ||
649 | static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, | ||
650 | const struct ovs_conntrack_info *info, | ||
651 | struct sk_buff *skb, struct nf_conn *ct, | ||
652 | enum ip_conntrack_info ctinfo) | ||
653 | { | ||
654 | enum nf_nat_manip_type maniptype; | ||
655 | int err; | ||
656 | |||
657 | if (nf_ct_is_untracked(ct)) { | ||
658 | /* A NAT action may only be performed on tracked packets. */ | ||
659 | return NF_ACCEPT; | ||
660 | } | ||
661 | |||
662 | /* Add NAT extension if not confirmed yet. */ | ||
663 | if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) | ||
664 | return NF_ACCEPT; /* Can't NAT. */ | ||
665 | |||
666 | /* Determine NAT type. | ||
667 | * Check if the NAT type can be deduced from the tracked connection. | ||
668 | * Make sure new expected connections (IP_CT_RELATED) are NATted only | ||
669 | * when committing. | ||
670 | */ | ||
671 | if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW && | ||
672 | ct->status & IPS_NAT_MASK && | ||
673 | (ctinfo != IP_CT_RELATED || info->commit)) { | ||
674 | /* NAT an established or related connection like before. */ | ||
675 | if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) | ||
676 | /* This is the REPLY direction for a connection | ||
677 | * for which NAT was applied in the forward | ||
678 | * direction. Do the reverse NAT. | ||
679 | */ | ||
680 | maniptype = ct->status & IPS_SRC_NAT | ||
681 | ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC; | ||
682 | else | ||
683 | maniptype = ct->status & IPS_SRC_NAT | ||
684 | ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST; | ||
685 | } else if (info->nat & OVS_CT_SRC_NAT) { | ||
686 | maniptype = NF_NAT_MANIP_SRC; | ||
687 | } else if (info->nat & OVS_CT_DST_NAT) { | ||
688 | maniptype = NF_NAT_MANIP_DST; | ||
689 | } else { | ||
690 | return NF_ACCEPT; /* Connection is not NATed. */ | ||
691 | } | ||
692 | err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype); | ||
693 | |||
694 | /* Mark NAT done if successful and update the flow key. */ | ||
695 | if (err == NF_ACCEPT) | ||
696 | ovs_nat_update_key(key, skb, maniptype); | ||
697 | |||
698 | return err; | ||
699 | } | ||
700 | #else /* !CONFIG_NF_NAT_NEEDED */ | ||
701 | static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, | ||
702 | const struct ovs_conntrack_info *info, | ||
703 | struct sk_buff *skb, struct nf_conn *ct, | ||
704 | enum ip_conntrack_info ctinfo) | ||
705 | { | ||
706 | return NF_ACCEPT; | ||
707 | } | ||
708 | #endif | ||
709 | |||
710 | /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if | ||
711 | * not done already. Update key with new CT state after passing the packet | ||
712 | * through conntrack. | ||
713 | * Note that if the packet is deemed invalid by conntrack, skb->nfct will be | ||
714 | * set to NULL and 0 will be returned. | ||
715 | */ | ||
380 | static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, | 716 | static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, |
381 | const struct ovs_conntrack_info *info, | 717 | const struct ovs_conntrack_info *info, |
382 | struct sk_buff *skb) | 718 | struct sk_buff *skb) |
@@ -386,8 +722,13 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, | |||
386 | * actually run the packet through conntrack twice unless it's for a | 722 | * actually run the packet through conntrack twice unless it's for a |
387 | * different zone. | 723 | * different zone. |
388 | */ | 724 | */ |
389 | if (!skb_nfct_cached(net, skb, info)) { | 725 | bool cached = skb_nfct_cached(net, key, info, skb); |
726 | enum ip_conntrack_info ctinfo; | ||
727 | struct nf_conn *ct; | ||
728 | |||
729 | if (!cached) { | ||
390 | struct nf_conn *tmpl = info->ct; | 730 | struct nf_conn *tmpl = info->ct; |
731 | int err; | ||
391 | 732 | ||
392 | /* Associate skb with specified zone. */ | 733 | /* Associate skb with specified zone. */ |
393 | if (tmpl) { | 734 | if (tmpl) { |
@@ -398,17 +739,53 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, | |||
398 | skb->nfctinfo = IP_CT_NEW; | 739 | skb->nfctinfo = IP_CT_NEW; |
399 | } | 740 | } |
400 | 741 | ||
401 | if (nf_conntrack_in(net, info->family, NF_INET_PRE_ROUTING, | 742 | /* Repeat if requested, see nf_iterate(). */ |
402 | skb) != NF_ACCEPT) | 743 | do { |
744 | err = nf_conntrack_in(net, info->family, | ||
745 | NF_INET_PRE_ROUTING, skb); | ||
746 | } while (err == NF_REPEAT); | ||
747 | |||
748 | if (err != NF_ACCEPT) | ||
403 | return -ENOENT; | 749 | return -ENOENT; |
404 | 750 | ||
405 | if (ovs_ct_helper(skb, info->family) != NF_ACCEPT) { | 751 | /* Clear CT state NAT flags to mark that we have not yet done |
406 | WARN_ONCE(1, "helper rejected packet"); | 752 | * NAT after the nf_conntrack_in() call. We can actually clear |
753 | * the whole state, as it will be re-initialized below. | ||
754 | */ | ||
755 | key->ct.state = 0; | ||
756 | |||
757 | /* Update the key, but keep the NAT flags. */ | ||
758 | ovs_ct_update_key(skb, info, key, true, true); | ||
759 | } | ||
760 | |||
761 | ct = nf_ct_get(skb, &ctinfo); | ||
762 | if (ct) { | ||
763 | /* Packets starting a new connection must be NATted before the | ||
764 | * helper, so that the helper knows about the NAT. We enforce | ||
765 | * this by delaying both NAT and helper calls for unconfirmed | ||
766 | * connections until the committing CT action. For later | ||
767 | * packets NAT and Helper may be called in either order. | ||
768 | * | ||
769 | * NAT will be done only if the CT action has NAT, and only | ||
770 | * once per packet (per zone), as guarded by the NAT bits in | ||
771 | * the key->ct.state. | ||
772 | */ | ||
773 | if (info->nat && !(key->ct.state & OVS_CS_F_NAT_MASK) && | ||
774 | (nf_ct_is_confirmed(ct) || info->commit) && | ||
775 | ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) { | ||
407 | return -EINVAL; | 776 | return -EINVAL; |
408 | } | 777 | } |
409 | } | ||
410 | 778 | ||
411 | ovs_ct_update_key(skb, info, key, true); | 779 | /* Call the helper only if: |
780 | * - nf_conntrack_in() was executed above ("!cached") for a | ||
781 | * confirmed connection, or | ||
782 | * - When committing an unconfirmed connection. | ||
783 | */ | ||
784 | if ((nf_ct_is_confirmed(ct) ? !cached : info->commit) && | ||
785 | ovs_ct_helper(skb, info->family) != NF_ACCEPT) { | ||
786 | return -EINVAL; | ||
787 | } | ||
788 | } | ||
412 | 789 | ||
413 | return 0; | 790 | return 0; |
414 | } | 791 | } |
@@ -420,19 +797,24 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, | |||
420 | { | 797 | { |
421 | struct nf_conntrack_expect *exp; | 798 | struct nf_conntrack_expect *exp; |
422 | 799 | ||
800 | /* If we pass an expected packet through nf_conntrack_in() the | ||
801 | * expectation is typically removed, but the packet could still be | ||
802 | * lost in upcall processing. To prevent this from happening we | ||
803 | * perform an explicit expectation lookup. Expected connections are | ||
804 | * always new, and will be passed through conntrack only when they are | ||
805 | * committed, as it is OK to remove the expectation at that time. | ||
806 | */ | ||
423 | exp = ovs_ct_expect_find(net, &info->zone, info->family, skb); | 807 | exp = ovs_ct_expect_find(net, &info->zone, info->family, skb); |
424 | if (exp) { | 808 | if (exp) { |
425 | u8 state; | 809 | u8 state; |
426 | 810 | ||
811 | /* NOTE: New connections are NATted and Helped only when | ||
812 | * committed, so we are not calling into NAT here. | ||
813 | */ | ||
427 | state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED; | 814 | state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED; |
428 | __ovs_ct_update_key(key, state, &info->zone, exp->master); | 815 | __ovs_ct_update_key(key, state, &info->zone, exp->master); |
429 | } else { | 816 | } else |
430 | int err; | 817 | return __ovs_ct_lookup(net, key, info, skb); |
431 | |||
432 | err = __ovs_ct_lookup(net, key, info, skb); | ||
433 | if (err) | ||
434 | return err; | ||
435 | } | ||
436 | 818 | ||
437 | return 0; | 819 | return 0; |
438 | } | 820 | } |
@@ -442,21 +824,12 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, | |||
442 | const struct ovs_conntrack_info *info, | 824 | const struct ovs_conntrack_info *info, |
443 | struct sk_buff *skb) | 825 | struct sk_buff *skb) |
444 | { | 826 | { |
445 | u8 state; | ||
446 | int err; | 827 | int err; |
447 | 828 | ||
448 | state = key->ct.state; | ||
449 | if (key->ct.zone == info->zone.id && | ||
450 | ((state & OVS_CS_F_TRACKED) && !(state & OVS_CS_F_NEW))) { | ||
451 | /* Previous lookup has shown that this connection is already | ||
452 | * tracked and committed. Skip committing. | ||
453 | */ | ||
454 | return 0; | ||
455 | } | ||
456 | |||
457 | err = __ovs_ct_lookup(net, key, info, skb); | 829 | err = __ovs_ct_lookup(net, key, info, skb); |
458 | if (err) | 830 | if (err) |
459 | return err; | 831 | return err; |
832 | /* This is a no-op if the connection has already been confirmed. */ | ||
460 | if (nf_conntrack_confirm(skb) != NF_ACCEPT) | 833 | if (nf_conntrack_confirm(skb) != NF_ACCEPT) |
461 | return -EINVAL; | 834 | return -EINVAL; |
462 | 835 | ||
@@ -541,6 +914,136 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, | |||
541 | return 0; | 914 | return 0; |
542 | } | 915 | } |
543 | 916 | ||
917 | #ifdef CONFIG_NF_NAT_NEEDED | ||
918 | static int parse_nat(const struct nlattr *attr, | ||
919 | struct ovs_conntrack_info *info, bool log) | ||
920 | { | ||
921 | struct nlattr *a; | ||
922 | int rem; | ||
923 | bool have_ip_max = false; | ||
924 | bool have_proto_max = false; | ||
925 | bool ip_vers = (info->family == NFPROTO_IPV6); | ||
926 | |||
927 | nla_for_each_nested(a, attr, rem) { | ||
928 | static const int ovs_nat_attr_lens[OVS_NAT_ATTR_MAX + 1][2] = { | ||
929 | [OVS_NAT_ATTR_SRC] = {0, 0}, | ||
930 | [OVS_NAT_ATTR_DST] = {0, 0}, | ||
931 | [OVS_NAT_ATTR_IP_MIN] = {sizeof(struct in_addr), | ||
932 | sizeof(struct in6_addr)}, | ||
933 | [OVS_NAT_ATTR_IP_MAX] = {sizeof(struct in_addr), | ||
934 | sizeof(struct in6_addr)}, | ||
935 | [OVS_NAT_ATTR_PROTO_MIN] = {sizeof(u16), sizeof(u16)}, | ||
936 | [OVS_NAT_ATTR_PROTO_MAX] = {sizeof(u16), sizeof(u16)}, | ||
937 | [OVS_NAT_ATTR_PERSISTENT] = {0, 0}, | ||
938 | [OVS_NAT_ATTR_PROTO_HASH] = {0, 0}, | ||
939 | [OVS_NAT_ATTR_PROTO_RANDOM] = {0, 0}, | ||
940 | }; | ||
941 | int type = nla_type(a); | ||
942 | |||
943 | if (type > OVS_NAT_ATTR_MAX) { | ||
944 | OVS_NLERR(log, | ||
945 | "Unknown NAT attribute (type=%d, max=%d).\n", | ||
946 | type, OVS_NAT_ATTR_MAX); | ||
947 | return -EINVAL; | ||
948 | } | ||
949 | |||
950 | if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) { | ||
951 | OVS_NLERR(log, | ||
952 | "NAT attribute type %d has unexpected length (%d != %d).\n", | ||
953 | type, nla_len(a), | ||
954 | ovs_nat_attr_lens[type][ip_vers]); | ||
955 | return -EINVAL; | ||
956 | } | ||
957 | |||
958 | switch (type) { | ||
959 | case OVS_NAT_ATTR_SRC: | ||
960 | case OVS_NAT_ATTR_DST: | ||
961 | if (info->nat) { | ||
962 | OVS_NLERR(log, | ||
963 | "Only one type of NAT may be specified.\n" | ||
964 | ); | ||
965 | return -ERANGE; | ||
966 | } | ||
967 | info->nat |= OVS_CT_NAT; | ||
968 | info->nat |= ((type == OVS_NAT_ATTR_SRC) | ||
969 | ? OVS_CT_SRC_NAT : OVS_CT_DST_NAT); | ||
970 | break; | ||
971 | |||
972 | case OVS_NAT_ATTR_IP_MIN: | ||
973 | nla_memcpy(&info->range.min_addr, a, | ||
974 | sizeof(info->range.min_addr)); | ||
975 | info->range.flags |= NF_NAT_RANGE_MAP_IPS; | ||
976 | break; | ||
977 | |||
978 | case OVS_NAT_ATTR_IP_MAX: | ||
979 | have_ip_max = true; | ||
980 | nla_memcpy(&info->range.max_addr, a, | ||
981 | sizeof(info->range.max_addr)); | ||
982 | info->range.flags |= NF_NAT_RANGE_MAP_IPS; | ||
983 | break; | ||
984 | |||
985 | case OVS_NAT_ATTR_PROTO_MIN: | ||
986 | info->range.min_proto.all = htons(nla_get_u16(a)); | ||
987 | info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; | ||
988 | break; | ||
989 | |||
990 | case OVS_NAT_ATTR_PROTO_MAX: | ||
991 | have_proto_max = true; | ||
992 | info->range.max_proto.all = htons(nla_get_u16(a)); | ||
993 | info->range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; | ||
994 | break; | ||
995 | |||
996 | case OVS_NAT_ATTR_PERSISTENT: | ||
997 | info->range.flags |= NF_NAT_RANGE_PERSISTENT; | ||
998 | break; | ||
999 | |||
1000 | case OVS_NAT_ATTR_PROTO_HASH: | ||
1001 | info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM; | ||
1002 | break; | ||
1003 | |||
1004 | case OVS_NAT_ATTR_PROTO_RANDOM: | ||
1005 | info->range.flags |= NF_NAT_RANGE_PROTO_RANDOM_FULLY; | ||
1006 | break; | ||
1007 | |||
1008 | default: | ||
1009 | OVS_NLERR(log, "Unknown nat attribute (%d).\n", type); | ||
1010 | return -EINVAL; | ||
1011 | } | ||
1012 | } | ||
1013 | |||
1014 | if (rem > 0) { | ||
1015 | OVS_NLERR(log, "NAT attribute has %d unknown bytes.\n", rem); | ||
1016 | return -EINVAL; | ||
1017 | } | ||
1018 | if (!info->nat) { | ||
1019 | /* Do not allow flags if no type is given. */ | ||
1020 | if (info->range.flags) { | ||
1021 | OVS_NLERR(log, | ||
1022 | "NAT flags may be given only when NAT range (SRC or DST) is also specified.\n" | ||
1023 | ); | ||
1024 | return -EINVAL; | ||
1025 | } | ||
1026 | info->nat = OVS_CT_NAT; /* NAT existing connections. */ | ||
1027 | } else if (!info->commit) { | ||
1028 | OVS_NLERR(log, | ||
1029 | "NAT attributes may be specified only when CT COMMIT flag is also specified.\n" | ||
1030 | ); | ||
1031 | return -EINVAL; | ||
1032 | } | ||
1033 | /* Allow missing IP_MAX. */ | ||
1034 | if (info->range.flags & NF_NAT_RANGE_MAP_IPS && !have_ip_max) { | ||
1035 | memcpy(&info->range.max_addr, &info->range.min_addr, | ||
1036 | sizeof(info->range.max_addr)); | ||
1037 | } | ||
1038 | /* Allow missing PROTO_MAX. */ | ||
1039 | if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED && | ||
1040 | !have_proto_max) { | ||
1041 | info->range.max_proto.all = info->range.min_proto.all; | ||
1042 | } | ||
1043 | return 0; | ||
1044 | } | ||
1045 | #endif | ||
1046 | |||
544 | static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { | 1047 | static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { |
545 | [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 }, | 1048 | [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 }, |
546 | [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), | 1049 | [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16), |
@@ -550,7 +1053,11 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { | |||
550 | [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels), | 1053 | [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels), |
551 | .maxlen = sizeof(struct md_labels) }, | 1054 | .maxlen = sizeof(struct md_labels) }, |
552 | [OVS_CT_ATTR_HELPER] = { .minlen = 1, | 1055 | [OVS_CT_ATTR_HELPER] = { .minlen = 1, |
553 | .maxlen = NF_CT_HELPER_NAME_LEN } | 1056 | .maxlen = NF_CT_HELPER_NAME_LEN }, |
1057 | #ifdef CONFIG_NF_NAT_NEEDED | ||
1058 | /* NAT length is checked when parsing the nested attributes. */ | ||
1059 | [OVS_CT_ATTR_NAT] = { .minlen = 0, .maxlen = INT_MAX }, | ||
1060 | #endif | ||
554 | }; | 1061 | }; |
555 | 1062 | ||
556 | static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, | 1063 | static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, |
@@ -617,6 +1124,15 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, | |||
617 | return -EINVAL; | 1124 | return -EINVAL; |
618 | } | 1125 | } |
619 | break; | 1126 | break; |
1127 | #ifdef CONFIG_NF_NAT_NEEDED | ||
1128 | case OVS_CT_ATTR_NAT: { | ||
1129 | int err = parse_nat(a, info, log); | ||
1130 | |||
1131 | if (err) | ||
1132 | return err; | ||
1133 | break; | ||
1134 | } | ||
1135 | #endif | ||
620 | default: | 1136 | default: |
621 | OVS_NLERR(log, "Unknown conntrack attr (%d)", | 1137 | OVS_NLERR(log, "Unknown conntrack attr (%d)", |
622 | type); | 1138 | type); |
@@ -704,6 +1220,74 @@ err_free_ct: | |||
704 | return err; | 1220 | return err; |
705 | } | 1221 | } |
706 | 1222 | ||
1223 | #ifdef CONFIG_NF_NAT_NEEDED | ||
1224 | static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, | ||
1225 | struct sk_buff *skb) | ||
1226 | { | ||
1227 | struct nlattr *start; | ||
1228 | |||
1229 | start = nla_nest_start(skb, OVS_CT_ATTR_NAT); | ||
1230 | if (!start) | ||
1231 | return false; | ||
1232 | |||
1233 | if (info->nat & OVS_CT_SRC_NAT) { | ||
1234 | if (nla_put_flag(skb, OVS_NAT_ATTR_SRC)) | ||
1235 | return false; | ||
1236 | } else if (info->nat & OVS_CT_DST_NAT) { | ||
1237 | if (nla_put_flag(skb, OVS_NAT_ATTR_DST)) | ||
1238 | return false; | ||
1239 | } else { | ||
1240 | goto out; | ||
1241 | } | ||
1242 | |||
1243 | if (info->range.flags & NF_NAT_RANGE_MAP_IPS) { | ||
1244 | if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && | ||
1245 | info->family == NFPROTO_IPV4) { | ||
1246 | if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN, | ||
1247 | info->range.min_addr.ip) || | ||
1248 | (info->range.max_addr.ip | ||
1249 | != info->range.min_addr.ip && | ||
1250 | (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX, | ||
1251 | info->range.max_addr.ip)))) | ||
1252 | return false; | ||
1253 | } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && | ||
1254 | info->family == NFPROTO_IPV6) { | ||
1255 | if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN, | ||
1256 | &info->range.min_addr.in6) || | ||
1257 | (memcmp(&info->range.max_addr.in6, | ||
1258 | &info->range.min_addr.in6, | ||
1259 | sizeof(info->range.max_addr.in6)) && | ||
1260 | (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX, | ||
1261 | &info->range.max_addr.in6)))) | ||
1262 | return false; | ||
1263 | } else { | ||
1264 | return false; | ||
1265 | } | ||
1266 | } | ||
1267 | if (info->range.flags & NF_NAT_RANGE_PROTO_SPECIFIED && | ||
1268 | (nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MIN, | ||
1269 | ntohs(info->range.min_proto.all)) || | ||
1270 | (info->range.max_proto.all != info->range.min_proto.all && | ||
1271 | nla_put_u16(skb, OVS_NAT_ATTR_PROTO_MAX, | ||
1272 | ntohs(info->range.max_proto.all))))) | ||
1273 | return false; | ||
1274 | |||
1275 | if (info->range.flags & NF_NAT_RANGE_PERSISTENT && | ||
1276 | nla_put_flag(skb, OVS_NAT_ATTR_PERSISTENT)) | ||
1277 | return false; | ||
1278 | if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM && | ||
1279 | nla_put_flag(skb, OVS_NAT_ATTR_PROTO_HASH)) | ||
1280 | return false; | ||
1281 | if (info->range.flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY && | ||
1282 | nla_put_flag(skb, OVS_NAT_ATTR_PROTO_RANDOM)) | ||
1283 | return false; | ||
1284 | out: | ||
1285 | nla_nest_end(skb, start); | ||
1286 | |||
1287 | return true; | ||
1288 | } | ||
1289 | #endif | ||
1290 | |||
707 | int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, | 1291 | int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, |
708 | struct sk_buff *skb) | 1292 | struct sk_buff *skb) |
709 | { | 1293 | { |
@@ -732,7 +1316,10 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info, | |||
732 | ct_info->helper->name)) | 1316 | ct_info->helper->name)) |
733 | return -EMSGSIZE; | 1317 | return -EMSGSIZE; |
734 | } | 1318 | } |
735 | 1319 | #ifdef CONFIG_NF_NAT_NEEDED | |
1320 | if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb)) | ||
1321 | return -EMSGSIZE; | ||
1322 | #endif | ||
736 | nla_nest_end(skb, start); | 1323 | nla_nest_end(skb, start); |
737 | 1324 | ||
738 | return 0; | 1325 | return 0; |
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index a7544f405c16..8f6230bd6183 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h | |||
@@ -37,7 +37,8 @@ void ovs_ct_free_action(const struct nlattr *a); | |||
37 | 37 | ||
38 | #define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \ | 38 | #define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \ |
39 | OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \ | 39 | OVS_CS_F_RELATED | OVS_CS_F_REPLY_DIR | \ |
40 | OVS_CS_F_INVALID | OVS_CS_F_TRACKED) | 40 | OVS_CS_F_INVALID | OVS_CS_F_TRACKED | \ |
41 | OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT) | ||
41 | #else | 42 | #else |
42 | #include <linux/errno.h> | 43 | #include <linux/errno.h> |
43 | 44 | ||
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index deadfdab1bc3..0cc66a4e492d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c | |||
@@ -422,10 +422,6 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, | |||
422 | struct sk_buff *nskb = NULL; | 422 | struct sk_buff *nskb = NULL; |
423 | struct sk_buff *user_skb = NULL; /* to be queued to userspace */ | 423 | struct sk_buff *user_skb = NULL; /* to be queued to userspace */ |
424 | struct nlattr *nla; | 424 | struct nlattr *nla; |
425 | struct genl_info info = { | ||
426 | .dst_sk = ovs_dp_get_net(dp)->genl_sock, | ||
427 | .snd_portid = upcall_info->portid, | ||
428 | }; | ||
429 | size_t len; | 425 | size_t len; |
430 | unsigned int hlen; | 426 | unsigned int hlen; |
431 | int err, dp_ifindex; | 427 | int err, dp_ifindex; |
@@ -466,7 +462,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, | |||
466 | hlen = skb->len; | 462 | hlen = skb->len; |
467 | 463 | ||
468 | len = upcall_msg_size(upcall_info, hlen); | 464 | len = upcall_msg_size(upcall_info, hlen); |
469 | user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); | 465 | user_skb = genlmsg_new(len, GFP_ATOMIC); |
470 | if (!user_skb) { | 466 | if (!user_skb) { |
471 | err = -ENOMEM; | 467 | err = -ENOMEM; |
472 | goto out; | 468 | goto out; |
@@ -654,7 +650,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { | |||
654 | 650 | ||
655 | static const struct genl_ops dp_packet_genl_ops[] = { | 651 | static const struct genl_ops dp_packet_genl_ops[] = { |
656 | { .cmd = OVS_PACKET_CMD_EXECUTE, | 652 | { .cmd = OVS_PACKET_CMD_EXECUTE, |
657 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 653 | .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
658 | .policy = packet_policy, | 654 | .policy = packet_policy, |
659 | .doit = ovs_packet_cmd_execute | 655 | .doit = ovs_packet_cmd_execute |
660 | } | 656 | } |
@@ -876,7 +872,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act | |||
876 | return NULL; | 872 | return NULL; |
877 | 873 | ||
878 | len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags); | 874 | len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags); |
879 | skb = genlmsg_new_unicast(len, info, GFP_KERNEL); | 875 | skb = genlmsg_new(len, GFP_KERNEL); |
880 | if (!skb) | 876 | if (!skb) |
881 | return ERR_PTR(-ENOMEM); | 877 | return ERR_PTR(-ENOMEM); |
882 | 878 | ||
@@ -1100,26 +1096,32 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) | |||
1100 | struct sw_flow_match match; | 1096 | struct sw_flow_match match; |
1101 | struct sw_flow_id sfid; | 1097 | struct sw_flow_id sfid; |
1102 | u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); | 1098 | u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); |
1103 | int error; | 1099 | int error = 0; |
1104 | bool log = !a[OVS_FLOW_ATTR_PROBE]; | 1100 | bool log = !a[OVS_FLOW_ATTR_PROBE]; |
1105 | bool ufid_present; | 1101 | bool ufid_present; |
1106 | 1102 | ||
1107 | /* Extract key. */ | ||
1108 | error = -EINVAL; | ||
1109 | if (!a[OVS_FLOW_ATTR_KEY]) { | ||
1110 | OVS_NLERR(log, "Flow key attribute not present in set flow."); | ||
1111 | goto error; | ||
1112 | } | ||
1113 | |||
1114 | ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); | 1103 | ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); |
1115 | ovs_match_init(&match, &key, &mask); | 1104 | if (a[OVS_FLOW_ATTR_KEY]) { |
1116 | error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], | 1105 | ovs_match_init(&match, &key, &mask); |
1117 | a[OVS_FLOW_ATTR_MASK], log); | 1106 | error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], |
1107 | a[OVS_FLOW_ATTR_MASK], log); | ||
1108 | } else if (!ufid_present) { | ||
1109 | OVS_NLERR(log, | ||
1110 | "Flow set message rejected, Key attribute missing."); | ||
1111 | error = -EINVAL; | ||
1112 | } | ||
1118 | if (error) | 1113 | if (error) |
1119 | goto error; | 1114 | goto error; |
1120 | 1115 | ||
1121 | /* Validate actions. */ | 1116 | /* Validate actions. */ |
1122 | if (a[OVS_FLOW_ATTR_ACTIONS]) { | 1117 | if (a[OVS_FLOW_ATTR_ACTIONS]) { |
1118 | if (!a[OVS_FLOW_ATTR_KEY]) { | ||
1119 | OVS_NLERR(log, | ||
1120 | "Flow key attribute not present in set flow."); | ||
1121 | error = -EINVAL; | ||
1122 | goto error; | ||
1123 | } | ||
1124 | |||
1123 | acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key, | 1125 | acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key, |
1124 | &mask, log); | 1126 | &mask, log); |
1125 | if (IS_ERR(acts)) { | 1127 | if (IS_ERR(acts)) { |
@@ -1391,12 +1393,12 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { | |||
1391 | 1393 | ||
1392 | static const struct genl_ops dp_flow_genl_ops[] = { | 1394 | static const struct genl_ops dp_flow_genl_ops[] = { |
1393 | { .cmd = OVS_FLOW_CMD_NEW, | 1395 | { .cmd = OVS_FLOW_CMD_NEW, |
1394 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 1396 | .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
1395 | .policy = flow_policy, | 1397 | .policy = flow_policy, |
1396 | .doit = ovs_flow_cmd_new | 1398 | .doit = ovs_flow_cmd_new |
1397 | }, | 1399 | }, |
1398 | { .cmd = OVS_FLOW_CMD_DEL, | 1400 | { .cmd = OVS_FLOW_CMD_DEL, |
1399 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 1401 | .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
1400 | .policy = flow_policy, | 1402 | .policy = flow_policy, |
1401 | .doit = ovs_flow_cmd_del | 1403 | .doit = ovs_flow_cmd_del |
1402 | }, | 1404 | }, |
@@ -1407,7 +1409,7 @@ static const struct genl_ops dp_flow_genl_ops[] = { | |||
1407 | .dumpit = ovs_flow_cmd_dump | 1409 | .dumpit = ovs_flow_cmd_dump |
1408 | }, | 1410 | }, |
1409 | { .cmd = OVS_FLOW_CMD_SET, | 1411 | { .cmd = OVS_FLOW_CMD_SET, |
1410 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 1412 | .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
1411 | .policy = flow_policy, | 1413 | .policy = flow_policy, |
1412 | .doit = ovs_flow_cmd_set, | 1414 | .doit = ovs_flow_cmd_set, |
1413 | }, | 1415 | }, |
@@ -1481,9 +1483,9 @@ error: | |||
1481 | return -EMSGSIZE; | 1483 | return -EMSGSIZE; |
1482 | } | 1484 | } |
1483 | 1485 | ||
1484 | static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info) | 1486 | static struct sk_buff *ovs_dp_cmd_alloc_info(void) |
1485 | { | 1487 | { |
1486 | return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); | 1488 | return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL); |
1487 | } | 1489 | } |
1488 | 1490 | ||
1489 | /* Called with rcu_read_lock or ovs_mutex. */ | 1491 | /* Called with rcu_read_lock or ovs_mutex. */ |
@@ -1536,7 +1538,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) | |||
1536 | if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) | 1538 | if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) |
1537 | goto err; | 1539 | goto err; |
1538 | 1540 | ||
1539 | reply = ovs_dp_cmd_alloc_info(info); | 1541 | reply = ovs_dp_cmd_alloc_info(); |
1540 | if (!reply) | 1542 | if (!reply) |
1541 | return -ENOMEM; | 1543 | return -ENOMEM; |
1542 | 1544 | ||
@@ -1657,7 +1659,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) | |||
1657 | struct datapath *dp; | 1659 | struct datapath *dp; |
1658 | int err; | 1660 | int err; |
1659 | 1661 | ||
1660 | reply = ovs_dp_cmd_alloc_info(info); | 1662 | reply = ovs_dp_cmd_alloc_info(); |
1661 | if (!reply) | 1663 | if (!reply) |
1662 | return -ENOMEM; | 1664 | return -ENOMEM; |
1663 | 1665 | ||
@@ -1690,7 +1692,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) | |||
1690 | struct datapath *dp; | 1692 | struct datapath *dp; |
1691 | int err; | 1693 | int err; |
1692 | 1694 | ||
1693 | reply = ovs_dp_cmd_alloc_info(info); | 1695 | reply = ovs_dp_cmd_alloc_info(); |
1694 | if (!reply) | 1696 | if (!reply) |
1695 | return -ENOMEM; | 1697 | return -ENOMEM; |
1696 | 1698 | ||
@@ -1723,7 +1725,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) | |||
1723 | struct datapath *dp; | 1725 | struct datapath *dp; |
1724 | int err; | 1726 | int err; |
1725 | 1727 | ||
1726 | reply = ovs_dp_cmd_alloc_info(info); | 1728 | reply = ovs_dp_cmd_alloc_info(); |
1727 | if (!reply) | 1729 | if (!reply) |
1728 | return -ENOMEM; | 1730 | return -ENOMEM; |
1729 | 1731 | ||
@@ -1777,12 +1779,12 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { | |||
1777 | 1779 | ||
1778 | static const struct genl_ops dp_datapath_genl_ops[] = { | 1780 | static const struct genl_ops dp_datapath_genl_ops[] = { |
1779 | { .cmd = OVS_DP_CMD_NEW, | 1781 | { .cmd = OVS_DP_CMD_NEW, |
1780 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 1782 | .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
1781 | .policy = datapath_policy, | 1783 | .policy = datapath_policy, |
1782 | .doit = ovs_dp_cmd_new | 1784 | .doit = ovs_dp_cmd_new |
1783 | }, | 1785 | }, |
1784 | { .cmd = OVS_DP_CMD_DEL, | 1786 | { .cmd = OVS_DP_CMD_DEL, |
1785 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 1787 | .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
1786 | .policy = datapath_policy, | 1788 | .policy = datapath_policy, |
1787 | .doit = ovs_dp_cmd_del | 1789 | .doit = ovs_dp_cmd_del |
1788 | }, | 1790 | }, |
@@ -1793,7 +1795,7 @@ static const struct genl_ops dp_datapath_genl_ops[] = { | |||
1793 | .dumpit = ovs_dp_cmd_dump | 1795 | .dumpit = ovs_dp_cmd_dump |
1794 | }, | 1796 | }, |
1795 | { .cmd = OVS_DP_CMD_SET, | 1797 | { .cmd = OVS_DP_CMD_SET, |
1796 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 1798 | .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
1797 | .policy = datapath_policy, | 1799 | .policy = datapath_policy, |
1798 | .doit = ovs_dp_cmd_set, | 1800 | .doit = ovs_dp_cmd_set, |
1799 | }, | 1801 | }, |
@@ -1912,6 +1914,29 @@ static struct vport *lookup_vport(struct net *net, | |||
1912 | return ERR_PTR(-EINVAL); | 1914 | return ERR_PTR(-EINVAL); |
1913 | } | 1915 | } |
1914 | 1916 | ||
1917 | /* Called with ovs_mutex */ | ||
1918 | static void update_headroom(struct datapath *dp) | ||
1919 | { | ||
1920 | unsigned dev_headroom, max_headroom = 0; | ||
1921 | struct net_device *dev; | ||
1922 | struct vport *vport; | ||
1923 | int i; | ||
1924 | |||
1925 | for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { | ||
1926 | hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { | ||
1927 | dev = vport->dev; | ||
1928 | dev_headroom = netdev_get_fwd_headroom(dev); | ||
1929 | if (dev_headroom > max_headroom) | ||
1930 | max_headroom = dev_headroom; | ||
1931 | } | ||
1932 | } | ||
1933 | |||
1934 | dp->max_headroom = max_headroom; | ||
1935 | for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) | ||
1936 | hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) | ||
1937 | netdev_set_rx_headroom(vport->dev, max_headroom); | ||
1938 | } | ||
1939 | |||
1915 | static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) | 1940 | static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) |
1916 | { | 1941 | { |
1917 | struct nlattr **a = info->attrs; | 1942 | struct nlattr **a = info->attrs; |
@@ -1977,6 +2002,12 @@ restart: | |||
1977 | 2002 | ||
1978 | err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, | 2003 | err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, |
1979 | info->snd_seq, 0, OVS_VPORT_CMD_NEW); | 2004 | info->snd_seq, 0, OVS_VPORT_CMD_NEW); |
2005 | |||
2006 | if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom) | ||
2007 | update_headroom(dp); | ||
2008 | else | ||
2009 | netdev_set_rx_headroom(vport->dev, dp->max_headroom); | ||
2010 | |||
1980 | BUG_ON(err < 0); | 2011 | BUG_ON(err < 0); |
1981 | ovs_unlock(); | 2012 | ovs_unlock(); |
1982 | 2013 | ||
@@ -2043,8 +2074,10 @@ exit_unlock_free: | |||
2043 | 2074 | ||
2044 | static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) | 2075 | static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) |
2045 | { | 2076 | { |
2077 | bool must_update_headroom = false; | ||
2046 | struct nlattr **a = info->attrs; | 2078 | struct nlattr **a = info->attrs; |
2047 | struct sk_buff *reply; | 2079 | struct sk_buff *reply; |
2080 | struct datapath *dp; | ||
2048 | struct vport *vport; | 2081 | struct vport *vport; |
2049 | int err; | 2082 | int err; |
2050 | 2083 | ||
@@ -2066,7 +2099,16 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) | |||
2066 | err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, | 2099 | err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, |
2067 | info->snd_seq, 0, OVS_VPORT_CMD_DEL); | 2100 | info->snd_seq, 0, OVS_VPORT_CMD_DEL); |
2068 | BUG_ON(err < 0); | 2101 | BUG_ON(err < 0); |
2102 | |||
2103 | /* the vport deletion may trigger dp headroom update */ | ||
2104 | dp = vport->dp; | ||
2105 | if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom) | ||
2106 | must_update_headroom = true; | ||
2107 | netdev_reset_rx_headroom(vport->dev); | ||
2069 | ovs_dp_detach_port(vport); | 2108 | ovs_dp_detach_port(vport); |
2109 | |||
2110 | if (must_update_headroom) | ||
2111 | update_headroom(dp); | ||
2070 | ovs_unlock(); | 2112 | ovs_unlock(); |
2071 | 2113 | ||
2072 | ovs_notify(&dp_vport_genl_family, reply, info); | 2114 | ovs_notify(&dp_vport_genl_family, reply, info); |
@@ -2158,12 +2200,12 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { | |||
2158 | 2200 | ||
2159 | static const struct genl_ops dp_vport_genl_ops[] = { | 2201 | static const struct genl_ops dp_vport_genl_ops[] = { |
2160 | { .cmd = OVS_VPORT_CMD_NEW, | 2202 | { .cmd = OVS_VPORT_CMD_NEW, |
2161 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 2203 | .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
2162 | .policy = vport_policy, | 2204 | .policy = vport_policy, |
2163 | .doit = ovs_vport_cmd_new | 2205 | .doit = ovs_vport_cmd_new |
2164 | }, | 2206 | }, |
2165 | { .cmd = OVS_VPORT_CMD_DEL, | 2207 | { .cmd = OVS_VPORT_CMD_DEL, |
2166 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 2208 | .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
2167 | .policy = vport_policy, | 2209 | .policy = vport_policy, |
2168 | .doit = ovs_vport_cmd_del | 2210 | .doit = ovs_vport_cmd_del |
2169 | }, | 2211 | }, |
@@ -2174,7 +2216,7 @@ static const struct genl_ops dp_vport_genl_ops[] = { | |||
2174 | .dumpit = ovs_vport_cmd_dump | 2216 | .dumpit = ovs_vport_cmd_dump |
2175 | }, | 2217 | }, |
2176 | { .cmd = OVS_VPORT_CMD_SET, | 2218 | { .cmd = OVS_VPORT_CMD_SET, |
2177 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ | 2219 | .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ |
2178 | .policy = vport_policy, | 2220 | .policy = vport_policy, |
2179 | .doit = ovs_vport_cmd_set, | 2221 | .doit = ovs_vport_cmd_set, |
2180 | }, | 2222 | }, |
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 67bdecd9fdc1..427e39a045cf 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h | |||
@@ -68,6 +68,8 @@ struct dp_stats_percpu { | |||
68 | * ovs_mutex and RCU. | 68 | * ovs_mutex and RCU. |
69 | * @stats_percpu: Per-CPU datapath statistics. | 69 | * @stats_percpu: Per-CPU datapath statistics. |
70 | * @net: Reference to net namespace. | 70 | * @net: Reference to net namespace. |
71 | * @max_headroom: the maximum headroom of all vports in this datapath; it will | ||
72 | * be used by all the internal vports in this dp. | ||
71 | * | 73 | * |
72 | * Context: See the comment on locking at the top of datapath.c for additional | 74 | * Context: See the comment on locking at the top of datapath.c for additional |
73 | * locking information. | 75 | * locking information. |
@@ -89,6 +91,8 @@ struct datapath { | |||
89 | possible_net_t net; | 91 | possible_net_t net; |
90 | 92 | ||
91 | u32 user_features; | 93 | u32 user_features; |
94 | |||
95 | u32 max_headroom; | ||
92 | }; | 96 | }; |
93 | 97 | ||
94 | /** | 98 | /** |
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 1d055c559eaf..03378e75a67c 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h | |||
@@ -55,7 +55,7 @@ struct ovs_tunnel_info { | |||
55 | FIELD_SIZEOF(struct sw_flow_key, recirc_id)) | 55 | FIELD_SIZEOF(struct sw_flow_key, recirc_id)) |
56 | 56 | ||
57 | struct sw_flow_key { | 57 | struct sw_flow_key { |
58 | u8 tun_opts[255]; | 58 | u8 tun_opts[IP_TUNNEL_OPTS_MAX]; |
59 | u8 tun_opts_len; | 59 | u8 tun_opts_len; |
60 | struct ip_tunnel_key tun_key; /* Encapsulating tunnel key. */ | 60 | struct ip_tunnel_key tun_key; /* Encapsulating tunnel key. */ |
61 | struct { | 61 | struct { |
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index d1bd4a45ca2d..689c17264221 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c | |||
@@ -1959,6 +1959,12 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, | |||
1959 | if (!tun_dst) | 1959 | if (!tun_dst) |
1960 | return -ENOMEM; | 1960 | return -ENOMEM; |
1961 | 1961 | ||
1962 | err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL); | ||
1963 | if (err) { | ||
1964 | dst_release((struct dst_entry *)tun_dst); | ||
1965 | return err; | ||
1966 | } | ||
1967 | |||
1962 | a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, | 1968 | a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, |
1963 | sizeof(*ovs_tun), log); | 1969 | sizeof(*ovs_tun), log); |
1964 | if (IS_ERR(a)) { | 1970 | if (IS_ERR(a)) { |
@@ -2038,9 +2044,6 @@ static int validate_set(const struct nlattr *a, | |||
2038 | break; | 2044 | break; |
2039 | 2045 | ||
2040 | case OVS_KEY_ATTR_TUNNEL: | 2046 | case OVS_KEY_ATTR_TUNNEL: |
2041 | if (eth_p_mpls(eth_type)) | ||
2042 | return -EINVAL; | ||
2043 | |||
2044 | if (masked) | 2047 | if (masked) |
2045 | return -EINVAL; /* Masked tunnel set not supported. */ | 2048 | return -EINVAL; /* Masked tunnel set not supported. */ |
2046 | 2049 | ||
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 30ab8e127288..1a1fcec88695 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c | |||
@@ -132,6 +132,6 @@ static void __exit ovs_geneve_tnl_exit(void) | |||
132 | module_init(ovs_geneve_tnl_init); | 132 | module_init(ovs_geneve_tnl_init); |
133 | module_exit(ovs_geneve_tnl_exit); | 133 | module_exit(ovs_geneve_tnl_exit); |
134 | 134 | ||
135 | MODULE_DESCRIPTION("OVS: Geneve swiching port"); | 135 | MODULE_DESCRIPTION("OVS: Geneve switching port"); |
136 | MODULE_LICENSE("GPL"); | 136 | MODULE_LICENSE("GPL"); |
137 | MODULE_ALIAS("vport-type-5"); | 137 | MODULE_ALIAS("vport-type-5"); |
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index ec76398a792f..7c8b90bf0e54 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c | |||
@@ -138,6 +138,11 @@ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) | |||
138 | return stats; | 138 | return stats; |
139 | } | 139 | } |
140 | 140 | ||
141 | static void internal_set_rx_headroom(struct net_device *dev, int new_hr) | ||
142 | { | ||
143 | dev->needed_headroom = new_hr; | ||
144 | } | ||
145 | |||
141 | static const struct net_device_ops internal_dev_netdev_ops = { | 146 | static const struct net_device_ops internal_dev_netdev_ops = { |
142 | .ndo_open = internal_dev_open, | 147 | .ndo_open = internal_dev_open, |
143 | .ndo_stop = internal_dev_stop, | 148 | .ndo_stop = internal_dev_stop, |
@@ -145,6 +150,7 @@ static const struct net_device_ops internal_dev_netdev_ops = { | |||
145 | .ndo_set_mac_address = eth_mac_addr, | 150 | .ndo_set_mac_address = eth_mac_addr, |
146 | .ndo_change_mtu = internal_dev_change_mtu, | 151 | .ndo_change_mtu = internal_dev_change_mtu, |
147 | .ndo_get_stats64 = internal_get_stats, | 152 | .ndo_get_stats64 = internal_get_stats, |
153 | .ndo_set_rx_headroom = internal_set_rx_headroom, | ||
148 | }; | 154 | }; |
149 | 155 | ||
150 | static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { | 156 | static struct rtnl_link_ops internal_dev_link_ops __read_mostly = { |
@@ -158,7 +164,8 @@ static void do_setup(struct net_device *netdev) | |||
158 | netdev->netdev_ops = &internal_dev_netdev_ops; | 164 | netdev->netdev_ops = &internal_dev_netdev_ops; |
159 | 165 | ||
160 | netdev->priv_flags &= ~IFF_TX_SKB_SHARING; | 166 | netdev->priv_flags &= ~IFF_TX_SKB_SHARING; |
161 | netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH; | 167 | netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH | |
168 | IFF_PHONY_HEADROOM; | ||
162 | netdev->destructor = internal_dev_destructor; | 169 | netdev->destructor = internal_dev_destructor; |
163 | netdev->ethtool_ops = &internal_dev_ethtool_ops; | 170 | netdev->ethtool_ops = &internal_dev_ethtool_ops; |
164 | netdev->rtnl_link_ops = &internal_dev_link_ops; | 171 | netdev->rtnl_link_ops = &internal_dev_link_ops; |
@@ -199,6 +206,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) | |||
199 | err = -ENOMEM; | 206 | err = -ENOMEM; |
200 | goto error_free_netdev; | 207 | goto error_free_netdev; |
201 | } | 208 | } |
209 | vport->dev->needed_headroom = vport->dp->max_headroom; | ||
202 | 210 | ||
203 | dev_net_set(vport->dev, ovs_dp_get_net(vport->dp)); | 211 | dev_net_set(vport->dev, ovs_dp_get_net(vport->dp)); |
204 | internal_dev = internal_dev_priv(vport->dev); | 212 | internal_dev = internal_dev_priv(vport->dev); |
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 6a6adf314363..4e3972344aa6 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c | |||
@@ -58,7 +58,7 @@ static void netdev_port_receive(struct sk_buff *skb) | |||
58 | return; | 58 | return; |
59 | 59 | ||
60 | skb_push(skb, ETH_HLEN); | 60 | skb_push(skb, ETH_HLEN); |
61 | ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); | 61 | skb_postpush_rcsum(skb, skb->data, ETH_HLEN); |
62 | ovs_vport_receive(vport, skb, skb_tunnel_info(skb)); | 62 | ovs_vport_receive(vport, skb, skb_tunnel_info(skb)); |
63 | return; | 63 | return; |
64 | error: | 64 | error: |
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index de9cb19efb6a..5eb7694348b5 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c | |||
@@ -90,7 +90,7 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) | |||
90 | int err; | 90 | int err; |
91 | struct vxlan_config conf = { | 91 | struct vxlan_config conf = { |
92 | .no_share = true, | 92 | .no_share = true, |
93 | .flags = VXLAN_F_COLLECT_METADATA, | 93 | .flags = VXLAN_F_COLLECT_METADATA | VXLAN_F_UDP_ZERO_CSUM6_RX, |
94 | /* Don't restrict the packets that can be sent by MTU */ | 94 | /* Don't restrict the packets that can be sent by MTU */ |
95 | .mtu = IP_MAX_MTU, | 95 | .mtu = IP_MAX_MTU, |
96 | }; | 96 | }; |
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index c10899cb9040..f01f28a567ad 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h | |||
@@ -185,13 +185,6 @@ static inline struct vport *vport_from_priv(void *priv) | |||
185 | int ovs_vport_receive(struct vport *, struct sk_buff *, | 185 | int ovs_vport_receive(struct vport *, struct sk_buff *, |
186 | const struct ip_tunnel_info *); | 186 | const struct ip_tunnel_info *); |
187 | 187 | ||
188 | static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, | ||
189 | const void *start, unsigned int len) | ||
190 | { | ||
191 | if (skb->ip_summed == CHECKSUM_COMPLETE) | ||
192 | skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); | ||
193 | } | ||
194 | |||
195 | static inline const char *ovs_vport_name(struct vport *vport) | 188 | static inline const char *ovs_vport_name(struct vport *vport) |
196 | { | 189 | { |
197 | return vport->dev->name; | 190 | return vport->dev->name; |