aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-11-30 12:01:30 -0500
committerDavid S. Miller <davem@davemloft.net>2012-11-30 12:01:30 -0500
commite7165030db8e932a9a968f7015cd3b2e984f8e7c (patch)
treeab46a0baf25f72b7001bb4673ba47534b81a0d2d /net
parentbb728820fe7c42fdb838ab2745fb5fe6b18b5ffa (diff)
parent92eb1d477145b2e7780b5002e856f70b8c3d74da (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch
Conflicts: net/ipv6/exthdrs_core.c Jesse Gross says: ==================== This series of improvements for 3.8/net-next contains four components: * Support for modifying IPv6 headers * Support for matching and setting skb->mark for better integration with things like iptables * Ability to recognize the EtherType for RARP packets * Two small performance enhancements The movement of ipv6_find_hdr() into exthdrs_core.c causes two small merge conflicts. I left it as is but can do the merge if you want. The conflicts are: * ipv6_find_hdr() and ipv6_find_tlv() were both moved to the bottom of exthdrs_core.c. Both should stay. * A new use of ipv6_find_hdr() was added to net/netfilter/ipvs/ip_vs_core.c after this patch. The IPVS user has two instances of the old constant name IP6T_FH_F_FRAG which has been renamed to IP6_FH_F_FRAG. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv6/exthdrs_core.c124
-rw-r--r--net/ipv6/netfilter/ip6_tables.c103
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c4
-rw-r--r--net/netfilter/xt_HMARK.c8
-rw-r--r--net/openvswitch/actions.c97
-rw-r--r--net/openvswitch/datapath.c27
-rw-r--r--net/openvswitch/flow.c28
-rw-r--r--net/openvswitch/flow.h8
-rw-r--r--net/openvswitch/vport-netdev.c14
-rw-r--r--net/openvswitch/vport-netdev.h3
-rw-r--r--net/openvswitch/vport.c5
11 files changed, 296 insertions, 125 deletions
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index e7d756e19d1d..c5e83fae4df4 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -155,3 +155,127 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
155 return -1; 155 return -1;
156} 156}
157EXPORT_SYMBOL_GPL(ipv6_find_tlv); 157EXPORT_SYMBOL_GPL(ipv6_find_tlv);
158
159/*
160 * find the offset to specified header or the protocol number of last header
161 * if target < 0. "last header" is transport protocol header, ESP, or
162 * "No next header".
163 *
164 * Note that *offset is used as input/output parameter. an if it is not zero,
165 * then it must be a valid offset to an inner IPv6 header. This can be used
166 * to explore inner IPv6 header, eg. ICMPv6 error messages.
167 *
168 * If target header is found, its offset is set in *offset and return protocol
169 * number. Otherwise, return -1.
170 *
171 * If the first fragment doesn't contain the final protocol header or
172 * NEXTHDR_NONE it is considered invalid.
173 *
174 * Note that non-1st fragment is special case that "the protocol number
175 * of last header" is "next header" field in Fragment header. In this case,
176 * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
177 * isn't NULL.
178 *
179 * if flags is not NULL and it's a fragment, then the frag flag
180 * IP6_FH_F_FRAG will be set. If it's an AH header, the
181 * IP6_FH_F_AUTH flag is set and target < 0, then this function will
182 * stop at the AH header. If IP6_FH_F_SKIP_RH flag was passed, then this
183 * function will skip all those routing headers, where segements_left was 0.
184 */
185int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
186 int target, unsigned short *fragoff, int *flags)
187{
188 unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
189 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
190 unsigned int len;
191 bool found;
192
193 if (fragoff)
194 *fragoff = 0;
195
196 if (*offset) {
197 struct ipv6hdr _ip6, *ip6;
198
199 ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6);
200 if (!ip6 || (ip6->version != 6)) {
201 printk(KERN_ERR "IPv6 header not found\n");
202 return -EBADMSG;
203 }
204 start = *offset + sizeof(struct ipv6hdr);
205 nexthdr = ip6->nexthdr;
206 }
207 len = skb->len - start;
208
209 do {
210 struct ipv6_opt_hdr _hdr, *hp;
211 unsigned int hdrlen;
212 found = (nexthdr == target);
213
214 if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
215 if (target < 0)
216 break;
217 return -ENOENT;
218 }
219
220 hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
221 if (hp == NULL)
222 return -EBADMSG;
223
224 if (nexthdr == NEXTHDR_ROUTING) {
225 struct ipv6_rt_hdr _rh, *rh;
226
227 rh = skb_header_pointer(skb, start, sizeof(_rh),
228 &_rh);
229 if (rh == NULL)
230 return -EBADMSG;
231
232 if (flags && (*flags & IP6_FH_F_SKIP_RH) &&
233 rh->segments_left == 0)
234 found = false;
235 }
236
237 if (nexthdr == NEXTHDR_FRAGMENT) {
238 unsigned short _frag_off;
239 __be16 *fp;
240
241 if (flags) /* Indicate that this is a fragment */
242 *flags |= IP6_FH_F_FRAG;
243 fp = skb_header_pointer(skb,
244 start+offsetof(struct frag_hdr,
245 frag_off),
246 sizeof(_frag_off),
247 &_frag_off);
248 if (fp == NULL)
249 return -EBADMSG;
250
251 _frag_off = ntohs(*fp) & ~0x7;
252 if (_frag_off) {
253 if (target < 0 &&
254 ((!ipv6_ext_hdr(hp->nexthdr)) ||
255 hp->nexthdr == NEXTHDR_NONE)) {
256 if (fragoff)
257 *fragoff = _frag_off;
258 return hp->nexthdr;
259 }
260 return -ENOENT;
261 }
262 hdrlen = 8;
263 } else if (nexthdr == NEXTHDR_AUTH) {
264 if (flags && (*flags & IP6_FH_F_AUTH) && (target < 0))
265 break;
266 hdrlen = (hp->hdrlen + 2) << 2;
267 } else
268 hdrlen = ipv6_optlen(hp);
269
270 if (!found) {
271 nexthdr = hp->nexthdr;
272 len -= hdrlen;
273 start += hdrlen;
274 }
275 } while (!found);
276
277 *offset = start;
278 return nexthdr;
279}
280EXPORT_SYMBOL(ipv6_find_hdr);
281
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 74cadd0719a5..125a90d6a795 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2271,112 +2271,9 @@ static void __exit ip6_tables_fini(void)
2271 unregister_pernet_subsys(&ip6_tables_net_ops); 2271 unregister_pernet_subsys(&ip6_tables_net_ops);
2272} 2272}
2273 2273
2274/*
2275 * find the offset to specified header or the protocol number of last header
2276 * if target < 0. "last header" is transport protocol header, ESP, or
2277 * "No next header".
2278 *
2279 * Note that *offset is used as input/output parameter. an if it is not zero,
2280 * then it must be a valid offset to an inner IPv6 header. This can be used
2281 * to explore inner IPv6 header, eg. ICMPv6 error messages.
2282 *
2283 * If target header is found, its offset is set in *offset and return protocol
2284 * number. Otherwise, return -1.
2285 *
2286 * If the first fragment doesn't contain the final protocol header or
2287 * NEXTHDR_NONE it is considered invalid.
2288 *
2289 * Note that non-1st fragment is special case that "the protocol number
2290 * of last header" is "next header" field in Fragment header. In this case,
2291 * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
2292 * isn't NULL.
2293 *
2294 * if flags is not NULL and it's a fragment, then the frag flag IP6T_FH_F_FRAG
2295 * will be set. If it's an AH header, the IP6T_FH_F_AUTH flag is set and
2296 * target < 0, then this function will stop at the AH header.
2297 */
2298int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
2299 int target, unsigned short *fragoff, int *flags)
2300{
2301 unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
2302 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
2303 unsigned int len;
2304
2305 if (fragoff)
2306 *fragoff = 0;
2307
2308 if (*offset) {
2309 struct ipv6hdr _ip6, *ip6;
2310
2311 ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6);
2312 if (!ip6 || (ip6->version != 6)) {
2313 printk(KERN_ERR "IPv6 header not found\n");
2314 return -EBADMSG;
2315 }
2316 start = *offset + sizeof(struct ipv6hdr);
2317 nexthdr = ip6->nexthdr;
2318 }
2319 len = skb->len - start;
2320
2321 while (nexthdr != target) {
2322 struct ipv6_opt_hdr _hdr, *hp;
2323 unsigned int hdrlen;
2324
2325 if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
2326 if (target < 0)
2327 break;
2328 return -ENOENT;
2329 }
2330
2331 hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
2332 if (hp == NULL)
2333 return -EBADMSG;
2334 if (nexthdr == NEXTHDR_FRAGMENT) {
2335 unsigned short _frag_off;
2336 __be16 *fp;
2337
2338 if (flags) /* Indicate that this is a fragment */
2339 *flags |= IP6T_FH_F_FRAG;
2340 fp = skb_header_pointer(skb,
2341 start+offsetof(struct frag_hdr,
2342 frag_off),
2343 sizeof(_frag_off),
2344 &_frag_off);
2345 if (fp == NULL)
2346 return -EBADMSG;
2347
2348 _frag_off = ntohs(*fp) & ~0x7;
2349 if (_frag_off) {
2350 if (target < 0 &&
2351 ((!ipv6_ext_hdr(hp->nexthdr)) ||
2352 hp->nexthdr == NEXTHDR_NONE)) {
2353 if (fragoff)
2354 *fragoff = _frag_off;
2355 return hp->nexthdr;
2356 }
2357 return -ENOENT;
2358 }
2359 hdrlen = 8;
2360 } else if (nexthdr == NEXTHDR_AUTH) {
2361 if (flags && (*flags & IP6T_FH_F_AUTH) && (target < 0))
2362 break;
2363 hdrlen = (hp->hdrlen + 2) << 2;
2364 } else
2365 hdrlen = ipv6_optlen(hp);
2366
2367 nexthdr = hp->nexthdr;
2368 len -= hdrlen;
2369 start += hdrlen;
2370 }
2371
2372 *offset = start;
2373 return nexthdr;
2374}
2375
2376EXPORT_SYMBOL(ip6t_register_table); 2274EXPORT_SYMBOL(ip6t_register_table);
2377EXPORT_SYMBOL(ip6t_unregister_table); 2275EXPORT_SYMBOL(ip6t_unregister_table);
2378EXPORT_SYMBOL(ip6t_do_table); 2276EXPORT_SYMBOL(ip6t_do_table);
2379EXPORT_SYMBOL(ipv6_find_hdr);
2380 2277
2381module_init(ip6_tables_init); 2278module_init(ip6_tables_init);
2382module_exit(ip6_tables_fini); 2279module_exit(ip6_tables_fini);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index fb45640dc1fb..47edf5a40a59 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -942,7 +942,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
942 /* Fragment header that is before ICMP header tells us that: 942 /* Fragment header that is before ICMP header tells us that:
943 * it's not an error message since they can't be fragmented. 943 * it's not an error message since they can't be fragmented.
944 */ 944 */
945 if (ipvsh->flags & IP6T_FH_F_FRAG) 945 if (ipvsh->flags & IP6_FH_F_FRAG)
946 return NF_DROP; 946 return NF_DROP;
947 947
948 IP_VS_DBG(8, "Outgoing ICMPv6 (%d,%d) %pI6c->%pI6c\n", 948 IP_VS_DBG(8, "Outgoing ICMPv6 (%d,%d) %pI6c->%pI6c\n",
@@ -1475,7 +1475,7 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
1475 /* Fragment header that is before ICMP header tells us that: 1475 /* Fragment header that is before ICMP header tells us that:
1476 * it's not an error message since they can't be fragmented. 1476 * it's not an error message since they can't be fragmented.
1477 */ 1477 */
1478 if (iph->flags & IP6T_FH_F_FRAG) 1478 if (iph->flags & IP6_FH_F_FRAG)
1479 return NF_DROP; 1479 return NF_DROP;
1480 1480
1481 IP_VS_DBG(8, "Incoming ICMPv6 (%d,%d) %pI6c->%pI6c\n", 1481 IP_VS_DBG(8, "Incoming ICMPv6 (%d,%d) %pI6c->%pI6c\n",
diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
index 1686ca1b53a1..73b73f687c58 100644
--- a/net/netfilter/xt_HMARK.c
+++ b/net/netfilter/xt_HMARK.c
@@ -167,7 +167,7 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
167 const struct xt_hmark_info *info) 167 const struct xt_hmark_info *info)
168{ 168{
169 struct ipv6hdr *ip6, _ip6; 169 struct ipv6hdr *ip6, _ip6;
170 int flag = IP6T_FH_F_AUTH; 170 int flag = IP6_FH_F_AUTH;
171 unsigned int nhoff = 0; 171 unsigned int nhoff = 0;
172 u16 fragoff = 0; 172 u16 fragoff = 0;
173 int nexthdr; 173 int nexthdr;
@@ -177,7 +177,7 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
177 if (nexthdr < 0) 177 if (nexthdr < 0)
178 return 0; 178 return 0;
179 /* No need to check for icmp errors on fragments */ 179 /* No need to check for icmp errors on fragments */
180 if ((flag & IP6T_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6)) 180 if ((flag & IP6_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6))
181 goto noicmp; 181 goto noicmp;
182 /* Use inner header in case of ICMP errors */ 182 /* Use inner header in case of ICMP errors */
183 if (get_inner6_hdr(skb, &nhoff)) { 183 if (get_inner6_hdr(skb, &nhoff)) {
@@ -185,7 +185,7 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
185 if (ip6 == NULL) 185 if (ip6 == NULL)
186 return -1; 186 return -1;
187 /* If AH present, use SPI like in ESP. */ 187 /* If AH present, use SPI like in ESP. */
188 flag = IP6T_FH_F_AUTH; 188 flag = IP6_FH_F_AUTH;
189 nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); 189 nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag);
190 if (nexthdr < 0) 190 if (nexthdr < 0)
191 return -1; 191 return -1;
@@ -201,7 +201,7 @@ noicmp:
201 if (t->proto == IPPROTO_ICMPV6) 201 if (t->proto == IPPROTO_ICMPV6)
202 return 0; 202 return 0;
203 203
204 if (flag & IP6T_FH_F_FRAG) 204 if (flag & IP6_FH_F_FRAG)
205 return 0; 205 return 0;
206 206
207 hmark_set_tuple_ports(skb, nhoff, t, info); 207 hmark_set_tuple_ports(skb, nhoff, t, info);
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 08114478cb85..ac2defeeba83 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -28,6 +28,7 @@
28#include <linux/if_arp.h> 28#include <linux/if_arp.h>
29#include <linux/if_vlan.h> 29#include <linux/if_vlan.h>
30#include <net/ip.h> 30#include <net/ip.h>
31#include <net/ipv6.h>
31#include <net/checksum.h> 32#include <net/checksum.h>
32#include <net/dsfield.h> 33#include <net/dsfield.h>
33 34
@@ -162,6 +163,53 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
162 *addr = new_addr; 163 *addr = new_addr;
163} 164}
164 165
166static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
167 __be32 addr[4], const __be32 new_addr[4])
168{
169 int transport_len = skb->len - skb_transport_offset(skb);
170
171 if (l4_proto == IPPROTO_TCP) {
172 if (likely(transport_len >= sizeof(struct tcphdr)))
173 inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
174 addr, new_addr, 1);
175 } else if (l4_proto == IPPROTO_UDP) {
176 if (likely(transport_len >= sizeof(struct udphdr))) {
177 struct udphdr *uh = udp_hdr(skb);
178
179 if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
180 inet_proto_csum_replace16(&uh->check, skb,
181 addr, new_addr, 1);
182 if (!uh->check)
183 uh->check = CSUM_MANGLED_0;
184 }
185 }
186 }
187}
188
189static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
190 __be32 addr[4], const __be32 new_addr[4],
191 bool recalculate_csum)
192{
193 if (recalculate_csum)
194 update_ipv6_checksum(skb, l4_proto, addr, new_addr);
195
196 skb->rxhash = 0;
197 memcpy(addr, new_addr, sizeof(__be32[4]));
198}
199
200static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc)
201{
202 nh->priority = tc >> 4;
203 nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4);
204}
205
206static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl)
207{
208 nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16;
209 nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
210 nh->flow_lbl[2] = fl & 0x000000FF;
211}
212
165static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl) 213static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
166{ 214{
167 csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8)); 215 csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
@@ -195,6 +243,47 @@ static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
195 return 0; 243 return 0;
196} 244}
197 245
246static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
247{
248 struct ipv6hdr *nh;
249 int err;
250 __be32 *saddr;
251 __be32 *daddr;
252
253 err = make_writable(skb, skb_network_offset(skb) +
254 sizeof(struct ipv6hdr));
255 if (unlikely(err))
256 return err;
257
258 nh = ipv6_hdr(skb);
259 saddr = (__be32 *)&nh->saddr;
260 daddr = (__be32 *)&nh->daddr;
261
262 if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src)))
263 set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
264 ipv6_key->ipv6_src, true);
265
266 if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
267 unsigned int offset = 0;
268 int flags = IP6_FH_F_SKIP_RH;
269 bool recalc_csum = true;
270
271 if (ipv6_ext_hdr(nh->nexthdr))
272 recalc_csum = ipv6_find_hdr(skb, &offset,
273 NEXTHDR_ROUTING, NULL,
274 &flags) != NEXTHDR_ROUTING;
275
276 set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
277 ipv6_key->ipv6_dst, recalc_csum);
278 }
279
280 set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
281 set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
282 nh->hop_limit = ipv6_key->ipv6_hlimit;
283
284 return 0;
285}
286
198/* Must follow make_writable() since that can move the skb data. */ 287/* Must follow make_writable() since that can move the skb data. */
199static void set_tp_port(struct sk_buff *skb, __be16 *port, 288static void set_tp_port(struct sk_buff *skb, __be16 *port,
200 __be16 new_port, __sum16 *check) 289 __be16 new_port, __sum16 *check)
@@ -339,6 +428,10 @@ static int execute_set_action(struct sk_buff *skb,
339 skb->priority = nla_get_u32(nested_attr); 428 skb->priority = nla_get_u32(nested_attr);
340 break; 429 break;
341 430
431 case OVS_KEY_ATTR_SKB_MARK:
432 skb->mark = nla_get_u32(nested_attr);
433 break;
434
342 case OVS_KEY_ATTR_ETHERNET: 435 case OVS_KEY_ATTR_ETHERNET:
343 err = set_eth_addr(skb, nla_data(nested_attr)); 436 err = set_eth_addr(skb, nla_data(nested_attr));
344 break; 437 break;
@@ -347,6 +440,10 @@ static int execute_set_action(struct sk_buff *skb,
347 err = set_ipv4(skb, nla_data(nested_attr)); 440 err = set_ipv4(skb, nla_data(nested_attr));
348 break; 441 break;
349 442
443 case OVS_KEY_ATTR_IPV6:
444 err = set_ipv6(skb, nla_data(nested_attr));
445 break;
446
350 case OVS_KEY_ATTR_TCP: 447 case OVS_KEY_ATTR_TCP:
351 err = set_tcp(skb, nla_data(nested_attr)); 448 err = set_tcp(skb, nla_data(nested_attr));
352 break; 449 break;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 4c4b62ccc7d7..f996db343247 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -208,7 +208,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
208 int error; 208 int error;
209 int key_len; 209 int key_len;
210 210
211 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id()); 211 stats = this_cpu_ptr(dp->stats_percpu);
212 212
213 /* Extract flow from 'skb' into 'key'. */ 213 /* Extract flow from 'skb' into 'key'. */
214 error = ovs_flow_extract(skb, p->port_no, &key, &key_len); 214 error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
@@ -282,7 +282,7 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
282 return 0; 282 return 0;
283 283
284err: 284err:
285 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id()); 285 stats = this_cpu_ptr(dp->stats_percpu);
286 286
287 u64_stats_update_begin(&stats->sync); 287 u64_stats_update_begin(&stats->sync);
288 stats->n_lost++; 288 stats->n_lost++;
@@ -479,8 +479,10 @@ static int validate_set(const struct nlattr *a,
479 479
480 switch (key_type) { 480 switch (key_type) {
481 const struct ovs_key_ipv4 *ipv4_key; 481 const struct ovs_key_ipv4 *ipv4_key;
482 const struct ovs_key_ipv6 *ipv6_key;
482 483
483 case OVS_KEY_ATTR_PRIORITY: 484 case OVS_KEY_ATTR_PRIORITY:
485 case OVS_KEY_ATTR_SKB_MARK:
484 case OVS_KEY_ATTR_ETHERNET: 486 case OVS_KEY_ATTR_ETHERNET:
485 break; 487 break;
486 488
@@ -500,6 +502,25 @@ static int validate_set(const struct nlattr *a,
500 502
501 break; 503 break;
502 504
505 case OVS_KEY_ATTR_IPV6:
506 if (flow_key->eth.type != htons(ETH_P_IPV6))
507 return -EINVAL;
508
509 if (!flow_key->ip.proto)
510 return -EINVAL;
511
512 ipv6_key = nla_data(ovs_key);
513 if (ipv6_key->ipv6_proto != flow_key->ip.proto)
514 return -EINVAL;
515
516 if (ipv6_key->ipv6_frag != flow_key->ip.frag)
517 return -EINVAL;
518
519 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
520 return -EINVAL;
521
522 break;
523
503 case OVS_KEY_ATTR_TCP: 524 case OVS_KEY_ATTR_TCP:
504 if (flow_key->ip.proto != IPPROTO_TCP) 525 if (flow_key->ip.proto != IPPROTO_TCP)
505 return -EINVAL; 526 return -EINVAL;
@@ -675,6 +696,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
675 goto err_flow_free; 696 goto err_flow_free;
676 697
677 err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority, 698 err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority,
699 &flow->key.phy.skb_mark,
678 &flow->key.phy.in_port, 700 &flow->key.phy.in_port,
679 a[OVS_PACKET_ATTR_KEY]); 701 a[OVS_PACKET_ATTR_KEY]);
680 if (err) 702 if (err)
@@ -694,6 +716,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
694 716
695 OVS_CB(packet)->flow = flow; 717 OVS_CB(packet)->flow = flow;
696 packet->priority = flow->key.phy.priority; 718 packet->priority = flow->key.phy.priority;
719 packet->mark = flow->key.phy.skb_mark;
697 720
698 rcu_read_lock(); 721 rcu_read_lock();
699 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 722 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 733cbf49ed1f..c3294cebc4f2 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -604,6 +604,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
604 604
605 key->phy.priority = skb->priority; 605 key->phy.priority = skb->priority;
606 key->phy.in_port = in_port; 606 key->phy.in_port = in_port;
607 key->phy.skb_mark = skb->mark;
607 608
608 skb_reset_mac_header(skb); 609 skb_reset_mac_header(skb);
609 610
@@ -689,7 +690,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
689 } 690 }
690 } 691 }
691 692
692 } else if (key->eth.type == htons(ETH_P_ARP) && arphdr_ok(skb)) { 693 } else if ((key->eth.type == htons(ETH_P_ARP) ||
694 key->eth.type == htons(ETH_P_RARP)) && arphdr_ok(skb)) {
693 struct arp_eth_header *arp; 695 struct arp_eth_header *arp;
694 696
695 arp = (struct arp_eth_header *)skb_network_header(skb); 697 arp = (struct arp_eth_header *)skb_network_header(skb);
@@ -802,6 +804,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
802 [OVS_KEY_ATTR_ENCAP] = -1, 804 [OVS_KEY_ATTR_ENCAP] = -1,
803 [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), 805 [OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
804 [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), 806 [OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
807 [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
805 [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), 808 [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
806 [OVS_KEY_ATTR_VLAN] = sizeof(__be16), 809 [OVS_KEY_ATTR_VLAN] = sizeof(__be16),
807 [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), 810 [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
@@ -987,6 +990,10 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
987 } else { 990 } else {
988 swkey->phy.in_port = DP_MAX_PORTS; 991 swkey->phy.in_port = DP_MAX_PORTS;
989 } 992 }
993 if (attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
994 swkey->phy.skb_mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
995 attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
996 }
990 997
991 /* Data attributes. */ 998 /* Data attributes. */
992 if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET))) 999 if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET)))
@@ -1086,7 +1093,8 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
1086 if (err) 1093 if (err)
1087 return err; 1094 return err;
1088 } 1095 }
1089 } else if (swkey->eth.type == htons(ETH_P_ARP)) { 1096 } else if (swkey->eth.type == htons(ETH_P_ARP) ||
1097 swkey->eth.type == htons(ETH_P_RARP)) {
1090 const struct ovs_key_arp *arp_key; 1098 const struct ovs_key_arp *arp_key;
1091 1099
1092 if (!(attrs & (1 << OVS_KEY_ATTR_ARP))) 1100 if (!(attrs & (1 << OVS_KEY_ATTR_ARP)))
@@ -1113,6 +1121,8 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
1113 1121
1114/** 1122/**
1115 * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. 1123 * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
1124 * @priority: receives the skb priority
1125 * @mark: receives the skb mark
1116 * @in_port: receives the extracted input port. 1126 * @in_port: receives the extracted input port.
1117 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1127 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1118 * sequence. 1128 * sequence.
@@ -1122,7 +1132,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
1122 * get the metadata, that is, the parts of the flow key that cannot be 1132 * get the metadata, that is, the parts of the flow key that cannot be
1123 * extracted from the packet itself. 1133 * extracted from the packet itself.
1124 */ 1134 */
1125int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, 1135int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port,
1126 const struct nlattr *attr) 1136 const struct nlattr *attr)
1127{ 1137{
1128 const struct nlattr *nla; 1138 const struct nlattr *nla;
@@ -1130,6 +1140,7 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
1130 1140
1131 *in_port = DP_MAX_PORTS; 1141 *in_port = DP_MAX_PORTS;
1132 *priority = 0; 1142 *priority = 0;
1143 *mark = 0;
1133 1144
1134 nla_for_each_nested(nla, attr, rem) { 1145 nla_for_each_nested(nla, attr, rem) {
1135 int type = nla_type(nla); 1146 int type = nla_type(nla);
@@ -1148,6 +1159,10 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
1148 return -EINVAL; 1159 return -EINVAL;
1149 *in_port = nla_get_u32(nla); 1160 *in_port = nla_get_u32(nla);
1150 break; 1161 break;
1162
1163 case OVS_KEY_ATTR_SKB_MARK:
1164 *mark = nla_get_u32(nla);
1165 break;
1151 } 1166 }
1152 } 1167 }
1153 } 1168 }
@@ -1169,6 +1184,10 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1169 nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) 1184 nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port))
1170 goto nla_put_failure; 1185 goto nla_put_failure;
1171 1186
1187 if (swkey->phy.skb_mark &&
1188 nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, swkey->phy.skb_mark))
1189 goto nla_put_failure;
1190
1172 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1191 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
1173 if (!nla) 1192 if (!nla)
1174 goto nla_put_failure; 1193 goto nla_put_failure;
@@ -1222,7 +1241,8 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1222 ipv6_key->ipv6_tclass = swkey->ip.tos; 1241 ipv6_key->ipv6_tclass = swkey->ip.tos;
1223 ipv6_key->ipv6_hlimit = swkey->ip.ttl; 1242 ipv6_key->ipv6_hlimit = swkey->ip.ttl;
1224 ipv6_key->ipv6_frag = swkey->ip.frag; 1243 ipv6_key->ipv6_frag = swkey->ip.frag;
1225 } else if (swkey->eth.type == htons(ETH_P_ARP)) { 1244 } else if (swkey->eth.type == htons(ETH_P_ARP) ||
1245 swkey->eth.type == htons(ETH_P_RARP)) {
1226 struct ovs_key_arp *arp_key; 1246 struct ovs_key_arp *arp_key;
1227 1247
1228 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); 1248 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 14a324eb017b..a7bb60ff3b5b 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -43,6 +43,7 @@ struct sw_flow_actions {
43struct sw_flow_key { 43struct sw_flow_key {
44 struct { 44 struct {
45 u32 priority; /* Packet QoS priority. */ 45 u32 priority; /* Packet QoS priority. */
46 u32 skb_mark; /* SKB mark. */
46 u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ 47 u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
47 } phy; 48 } phy;
48 struct { 49 struct {
@@ -144,6 +145,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
144 * ------ --- ------ ----- 145 * ------ --- ------ -----
145 * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 146 * OVS_KEY_ATTR_PRIORITY 4 -- 4 8
146 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 147 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8
148 * OVS_KEY_ATTR_SKB_MARK 4 -- 4 8
147 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 149 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16
148 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) 150 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype)
149 * OVS_KEY_ATTR_8021Q 4 -- 4 8 151 * OVS_KEY_ATTR_8021Q 4 -- 4 8
@@ -153,14 +155,14 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
153 * OVS_KEY_ATTR_ICMPV6 2 2 4 8 155 * OVS_KEY_ATTR_ICMPV6 2 2 4 8
154 * OVS_KEY_ATTR_ND 28 -- 4 32 156 * OVS_KEY_ATTR_ND 28 -- 4 32
155 * ------------------------------------------------- 157 * -------------------------------------------------
156 * total 144 158 * total 152
157 */ 159 */
158#define FLOW_BUFSIZE 144 160#define FLOW_BUFSIZE 152
159 161
160int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); 162int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
161int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, 163int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
162 const struct nlattr *); 164 const struct nlattr *);
163int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, 165int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port,
164 const struct nlattr *); 166 const struct nlattr *);
165 167
166#define MAX_ACTIONS_BUFSIZE (16 * 1024) 168#define MAX_ACTIONS_BUFSIZE (16 * 1024)
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index a9033481fa5e..a9327e2e48ce 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -114,6 +114,15 @@ error:
114 return ERR_PTR(err); 114 return ERR_PTR(err);
115} 115}
116 116
117static void free_port_rcu(struct rcu_head *rcu)
118{
119 struct netdev_vport *netdev_vport = container_of(rcu,
120 struct netdev_vport, rcu);
121
122 dev_put(netdev_vport->dev);
123 ovs_vport_free(vport_from_priv(netdev_vport));
124}
125
117static void netdev_destroy(struct vport *vport) 126static void netdev_destroy(struct vport *vport)
118{ 127{
119 struct netdev_vport *netdev_vport = netdev_vport_priv(vport); 128 struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
@@ -122,10 +131,7 @@ static void netdev_destroy(struct vport *vport)
122 netdev_rx_handler_unregister(netdev_vport->dev); 131 netdev_rx_handler_unregister(netdev_vport->dev);
123 dev_set_promiscuity(netdev_vport->dev, -1); 132 dev_set_promiscuity(netdev_vport->dev, -1);
124 133
125 synchronize_rcu(); 134 call_rcu(&netdev_vport->rcu, free_port_rcu);
126
127 dev_put(netdev_vport->dev);
128 ovs_vport_free(vport);
129} 135}
130 136
131const char *ovs_netdev_get_name(const struct vport *vport) 137const char *ovs_netdev_get_name(const struct vport *vport)
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index f7072a25c604..6478079b3417 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -20,12 +20,15 @@
20#define VPORT_NETDEV_H 1 20#define VPORT_NETDEV_H 1
21 21
22#include <linux/netdevice.h> 22#include <linux/netdevice.h>
23#include <linux/rcupdate.h>
23 24
24#include "vport.h" 25#include "vport.h"
25 26
26struct vport *ovs_netdev_get_vport(struct net_device *dev); 27struct vport *ovs_netdev_get_vport(struct net_device *dev);
27 28
28struct netdev_vport { 29struct netdev_vport {
30 struct rcu_head rcu;
31
29 struct net_device *dev; 32 struct net_device *dev;
30}; 33};
31 34
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 03779e8a2622..70af0bedbac4 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -333,8 +333,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
333{ 333{
334 struct vport_percpu_stats *stats; 334 struct vport_percpu_stats *stats;
335 335
336 stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id()); 336 stats = this_cpu_ptr(vport->percpu_stats);
337
338 u64_stats_update_begin(&stats->sync); 337 u64_stats_update_begin(&stats->sync);
339 stats->rx_packets++; 338 stats->rx_packets++;
340 stats->rx_bytes += skb->len; 339 stats->rx_bytes += skb->len;
@@ -359,7 +358,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
359 if (likely(sent)) { 358 if (likely(sent)) {
360 struct vport_percpu_stats *stats; 359 struct vport_percpu_stats *stats;
361 360
362 stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id()); 361 stats = this_cpu_ptr(vport->percpu_stats);
363 362
364 u64_stats_update_begin(&stats->sync); 363 u64_stats_update_begin(&stats->sync);
365 stats->tx_packets++; 364 stats->tx_packets++;