aboutsummaryrefslogtreecommitdiffstats
path: root/net/openvswitch/datapath.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/openvswitch/datapath.c')
-rw-r--r--net/openvswitch/datapath.c343
1 files changed, 202 insertions, 141 deletions
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index e6d7255183eb..ab141d49bb9d 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -59,6 +59,7 @@
59#include "vport-netdev.h" 59#include "vport-netdev.h"
60 60
61int ovs_net_id __read_mostly; 61int ovs_net_id __read_mostly;
62EXPORT_SYMBOL_GPL(ovs_net_id);
62 63
63static struct genl_family dp_packet_genl_family; 64static struct genl_family dp_packet_genl_family;
64static struct genl_family dp_flow_genl_family; 65static struct genl_family dp_flow_genl_family;
@@ -130,27 +131,41 @@ int lockdep_ovsl_is_held(void)
130 else 131 else
131 return 1; 132 return 1;
132} 133}
134EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
133#endif 135#endif
134 136
135static struct vport *new_vport(const struct vport_parms *); 137static struct vport *new_vport(const struct vport_parms *);
136static int queue_gso_packets(struct datapath *dp, struct sk_buff *, 138static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
139 const struct sw_flow_key *,
137 const struct dp_upcall_info *); 140 const struct dp_upcall_info *);
138static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, 141static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
142 const struct sw_flow_key *,
139 const struct dp_upcall_info *); 143 const struct dp_upcall_info *);
140 144
141/* Must be called with rcu_read_lock or ovs_mutex. */ 145/* Must be called with rcu_read_lock. */
142static struct datapath *get_dp(struct net *net, int dp_ifindex) 146static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
143{ 147{
144 struct datapath *dp = NULL; 148 struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
145 struct net_device *dev;
146 149
147 rcu_read_lock();
148 dev = dev_get_by_index_rcu(net, dp_ifindex);
149 if (dev) { 150 if (dev) {
150 struct vport *vport = ovs_internal_dev_get_vport(dev); 151 struct vport *vport = ovs_internal_dev_get_vport(dev);
151 if (vport) 152 if (vport)
152 dp = vport->dp; 153 return vport->dp;
153 } 154 }
155
156 return NULL;
157}
158
159/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
160 * returned dp pointer valid.
161 */
162static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
163{
164 struct datapath *dp;
165
166 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
167 rcu_read_lock();
168 dp = get_dp_rcu(net, dp_ifindex);
154 rcu_read_unlock(); 169 rcu_read_unlock();
155 170
156 return dp; 171 return dp;
@@ -163,7 +178,7 @@ const char *ovs_dp_name(const struct datapath *dp)
163 return vport->ops->get_name(vport); 178 return vport->ops->get_name(vport);
164} 179}
165 180
166static int get_dpifindex(struct datapath *dp) 181static int get_dpifindex(const struct datapath *dp)
167{ 182{
168 struct vport *local; 183 struct vport *local;
169 int ifindex; 184 int ifindex;
@@ -185,6 +200,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
185{ 200{
186 struct datapath *dp = container_of(rcu, struct datapath, rcu); 201 struct datapath *dp = container_of(rcu, struct datapath, rcu);
187 202
203 ovs_flow_tbl_destroy(&dp->table);
188 free_percpu(dp->stats_percpu); 204 free_percpu(dp->stats_percpu);
189 release_net(ovs_dp_get_net(dp)); 205 release_net(ovs_dp_get_net(dp));
190 kfree(dp->ports); 206 kfree(dp->ports);
@@ -243,6 +259,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
243 const struct vport *p = OVS_CB(skb)->input_vport; 259 const struct vport *p = OVS_CB(skb)->input_vport;
244 struct datapath *dp = p->dp; 260 struct datapath *dp = p->dp;
245 struct sw_flow *flow; 261 struct sw_flow *flow;
262 struct sw_flow_actions *sf_acts;
246 struct dp_stats_percpu *stats; 263 struct dp_stats_percpu *stats;
247 u64 *stats_counter; 264 u64 *stats_counter;
248 u32 n_mask_hit; 265 u32 n_mask_hit;
@@ -256,10 +273,10 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
256 int error; 273 int error;
257 274
258 upcall.cmd = OVS_PACKET_CMD_MISS; 275 upcall.cmd = OVS_PACKET_CMD_MISS;
259 upcall.key = key;
260 upcall.userdata = NULL; 276 upcall.userdata = NULL;
261 upcall.portid = ovs_vport_find_upcall_portid(p, skb); 277 upcall.portid = ovs_vport_find_upcall_portid(p, skb);
262 error = ovs_dp_upcall(dp, skb, &upcall); 278 upcall.egress_tun_info = NULL;
279 error = ovs_dp_upcall(dp, skb, key, &upcall);
263 if (unlikely(error)) 280 if (unlikely(error))
264 kfree_skb(skb); 281 kfree_skb(skb);
265 else 282 else
@@ -268,10 +285,10 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
268 goto out; 285 goto out;
269 } 286 }
270 287
271 OVS_CB(skb)->flow = flow; 288 ovs_flow_stats_update(flow, key->tp.flags, skb);
289 sf_acts = rcu_dereference(flow->sf_acts);
290 ovs_execute_actions(dp, skb, sf_acts, key);
272 291
273 ovs_flow_stats_update(OVS_CB(skb)->flow, key->tp.flags, skb);
274 ovs_execute_actions(dp, skb, key);
275 stats_counter = &stats->n_hit; 292 stats_counter = &stats->n_hit;
276 293
277out: 294out:
@@ -283,6 +300,7 @@ out:
283} 300}
284 301
285int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, 302int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
303 const struct sw_flow_key *key,
286 const struct dp_upcall_info *upcall_info) 304 const struct dp_upcall_info *upcall_info)
287{ 305{
288 struct dp_stats_percpu *stats; 306 struct dp_stats_percpu *stats;
@@ -294,9 +312,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
294 } 312 }
295 313
296 if (!skb_is_gso(skb)) 314 if (!skb_is_gso(skb))
297 err = queue_userspace_packet(dp, skb, upcall_info); 315 err = queue_userspace_packet(dp, skb, key, upcall_info);
298 else 316 else
299 err = queue_gso_packets(dp, skb, upcall_info); 317 err = queue_gso_packets(dp, skb, key, upcall_info);
300 if (err) 318 if (err)
301 goto err; 319 goto err;
302 320
@@ -313,39 +331,43 @@ err:
313} 331}
314 332
315static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, 333static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
334 const struct sw_flow_key *key,
316 const struct dp_upcall_info *upcall_info) 335 const struct dp_upcall_info *upcall_info)
317{ 336{
318 unsigned short gso_type = skb_shinfo(skb)->gso_type; 337 unsigned short gso_type = skb_shinfo(skb)->gso_type;
319 struct dp_upcall_info later_info;
320 struct sw_flow_key later_key; 338 struct sw_flow_key later_key;
321 struct sk_buff *segs, *nskb; 339 struct sk_buff *segs, *nskb;
340 struct ovs_skb_cb ovs_cb;
322 int err; 341 int err;
323 342
343 ovs_cb = *OVS_CB(skb);
324 segs = __skb_gso_segment(skb, NETIF_F_SG, false); 344 segs = __skb_gso_segment(skb, NETIF_F_SG, false);
345 *OVS_CB(skb) = ovs_cb;
325 if (IS_ERR(segs)) 346 if (IS_ERR(segs))
326 return PTR_ERR(segs); 347 return PTR_ERR(segs);
327 if (segs == NULL) 348 if (segs == NULL)
328 return -EINVAL; 349 return -EINVAL;
329 350
351 if (gso_type & SKB_GSO_UDP) {
352 /* The initial flow key extracted by ovs_flow_key_extract()
353 * in this case is for a first fragment, so we need to
354 * properly mark later fragments.
355 */
356 later_key = *key;
357 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
358 }
359
330 /* Queue all of the segments. */ 360 /* Queue all of the segments. */
331 skb = segs; 361 skb = segs;
332 do { 362 do {
333 err = queue_userspace_packet(dp, skb, upcall_info); 363 *OVS_CB(skb) = ovs_cb;
364 if (gso_type & SKB_GSO_UDP && skb != segs)
365 key = &later_key;
366
367 err = queue_userspace_packet(dp, skb, key, upcall_info);
334 if (err) 368 if (err)
335 break; 369 break;
336 370
337 if (skb == segs && gso_type & SKB_GSO_UDP) {
338 /* The initial flow key extracted by ovs_flow_extract()
339 * in this case is for a first fragment, so we need to
340 * properly mark later fragments.
341 */
342 later_key = *upcall_info->key;
343 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
344
345 later_info = *upcall_info;
346 later_info.key = &later_key;
347 upcall_info = &later_info;
348 }
349 } while ((skb = skb->next)); 371 } while ((skb = skb->next));
350 372
351 /* Free all of the segments. */ 373 /* Free all of the segments. */
@@ -360,46 +382,26 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
360 return err; 382 return err;
361} 383}
362 384
363static size_t key_attr_size(void) 385static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
364{
365 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
366 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
367 + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
368 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
369 + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
370 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
371 + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
372 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
373 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
374 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
375 + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
376 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
377 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
378 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
379 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
380 + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */
381 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
382 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
383 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
384 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
385 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
386}
387
388static size_t upcall_msg_size(const struct nlattr *userdata,
389 unsigned int hdrlen) 386 unsigned int hdrlen)
390{ 387{
391 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) 388 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
392 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ 389 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
393 + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */ 390 + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */
394 391
395 /* OVS_PACKET_ATTR_USERDATA */ 392 /* OVS_PACKET_ATTR_USERDATA */
396 if (userdata) 393 if (upcall_info->userdata)
397 size += NLA_ALIGN(userdata->nla_len); 394 size += NLA_ALIGN(upcall_info->userdata->nla_len);
395
396 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
397 if (upcall_info->egress_tun_info)
398 size += nla_total_size(ovs_tun_key_attr_size());
398 399
399 return size; 400 return size;
400} 401}
401 402
402static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, 403static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
404 const struct sw_flow_key *key,
403 const struct dp_upcall_info *upcall_info) 405 const struct dp_upcall_info *upcall_info)
404{ 406{
405 struct ovs_header *upcall; 407 struct ovs_header *upcall;
@@ -450,7 +452,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
450 else 452 else
451 hlen = skb->len; 453 hlen = skb->len;
452 454
453 len = upcall_msg_size(upcall_info->userdata, hlen); 455 len = upcall_msg_size(upcall_info, hlen);
454 user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC); 456 user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
455 if (!user_skb) { 457 if (!user_skb) {
456 err = -ENOMEM; 458 err = -ENOMEM;
@@ -462,7 +464,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
462 upcall->dp_ifindex = dp_ifindex; 464 upcall->dp_ifindex = dp_ifindex;
463 465
464 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); 466 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
465 err = ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb); 467 err = ovs_nla_put_flow(key, key, user_skb);
466 BUG_ON(err); 468 BUG_ON(err);
467 nla_nest_end(user_skb, nla); 469 nla_nest_end(user_skb, nla);
468 470
@@ -471,6 +473,14 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
471 nla_len(upcall_info->userdata), 473 nla_len(upcall_info->userdata),
472 nla_data(upcall_info->userdata)); 474 nla_data(upcall_info->userdata));
473 475
476 if (upcall_info->egress_tun_info) {
477 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
478 err = ovs_nla_put_egress_tunnel_key(user_skb,
479 upcall_info->egress_tun_info);
480 BUG_ON(err);
481 nla_nest_end(user_skb, nla);
482 }
483
474 /* Only reserve room for attribute header, packet data is added 484 /* Only reserve room for attribute header, packet data is added
475 * in skb_zerocopy() */ 485 * in skb_zerocopy() */
476 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { 486 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
@@ -510,11 +520,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
510 struct sw_flow_actions *acts; 520 struct sw_flow_actions *acts;
511 struct sk_buff *packet; 521 struct sk_buff *packet;
512 struct sw_flow *flow; 522 struct sw_flow *flow;
523 struct sw_flow_actions *sf_acts;
513 struct datapath *dp; 524 struct datapath *dp;
514 struct ethhdr *eth; 525 struct ethhdr *eth;
515 struct vport *input_vport; 526 struct vport *input_vport;
516 int len; 527 int len;
517 int err; 528 int err;
529 bool log = !a[OVS_FLOW_ATTR_PROBE];
518 530
519 err = -EINVAL; 531 err = -EINVAL;
520 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || 532 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
@@ -548,29 +560,22 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
548 goto err_kfree_skb; 560 goto err_kfree_skb;
549 561
550 err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet, 562 err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet,
551 &flow->key); 563 &flow->key, log);
552 if (err) 564 if (err)
553 goto err_flow_free; 565 goto err_flow_free;
554 566
555 acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
556 err = PTR_ERR(acts);
557 if (IS_ERR(acts))
558 goto err_flow_free;
559
560 err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], 567 err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
561 &flow->key, 0, &acts); 568 &flow->key, &acts, log);
562 if (err) 569 if (err)
563 goto err_flow_free; 570 goto err_flow_free;
564 571
565 rcu_assign_pointer(flow->sf_acts, acts); 572 rcu_assign_pointer(flow->sf_acts, acts);
566
567 OVS_CB(packet)->egress_tun_info = NULL; 573 OVS_CB(packet)->egress_tun_info = NULL;
568 OVS_CB(packet)->flow = flow;
569 packet->priority = flow->key.phy.priority; 574 packet->priority = flow->key.phy.priority;
570 packet->mark = flow->key.phy.skb_mark; 575 packet->mark = flow->key.phy.skb_mark;
571 576
572 rcu_read_lock(); 577 rcu_read_lock();
573 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 578 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
574 err = -ENODEV; 579 err = -ENODEV;
575 if (!dp) 580 if (!dp)
576 goto err_unlock; 581 goto err_unlock;
@@ -583,9 +588,10 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
583 goto err_unlock; 588 goto err_unlock;
584 589
585 OVS_CB(packet)->input_vport = input_vport; 590 OVS_CB(packet)->input_vport = input_vport;
591 sf_acts = rcu_dereference(flow->sf_acts);
586 592
587 local_bh_disable(); 593 local_bh_disable();
588 err = ovs_execute_actions(dp, packet, &flow->key); 594 err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
589 local_bh_enable(); 595 local_bh_enable();
590 rcu_read_unlock(); 596 rcu_read_unlock();
591 597
@@ -628,7 +634,7 @@ static struct genl_family dp_packet_genl_family = {
628 .n_ops = ARRAY_SIZE(dp_packet_genl_ops), 634 .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
629}; 635};
630 636
631static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, 637static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
632 struct ovs_dp_megaflow_stats *mega_stats) 638 struct ovs_dp_megaflow_stats *mega_stats)
633{ 639{
634 int i; 640 int i;
@@ -662,8 +668,8 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
662static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) 668static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
663{ 669{
664 return NLMSG_ALIGN(sizeof(struct ovs_header)) 670 return NLMSG_ALIGN(sizeof(struct ovs_header))
665 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */ 671 + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */
666 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */ 672 + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */
667 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ 673 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
668 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ 674 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
669 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */ 675 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
@@ -671,58 +677,67 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
671} 677}
672 678
673/* Called with ovs_mutex or RCU read lock. */ 679/* Called with ovs_mutex or RCU read lock. */
674static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, 680static int ovs_flow_cmd_fill_match(const struct sw_flow *flow,
675 struct sk_buff *skb, u32 portid, 681 struct sk_buff *skb)
676 u32 seq, u32 flags, u8 cmd)
677{ 682{
678 const int skb_orig_len = skb->len;
679 struct nlattr *start;
680 struct ovs_flow_stats stats;
681 __be16 tcp_flags;
682 unsigned long used;
683 struct ovs_header *ovs_header;
684 struct nlattr *nla; 683 struct nlattr *nla;
685 int err; 684 int err;
686 685
687 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
688 if (!ovs_header)
689 return -EMSGSIZE;
690
691 ovs_header->dp_ifindex = dp_ifindex;
692
693 /* Fill flow key. */ 686 /* Fill flow key. */
694 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); 687 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
695 if (!nla) 688 if (!nla)
696 goto nla_put_failure; 689 return -EMSGSIZE;
697 690
698 err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb); 691 err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);
699 if (err) 692 if (err)
700 goto error; 693 return err;
694
701 nla_nest_end(skb, nla); 695 nla_nest_end(skb, nla);
702 696
697 /* Fill flow mask. */
703 nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK); 698 nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
704 if (!nla) 699 if (!nla)
705 goto nla_put_failure; 700 return -EMSGSIZE;
706 701
707 err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); 702 err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
708 if (err) 703 if (err)
709 goto error; 704 return err;
710 705
711 nla_nest_end(skb, nla); 706 nla_nest_end(skb, nla);
707 return 0;
708}
709
710/* Called with ovs_mutex or RCU read lock. */
711static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
712 struct sk_buff *skb)
713{
714 struct ovs_flow_stats stats;
715 __be16 tcp_flags;
716 unsigned long used;
712 717
713 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); 718 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
714 719
715 if (used && 720 if (used &&
716 nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) 721 nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
717 goto nla_put_failure; 722 return -EMSGSIZE;
718 723
719 if (stats.n_packets && 724 if (stats.n_packets &&
720 nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats)) 725 nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
721 goto nla_put_failure; 726 return -EMSGSIZE;
722 727
723 if ((u8)ntohs(tcp_flags) && 728 if ((u8)ntohs(tcp_flags) &&
724 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags))) 729 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
725 goto nla_put_failure; 730 return -EMSGSIZE;
731
732 return 0;
733}
734
735/* Called with ovs_mutex or RCU read lock. */
736static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
737 struct sk_buff *skb, int skb_orig_len)
738{
739 struct nlattr *start;
740 int err;
726 741
727 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if 742 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
728 * this is the first flow to be dumped into 'skb'. This is unusual for 743 * this is the first flow to be dumped into 'skb'. This is unusual for
@@ -746,17 +761,47 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
746 nla_nest_end(skb, start); 761 nla_nest_end(skb, start);
747 else { 762 else {
748 if (skb_orig_len) 763 if (skb_orig_len)
749 goto error; 764 return err;
750 765
751 nla_nest_cancel(skb, start); 766 nla_nest_cancel(skb, start);
752 } 767 }
753 } else if (skb_orig_len) 768 } else if (skb_orig_len) {
754 goto nla_put_failure; 769 return -EMSGSIZE;
770 }
771
772 return 0;
773}
774
775/* Called with ovs_mutex or RCU read lock. */
776static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
777 struct sk_buff *skb, u32 portid,
778 u32 seq, u32 flags, u8 cmd)
779{
780 const int skb_orig_len = skb->len;
781 struct ovs_header *ovs_header;
782 int err;
783
784 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
785 flags, cmd);
786 if (!ovs_header)
787 return -EMSGSIZE;
788
789 ovs_header->dp_ifindex = dp_ifindex;
790
791 err = ovs_flow_cmd_fill_match(flow, skb);
792 if (err)
793 goto error;
794
795 err = ovs_flow_cmd_fill_stats(flow, skb);
796 if (err)
797 goto error;
798
799 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
800 if (err)
801 goto error;
755 802
756 return genlmsg_end(skb, ovs_header); 803 return genlmsg_end(skb, ovs_header);
757 804
758nla_put_failure:
759 err = -EMSGSIZE;
760error: 805error:
761 genlmsg_cancel(skb, ovs_header); 806 genlmsg_cancel(skb, ovs_header);
762 return err; 807 return err;
@@ -811,13 +856,18 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
811 struct sw_flow_actions *acts; 856 struct sw_flow_actions *acts;
812 struct sw_flow_match match; 857 struct sw_flow_match match;
813 int error; 858 int error;
859 bool log = !a[OVS_FLOW_ATTR_PROBE];
814 860
815 /* Must have key and actions. */ 861 /* Must have key and actions. */
816 error = -EINVAL; 862 error = -EINVAL;
817 if (!a[OVS_FLOW_ATTR_KEY]) 863 if (!a[OVS_FLOW_ATTR_KEY]) {
864 OVS_NLERR(log, "Flow key attr not present in new flow.");
818 goto error; 865 goto error;
819 if (!a[OVS_FLOW_ATTR_ACTIONS]) 866 }
867 if (!a[OVS_FLOW_ATTR_ACTIONS]) {
868 OVS_NLERR(log, "Flow actions attr not present in new flow.");
820 goto error; 869 goto error;
870 }
821 871
822 /* Most of the time we need to allocate a new flow, do it before 872 /* Most of the time we need to allocate a new flow, do it before
823 * locking. 873 * locking.
@@ -830,24 +880,19 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
830 880
831 /* Extract key. */ 881 /* Extract key. */
832 ovs_match_init(&match, &new_flow->unmasked_key, &mask); 882 ovs_match_init(&match, &new_flow->unmasked_key, &mask);
833 error = ovs_nla_get_match(&match, 883 error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
834 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); 884 a[OVS_FLOW_ATTR_MASK], log);
835 if (error) 885 if (error)
836 goto err_kfree_flow; 886 goto err_kfree_flow;
837 887
838 ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); 888 ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
839 889
840 /* Validate actions. */ 890 /* Validate actions. */
841 acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
842 error = PTR_ERR(acts);
843 if (IS_ERR(acts))
844 goto err_kfree_flow;
845
846 error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, 891 error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
847 0, &acts); 892 &acts, log);
848 if (error) { 893 if (error) {
849 OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); 894 OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
850 goto err_kfree_acts; 895 goto err_kfree_flow;
851 } 896 }
852 897
853 reply = ovs_flow_cmd_alloc_info(acts, info, false); 898 reply = ovs_flow_cmd_alloc_info(acts, info, false);
@@ -899,6 +944,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
899 } 944 }
900 /* The unmasked key has to be the same for flow updates. */ 945 /* The unmasked key has to be the same for flow updates. */
901 if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { 946 if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
947 /* Look for any overlapping flow. */
902 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); 948 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
903 if (!flow) { 949 if (!flow) {
904 error = -ENOENT; 950 error = -ENOENT;
@@ -938,23 +984,21 @@ error:
938 return error; 984 return error;
939} 985}
940 986
987/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
941static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, 988static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
942 const struct sw_flow_key *key, 989 const struct sw_flow_key *key,
943 const struct sw_flow_mask *mask) 990 const struct sw_flow_mask *mask,
991 bool log)
944{ 992{
945 struct sw_flow_actions *acts; 993 struct sw_flow_actions *acts;
946 struct sw_flow_key masked_key; 994 struct sw_flow_key masked_key;
947 int error; 995 int error;
948 996
949 acts = ovs_nla_alloc_flow_actions(nla_len(a));
950 if (IS_ERR(acts))
951 return acts;
952
953 ovs_flow_mask_key(&masked_key, key, mask); 997 ovs_flow_mask_key(&masked_key, key, mask);
954 error = ovs_nla_copy_actions(a, &masked_key, 0, &acts); 998 error = ovs_nla_copy_actions(a, &masked_key, &acts, log);
955 if (error) { 999 if (error) {
956 OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); 1000 OVS_NLERR(log,
957 kfree(acts); 1001 "Actions may not be safe on all matching packets");
958 return ERR_PTR(error); 1002 return ERR_PTR(error);
959 } 1003 }
960 1004
@@ -973,29 +1017,31 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
973 struct sw_flow_actions *old_acts = NULL, *acts = NULL; 1017 struct sw_flow_actions *old_acts = NULL, *acts = NULL;
974 struct sw_flow_match match; 1018 struct sw_flow_match match;
975 int error; 1019 int error;
1020 bool log = !a[OVS_FLOW_ATTR_PROBE];
976 1021
977 /* Extract key. */ 1022 /* Extract key. */
978 error = -EINVAL; 1023 error = -EINVAL;
979 if (!a[OVS_FLOW_ATTR_KEY]) 1024 if (!a[OVS_FLOW_ATTR_KEY]) {
1025 OVS_NLERR(log, "Flow key attribute not present in set flow.");
980 goto error; 1026 goto error;
1027 }
981 1028
982 ovs_match_init(&match, &key, &mask); 1029 ovs_match_init(&match, &key, &mask);
983 error = ovs_nla_get_match(&match, 1030 error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
984 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); 1031 a[OVS_FLOW_ATTR_MASK], log);
985 if (error) 1032 if (error)
986 goto error; 1033 goto error;
987 1034
988 /* Validate actions. */ 1035 /* Validate actions. */
989 if (a[OVS_FLOW_ATTR_ACTIONS]) { 1036 if (a[OVS_FLOW_ATTR_ACTIONS]) {
990 acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask); 1037 acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask,
1038 log);
991 if (IS_ERR(acts)) { 1039 if (IS_ERR(acts)) {
992 error = PTR_ERR(acts); 1040 error = PTR_ERR(acts);
993 goto error; 1041 goto error;
994 } 1042 }
995 }
996 1043
997 /* Can allocate before locking if have acts. */ 1044 /* Can allocate before locking if have acts. */
998 if (acts) {
999 reply = ovs_flow_cmd_alloc_info(acts, info, false); 1045 reply = ovs_flow_cmd_alloc_info(acts, info, false);
1000 if (IS_ERR(reply)) { 1046 if (IS_ERR(reply)) {
1001 error = PTR_ERR(reply); 1047 error = PTR_ERR(reply);
@@ -1070,14 +1116,16 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1070 struct datapath *dp; 1116 struct datapath *dp;
1071 struct sw_flow_match match; 1117 struct sw_flow_match match;
1072 int err; 1118 int err;
1119 bool log = !a[OVS_FLOW_ATTR_PROBE];
1073 1120
1074 if (!a[OVS_FLOW_ATTR_KEY]) { 1121 if (!a[OVS_FLOW_ATTR_KEY]) {
1075 OVS_NLERR("Flow get message rejected, Key attribute missing.\n"); 1122 OVS_NLERR(log,
1123 "Flow get message rejected, Key attribute missing.");
1076 return -EINVAL; 1124 return -EINVAL;
1077 } 1125 }
1078 1126
1079 ovs_match_init(&match, &key, NULL); 1127 ovs_match_init(&match, &key, NULL);
1080 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); 1128 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log);
1081 if (err) 1129 if (err)
1082 return err; 1130 return err;
1083 1131
@@ -1118,10 +1166,12 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1118 struct datapath *dp; 1166 struct datapath *dp;
1119 struct sw_flow_match match; 1167 struct sw_flow_match match;
1120 int err; 1168 int err;
1169 bool log = !a[OVS_FLOW_ATTR_PROBE];
1121 1170
1122 if (likely(a[OVS_FLOW_ATTR_KEY])) { 1171 if (likely(a[OVS_FLOW_ATTR_KEY])) {
1123 ovs_match_init(&match, &key, NULL); 1172 ovs_match_init(&match, &key, NULL);
1124 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); 1173 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
1174 log);
1125 if (unlikely(err)) 1175 if (unlikely(err))
1126 return err; 1176 return err;
1127 } 1177 }
@@ -1179,7 +1229,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1179 struct datapath *dp; 1229 struct datapath *dp;
1180 1230
1181 rcu_read_lock(); 1231 rcu_read_lock();
1182 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1232 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1183 if (!dp) { 1233 if (!dp) {
1184 rcu_read_unlock(); 1234 rcu_read_unlock();
1185 return -ENODEV; 1235 return -ENODEV;
@@ -1211,8 +1261,10 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1211 1261
1212static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { 1262static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1213 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, 1263 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1264 [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1214 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, 1265 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1215 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, 1266 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1267 [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1216}; 1268};
1217 1269
1218static const struct genl_ops dp_flow_genl_ops[] = { 1270static const struct genl_ops dp_flow_genl_ops[] = {
@@ -1313,7 +1365,7 @@ static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
1313 1365
1314/* Called with rcu_read_lock or ovs_mutex. */ 1366/* Called with rcu_read_lock or ovs_mutex. */
1315static struct datapath *lookup_datapath(struct net *net, 1367static struct datapath *lookup_datapath(struct net *net,
1316 struct ovs_header *ovs_header, 1368 const struct ovs_header *ovs_header,
1317 struct nlattr *a[OVS_DP_ATTR_MAX + 1]) 1369 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1318{ 1370{
1319 struct datapath *dp; 1371 struct datapath *dp;
@@ -1341,7 +1393,7 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in
1341 dp->user_features = 0; 1393 dp->user_features = 0;
1342} 1394}
1343 1395
1344static void ovs_dp_change(struct datapath *dp, struct nlattr **a) 1396static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1345{ 1397{
1346 if (a[OVS_DP_ATTR_USER_FEATURES]) 1398 if (a[OVS_DP_ATTR_USER_FEATURES])
1347 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); 1399 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
@@ -1442,7 +1494,7 @@ err_destroy_ports_array:
1442err_destroy_percpu: 1494err_destroy_percpu:
1443 free_percpu(dp->stats_percpu); 1495 free_percpu(dp->stats_percpu);
1444err_destroy_table: 1496err_destroy_table:
1445 ovs_flow_tbl_destroy(&dp->table, false); 1497 ovs_flow_tbl_destroy(&dp->table);
1446err_free_dp: 1498err_free_dp:
1447 release_net(ovs_dp_get_net(dp)); 1499 release_net(ovs_dp_get_net(dp));
1448 kfree(dp); 1500 kfree(dp);
@@ -1474,8 +1526,6 @@ static void __dp_destroy(struct datapath *dp)
1474 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); 1526 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1475 1527
1476 /* RCU destroy the flow table */ 1528 /* RCU destroy the flow table */
1477 ovs_flow_tbl_destroy(&dp->table, true);
1478
1479 call_rcu(&dp->rcu, destroy_dp_rcu); 1529 call_rcu(&dp->rcu, destroy_dp_rcu);
1480} 1530}
1481 1531
@@ -1707,7 +1757,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1707 1757
1708/* Called with ovs_mutex or RCU read lock. */ 1758/* Called with ovs_mutex or RCU read lock. */
1709static struct vport *lookup_vport(struct net *net, 1759static struct vport *lookup_vport(struct net *net,
1710 struct ovs_header *ovs_header, 1760 const struct ovs_header *ovs_header,
1711 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) 1761 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1712{ 1762{
1713 struct datapath *dp; 1763 struct datapath *dp;
@@ -1764,6 +1814,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1764 return -ENOMEM; 1814 return -ENOMEM;
1765 1815
1766 ovs_lock(); 1816 ovs_lock();
1817restart:
1767 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1818 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1768 err = -ENODEV; 1819 err = -ENODEV;
1769 if (!dp) 1820 if (!dp)
@@ -1795,8 +1846,11 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1795 1846
1796 vport = new_vport(&parms); 1847 vport = new_vport(&parms);
1797 err = PTR_ERR(vport); 1848 err = PTR_ERR(vport);
1798 if (IS_ERR(vport)) 1849 if (IS_ERR(vport)) {
1850 if (err == -EAGAIN)
1851 goto restart;
1799 goto exit_unlock_free; 1852 goto exit_unlock_free;
1853 }
1800 1854
1801 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, 1855 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1802 info->snd_seq, 0, OVS_VPORT_CMD_NEW); 1856 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
@@ -1939,7 +1993,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1939 int i, j = 0; 1993 int i, j = 0;
1940 1994
1941 rcu_read_lock(); 1995 rcu_read_lock();
1942 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1996 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1943 if (!dp) { 1997 if (!dp) {
1944 rcu_read_unlock(); 1998 rcu_read_unlock();
1945 return -ENODEV; 1999 return -ENODEV;
@@ -2112,12 +2166,18 @@ static int __init dp_init(void)
2112 if (err) 2166 if (err)
2113 goto error_netns_exit; 2167 goto error_netns_exit;
2114 2168
2169 err = ovs_netdev_init();
2170 if (err)
2171 goto error_unreg_notifier;
2172
2115 err = dp_register_genl(); 2173 err = dp_register_genl();
2116 if (err < 0) 2174 if (err < 0)
2117 goto error_unreg_notifier; 2175 goto error_unreg_netdev;
2118 2176
2119 return 0; 2177 return 0;
2120 2178
2179error_unreg_netdev:
2180 ovs_netdev_exit();
2121error_unreg_notifier: 2181error_unreg_notifier:
2122 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2182 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2123error_netns_exit: 2183error_netns_exit:
@@ -2137,6 +2197,7 @@ error:
2137static void dp_cleanup(void) 2197static void dp_cleanup(void)
2138{ 2198{
2139 dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); 2199 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2200 ovs_netdev_exit();
2140 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2201 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2141 unregister_pernet_device(&ovs_net_ops); 2202 unregister_pernet_device(&ovs_net_ops);
2142 rcu_barrier(); 2203 rcu_barrier();