aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2013-04-17 13:30:32 -0400
committerDavid S. Miller <davem@davemloft.net>2013-04-17 13:30:32 -0400
commit92cf1f23cc9390ea5c00e8185c1f7910c3d15452 (patch)
tree5efa469a504fa66d2127aeaf607b47b919e298b5
parent98d2f0e68c4de36c56fbe3baeae30c001f012243 (diff)
parente0f0ecf33c3f13401f90bff5afdc3ed1bb40b9af (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch
Jesse Gross says: ==================== A number of improvements for net-next/3.10. Highlights include: * Properly exposing linux/openvswitch.h to userspace after the uapi changes. * Simplification of locking. It immediately makes things simpler to reason about and avoids holding RTNL mutex for longer than necessary. In the near future it will also enable tunnel registration and more fine-grained locking. * Miscellaneous cleanups and simplifications. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/openvswitch.h432
-rw-r--r--include/uapi/linux/Kbuild1
-rw-r--r--include/uapi/linux/openvswitch.h456
-rw-r--r--net/openvswitch/datapath.c393
-rw-r--r--net/openvswitch/datapath.h70
-rw-r--r--net/openvswitch/dp_notify.c82
-rw-r--r--net/openvswitch/flow.c2
-rw-r--r--net/openvswitch/flow.h21
-rw-r--r--net/openvswitch/vport-internal_dev.c6
-rw-r--r--net/openvswitch/vport-netdev.c8
-rw-r--r--net/openvswitch/vport.c58
-rw-r--r--net/openvswitch/vport.h15
12 files changed, 849 insertions, 695 deletions
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index 67d6c7b03581..e6b240b6196c 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -19,436 +19,6 @@
19#ifndef _LINUX_OPENVSWITCH_H 19#ifndef _LINUX_OPENVSWITCH_H
20#define _LINUX_OPENVSWITCH_H 1 20#define _LINUX_OPENVSWITCH_H 1
21 21
22#include <linux/types.h> 22#include <uapi/linux/openvswitch.h>
23
24/**
25 * struct ovs_header - header for OVS Generic Netlink messages.
26 * @dp_ifindex: ifindex of local port for datapath (0 to make a request not
27 * specific to a datapath).
28 *
29 * Attributes following the header are specific to a particular OVS Generic
30 * Netlink family, but all of the OVS families use this header.
31 */
32
33struct ovs_header {
34 int dp_ifindex;
35};
36
37/* Datapaths. */
38
39#define OVS_DATAPATH_FAMILY "ovs_datapath"
40#define OVS_DATAPATH_MCGROUP "ovs_datapath"
41#define OVS_DATAPATH_VERSION 0x1
42
43enum ovs_datapath_cmd {
44 OVS_DP_CMD_UNSPEC,
45 OVS_DP_CMD_NEW,
46 OVS_DP_CMD_DEL,
47 OVS_DP_CMD_GET,
48 OVS_DP_CMD_SET
49};
50
51/**
52 * enum ovs_datapath_attr - attributes for %OVS_DP_* commands.
53 * @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local
54 * port". This is the name of the network device whose dp_ifindex is given in
55 * the &struct ovs_header. Always present in notifications. Required in
56 * %OVS_DP_NEW requests. May be used as an alternative to specifying
57 * dp_ifindex in other requests (with a dp_ifindex of 0).
58 * @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially
59 * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on
60 * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
61 * not be sent.
62 * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
63 * datapath. Always present in notifications.
64 *
65 * These attributes follow the &struct ovs_header within the Generic Netlink
66 * payload for %OVS_DP_* commands.
67 */
68enum ovs_datapath_attr {
69 OVS_DP_ATTR_UNSPEC,
70 OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */
71 OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */
72 OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */
73 __OVS_DP_ATTR_MAX
74};
75
76#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1)
77
78struct ovs_dp_stats {
79 __u64 n_hit; /* Number of flow table matches. */
80 __u64 n_missed; /* Number of flow table misses. */
81 __u64 n_lost; /* Number of misses not sent to userspace. */
82 __u64 n_flows; /* Number of flows present */
83};
84
85struct ovs_vport_stats {
86 __u64 rx_packets; /* total packets received */
87 __u64 tx_packets; /* total packets transmitted */
88 __u64 rx_bytes; /* total bytes received */
89 __u64 tx_bytes; /* total bytes transmitted */
90 __u64 rx_errors; /* bad packets received */
91 __u64 tx_errors; /* packet transmit problems */
92 __u64 rx_dropped; /* no space in linux buffers */
93 __u64 tx_dropped; /* no space available in linux */
94};
95
96/* Fixed logical ports. */
97#define OVSP_LOCAL ((__u32)0)
98
99/* Packet transfer. */
100
101#define OVS_PACKET_FAMILY "ovs_packet"
102#define OVS_PACKET_VERSION 0x1
103
104enum ovs_packet_cmd {
105 OVS_PACKET_CMD_UNSPEC,
106
107 /* Kernel-to-user notifications. */
108 OVS_PACKET_CMD_MISS, /* Flow table miss. */
109 OVS_PACKET_CMD_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */
110
111 /* Userspace commands. */
112 OVS_PACKET_CMD_EXECUTE /* Apply actions to a packet. */
113};
114
115/**
116 * enum ovs_packet_attr - attributes for %OVS_PACKET_* commands.
117 * @OVS_PACKET_ATTR_PACKET: Present for all notifications. Contains the entire
118 * packet as received, from the start of the Ethernet header onward. For
119 * %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by
120 * actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is
121 * the flow key extracted from the packet as originally received.
122 * @OVS_PACKET_ATTR_KEY: Present for all notifications. Contains the flow key
123 * extracted from the packet as nested %OVS_KEY_ATTR_* attributes. This allows
124 * userspace to adapt its flow setup strategy by comparing its notion of the
125 * flow key against the kernel's.
126 * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used
127 * for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes.
128 * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION
129 * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
130 * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content
131 * specified there.
132 *
133 * These attributes follow the &struct ovs_header within the Generic Netlink
134 * payload for %OVS_PACKET_* commands.
135 */
136enum ovs_packet_attr {
137 OVS_PACKET_ATTR_UNSPEC,
138 OVS_PACKET_ATTR_PACKET, /* Packet data. */
139 OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */
140 OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
141 OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */
142 __OVS_PACKET_ATTR_MAX
143};
144
145#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1)
146
147/* Virtual ports. */
148
149#define OVS_VPORT_FAMILY "ovs_vport"
150#define OVS_VPORT_MCGROUP "ovs_vport"
151#define OVS_VPORT_VERSION 0x1
152
153enum ovs_vport_cmd {
154 OVS_VPORT_CMD_UNSPEC,
155 OVS_VPORT_CMD_NEW,
156 OVS_VPORT_CMD_DEL,
157 OVS_VPORT_CMD_GET,
158 OVS_VPORT_CMD_SET
159};
160
161enum ovs_vport_type {
162 OVS_VPORT_TYPE_UNSPEC,
163 OVS_VPORT_TYPE_NETDEV, /* network device */
164 OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
165 __OVS_VPORT_TYPE_MAX
166};
167
168#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1)
169
170/**
171 * enum ovs_vport_attr - attributes for %OVS_VPORT_* commands.
172 * @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath.
173 * @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type
174 * of vport.
175 * @OVS_VPORT_ATTR_NAME: Name of vport. For a vport based on a network device
176 * this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes
177 * plus a null terminator.
178 * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information.
179 * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that
180 * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on
181 * this port. A value of zero indicates that upcalls should not be sent.
182 * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for
183 * packets sent or received through the vport.
184 *
185 * These attributes follow the &struct ovs_header within the Generic Netlink
186 * payload for %OVS_VPORT_* commands.
187 *
188 * For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and
189 * %OVS_VPORT_ATTR_NAME attributes are required. %OVS_VPORT_ATTR_PORT_NO is
190 * optional; if not specified a free port number is automatically selected.
191 * Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type
192 * of vport.
193 * and other attributes are ignored.
194 *
195 * For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to
196 * look up the vport to operate on; otherwise dp_idx from the &struct
197 * ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport.
198 */
199enum ovs_vport_attr {
200 OVS_VPORT_ATTR_UNSPEC,
201 OVS_VPORT_ATTR_PORT_NO, /* u32 port number within datapath */
202 OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */
203 OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */
204 OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */
205 OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */
206 OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */
207 __OVS_VPORT_ATTR_MAX
208};
209
210#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
211
212/* Flows. */
213
214#define OVS_FLOW_FAMILY "ovs_flow"
215#define OVS_FLOW_MCGROUP "ovs_flow"
216#define OVS_FLOW_VERSION 0x1
217
218enum ovs_flow_cmd {
219 OVS_FLOW_CMD_UNSPEC,
220 OVS_FLOW_CMD_NEW,
221 OVS_FLOW_CMD_DEL,
222 OVS_FLOW_CMD_GET,
223 OVS_FLOW_CMD_SET
224};
225
226struct ovs_flow_stats {
227 __u64 n_packets; /* Number of matched packets. */
228 __u64 n_bytes; /* Number of matched bytes. */
229};
230
231enum ovs_key_attr {
232 OVS_KEY_ATTR_UNSPEC,
233 OVS_KEY_ATTR_ENCAP, /* Nested set of encapsulated attributes. */
234 OVS_KEY_ATTR_PRIORITY, /* u32 skb->priority */
235 OVS_KEY_ATTR_IN_PORT, /* u32 OVS dp port number */
236 OVS_KEY_ATTR_ETHERNET, /* struct ovs_key_ethernet */
237 OVS_KEY_ATTR_VLAN, /* be16 VLAN TCI */
238 OVS_KEY_ATTR_ETHERTYPE, /* be16 Ethernet type */
239 OVS_KEY_ATTR_IPV4, /* struct ovs_key_ipv4 */
240 OVS_KEY_ATTR_IPV6, /* struct ovs_key_ipv6 */
241 OVS_KEY_ATTR_TCP, /* struct ovs_key_tcp */
242 OVS_KEY_ATTR_UDP, /* struct ovs_key_udp */
243 OVS_KEY_ATTR_ICMP, /* struct ovs_key_icmp */
244 OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */
245 OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */
246 OVS_KEY_ATTR_ND, /* struct ovs_key_nd */
247 OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */
248 __OVS_KEY_ATTR_MAX
249};
250
251#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
252
253/**
254 * enum ovs_frag_type - IPv4 and IPv6 fragment type
255 * @OVS_FRAG_TYPE_NONE: Packet is not a fragment.
256 * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0.
257 * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset.
258 *
259 * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct
260 * ovs_key_ipv6.
261 */
262enum ovs_frag_type {
263 OVS_FRAG_TYPE_NONE,
264 OVS_FRAG_TYPE_FIRST,
265 OVS_FRAG_TYPE_LATER,
266 __OVS_FRAG_TYPE_MAX
267};
268
269#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1)
270
271struct ovs_key_ethernet {
272 __u8 eth_src[6];
273 __u8 eth_dst[6];
274};
275
276struct ovs_key_ipv4 {
277 __be32 ipv4_src;
278 __be32 ipv4_dst;
279 __u8 ipv4_proto;
280 __u8 ipv4_tos;
281 __u8 ipv4_ttl;
282 __u8 ipv4_frag; /* One of OVS_FRAG_TYPE_*. */
283};
284
285struct ovs_key_ipv6 {
286 __be32 ipv6_src[4];
287 __be32 ipv6_dst[4];
288 __be32 ipv6_label; /* 20-bits in least-significant bits. */
289 __u8 ipv6_proto;
290 __u8 ipv6_tclass;
291 __u8 ipv6_hlimit;
292 __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */
293};
294
295struct ovs_key_tcp {
296 __be16 tcp_src;
297 __be16 tcp_dst;
298};
299
300struct ovs_key_udp {
301 __be16 udp_src;
302 __be16 udp_dst;
303};
304
305struct ovs_key_icmp {
306 __u8 icmp_type;
307 __u8 icmp_code;
308};
309
310struct ovs_key_icmpv6 {
311 __u8 icmpv6_type;
312 __u8 icmpv6_code;
313};
314
315struct ovs_key_arp {
316 __be32 arp_sip;
317 __be32 arp_tip;
318 __be16 arp_op;
319 __u8 arp_sha[6];
320 __u8 arp_tha[6];
321};
322
323struct ovs_key_nd {
324 __u32 nd_target[4];
325 __u8 nd_sll[6];
326 __u8 nd_tll[6];
327};
328
329/**
330 * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
331 * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
332 * key. Always present in notifications. Required for all requests (except
333 * dumps).
334 * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying
335 * the actions to take for packets that match the key. Always present in
336 * notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for
337 * %OVS_FLOW_CMD_SET requests.
338 * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this
339 * flow. Present in notifications if the stats would be nonzero. Ignored in
340 * requests.
341 * @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the
342 * TCP flags seen on packets in this flow. Only present in notifications for
343 * TCP flows, and only if it would be nonzero. Ignored in requests.
344 * @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on
345 * the system monotonic clock, at which a packet was last processed for this
346 * flow. Only present in notifications if a packet has been processed for this
347 * flow. Ignored in requests.
348 * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the
349 * last-used time, accumulated TCP flags, and statistics for this flow.
350 * Otherwise ignored in requests. Never present in notifications.
351 *
352 * These attributes follow the &struct ovs_header within the Generic Netlink
353 * payload for %OVS_FLOW_* commands.
354 */
355enum ovs_flow_attr {
356 OVS_FLOW_ATTR_UNSPEC,
357 OVS_FLOW_ATTR_KEY, /* Sequence of OVS_KEY_ATTR_* attributes. */
358 OVS_FLOW_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
359 OVS_FLOW_ATTR_STATS, /* struct ovs_flow_stats. */
360 OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
361 OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */
362 OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */
363 __OVS_FLOW_ATTR_MAX
364};
365
366#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
367
368/**
369 * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
370 * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
371 * @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of
372 * %UINT32_MAX samples all packets and intermediate values sample intermediate
373 * fractions of packets.
374 * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
375 * Actions are passed as nested attributes.
376 *
377 * Executes the specified actions with the given probability on a per-packet
378 * basis.
379 */
380enum ovs_sample_attr {
381 OVS_SAMPLE_ATTR_UNSPEC,
382 OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
383 OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
384 __OVS_SAMPLE_ATTR_MAX,
385};
386
387#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
388
389/**
390 * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
391 * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
392 * message should be sent. Required.
393 * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is
394 * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA.
395 */
396enum ovs_userspace_attr {
397 OVS_USERSPACE_ATTR_UNSPEC,
398 OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */
399 OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */
400 __OVS_USERSPACE_ATTR_MAX
401};
402
403#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
404
405/**
406 * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
407 * @vlan_tpid: Tag protocol identifier (TPID) to push.
408 * @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set
409 * (but it will not be set in the 802.1Q header that is pushed).
410 *
411 * The @vlan_tpid value is typically %ETH_P_8021Q. The only acceptable TPID
412 * values are those that the kernel module also parses as 802.1Q headers, to
413 * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN
414 * from having surprising results.
415 */
416struct ovs_action_push_vlan {
417 __be16 vlan_tpid; /* 802.1Q TPID. */
418 __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */
419};
420
421/**
422 * enum ovs_action_attr - Action types.
423 *
424 * @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
425 * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested
426 * %OVS_USERSPACE_ATTR_* attributes.
427 * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The
428 * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
429 * value.
430 * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
431 * packet.
432 * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
433 * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
434 * the nested %OVS_SAMPLE_ATTR_* attributes.
435 *
436 * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
437 * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
438 * type may not be changed.
439 */
440
441enum ovs_action_attr {
442 OVS_ACTION_ATTR_UNSPEC,
443 OVS_ACTION_ATTR_OUTPUT, /* u32 port number. */
444 OVS_ACTION_ATTR_USERSPACE, /* Nested OVS_USERSPACE_ATTR_*. */
445 OVS_ACTION_ATTR_SET, /* One nested OVS_KEY_ATTR_*. */
446 OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */
447 OVS_ACTION_ATTR_POP_VLAN, /* No argument. */
448 OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */
449 __OVS_ACTION_ATTR_MAX
450};
451
452#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
453 23
454#endif /* _LINUX_OPENVSWITCH_H */ 24#endif /* _LINUX_OPENVSWITCH_H */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 7df190525337..ab5d4992e568 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -285,6 +285,7 @@ header-y += nvram.h
285header-y += omap3isp.h 285header-y += omap3isp.h
286header-y += omapfb.h 286header-y += omapfb.h
287header-y += oom.h 287header-y += oom.h
288header-y += openvswitch.h
288header-y += packet_diag.h 289header-y += packet_diag.h
289header-y += param.h 290header-y += param.h
290header-y += parport.h 291header-y += parport.h
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
new file mode 100644
index 000000000000..405918dd7b3f
--- /dev/null
+++ b/include/uapi/linux/openvswitch.h
@@ -0,0 +1,456 @@
1
2/*
3 * Copyright (c) 2007-2011 Nicira Networks.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#ifndef _UAPI__LINUX_OPENVSWITCH_H
21#define _UAPI__LINUX_OPENVSWITCH_H 1
22
23#include <linux/types.h>
24#include <linux/if_ether.h>
25
26/**
27 * struct ovs_header - header for OVS Generic Netlink messages.
28 * @dp_ifindex: ifindex of local port for datapath (0 to make a request not
29 * specific to a datapath).
30 *
31 * Attributes following the header are specific to a particular OVS Generic
32 * Netlink family, but all of the OVS families use this header.
33 */
34
35struct ovs_header {
36 int dp_ifindex;
37};
38
39/* Datapaths. */
40
41#define OVS_DATAPATH_FAMILY "ovs_datapath"
42#define OVS_DATAPATH_MCGROUP "ovs_datapath"
43#define OVS_DATAPATH_VERSION 0x1
44
45enum ovs_datapath_cmd {
46 OVS_DP_CMD_UNSPEC,
47 OVS_DP_CMD_NEW,
48 OVS_DP_CMD_DEL,
49 OVS_DP_CMD_GET,
50 OVS_DP_CMD_SET
51};
52
53/**
54 * enum ovs_datapath_attr - attributes for %OVS_DP_* commands.
55 * @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local
56 * port". This is the name of the network device whose dp_ifindex is given in
57 * the &struct ovs_header. Always present in notifications. Required in
58 * %OVS_DP_NEW requests. May be used as an alternative to specifying
59 * dp_ifindex in other requests (with a dp_ifindex of 0).
60 * @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially
61 * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on
62 * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
63 * not be sent.
64 * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
65 * datapath. Always present in notifications.
66 *
67 * These attributes follow the &struct ovs_header within the Generic Netlink
68 * payload for %OVS_DP_* commands.
69 */
70enum ovs_datapath_attr {
71 OVS_DP_ATTR_UNSPEC,
72 OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */
73 OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */
74 OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */
75 __OVS_DP_ATTR_MAX
76};
77
78#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1)
79
80struct ovs_dp_stats {
81 __u64 n_hit; /* Number of flow table matches. */
82 __u64 n_missed; /* Number of flow table misses. */
83 __u64 n_lost; /* Number of misses not sent to userspace. */
84 __u64 n_flows; /* Number of flows present */
85};
86
87struct ovs_vport_stats {
88 __u64 rx_packets; /* total packets received */
89 __u64 tx_packets; /* total packets transmitted */
90 __u64 rx_bytes; /* total bytes received */
91 __u64 tx_bytes; /* total bytes transmitted */
92 __u64 rx_errors; /* bad packets received */
93 __u64 tx_errors; /* packet transmit problems */
94 __u64 rx_dropped; /* no space in linux buffers */
95 __u64 tx_dropped; /* no space available in linux */
96};
97
98/* Fixed logical ports. */
99#define OVSP_LOCAL ((__u32)0)
100
101/* Packet transfer. */
102
103#define OVS_PACKET_FAMILY "ovs_packet"
104#define OVS_PACKET_VERSION 0x1
105
106enum ovs_packet_cmd {
107 OVS_PACKET_CMD_UNSPEC,
108
109 /* Kernel-to-user notifications. */
110 OVS_PACKET_CMD_MISS, /* Flow table miss. */
111 OVS_PACKET_CMD_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */
112
113 /* Userspace commands. */
114 OVS_PACKET_CMD_EXECUTE /* Apply actions to a packet. */
115};
116
117/**
118 * enum ovs_packet_attr - attributes for %OVS_PACKET_* commands.
119 * @OVS_PACKET_ATTR_PACKET: Present for all notifications. Contains the entire
120 * packet as received, from the start of the Ethernet header onward. For
121 * %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by
122 * actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is
123 * the flow key extracted from the packet as originally received.
124 * @OVS_PACKET_ATTR_KEY: Present for all notifications. Contains the flow key
125 * extracted from the packet as nested %OVS_KEY_ATTR_* attributes. This allows
126 * userspace to adapt its flow setup strategy by comparing its notion of the
127 * flow key against the kernel's.
128 * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used
129 * for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes.
130 * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION
131 * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
132 * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content
133 * specified there.
134 *
135 * These attributes follow the &struct ovs_header within the Generic Netlink
136 * payload for %OVS_PACKET_* commands.
137 */
138enum ovs_packet_attr {
139 OVS_PACKET_ATTR_UNSPEC,
140 OVS_PACKET_ATTR_PACKET, /* Packet data. */
141 OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */
142 OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
143 OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */
144 __OVS_PACKET_ATTR_MAX
145};
146
147#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1)
148
149/* Virtual ports. */
150
151#define OVS_VPORT_FAMILY "ovs_vport"
152#define OVS_VPORT_MCGROUP "ovs_vport"
153#define OVS_VPORT_VERSION 0x1
154
155enum ovs_vport_cmd {
156 OVS_VPORT_CMD_UNSPEC,
157 OVS_VPORT_CMD_NEW,
158 OVS_VPORT_CMD_DEL,
159 OVS_VPORT_CMD_GET,
160 OVS_VPORT_CMD_SET
161};
162
163enum ovs_vport_type {
164 OVS_VPORT_TYPE_UNSPEC,
165 OVS_VPORT_TYPE_NETDEV, /* network device */
166 OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
167 __OVS_VPORT_TYPE_MAX
168};
169
170#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1)
171
172/**
173 * enum ovs_vport_attr - attributes for %OVS_VPORT_* commands.
174 * @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath.
175 * @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type
176 * of vport.
177 * @OVS_VPORT_ATTR_NAME: Name of vport. For a vport based on a network device
178 * this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes
179 * plus a null terminator.
180 * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information.
181 * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that
182 * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on
183 * this port. A value of zero indicates that upcalls should not be sent.
184 * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for
185 * packets sent or received through the vport.
186 *
187 * These attributes follow the &struct ovs_header within the Generic Netlink
188 * payload for %OVS_VPORT_* commands.
189 *
190 * For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and
191 * %OVS_VPORT_ATTR_NAME attributes are required. %OVS_VPORT_ATTR_PORT_NO is
192 * optional; if not specified a free port number is automatically selected.
193 * Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type
194 * of vport.
195 * and other attributes are ignored.
196 *
197 * For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to
198 * look up the vport to operate on; otherwise dp_idx from the &struct
199 * ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport.
200 */
201enum ovs_vport_attr {
202 OVS_VPORT_ATTR_UNSPEC,
203 OVS_VPORT_ATTR_PORT_NO, /* u32 port number within datapath */
204 OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */
205 OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */
206 OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */
207 OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */
208 OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */
209 __OVS_VPORT_ATTR_MAX
210};
211
212#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
213
214/* Flows. */
215
216#define OVS_FLOW_FAMILY "ovs_flow"
217#define OVS_FLOW_MCGROUP "ovs_flow"
218#define OVS_FLOW_VERSION 0x1
219
220enum ovs_flow_cmd {
221 OVS_FLOW_CMD_UNSPEC,
222 OVS_FLOW_CMD_NEW,
223 OVS_FLOW_CMD_DEL,
224 OVS_FLOW_CMD_GET,
225 OVS_FLOW_CMD_SET
226};
227
228struct ovs_flow_stats {
229 __u64 n_packets; /* Number of matched packets. */
230 __u64 n_bytes; /* Number of matched bytes. */
231};
232
233enum ovs_key_attr {
234 OVS_KEY_ATTR_UNSPEC,
235 OVS_KEY_ATTR_ENCAP, /* Nested set of encapsulated attributes. */
236 OVS_KEY_ATTR_PRIORITY, /* u32 skb->priority */
237 OVS_KEY_ATTR_IN_PORT, /* u32 OVS dp port number */
238 OVS_KEY_ATTR_ETHERNET, /* struct ovs_key_ethernet */
239 OVS_KEY_ATTR_VLAN, /* be16 VLAN TCI */
240 OVS_KEY_ATTR_ETHERTYPE, /* be16 Ethernet type */
241 OVS_KEY_ATTR_IPV4, /* struct ovs_key_ipv4 */
242 OVS_KEY_ATTR_IPV6, /* struct ovs_key_ipv6 */
243 OVS_KEY_ATTR_TCP, /* struct ovs_key_tcp */
244 OVS_KEY_ATTR_UDP, /* struct ovs_key_udp */
245 OVS_KEY_ATTR_ICMP, /* struct ovs_key_icmp */
246 OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */
247 OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */
248 OVS_KEY_ATTR_ND, /* struct ovs_key_nd */
249 OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */
250 __OVS_KEY_ATTR_MAX
251};
252
253#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
254
255/**
256 * enum ovs_frag_type - IPv4 and IPv6 fragment type
257 * @OVS_FRAG_TYPE_NONE: Packet is not a fragment.
258 * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0.
259 * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset.
260 *
261 * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct
262 * ovs_key_ipv6.
263 */
264enum ovs_frag_type {
265 OVS_FRAG_TYPE_NONE,
266 OVS_FRAG_TYPE_FIRST,
267 OVS_FRAG_TYPE_LATER,
268 __OVS_FRAG_TYPE_MAX
269};
270
271#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1)
272
273struct ovs_key_ethernet {
274 __u8 eth_src[ETH_ALEN];
275 __u8 eth_dst[ETH_ALEN];
276};
277
278struct ovs_key_ipv4 {
279 __be32 ipv4_src;
280 __be32 ipv4_dst;
281 __u8 ipv4_proto;
282 __u8 ipv4_tos;
283 __u8 ipv4_ttl;
284 __u8 ipv4_frag; /* One of OVS_FRAG_TYPE_*. */
285};
286
287struct ovs_key_ipv6 {
288 __be32 ipv6_src[4];
289 __be32 ipv6_dst[4];
290 __be32 ipv6_label; /* 20-bits in least-significant bits. */
291 __u8 ipv6_proto;
292 __u8 ipv6_tclass;
293 __u8 ipv6_hlimit;
294 __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */
295};
296
297struct ovs_key_tcp {
298 __be16 tcp_src;
299 __be16 tcp_dst;
300};
301
302struct ovs_key_udp {
303 __be16 udp_src;
304 __be16 udp_dst;
305};
306
307struct ovs_key_icmp {
308 __u8 icmp_type;
309 __u8 icmp_code;
310};
311
312struct ovs_key_icmpv6 {
313 __u8 icmpv6_type;
314 __u8 icmpv6_code;
315};
316
317struct ovs_key_arp {
318 __be32 arp_sip;
319 __be32 arp_tip;
320 __be16 arp_op;
321 __u8 arp_sha[ETH_ALEN];
322 __u8 arp_tha[ETH_ALEN];
323};
324
325struct ovs_key_nd {
326 __u32 nd_target[4];
327 __u8 nd_sll[ETH_ALEN];
328 __u8 nd_tll[ETH_ALEN];
329};
330
331/**
332 * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
333 * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
334 * key. Always present in notifications. Required for all requests (except
335 * dumps).
336 * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying
337 * the actions to take for packets that match the key. Always present in
338 * notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for
339 * %OVS_FLOW_CMD_SET requests.
340 * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this
341 * flow. Present in notifications if the stats would be nonzero. Ignored in
342 * requests.
343 * @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the
344 * TCP flags seen on packets in this flow. Only present in notifications for
345 * TCP flows, and only if it would be nonzero. Ignored in requests.
346 * @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on
347 * the system monotonic clock, at which a packet was last processed for this
348 * flow. Only present in notifications if a packet has been processed for this
349 * flow. Ignored in requests.
350 * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the
351 * last-used time, accumulated TCP flags, and statistics for this flow.
352 * Otherwise ignored in requests. Never present in notifications.
353 *
354 * These attributes follow the &struct ovs_header within the Generic Netlink
355 * payload for %OVS_FLOW_* commands.
356 */
357enum ovs_flow_attr {
358 OVS_FLOW_ATTR_UNSPEC,
359 OVS_FLOW_ATTR_KEY, /* Sequence of OVS_KEY_ATTR_* attributes. */
360 OVS_FLOW_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
361 OVS_FLOW_ATTR_STATS, /* struct ovs_flow_stats. */
362 OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
363 OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */
364 OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */
365 __OVS_FLOW_ATTR_MAX
366};
367
368#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
369
370/**
371 * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
372 * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
373 * @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of
374 * %UINT32_MAX samples all packets and intermediate values sample intermediate
375 * fractions of packets.
376 * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
377 * Actions are passed as nested attributes.
378 *
379 * Executes the specified actions with the given probability on a per-packet
380 * basis.
381 */
382enum ovs_sample_attr {
383 OVS_SAMPLE_ATTR_UNSPEC,
384 OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
385 OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
386 __OVS_SAMPLE_ATTR_MAX,
387};
388
389#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
390
391/**
392 * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
393 * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
394 * message should be sent. Required.
395 * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is
396 * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA.
397 */
398enum ovs_userspace_attr {
399 OVS_USERSPACE_ATTR_UNSPEC,
400 OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */
401 OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */
402 __OVS_USERSPACE_ATTR_MAX
403};
404
405#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
406
407/**
408 * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
409 * @vlan_tpid: Tag protocol identifier (TPID) to push.
410 * @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set
411 * (but it will not be set in the 802.1Q header that is pushed).
412 *
413 * The @vlan_tpid value is typically %ETH_P_8021Q. The only acceptable TPID
414 * values are those that the kernel module also parses as 802.1Q headers, to
415 * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN
416 * from having surprising results.
417 */
418struct ovs_action_push_vlan {
419 __be16 vlan_tpid; /* 802.1Q TPID. */
420 __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */
421};
422
423/**
424 * enum ovs_action_attr - Action types.
425 *
426 * @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
427 * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested
428 * %OVS_USERSPACE_ATTR_* attributes.
429 * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The
430 * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
431 * value.
432 * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
433 * packet.
434 * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
435 * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
436 * the nested %OVS_SAMPLE_ATTR_* attributes.
437 *
438 * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
439 * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
440 * type may not be changed.
441 */
442
443enum ovs_action_attr {
444 OVS_ACTION_ATTR_UNSPEC,
445 OVS_ACTION_ATTR_OUTPUT, /* u32 port number. */
446 OVS_ACTION_ATTR_USERSPACE, /* Nested OVS_USERSPACE_ATTR_*. */
447 OVS_ACTION_ATTR_SET, /* One nested OVS_KEY_ATTR_*. */
448 OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */
449 OVS_ACTION_ATTR_POP_VLAN, /* No argument. */
450 OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */
451 __OVS_ACTION_ATTR_MAX
452};
453
454#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
455
456#endif /* _LINUX_OPENVSWITCH_H */
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 8759265a3e46..b7d0b7c3fe2c 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -44,6 +44,7 @@
44#include <linux/netfilter_ipv4.h> 44#include <linux/netfilter_ipv4.h>
45#include <linux/inetdevice.h> 45#include <linux/inetdevice.h>
46#include <linux/list.h> 46#include <linux/list.h>
47#include <linux/lockdep.h>
47#include <linux/openvswitch.h> 48#include <linux/openvswitch.h>
48#include <linux/rculist.h> 49#include <linux/rculist.h>
49#include <linux/dmi.h> 50#include <linux/dmi.h>
@@ -56,38 +57,59 @@
56#include "flow.h" 57#include "flow.h"
57#include "vport-internal_dev.h" 58#include "vport-internal_dev.h"
58 59
59/**
60 * struct ovs_net - Per net-namespace data for ovs.
61 * @dps: List of datapaths to enable dumping them all out.
62 * Protected by genl_mutex.
63 */
64struct ovs_net {
65 struct list_head dps;
66};
67
68static int ovs_net_id __read_mostly;
69 60
70#define REHASH_FLOW_INTERVAL (10 * 60 * HZ) 61#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
71static void rehash_flow_table(struct work_struct *work); 62static void rehash_flow_table(struct work_struct *work);
72static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); 63static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
73 64
65int ovs_net_id __read_mostly;
66
67static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
68 struct genl_multicast_group *grp)
69{
70 genl_notify(skb, genl_info_net(info), info->snd_portid,
71 grp->id, info->nlhdr, GFP_KERNEL);
72}
73
74/** 74/**
75 * DOC: Locking: 75 * DOC: Locking:
76 * 76 *
77 * Writes to device state (add/remove datapath, port, set operations on vports, 77 * All writes e.g. Writes to device state (add/remove datapath, port, set
78 * etc.) are protected by RTNL. 78 * operations on vports, etc.), Writes to other state (flow table
79 * 79 * modifications, set miscellaneous datapath parameters, etc.) are protected
80 * Writes to other state (flow table modifications, set miscellaneous datapath 80 * by ovs_lock.
81 * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside
82 * genl_mutex.
83 * 81 *
84 * Reads are protected by RCU. 82 * Reads are protected by RCU.
85 * 83 *
86 * There are a few special cases (mostly stats) that have their own 84 * There are a few special cases (mostly stats) that have their own
87 * synchronization but they nest under all of above and don't interact with 85 * synchronization but they nest under all of above and don't interact with
88 * each other. 86 * each other.
87 *
88 * The RTNL lock nests inside ovs_mutex.
89 */ 89 */
90 90
91static DEFINE_MUTEX(ovs_mutex);
92
93void ovs_lock(void)
94{
95 mutex_lock(&ovs_mutex);
96}
97
98void ovs_unlock(void)
99{
100 mutex_unlock(&ovs_mutex);
101}
102
103#ifdef CONFIG_LOCKDEP
104int lockdep_ovsl_is_held(void)
105{
106 if (debug_locks)
107 return lockdep_is_held(&ovs_mutex);
108 else
109 return 1;
110}
111#endif
112
91static struct vport *new_vport(const struct vport_parms *); 113static struct vport *new_vport(const struct vport_parms *);
92static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, 114static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
93 const struct dp_upcall_info *); 115 const struct dp_upcall_info *);
@@ -95,7 +117,7 @@ static int queue_userspace_packet(struct net *, int dp_ifindex,
95 struct sk_buff *, 117 struct sk_buff *,
96 const struct dp_upcall_info *); 118 const struct dp_upcall_info *);
97 119
98/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ 120/* Must be called with rcu_read_lock or ovs_mutex. */
99static struct datapath *get_dp(struct net *net, int dp_ifindex) 121static struct datapath *get_dp(struct net *net, int dp_ifindex)
100{ 122{
101 struct datapath *dp = NULL; 123 struct datapath *dp = NULL;
@@ -113,10 +135,10 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)
113 return dp; 135 return dp;
114} 136}
115 137
116/* Must be called with rcu_read_lock or RTNL lock. */ 138/* Must be called with rcu_read_lock or ovs_mutex. */
117const char *ovs_dp_name(const struct datapath *dp) 139const char *ovs_dp_name(const struct datapath *dp)
118{ 140{
119 struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL); 141 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
120 return vport->ops->get_name(vport); 142 return vport->ops->get_name(vport);
121} 143}
122 144
@@ -168,7 +190,7 @@ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
168 return NULL; 190 return NULL;
169} 191}
170 192
171/* Called with RTNL lock and genl_lock. */ 193/* Called with ovs_mutex. */
172static struct vport *new_vport(const struct vport_parms *parms) 194static struct vport *new_vport(const struct vport_parms *parms)
173{ 195{
174 struct vport *vport; 196 struct vport *vport;
@@ -180,14 +202,12 @@ static struct vport *new_vport(const struct vport_parms *parms)
180 202
181 hlist_add_head_rcu(&vport->dp_hash_node, head); 203 hlist_add_head_rcu(&vport->dp_hash_node, head);
182 } 204 }
183
184 return vport; 205 return vport;
185} 206}
186 207
187/* Called with RTNL lock. */
188void ovs_dp_detach_port(struct vport *p) 208void ovs_dp_detach_port(struct vport *p)
189{ 209{
190 ASSERT_RTNL(); 210 ASSERT_OVSL();
191 211
192 /* First drop references to device. */ 212 /* First drop references to device. */
193 hlist_del_rcu(&p->dp_hash_node); 213 hlist_del_rcu(&p->dp_hash_node);
@@ -337,6 +357,35 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
337 return err; 357 return err;
338} 358}
339 359
360static size_t key_attr_size(void)
361{
362 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
363 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
364 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
365 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
366 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
367 + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */
368 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
369 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
370 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
371 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
372 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
373}
374
375static size_t upcall_msg_size(const struct sk_buff *skb,
376 const struct nlattr *userdata)
377{
378 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
379 + nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */
380 + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
381
382 /* OVS_PACKET_ATTR_USERDATA */
383 if (userdata)
384 size += NLA_ALIGN(userdata->nla_len);
385
386 return size;
387}
388
340static int queue_userspace_packet(struct net *net, int dp_ifindex, 389static int queue_userspace_packet(struct net *net, int dp_ifindex,
341 struct sk_buff *skb, 390 struct sk_buff *skb,
342 const struct dp_upcall_info *upcall_info) 391 const struct dp_upcall_info *upcall_info)
@@ -345,7 +394,6 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
345 struct sk_buff *nskb = NULL; 394 struct sk_buff *nskb = NULL;
346 struct sk_buff *user_skb; /* to be queued to userspace */ 395 struct sk_buff *user_skb; /* to be queued to userspace */
347 struct nlattr *nla; 396 struct nlattr *nla;
348 unsigned int len;
349 int err; 397 int err;
350 398
351 if (vlan_tx_tag_present(skb)) { 399 if (vlan_tx_tag_present(skb)) {
@@ -366,13 +414,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
366 goto out; 414 goto out;
367 } 415 }
368 416
369 len = sizeof(struct ovs_header); 417 user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC);
370 len += nla_total_size(skb->len);
371 len += nla_total_size(FLOW_BUFSIZE);
372 if (upcall_info->userdata)
373 len += NLA_ALIGN(upcall_info->userdata->nla_len);
374
375 user_skb = genlmsg_new(len, GFP_ATOMIC);
376 if (!user_skb) { 418 if (!user_skb) {
377 err = -ENOMEM; 419 err = -ENOMEM;
378 goto out; 420 goto out;
@@ -403,13 +445,13 @@ out:
403 return err; 445 return err;
404} 446}
405 447
406/* Called with genl_mutex. */ 448/* Called with ovs_mutex. */
407static int flush_flows(struct datapath *dp) 449static int flush_flows(struct datapath *dp)
408{ 450{
409 struct flow_table *old_table; 451 struct flow_table *old_table;
410 struct flow_table *new_table; 452 struct flow_table *new_table;
411 453
412 old_table = genl_dereference(dp->table); 454 old_table = ovsl_dereference(dp->table);
413 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); 455 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
414 if (!new_table) 456 if (!new_table)
415 return -ENOMEM; 457 return -ENOMEM;
@@ -662,8 +704,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
662 704
663 err = -EINVAL; 705 err = -EINVAL;
664 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || 706 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
665 !a[OVS_PACKET_ATTR_ACTIONS] || 707 !a[OVS_PACKET_ATTR_ACTIONS])
666 nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
667 goto err; 708 goto err;
668 709
669 len = nla_len(a[OVS_PACKET_ATTR_PACKET]); 710 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
@@ -673,7 +714,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
673 goto err; 714 goto err;
674 skb_reserve(packet, NET_IP_ALIGN); 715 skb_reserve(packet, NET_IP_ALIGN);
675 716
676 memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len); 717 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
677 718
678 skb_reset_mac_header(packet); 719 skb_reset_mac_header(packet);
679 eth = eth_hdr(packet); 720 eth = eth_hdr(packet);
@@ -744,7 +785,7 @@ err:
744} 785}
745 786
746static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { 787static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
747 [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC }, 788 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
748 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, 789 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
749 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, 790 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
750}; 791};
@@ -760,7 +801,7 @@ static struct genl_ops dp_packet_genl_ops[] = {
760static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) 801static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
761{ 802{
762 int i; 803 int i;
763 struct flow_table *table = genl_dereference(dp->table); 804 struct flow_table *table = ovsl_dereference(dp->table);
764 805
765 stats->n_flows = ovs_flow_tbl_count(table); 806 stats->n_flows = ovs_flow_tbl_count(table);
766 807
@@ -802,7 +843,17 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = {
802 .name = OVS_FLOW_MCGROUP 843 .name = OVS_FLOW_MCGROUP
803}; 844};
804 845
805/* Called with genl_lock. */ 846static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
847{
848 return NLMSG_ALIGN(sizeof(struct ovs_header))
849 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
850 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
851 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
852 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
853 + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
854}
855
856/* Called with ovs_mutex. */
806static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, 857static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
807 struct sk_buff *skb, u32 portid, 858 struct sk_buff *skb, u32 portid,
808 u32 seq, u32 flags, u8 cmd) 859 u32 seq, u32 flags, u8 cmd)
@@ -816,8 +867,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
816 u8 tcp_flags; 867 u8 tcp_flags;
817 int err; 868 int err;
818 869
819 sf_acts = rcu_dereference_protected(flow->sf_acts, 870 sf_acts = ovsl_dereference(flow->sf_acts);
820 lockdep_genl_is_held());
821 871
822 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); 872 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
823 if (!ovs_header) 873 if (!ovs_header)
@@ -880,25 +930,10 @@ error:
880static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) 930static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
881{ 931{
882 const struct sw_flow_actions *sf_acts; 932 const struct sw_flow_actions *sf_acts;
883 int len;
884
885 sf_acts = rcu_dereference_protected(flow->sf_acts,
886 lockdep_genl_is_held());
887
888 /* OVS_FLOW_ATTR_KEY */
889 len = nla_total_size(FLOW_BUFSIZE);
890 /* OVS_FLOW_ATTR_ACTIONS */
891 len += nla_total_size(sf_acts->actions_len);
892 /* OVS_FLOW_ATTR_STATS */
893 len += nla_total_size(sizeof(struct ovs_flow_stats));
894 /* OVS_FLOW_ATTR_TCP_FLAGS */
895 len += nla_total_size(1);
896 /* OVS_FLOW_ATTR_USED */
897 len += nla_total_size(8);
898 933
899 len += NLMSG_ALIGN(sizeof(struct ovs_header)); 934 sf_acts = ovsl_dereference(flow->sf_acts);
900 935
901 return genlmsg_new(len, GFP_KERNEL); 936 return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL);
902} 937}
903 938
904static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, 939static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
@@ -947,12 +982,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
947 goto error; 982 goto error;
948 } 983 }
949 984
985 ovs_lock();
950 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 986 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
951 error = -ENODEV; 987 error = -ENODEV;
952 if (!dp) 988 if (!dp)
953 goto error; 989 goto err_unlock_ovs;
954 990
955 table = genl_dereference(dp->table); 991 table = ovsl_dereference(dp->table);
956 flow = ovs_flow_tbl_lookup(table, &key, key_len); 992 flow = ovs_flow_tbl_lookup(table, &key, key_len);
957 if (!flow) { 993 if (!flow) {
958 struct sw_flow_actions *acts; 994 struct sw_flow_actions *acts;
@@ -960,7 +996,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
960 /* Bail out if we're not allowed to create a new flow. */ 996 /* Bail out if we're not allowed to create a new flow. */
961 error = -ENOENT; 997 error = -ENOENT;
962 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) 998 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
963 goto error; 999 goto err_unlock_ovs;
964 1000
965 /* Expand table, if necessary, to make room. */ 1001 /* Expand table, if necessary, to make room. */
966 if (ovs_flow_tbl_need_to_expand(table)) { 1002 if (ovs_flow_tbl_need_to_expand(table)) {
@@ -970,7 +1006,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
970 if (!IS_ERR(new_table)) { 1006 if (!IS_ERR(new_table)) {
971 rcu_assign_pointer(dp->table, new_table); 1007 rcu_assign_pointer(dp->table, new_table);
972 ovs_flow_tbl_deferred_destroy(table); 1008 ovs_flow_tbl_deferred_destroy(table);
973 table = genl_dereference(dp->table); 1009 table = ovsl_dereference(dp->table);
974 } 1010 }
975 } 1011 }
976 1012
@@ -978,7 +1014,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
978 flow = ovs_flow_alloc(); 1014 flow = ovs_flow_alloc();
979 if (IS_ERR(flow)) { 1015 if (IS_ERR(flow)) {
980 error = PTR_ERR(flow); 1016 error = PTR_ERR(flow);
981 goto error; 1017 goto err_unlock_ovs;
982 } 1018 }
983 flow->key = key; 1019 flow->key = key;
984 clear_stats(flow); 1020 clear_stats(flow);
@@ -1011,11 +1047,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1011 error = -EEXIST; 1047 error = -EEXIST;
1012 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && 1048 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
1013 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) 1049 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1014 goto error; 1050 goto err_unlock_ovs;
1015 1051
1016 /* Update actions. */ 1052 /* Update actions. */
1017 old_acts = rcu_dereference_protected(flow->sf_acts, 1053 old_acts = ovsl_dereference(flow->sf_acts);
1018 lockdep_genl_is_held());
1019 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; 1054 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
1020 if (acts_attrs && 1055 if (acts_attrs &&
1021 (old_acts->actions_len != nla_len(acts_attrs) || 1056 (old_acts->actions_len != nla_len(acts_attrs) ||
@@ -1026,7 +1061,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1026 new_acts = ovs_flow_actions_alloc(acts_attrs); 1061 new_acts = ovs_flow_actions_alloc(acts_attrs);
1027 error = PTR_ERR(new_acts); 1062 error = PTR_ERR(new_acts);
1028 if (IS_ERR(new_acts)) 1063 if (IS_ERR(new_acts))
1029 goto error; 1064 goto err_unlock_ovs;
1030 1065
1031 rcu_assign_pointer(flow->sf_acts, new_acts); 1066 rcu_assign_pointer(flow->sf_acts, new_acts);
1032 ovs_flow_deferred_free_acts(old_acts); 1067 ovs_flow_deferred_free_acts(old_acts);
@@ -1042,11 +1077,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1042 spin_unlock_bh(&flow->lock); 1077 spin_unlock_bh(&flow->lock);
1043 } 1078 }
1044 } 1079 }
1080 ovs_unlock();
1045 1081
1046 if (!IS_ERR(reply)) 1082 if (!IS_ERR(reply))
1047 genl_notify(reply, genl_info_net(info), info->snd_portid, 1083 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
1048 ovs_dp_flow_multicast_group.id, info->nlhdr,
1049 GFP_KERNEL);
1050 else 1084 else
1051 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 1085 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1052 ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); 1086 ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
@@ -1054,6 +1088,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1054 1088
1055error_free_flow: 1089error_free_flow:
1056 ovs_flow_free(flow); 1090 ovs_flow_free(flow);
1091err_unlock_ovs:
1092 ovs_unlock();
1057error: 1093error:
1058 return error; 1094 return error;
1059} 1095}
@@ -1076,21 +1112,32 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1076 if (err) 1112 if (err)
1077 return err; 1113 return err;
1078 1114
1115 ovs_lock();
1079 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1116 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1080 if (!dp) 1117 if (!dp) {
1081 return -ENODEV; 1118 err = -ENODEV;
1119 goto unlock;
1120 }
1082 1121
1083 table = genl_dereference(dp->table); 1122 table = ovsl_dereference(dp->table);
1084 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1123 flow = ovs_flow_tbl_lookup(table, &key, key_len);
1085 if (!flow) 1124 if (!flow) {
1086 return -ENOENT; 1125 err = -ENOENT;
1126 goto unlock;
1127 }
1087 1128
1088 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 1129 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1089 info->snd_seq, OVS_FLOW_CMD_NEW); 1130 info->snd_seq, OVS_FLOW_CMD_NEW);
1090 if (IS_ERR(reply)) 1131 if (IS_ERR(reply)) {
1091 return PTR_ERR(reply); 1132 err = PTR_ERR(reply);
1133 goto unlock;
1134 }
1092 1135
1136 ovs_unlock();
1093 return genlmsg_reply(reply, info); 1137 return genlmsg_reply(reply, info);
1138unlock:
1139 ovs_unlock();
1140 return err;
1094} 1141}
1095 1142
1096static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) 1143static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
@@ -1105,25 +1152,33 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1105 int err; 1152 int err;
1106 int key_len; 1153 int key_len;
1107 1154
1155 ovs_lock();
1108 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1156 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1109 if (!dp) 1157 if (!dp) {
1110 return -ENODEV; 1158 err = -ENODEV;
1111 1159 goto unlock;
1112 if (!a[OVS_FLOW_ATTR_KEY]) 1160 }
1113 return flush_flows(dp);
1114 1161
1162 if (!a[OVS_FLOW_ATTR_KEY]) {
1163 err = flush_flows(dp);
1164 goto unlock;
1165 }
1115 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1166 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1116 if (err) 1167 if (err)
1117 return err; 1168 goto unlock;
1118 1169
1119 table = genl_dereference(dp->table); 1170 table = ovsl_dereference(dp->table);
1120 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1171 flow = ovs_flow_tbl_lookup(table, &key, key_len);
1121 if (!flow) 1172 if (!flow) {
1122 return -ENOENT; 1173 err = -ENOENT;
1174 goto unlock;
1175 }
1123 1176
1124 reply = ovs_flow_cmd_alloc_info(flow); 1177 reply = ovs_flow_cmd_alloc_info(flow);
1125 if (!reply) 1178 if (!reply) {
1126 return -ENOMEM; 1179 err = -ENOMEM;
1180 goto unlock;
1181 }
1127 1182
1128 ovs_flow_tbl_remove(table, flow); 1183 ovs_flow_tbl_remove(table, flow);
1129 1184
@@ -1132,10 +1187,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1132 BUG_ON(err < 0); 1187 BUG_ON(err < 0);
1133 1188
1134 ovs_flow_deferred_free(flow); 1189 ovs_flow_deferred_free(flow);
1190 ovs_unlock();
1135 1191
1136 genl_notify(reply, genl_info_net(info), info->snd_portid, 1192 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
1137 ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1138 return 0; 1193 return 0;
1194unlock:
1195 ovs_unlock();
1196 return err;
1139} 1197}
1140 1198
1141static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1199static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1144,11 +1202,14 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1144 struct datapath *dp; 1202 struct datapath *dp;
1145 struct flow_table *table; 1203 struct flow_table *table;
1146 1204
1205 ovs_lock();
1147 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1206 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1148 if (!dp) 1207 if (!dp) {
1208 ovs_unlock();
1149 return -ENODEV; 1209 return -ENODEV;
1210 }
1150 1211
1151 table = genl_dereference(dp->table); 1212 table = ovsl_dereference(dp->table);
1152 1213
1153 for (;;) { 1214 for (;;) {
1154 struct sw_flow *flow; 1215 struct sw_flow *flow;
@@ -1169,6 +1230,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1169 cb->args[0] = bucket; 1230 cb->args[0] = bucket;
1170 cb->args[1] = obj; 1231 cb->args[1] = obj;
1171 } 1232 }
1233 ovs_unlock();
1172 return skb->len; 1234 return skb->len;
1173} 1235}
1174 1236
@@ -1214,6 +1276,16 @@ static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
1214 .name = OVS_DATAPATH_MCGROUP 1276 .name = OVS_DATAPATH_MCGROUP
1215}; 1277};
1216 1278
1279static size_t ovs_dp_cmd_msg_size(void)
1280{
1281 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1282
1283 msgsize += nla_total_size(IFNAMSIZ);
1284 msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
1285
1286 return msgsize;
1287}
1288
1217static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, 1289static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1218 u32 portid, u32 seq, u32 flags, u8 cmd) 1290 u32 portid, u32 seq, u32 flags, u8 cmd)
1219{ 1291{
@@ -1252,7 +1324,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
1252 struct sk_buff *skb; 1324 struct sk_buff *skb;
1253 int retval; 1325 int retval;
1254 1326
1255 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1327 skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1256 if (!skb) 1328 if (!skb)
1257 return ERR_PTR(-ENOMEM); 1329 return ERR_PTR(-ENOMEM);
1258 1330
@@ -1264,7 +1336,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
1264 return skb; 1336 return skb;
1265} 1337}
1266 1338
1267/* Called with genl_mutex and optionally with RTNL lock also. */ 1339/* Called with ovs_mutex. */
1268static struct datapath *lookup_datapath(struct net *net, 1340static struct datapath *lookup_datapath(struct net *net,
1269 struct ovs_header *ovs_header, 1341 struct ovs_header *ovs_header,
1270 struct nlattr *a[OVS_DP_ATTR_MAX + 1]) 1342 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
@@ -1298,12 +1370,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1298 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) 1370 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1299 goto err; 1371 goto err;
1300 1372
1301 rtnl_lock(); 1373 ovs_lock();
1302 1374
1303 err = -ENOMEM; 1375 err = -ENOMEM;
1304 dp = kzalloc(sizeof(*dp), GFP_KERNEL); 1376 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1305 if (dp == NULL) 1377 if (dp == NULL)
1306 goto err_unlock_rtnl; 1378 goto err_unlock_ovs;
1307 1379
1308 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); 1380 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1309 1381
@@ -1354,37 +1426,34 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1354 1426
1355 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); 1427 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1356 list_add_tail(&dp->list_node, &ovs_net->dps); 1428 list_add_tail(&dp->list_node, &ovs_net->dps);
1357 rtnl_unlock();
1358 1429
1359 genl_notify(reply, genl_info_net(info), info->snd_portid, 1430 ovs_unlock();
1360 ovs_dp_datapath_multicast_group.id, info->nlhdr, 1431
1361 GFP_KERNEL); 1432 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1362 return 0; 1433 return 0;
1363 1434
1364err_destroy_local_port: 1435err_destroy_local_port:
1365 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); 1436 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1366err_destroy_ports_array: 1437err_destroy_ports_array:
1367 kfree(dp->ports); 1438 kfree(dp->ports);
1368err_destroy_percpu: 1439err_destroy_percpu:
1369 free_percpu(dp->stats_percpu); 1440 free_percpu(dp->stats_percpu);
1370err_destroy_table: 1441err_destroy_table:
1371 ovs_flow_tbl_destroy(genl_dereference(dp->table)); 1442 ovs_flow_tbl_destroy(ovsl_dereference(dp->table));
1372err_free_dp: 1443err_free_dp:
1373 release_net(ovs_dp_get_net(dp)); 1444 release_net(ovs_dp_get_net(dp));
1374 kfree(dp); 1445 kfree(dp);
1375err_unlock_rtnl: 1446err_unlock_ovs:
1376 rtnl_unlock(); 1447 ovs_unlock();
1377err: 1448err:
1378 return err; 1449 return err;
1379} 1450}
1380 1451
1381/* Called with genl_mutex. */ 1452/* Called with ovs_mutex. */
1382static void __dp_destroy(struct datapath *dp) 1453static void __dp_destroy(struct datapath *dp)
1383{ 1454{
1384 int i; 1455 int i;
1385 1456
1386 rtnl_lock();
1387
1388 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 1457 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1389 struct vport *vport; 1458 struct vport *vport;
1390 struct hlist_node *n; 1459 struct hlist_node *n;
@@ -1395,14 +1464,11 @@ static void __dp_destroy(struct datapath *dp)
1395 } 1464 }
1396 1465
1397 list_del(&dp->list_node); 1466 list_del(&dp->list_node);
1398 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
1399 1467
1400 /* rtnl_unlock() will wait until all the references to devices that 1468 /* OVSP_LOCAL is datapath internal port. We need to make sure that
1401 * are pending unregistration have been dropped. We do it here to 1469 * all port in datapath are destroyed first before freeing datapath.
1402 * ensure that any internal devices (which contain DP pointers) are
1403 * fully destroyed before freeing the datapath.
1404 */ 1470 */
1405 rtnl_unlock(); 1471 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1406 1472
1407 call_rcu(&dp->rcu, destroy_dp_rcu); 1473 call_rcu(&dp->rcu, destroy_dp_rcu);
1408} 1474}
@@ -1413,24 +1479,27 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1413 struct datapath *dp; 1479 struct datapath *dp;
1414 int err; 1480 int err;
1415 1481
1482 ovs_lock();
1416 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1483 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1417 err = PTR_ERR(dp); 1484 err = PTR_ERR(dp);
1418 if (IS_ERR(dp)) 1485 if (IS_ERR(dp))
1419 return err; 1486 goto unlock;
1420 1487
1421 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1488 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1422 info->snd_seq, OVS_DP_CMD_DEL); 1489 info->snd_seq, OVS_DP_CMD_DEL);
1423 err = PTR_ERR(reply); 1490 err = PTR_ERR(reply);
1424 if (IS_ERR(reply)) 1491 if (IS_ERR(reply))
1425 return err; 1492 goto unlock;
1426 1493
1427 __dp_destroy(dp); 1494 __dp_destroy(dp);
1495 ovs_unlock();
1428 1496
1429 genl_notify(reply, genl_info_net(info), info->snd_portid, 1497 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1430 ovs_dp_datapath_multicast_group.id, info->nlhdr,
1431 GFP_KERNEL);
1432 1498
1433 return 0; 1499 return 0;
1500unlock:
1501 ovs_unlock();
1502 return err;
1434} 1503}
1435 1504
1436static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) 1505static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
@@ -1439,9 +1508,11 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1439 struct datapath *dp; 1508 struct datapath *dp;
1440 int err; 1509 int err;
1441 1510
1511 ovs_lock();
1442 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1512 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1513 err = PTR_ERR(dp);
1443 if (IS_ERR(dp)) 1514 if (IS_ERR(dp))
1444 return PTR_ERR(dp); 1515 goto unlock;
1445 1516
1446 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1517 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1447 info->snd_seq, OVS_DP_CMD_NEW); 1518 info->snd_seq, OVS_DP_CMD_NEW);
@@ -1449,31 +1520,45 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1449 err = PTR_ERR(reply); 1520 err = PTR_ERR(reply);
1450 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 1521 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1451 ovs_dp_datapath_multicast_group.id, err); 1522 ovs_dp_datapath_multicast_group.id, err);
1452 return 0; 1523 err = 0;
1524 goto unlock;
1453 } 1525 }
1454 1526
1455 genl_notify(reply, genl_info_net(info), info->snd_portid, 1527 ovs_unlock();
1456 ovs_dp_datapath_multicast_group.id, info->nlhdr, 1528 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1457 GFP_KERNEL);
1458 1529
1459 return 0; 1530 return 0;
1531unlock:
1532 ovs_unlock();
1533 return err;
1460} 1534}
1461 1535
1462static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) 1536static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1463{ 1537{
1464 struct sk_buff *reply; 1538 struct sk_buff *reply;
1465 struct datapath *dp; 1539 struct datapath *dp;
1540 int err;
1466 1541
1542 ovs_lock();
1467 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1543 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1468 if (IS_ERR(dp)) 1544 if (IS_ERR(dp)) {
1469 return PTR_ERR(dp); 1545 err = PTR_ERR(dp);
1546 goto unlock;
1547 }
1470 1548
1471 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1549 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1472 info->snd_seq, OVS_DP_CMD_NEW); 1550 info->snd_seq, OVS_DP_CMD_NEW);
1473 if (IS_ERR(reply)) 1551 if (IS_ERR(reply)) {
1474 return PTR_ERR(reply); 1552 err = PTR_ERR(reply);
1553 goto unlock;
1554 }
1475 1555
1556 ovs_unlock();
1476 return genlmsg_reply(reply, info); 1557 return genlmsg_reply(reply, info);
1558
1559unlock:
1560 ovs_unlock();
1561 return err;
1477} 1562}
1478 1563
1479static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1564static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1483,6 +1568,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1483 int skip = cb->args[0]; 1568 int skip = cb->args[0];
1484 int i = 0; 1569 int i = 0;
1485 1570
1571 ovs_lock();
1486 list_for_each_entry(dp, &ovs_net->dps, list_node) { 1572 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1487 if (i >= skip && 1573 if (i >= skip &&
1488 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, 1574 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
@@ -1491,6 +1577,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1491 break; 1577 break;
1492 i++; 1578 i++;
1493 } 1579 }
1580 ovs_unlock();
1494 1581
1495 cb->args[0] = i; 1582 cb->args[0] = i;
1496 1583
@@ -1543,7 +1630,7 @@ struct genl_multicast_group ovs_dp_vport_multicast_group = {
1543 .name = OVS_VPORT_MCGROUP 1630 .name = OVS_VPORT_MCGROUP
1544}; 1631};
1545 1632
1546/* Called with RTNL lock or RCU read lock. */ 1633/* Called with ovs_mutex or RCU read lock. */
1547static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, 1634static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1548 u32 portid, u32 seq, u32 flags, u8 cmd) 1635 u32 portid, u32 seq, u32 flags, u8 cmd)
1549{ 1636{
@@ -1582,7 +1669,7 @@ error:
1582 return err; 1669 return err;
1583} 1670}
1584 1671
1585/* Called with RTNL lock or RCU read lock. */ 1672/* Called with ovs_mutex or RCU read lock. */
1586struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, 1673struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1587 u32 seq, u8 cmd) 1674 u32 seq, u8 cmd)
1588{ 1675{
@@ -1601,7 +1688,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1601 return skb; 1688 return skb;
1602} 1689}
1603 1690
1604/* Called with RTNL lock or RCU read lock. */ 1691/* Called with ovs_mutex or RCU read lock. */
1605static struct vport *lookup_vport(struct net *net, 1692static struct vport *lookup_vport(struct net *net,
1606 struct ovs_header *ovs_header, 1693 struct ovs_header *ovs_header,
1607 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) 1694 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
@@ -1627,7 +1714,7 @@ static struct vport *lookup_vport(struct net *net,
1627 if (!dp) 1714 if (!dp)
1628 return ERR_PTR(-ENODEV); 1715 return ERR_PTR(-ENODEV);
1629 1716
1630 vport = ovs_vport_rtnl_rcu(dp, port_no); 1717 vport = ovs_vport_ovsl_rcu(dp, port_no);
1631 if (!vport) 1718 if (!vport)
1632 return ERR_PTR(-ENODEV); 1719 return ERR_PTR(-ENODEV);
1633 return vport; 1720 return vport;
@@ -1651,7 +1738,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1651 !a[OVS_VPORT_ATTR_UPCALL_PID]) 1738 !a[OVS_VPORT_ATTR_UPCALL_PID])
1652 goto exit; 1739 goto exit;
1653 1740
1654 rtnl_lock(); 1741 ovs_lock();
1655 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1742 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1656 err = -ENODEV; 1743 err = -ENODEV;
1657 if (!dp) 1744 if (!dp)
@@ -1664,7 +1751,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1664 if (port_no >= DP_MAX_PORTS) 1751 if (port_no >= DP_MAX_PORTS)
1665 goto exit_unlock; 1752 goto exit_unlock;
1666 1753
1667 vport = ovs_vport_rtnl_rcu(dp, port_no); 1754 vport = ovs_vport_ovsl(dp, port_no);
1668 err = -EBUSY; 1755 err = -EBUSY;
1669 if (vport) 1756 if (vport)
1670 goto exit_unlock; 1757 goto exit_unlock;
@@ -1674,7 +1761,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1674 err = -EFBIG; 1761 err = -EFBIG;
1675 goto exit_unlock; 1762 goto exit_unlock;
1676 } 1763 }
1677 vport = ovs_vport_rtnl(dp, port_no); 1764 vport = ovs_vport_ovsl(dp, port_no);
1678 if (!vport) 1765 if (!vport)
1679 break; 1766 break;
1680 } 1767 }
@@ -1700,11 +1787,11 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1700 ovs_dp_detach_port(vport); 1787 ovs_dp_detach_port(vport);
1701 goto exit_unlock; 1788 goto exit_unlock;
1702 } 1789 }
1703 genl_notify(reply, genl_info_net(info), info->snd_portid, 1790
1704 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); 1791 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1705 1792
1706exit_unlock: 1793exit_unlock:
1707 rtnl_unlock(); 1794 ovs_unlock();
1708exit: 1795exit:
1709 return err; 1796 return err;
1710} 1797}
@@ -1716,7 +1803,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1716 struct vport *vport; 1803 struct vport *vport;
1717 int err; 1804 int err;
1718 1805
1719 rtnl_lock(); 1806 ovs_lock();
1720 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1807 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1721 err = PTR_ERR(vport); 1808 err = PTR_ERR(vport);
1722 if (IS_ERR(vport)) 1809 if (IS_ERR(vport))
@@ -1742,11 +1829,12 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1742 goto exit_unlock; 1829 goto exit_unlock;
1743 } 1830 }
1744 1831
1745 genl_notify(reply, genl_info_net(info), info->snd_portid, 1832 ovs_unlock();
1746 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); 1833 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1834 return 0;
1747 1835
1748exit_unlock: 1836exit_unlock:
1749 rtnl_unlock(); 1837 ovs_unlock();
1750 return err; 1838 return err;
1751} 1839}
1752 1840
@@ -1757,7 +1845,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1757 struct vport *vport; 1845 struct vport *vport;
1758 int err; 1846 int err;
1759 1847
1760 rtnl_lock(); 1848 ovs_lock();
1761 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1849 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1762 err = PTR_ERR(vport); 1850 err = PTR_ERR(vport);
1763 if (IS_ERR(vport)) 1851 if (IS_ERR(vport))
@@ -1777,11 +1865,10 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1777 err = 0; 1865 err = 0;
1778 ovs_dp_detach_port(vport); 1866 ovs_dp_detach_port(vport);
1779 1867
1780 genl_notify(reply, genl_info_net(info), info->snd_portid, 1868 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1781 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1782 1869
1783exit_unlock: 1870exit_unlock:
1784 rtnl_unlock(); 1871 ovs_unlock();
1785 return err; 1872 return err;
1786} 1873}
1787 1874
@@ -1941,13 +2028,13 @@ static void rehash_flow_table(struct work_struct *work)
1941 struct datapath *dp; 2028 struct datapath *dp;
1942 struct net *net; 2029 struct net *net;
1943 2030
1944 genl_lock(); 2031 ovs_lock();
1945 rtnl_lock(); 2032 rtnl_lock();
1946 for_each_net(net) { 2033 for_each_net(net) {
1947 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2034 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1948 2035
1949 list_for_each_entry(dp, &ovs_net->dps, list_node) { 2036 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1950 struct flow_table *old_table = genl_dereference(dp->table); 2037 struct flow_table *old_table = ovsl_dereference(dp->table);
1951 struct flow_table *new_table; 2038 struct flow_table *new_table;
1952 2039
1953 new_table = ovs_flow_tbl_rehash(old_table); 2040 new_table = ovs_flow_tbl_rehash(old_table);
@@ -1958,8 +2045,7 @@ static void rehash_flow_table(struct work_struct *work)
1958 } 2045 }
1959 } 2046 }
1960 rtnl_unlock(); 2047 rtnl_unlock();
1961 genl_unlock(); 2048 ovs_unlock();
1962
1963 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); 2049 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
1964} 2050}
1965 2051
@@ -1968,18 +2054,21 @@ static int __net_init ovs_init_net(struct net *net)
1968 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2054 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1969 2055
1970 INIT_LIST_HEAD(&ovs_net->dps); 2056 INIT_LIST_HEAD(&ovs_net->dps);
2057 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
1971 return 0; 2058 return 0;
1972} 2059}
1973 2060
1974static void __net_exit ovs_exit_net(struct net *net) 2061static void __net_exit ovs_exit_net(struct net *net)
1975{ 2062{
1976 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1977 struct datapath *dp, *dp_next; 2063 struct datapath *dp, *dp_next;
2064 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1978 2065
1979 genl_lock(); 2066 ovs_lock();
1980 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) 2067 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
1981 __dp_destroy(dp); 2068 __dp_destroy(dp);
1982 genl_unlock(); 2069 ovs_unlock();
2070
2071 cancel_work_sync(&ovs_net->dp_notify_work);
1983} 2072}
1984 2073
1985static struct pernet_operations ovs_net_ops = { 2074static struct pernet_operations ovs_net_ops = {
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 9125ad5c5aeb..16b840695216 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -57,10 +57,9 @@ struct dp_stats_percpu {
57 * struct datapath - datapath for flow-based packet switching 57 * struct datapath - datapath for flow-based packet switching
58 * @rcu: RCU callback head for deferred destruction. 58 * @rcu: RCU callback head for deferred destruction.
59 * @list_node: Element in global 'dps' list. 59 * @list_node: Element in global 'dps' list.
60 * @n_flows: Number of flows currently in flow table. 60 * @table: Current flow table. Protected by ovs_mutex and RCU.
61 * @table: Current flow table. Protected by genl_lock and RCU.
62 * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by 61 * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by
63 * RTNL and RCU. 62 * ovs_mutex and RCU.
64 * @stats_percpu: Per-CPU datapath statistics. 63 * @stats_percpu: Per-CPU datapath statistics.
65 * @net: Reference to net namespace. 64 * @net: Reference to net namespace.
66 * 65 *
@@ -86,26 +85,6 @@ struct datapath {
86#endif 85#endif
87}; 86};
88 87
89struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
90
91static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
92{
93 WARN_ON_ONCE(!rcu_read_lock_held());
94 return ovs_lookup_vport(dp, port_no);
95}
96
97static inline struct vport *ovs_vport_rtnl_rcu(const struct datapath *dp, int port_no)
98{
99 WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked());
100 return ovs_lookup_vport(dp, port_no);
101}
102
103static inline struct vport *ovs_vport_rtnl(const struct datapath *dp, int port_no)
104{
105 ASSERT_RTNL();
106 return ovs_lookup_vport(dp, port_no);
107}
108
109/** 88/**
110 * struct ovs_skb_cb - OVS data in skb CB 89 * struct ovs_skb_cb - OVS data in skb CB
111 * @flow: The flow associated with this packet. May be %NULL if no flow. 90 * @flow: The flow associated with this packet. May be %NULL if no flow.
@@ -132,6 +111,30 @@ struct dp_upcall_info {
132 u32 portid; 111 u32 portid;
133}; 112};
134 113
114/**
115 * struct ovs_net - Per net-namespace data for ovs.
116 * @dps: List of datapaths to enable dumping them all out.
117 * Protected by genl_mutex.
118 */
119struct ovs_net {
120 struct list_head dps;
121 struct work_struct dp_notify_work;
122};
123
124extern int ovs_net_id;
125void ovs_lock(void);
126void ovs_unlock(void);
127
128#ifdef CONFIG_LOCKDEP
129int lockdep_ovsl_is_held(void);
130#else
131#define lockdep_ovsl_is_held() 1
132#endif
133
134#define ASSERT_OVSL() WARN_ON(unlikely(!lockdep_ovsl_is_held()))
135#define ovsl_dereference(p) \
136 rcu_dereference_protected(p, lockdep_ovsl_is_held())
137
135static inline struct net *ovs_dp_get_net(struct datapath *dp) 138static inline struct net *ovs_dp_get_net(struct datapath *dp)
136{ 139{
137 return read_pnet(&dp->net); 140 return read_pnet(&dp->net);
@@ -142,6 +145,26 @@ static inline void ovs_dp_set_net(struct datapath *dp, struct net *net)
142 write_pnet(&dp->net, net); 145 write_pnet(&dp->net, net);
143} 146}
144 147
148struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
149
150static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
151{
152 WARN_ON_ONCE(!rcu_read_lock_held());
153 return ovs_lookup_vport(dp, port_no);
154}
155
156static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no)
157{
158 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
159 return ovs_lookup_vport(dp, port_no);
160}
161
162static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no)
163{
164 ASSERT_OVSL();
165 return ovs_lookup_vport(dp, port_no);
166}
167
145extern struct notifier_block ovs_dp_device_notifier; 168extern struct notifier_block ovs_dp_device_notifier;
146extern struct genl_multicast_group ovs_dp_vport_multicast_group; 169extern struct genl_multicast_group ovs_dp_vport_multicast_group;
147 170
@@ -155,4 +178,5 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
155 u8 cmd); 178 u8 cmd);
156 179
157int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb); 180int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
181void ovs_dp_notify_wq(struct work_struct *work);
158#endif /* datapath.h */ 182#endif /* datapath.h */
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 5558350e0d33..ef4feec6cd84 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -18,46 +18,78 @@
18 18
19#include <linux/netdevice.h> 19#include <linux/netdevice.h>
20#include <net/genetlink.h> 20#include <net/genetlink.h>
21#include <net/netns/generic.h>
21 22
22#include "datapath.h" 23#include "datapath.h"
23#include "vport-internal_dev.h" 24#include "vport-internal_dev.h"
24#include "vport-netdev.h" 25#include "vport-netdev.h"
25 26
27static void dp_detach_port_notify(struct vport *vport)
28{
29 struct sk_buff *notify;
30 struct datapath *dp;
31
32 dp = vport->dp;
33 notify = ovs_vport_cmd_build_info(vport, 0, 0,
34 OVS_VPORT_CMD_DEL);
35 ovs_dp_detach_port(vport);
36 if (IS_ERR(notify)) {
37 netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
38 ovs_dp_vport_multicast_group.id,
39 PTR_ERR(notify));
40 return;
41 }
42
43 genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
44 ovs_dp_vport_multicast_group.id,
45 GFP_KERNEL);
46}
47
48void ovs_dp_notify_wq(struct work_struct *work)
49{
50 struct ovs_net *ovs_net = container_of(work, struct ovs_net, dp_notify_work);
51 struct datapath *dp;
52
53 ovs_lock();
54 list_for_each_entry(dp, &ovs_net->dps, list_node) {
55 int i;
56
57 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
58 struct vport *vport;
59 struct hlist_node *n;
60
61 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
62 struct netdev_vport *netdev_vport;
63
64 if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
65 continue;
66
67 netdev_vport = netdev_vport_priv(vport);
68 if (netdev_vport->dev->reg_state == NETREG_UNREGISTERED ||
69 netdev_vport->dev->reg_state == NETREG_UNREGISTERING)
70 dp_detach_port_notify(vport);
71 }
72 }
73 }
74 ovs_unlock();
75}
76
26static int dp_device_event(struct notifier_block *unused, unsigned long event, 77static int dp_device_event(struct notifier_block *unused, unsigned long event,
27 void *ptr) 78 void *ptr)
28{ 79{
80 struct ovs_net *ovs_net;
29 struct net_device *dev = ptr; 81 struct net_device *dev = ptr;
30 struct vport *vport; 82 struct vport *vport = NULL;
31 83
32 if (ovs_is_internal_dev(dev)) 84 if (!ovs_is_internal_dev(dev))
33 vport = ovs_internal_dev_get_vport(dev);
34 else
35 vport = ovs_netdev_get_vport(dev); 85 vport = ovs_netdev_get_vport(dev);
36 86
37 if (!vport) 87 if (!vport)
38 return NOTIFY_DONE; 88 return NOTIFY_DONE;
39 89
40 switch (event) { 90 if (event == NETDEV_UNREGISTER) {
41 case NETDEV_UNREGISTER: 91 ovs_net = net_generic(dev_net(dev), ovs_net_id);
42 if (!ovs_is_internal_dev(dev)) { 92 queue_work(system_wq, &ovs_net->dp_notify_work);
43 struct sk_buff *notify;
44 struct datapath *dp = vport->dp;
45
46 notify = ovs_vport_cmd_build_info(vport, 0, 0,
47 OVS_VPORT_CMD_DEL);
48 ovs_dp_detach_port(vport);
49 if (IS_ERR(notify)) {
50 netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
51 ovs_dp_vport_multicast_group.id,
52 PTR_ERR(notify));
53 break;
54 }
55
56 genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
57 ovs_dp_vport_multicast_group.id,
58 GFP_KERNEL);
59 }
60 break;
61 } 93 }
62 94
63 return NOTIFY_DONE; 95 return NOTIFY_DONE;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 332486839347..cf9328be75e9 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -211,7 +211,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions)
211 return ERR_PTR(-ENOMEM); 211 return ERR_PTR(-ENOMEM);
212 212
213 sfa->actions_len = actions_len; 213 sfa->actions_len = actions_len;
214 memcpy(sfa->actions, nla_data(actions), actions_len); 214 nla_memcpy(sfa->actions, actions, actions_len);
215 return sfa; 215 return sfa;
216} 216}
217 217
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index a7bb60ff3b5b..0875fde65b9c 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -138,27 +138,6 @@ int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
138void ovs_flow_used(struct sw_flow *, struct sk_buff *); 138void ovs_flow_used(struct sw_flow *, struct sk_buff *);
139u64 ovs_flow_used_time(unsigned long flow_jiffies); 139u64 ovs_flow_used_time(unsigned long flow_jiffies);
140 140
141/* Upper bound on the length of a nlattr-formatted flow key. The longest
142 * nlattr-formatted flow key would be:
143 *
144 * struct pad nl hdr total
145 * ------ --- ------ -----
146 * OVS_KEY_ATTR_PRIORITY 4 -- 4 8
147 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8
148 * OVS_KEY_ATTR_SKB_MARK 4 -- 4 8
149 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16
150 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype)
151 * OVS_KEY_ATTR_8021Q 4 -- 4 8
152 * OVS_KEY_ATTR_ENCAP 0 -- 4 4 (VLAN encapsulation)
153 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (inner VLAN ethertype)
154 * OVS_KEY_ATTR_IPV6 40 -- 4 44
155 * OVS_KEY_ATTR_ICMPV6 2 2 4 8
156 * OVS_KEY_ATTR_ND 28 -- 4 32
157 * -------------------------------------------------
158 * total 152
159 */
160#define FLOW_BUFSIZE 152
161
162int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); 141int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
163int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, 142int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
164 const struct nlattr *); 143 const struct nlattr *);
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 40f8a2489c90..9604760494b1 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -173,16 +173,19 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
173 if (vport->port_no == OVSP_LOCAL) 173 if (vport->port_no == OVSP_LOCAL)
174 netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL; 174 netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL;
175 175
176 rtnl_lock();
176 err = register_netdevice(netdev_vport->dev); 177 err = register_netdevice(netdev_vport->dev);
177 if (err) 178 if (err)
178 goto error_free_netdev; 179 goto error_free_netdev;
179 180
180 dev_set_promiscuity(netdev_vport->dev, 1); 181 dev_set_promiscuity(netdev_vport->dev, 1);
182 rtnl_unlock();
181 netif_start_queue(netdev_vport->dev); 183 netif_start_queue(netdev_vport->dev);
182 184
183 return vport; 185 return vport;
184 186
185error_free_netdev: 187error_free_netdev:
188 rtnl_unlock();
186 free_netdev(netdev_vport->dev); 189 free_netdev(netdev_vport->dev);
187error_free_vport: 190error_free_vport:
188 ovs_vport_free(vport); 191 ovs_vport_free(vport);
@@ -195,10 +198,13 @@ static void internal_dev_destroy(struct vport *vport)
195 struct netdev_vport *netdev_vport = netdev_vport_priv(vport); 198 struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
196 199
197 netif_stop_queue(netdev_vport->dev); 200 netif_stop_queue(netdev_vport->dev);
201 rtnl_lock();
198 dev_set_promiscuity(netdev_vport->dev, -1); 202 dev_set_promiscuity(netdev_vport->dev, -1);
199 203
200 /* unregister_netdevice() waits for an RCU grace period. */ 204 /* unregister_netdevice() waits for an RCU grace period. */
201 unregister_netdevice(netdev_vport->dev); 205 unregister_netdevice(netdev_vport->dev);
206
207 rtnl_unlock();
202} 208}
203 209
204static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) 210static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 2130d61c384a..40a89ae8e19f 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -100,16 +100,20 @@ static struct vport *netdev_create(const struct vport_parms *parms)
100 goto error_put; 100 goto error_put;
101 } 101 }
102 102
103 rtnl_lock();
103 err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook, 104 err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
104 vport); 105 vport);
105 if (err) 106 if (err)
106 goto error_put; 107 goto error_unlock;
107 108
108 dev_set_promiscuity(netdev_vport->dev, 1); 109 dev_set_promiscuity(netdev_vport->dev, 1);
109 netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH; 110 netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
111 rtnl_unlock();
110 112
111 return vport; 113 return vport;
112 114
115error_unlock:
116 rtnl_unlock();
113error_put: 117error_put:
114 dev_put(netdev_vport->dev); 118 dev_put(netdev_vport->dev);
115error_free_vport: 119error_free_vport:
@@ -131,9 +135,11 @@ static void netdev_destroy(struct vport *vport)
131{ 135{
132 struct netdev_vport *netdev_vport = netdev_vport_priv(vport); 136 struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
133 137
138 rtnl_lock();
134 netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; 139 netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
135 netdev_rx_handler_unregister(netdev_vport->dev); 140 netdev_rx_handler_unregister(netdev_vport->dev);
136 dev_set_promiscuity(netdev_vport->dev, -1); 141 dev_set_promiscuity(netdev_vport->dev, -1);
142 rtnl_unlock();
137 143
138 call_rcu(&netdev_vport->rcu, free_port_rcu); 144 call_rcu(&netdev_vport->rcu, free_port_rcu);
139} 145}
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index f6b8132ce4cb..720623190eaa 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -40,7 +40,7 @@ static const struct vport_ops *vport_ops_list[] = {
40 &ovs_internal_vport_ops, 40 &ovs_internal_vport_ops,
41}; 41};
42 42
43/* Protected by RCU read lock for reading, RTNL lock for writing. */ 43/* Protected by RCU read lock for reading, ovs_mutex for writing. */
44static struct hlist_head *dev_table; 44static struct hlist_head *dev_table;
45#define VPORT_HASH_BUCKETS 1024 45#define VPORT_HASH_BUCKETS 1024
46 46
@@ -80,7 +80,7 @@ static struct hlist_head *hash_bucket(struct net *net, const char *name)
80 * 80 *
81 * @name: name of port to find 81 * @name: name of port to find
82 * 82 *
83 * Must be called with RTNL or RCU read lock. 83 * Must be called with ovs or RCU read lock.
84 */ 84 */
85struct vport *ovs_vport_locate(struct net *net, const char *name) 85struct vport *ovs_vport_locate(struct net *net, const char *name)
86{ 86{
@@ -128,7 +128,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
128 vport->ops = ops; 128 vport->ops = ops;
129 INIT_HLIST_NODE(&vport->dp_hash_node); 129 INIT_HLIST_NODE(&vport->dp_hash_node);
130 130
131 vport->percpu_stats = alloc_percpu(struct vport_percpu_stats); 131 vport->percpu_stats = alloc_percpu(struct pcpu_tstats);
132 if (!vport->percpu_stats) { 132 if (!vport->percpu_stats) {
133 kfree(vport); 133 kfree(vport);
134 return ERR_PTR(-ENOMEM); 134 return ERR_PTR(-ENOMEM);
@@ -161,7 +161,7 @@ void ovs_vport_free(struct vport *vport)
161 * @parms: Information about new vport. 161 * @parms: Information about new vport.
162 * 162 *
163 * Creates a new vport with the specified configuration (which is dependent on 163 * Creates a new vport with the specified configuration (which is dependent on
164 * device type). RTNL lock must be held. 164 * device type). ovs_mutex must be held.
165 */ 165 */
166struct vport *ovs_vport_add(const struct vport_parms *parms) 166struct vport *ovs_vport_add(const struct vport_parms *parms)
167{ 167{
@@ -169,8 +169,6 @@ struct vport *ovs_vport_add(const struct vport_parms *parms)
169 int err = 0; 169 int err = 0;
170 int i; 170 int i;
171 171
172 ASSERT_RTNL();
173
174 for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) { 172 for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
175 if (vport_ops_list[i]->type == parms->type) { 173 if (vport_ops_list[i]->type == parms->type) {
176 struct hlist_head *bucket; 174 struct hlist_head *bucket;
@@ -201,12 +199,10 @@ out:
201 * @port: New configuration. 199 * @port: New configuration.
202 * 200 *
203 * Modifies an existing device with the specified configuration (which is 201 * Modifies an existing device with the specified configuration (which is
204 * dependent on device type). RTNL lock must be held. 202 * dependent on device type). ovs_mutex must be held.
205 */ 203 */
206int ovs_vport_set_options(struct vport *vport, struct nlattr *options) 204int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
207{ 205{
208 ASSERT_RTNL();
209
210 if (!vport->ops->set_options) 206 if (!vport->ops->set_options)
211 return -EOPNOTSUPP; 207 return -EOPNOTSUPP;
212 return vport->ops->set_options(vport, options); 208 return vport->ops->set_options(vport, options);
@@ -218,11 +214,11 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
218 * @vport: vport to delete. 214 * @vport: vport to delete.
219 * 215 *
220 * Detaches @vport from its datapath and destroys it. It is possible to fail 216 * Detaches @vport from its datapath and destroys it. It is possible to fail
221 * for reasons such as lack of memory. RTNL lock must be held. 217 * for reasons such as lack of memory. ovs_mutex must be held.
222 */ 218 */
223void ovs_vport_del(struct vport *vport) 219void ovs_vport_del(struct vport *vport)
224{ 220{
225 ASSERT_RTNL(); 221 ASSERT_OVSL();
226 222
227 hlist_del_rcu(&vport->hash_node); 223 hlist_del_rcu(&vport->hash_node);
228 224
@@ -237,7 +233,7 @@ void ovs_vport_del(struct vport *vport)
237 * 233 *
238 * Retrieves transmit, receive, and error stats for the given device. 234 * Retrieves transmit, receive, and error stats for the given device.
239 * 235 *
240 * Must be called with RTNL lock or rcu_read_lock. 236 * Must be called with ovs_mutex or rcu_read_lock.
241 */ 237 */
242void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) 238void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
243{ 239{
@@ -264,16 +260,16 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
264 spin_unlock_bh(&vport->stats_lock); 260 spin_unlock_bh(&vport->stats_lock);
265 261
266 for_each_possible_cpu(i) { 262 for_each_possible_cpu(i) {
267 const struct vport_percpu_stats *percpu_stats; 263 const struct pcpu_tstats *percpu_stats;
268 struct vport_percpu_stats local_stats; 264 struct pcpu_tstats local_stats;
269 unsigned int start; 265 unsigned int start;
270 266
271 percpu_stats = per_cpu_ptr(vport->percpu_stats, i); 267 percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
272 268
273 do { 269 do {
274 start = u64_stats_fetch_begin_bh(&percpu_stats->sync); 270 start = u64_stats_fetch_begin_bh(&percpu_stats->syncp);
275 local_stats = *percpu_stats; 271 local_stats = *percpu_stats;
276 } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start)); 272 } while (u64_stats_fetch_retry_bh(&percpu_stats->syncp, start));
277 273
278 stats->rx_bytes += local_stats.rx_bytes; 274 stats->rx_bytes += local_stats.rx_bytes;
279 stats->rx_packets += local_stats.rx_packets; 275 stats->rx_packets += local_stats.rx_packets;
@@ -296,22 +292,24 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
296 * negative error code if a real error occurred. If an error occurs, @skb is 292 * negative error code if a real error occurred. If an error occurs, @skb is
297 * left unmodified. 293 * left unmodified.
298 * 294 *
299 * Must be called with RTNL lock or rcu_read_lock. 295 * Must be called with ovs_mutex or rcu_read_lock.
300 */ 296 */
301int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) 297int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
302{ 298{
303 struct nlattr *nla; 299 struct nlattr *nla;
300 int err;
301
302 if (!vport->ops->get_options)
303 return 0;
304 304
305 nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS); 305 nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS);
306 if (!nla) 306 if (!nla)
307 return -EMSGSIZE; 307 return -EMSGSIZE;
308 308
309 if (vport->ops->get_options) { 309 err = vport->ops->get_options(vport, skb);
310 int err = vport->ops->get_options(vport, skb); 310 if (err) {
311 if (err) { 311 nla_nest_cancel(skb, nla);
312 nla_nest_cancel(skb, nla); 312 return err;
313 return err;
314 }
315 } 313 }
316 314
317 nla_nest_end(skb, nla); 315 nla_nest_end(skb, nla);
@@ -329,13 +327,13 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
329 */ 327 */
330void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) 328void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
331{ 329{
332 struct vport_percpu_stats *stats; 330 struct pcpu_tstats *stats;
333 331
334 stats = this_cpu_ptr(vport->percpu_stats); 332 stats = this_cpu_ptr(vport->percpu_stats);
335 u64_stats_update_begin(&stats->sync); 333 u64_stats_update_begin(&stats->syncp);
336 stats->rx_packets++; 334 stats->rx_packets++;
337 stats->rx_bytes += skb->len; 335 stats->rx_bytes += skb->len;
338 u64_stats_update_end(&stats->sync); 336 u64_stats_update_end(&stats->syncp);
339 337
340 ovs_dp_process_received_packet(vport, skb); 338 ovs_dp_process_received_packet(vport, skb);
341} 339}
@@ -346,7 +344,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
346 * @vport: vport on which to send the packet 344 * @vport: vport on which to send the packet
347 * @skb: skb to send 345 * @skb: skb to send
348 * 346 *
349 * Sends the given packet and returns the length of data sent. Either RTNL 347 * Sends the given packet and returns the length of data sent. Either ovs
350 * lock or rcu_read_lock must be held. 348 * lock or rcu_read_lock must be held.
351 */ 349 */
352int ovs_vport_send(struct vport *vport, struct sk_buff *skb) 350int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
@@ -354,14 +352,14 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
354 int sent = vport->ops->send(vport, skb); 352 int sent = vport->ops->send(vport, skb);
355 353
356 if (likely(sent)) { 354 if (likely(sent)) {
357 struct vport_percpu_stats *stats; 355 struct pcpu_tstats *stats;
358 356
359 stats = this_cpu_ptr(vport->percpu_stats); 357 stats = this_cpu_ptr(vport->percpu_stats);
360 358
361 u64_stats_update_begin(&stats->sync); 359 u64_stats_update_begin(&stats->syncp);
362 stats->tx_packets++; 360 stats->tx_packets++;
363 stats->tx_bytes += sent; 361 stats->tx_bytes += sent;
364 u64_stats_update_end(&stats->sync); 362 u64_stats_update_end(&stats->syncp);
365 } 363 }
366 return sent; 364 return sent;
367} 365}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index aee7d43114c9..7ba08c30b853 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -19,6 +19,7 @@
19#ifndef VPORT_H 19#ifndef VPORT_H
20#define VPORT_H 1 20#define VPORT_H 1
21 21
22#include <linux/if_tunnel.h>
22#include <linux/list.h> 23#include <linux/list.h>
23#include <linux/netlink.h> 24#include <linux/netlink.h>
24#include <linux/openvswitch.h> 25#include <linux/openvswitch.h>
@@ -50,14 +51,6 @@ int ovs_vport_send(struct vport *, struct sk_buff *);
50 51
51/* The following definitions are for implementers of vport devices: */ 52/* The following definitions are for implementers of vport devices: */
52 53
53struct vport_percpu_stats {
54 u64 rx_bytes;
55 u64 rx_packets;
56 u64 tx_bytes;
57 u64 tx_packets;
58 struct u64_stats_sync sync;
59};
60
61struct vport_err_stats { 54struct vport_err_stats {
62 u64 rx_dropped; 55 u64 rx_dropped;
63 u64 rx_errors; 56 u64 rx_errors;
@@ -89,7 +82,7 @@ struct vport {
89 struct hlist_node dp_hash_node; 82 struct hlist_node dp_hash_node;
90 const struct vport_ops *ops; 83 const struct vport_ops *ops;
91 84
92 struct vport_percpu_stats __percpu *percpu_stats; 85 struct pcpu_tstats __percpu *percpu_stats;
93 86
94 spinlock_t stats_lock; 87 spinlock_t stats_lock;
95 struct vport_err_stats err_stats; 88 struct vport_err_stats err_stats;
@@ -138,14 +131,14 @@ struct vport_parms {
138struct vport_ops { 131struct vport_ops {
139 enum ovs_vport_type type; 132 enum ovs_vport_type type;
140 133
141 /* Called with RTNL lock. */ 134 /* Called with ovs_mutex. */
142 struct vport *(*create)(const struct vport_parms *); 135 struct vport *(*create)(const struct vport_parms *);
143 void (*destroy)(struct vport *); 136 void (*destroy)(struct vport *);
144 137
145 int (*set_options)(struct vport *, struct nlattr *); 138 int (*set_options)(struct vport *, struct nlattr *);
146 int (*get_options)(const struct vport *, struct sk_buff *); 139 int (*get_options)(const struct vport *, struct sk_buff *);
147 140
148 /* Called with rcu_read_lock or RTNL lock. */ 141 /* Called with rcu_read_lock or ovs_mutex. */
149 const char *(*get_name)(const struct vport *); 142 const char *(*get_name)(const struct vport *);
150 void (*get_config)(const struct vport *, void *); 143 void (*get_config)(const struct vport *, void *);
151 int (*get_ifindex)(const struct vport *); 144 int (*get_ifindex)(const struct vport *);