aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/openvswitch.h432
-rw-r--r--include/uapi/linux/Kbuild1
-rw-r--r--include/uapi/linux/openvswitch.h456
-rw-r--r--net/openvswitch/datapath.c393
-rw-r--r--net/openvswitch/datapath.h70
-rw-r--r--net/openvswitch/dp_notify.c82
-rw-r--r--net/openvswitch/flow.c2
-rw-r--r--net/openvswitch/flow.h21
-rw-r--r--net/openvswitch/vport-internal_dev.c6
-rw-r--r--net/openvswitch/vport-netdev.c8
-rw-r--r--net/openvswitch/vport.c58
-rw-r--r--net/openvswitch/vport.h15
12 files changed, 849 insertions, 695 deletions
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index 67d6c7b03581..e6b240b6196c 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -19,436 +19,6 @@
19#ifndef _LINUX_OPENVSWITCH_H 19#ifndef _LINUX_OPENVSWITCH_H
20#define _LINUX_OPENVSWITCH_H 1 20#define _LINUX_OPENVSWITCH_H 1
21 21
22#include <linux/types.h> 22#include <uapi/linux/openvswitch.h>
23
24/**
25 * struct ovs_header - header for OVS Generic Netlink messages.
26 * @dp_ifindex: ifindex of local port for datapath (0 to make a request not
27 * specific to a datapath).
28 *
29 * Attributes following the header are specific to a particular OVS Generic
30 * Netlink family, but all of the OVS families use this header.
31 */
32
33struct ovs_header {
34 int dp_ifindex;
35};
36
37/* Datapaths. */
38
39#define OVS_DATAPATH_FAMILY "ovs_datapath"
40#define OVS_DATAPATH_MCGROUP "ovs_datapath"
41#define OVS_DATAPATH_VERSION 0x1
42
43enum ovs_datapath_cmd {
44 OVS_DP_CMD_UNSPEC,
45 OVS_DP_CMD_NEW,
46 OVS_DP_CMD_DEL,
47 OVS_DP_CMD_GET,
48 OVS_DP_CMD_SET
49};
50
51/**
52 * enum ovs_datapath_attr - attributes for %OVS_DP_* commands.
53 * @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local
54 * port". This is the name of the network device whose dp_ifindex is given in
55 * the &struct ovs_header. Always present in notifications. Required in
56 * %OVS_DP_NEW requests. May be used as an alternative to specifying
57 * dp_ifindex in other requests (with a dp_ifindex of 0).
58 * @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially
59 * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on
60 * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
61 * not be sent.
62 * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
63 * datapath. Always present in notifications.
64 *
65 * These attributes follow the &struct ovs_header within the Generic Netlink
66 * payload for %OVS_DP_* commands.
67 */
68enum ovs_datapath_attr {
69 OVS_DP_ATTR_UNSPEC,
70 OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */
71 OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */
72 OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */
73 __OVS_DP_ATTR_MAX
74};
75
76#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1)
77
78struct ovs_dp_stats {
79 __u64 n_hit; /* Number of flow table matches. */
80 __u64 n_missed; /* Number of flow table misses. */
81 __u64 n_lost; /* Number of misses not sent to userspace. */
82 __u64 n_flows; /* Number of flows present */
83};
84
85struct ovs_vport_stats {
86 __u64 rx_packets; /* total packets received */
87 __u64 tx_packets; /* total packets transmitted */
88 __u64 rx_bytes; /* total bytes received */
89 __u64 tx_bytes; /* total bytes transmitted */
90 __u64 rx_errors; /* bad packets received */
91 __u64 tx_errors; /* packet transmit problems */
92 __u64 rx_dropped; /* no space in linux buffers */
93 __u64 tx_dropped; /* no space available in linux */
94};
95
96/* Fixed logical ports. */
97#define OVSP_LOCAL ((__u32)0)
98
99/* Packet transfer. */
100
101#define OVS_PACKET_FAMILY "ovs_packet"
102#define OVS_PACKET_VERSION 0x1
103
104enum ovs_packet_cmd {
105 OVS_PACKET_CMD_UNSPEC,
106
107 /* Kernel-to-user notifications. */
108 OVS_PACKET_CMD_MISS, /* Flow table miss. */
109 OVS_PACKET_CMD_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */
110
111 /* Userspace commands. */
112 OVS_PACKET_CMD_EXECUTE /* Apply actions to a packet. */
113};
114
115/**
116 * enum ovs_packet_attr - attributes for %OVS_PACKET_* commands.
117 * @OVS_PACKET_ATTR_PACKET: Present for all notifications. Contains the entire
118 * packet as received, from the start of the Ethernet header onward. For
119 * %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by
120 * actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is
121 * the flow key extracted from the packet as originally received.
122 * @OVS_PACKET_ATTR_KEY: Present for all notifications. Contains the flow key
123 * extracted from the packet as nested %OVS_KEY_ATTR_* attributes. This allows
124 * userspace to adapt its flow setup strategy by comparing its notion of the
125 * flow key against the kernel's.
126 * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used
127 * for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes.
128 * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION
129 * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
130 * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content
131 * specified there.
132 *
133 * These attributes follow the &struct ovs_header within the Generic Netlink
134 * payload for %OVS_PACKET_* commands.
135 */
136enum ovs_packet_attr {
137 OVS_PACKET_ATTR_UNSPEC,
138 OVS_PACKET_ATTR_PACKET, /* Packet data. */
139 OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */
140 OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
141 OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */
142 __OVS_PACKET_ATTR_MAX
143};
144
145#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1)
146
147/* Virtual ports. */
148
149#define OVS_VPORT_FAMILY "ovs_vport"
150#define OVS_VPORT_MCGROUP "ovs_vport"
151#define OVS_VPORT_VERSION 0x1
152
153enum ovs_vport_cmd {
154 OVS_VPORT_CMD_UNSPEC,
155 OVS_VPORT_CMD_NEW,
156 OVS_VPORT_CMD_DEL,
157 OVS_VPORT_CMD_GET,
158 OVS_VPORT_CMD_SET
159};
160
161enum ovs_vport_type {
162 OVS_VPORT_TYPE_UNSPEC,
163 OVS_VPORT_TYPE_NETDEV, /* network device */
164 OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
165 __OVS_VPORT_TYPE_MAX
166};
167
168#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1)
169
170/**
171 * enum ovs_vport_attr - attributes for %OVS_VPORT_* commands.
172 * @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath.
173 * @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type
174 * of vport.
175 * @OVS_VPORT_ATTR_NAME: Name of vport. For a vport based on a network device
176 * this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes
177 * plus a null terminator.
178 * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information.
179 * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that
180 * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on
181 * this port. A value of zero indicates that upcalls should not be sent.
182 * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for
183 * packets sent or received through the vport.
184 *
185 * These attributes follow the &struct ovs_header within the Generic Netlink
186 * payload for %OVS_VPORT_* commands.
187 *
188 * For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and
189 * %OVS_VPORT_ATTR_NAME attributes are required. %OVS_VPORT_ATTR_PORT_NO is
190 * optional; if not specified a free port number is automatically selected.
191 * Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type
192 * of vport.
193 * and other attributes are ignored.
194 *
195 * For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to
196 * look up the vport to operate on; otherwise dp_idx from the &struct
197 * ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport.
198 */
199enum ovs_vport_attr {
200 OVS_VPORT_ATTR_UNSPEC,
201 OVS_VPORT_ATTR_PORT_NO, /* u32 port number within datapath */
202 OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */
203 OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */
204 OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */
205 OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */
206 OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */
207 __OVS_VPORT_ATTR_MAX
208};
209
210#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
211
212/* Flows. */
213
214#define OVS_FLOW_FAMILY "ovs_flow"
215#define OVS_FLOW_MCGROUP "ovs_flow"
216#define OVS_FLOW_VERSION 0x1
217
218enum ovs_flow_cmd {
219 OVS_FLOW_CMD_UNSPEC,
220 OVS_FLOW_CMD_NEW,
221 OVS_FLOW_CMD_DEL,
222 OVS_FLOW_CMD_GET,
223 OVS_FLOW_CMD_SET
224};
225
226struct ovs_flow_stats {
227 __u64 n_packets; /* Number of matched packets. */
228 __u64 n_bytes; /* Number of matched bytes. */
229};
230
231enum ovs_key_attr {
232 OVS_KEY_ATTR_UNSPEC,
233 OVS_KEY_ATTR_ENCAP, /* Nested set of encapsulated attributes. */
234 OVS_KEY_ATTR_PRIORITY, /* u32 skb->priority */
235 OVS_KEY_ATTR_IN_PORT, /* u32 OVS dp port number */
236 OVS_KEY_ATTR_ETHERNET, /* struct ovs_key_ethernet */
237 OVS_KEY_ATTR_VLAN, /* be16 VLAN TCI */
238 OVS_KEY_ATTR_ETHERTYPE, /* be16 Ethernet type */
239 OVS_KEY_ATTR_IPV4, /* struct ovs_key_ipv4 */
240 OVS_KEY_ATTR_IPV6, /* struct ovs_key_ipv6 */
241 OVS_KEY_ATTR_TCP, /* struct ovs_key_tcp */
242 OVS_KEY_ATTR_UDP, /* struct ovs_key_udp */
243 OVS_KEY_ATTR_ICMP, /* struct ovs_key_icmp */
244 OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */
245 OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */
246 OVS_KEY_ATTR_ND, /* struct ovs_key_nd */
247 OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */
248 __OVS_KEY_ATTR_MAX
249};
250
251#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
252
253/**
254 * enum ovs_frag_type - IPv4 and IPv6 fragment type
255 * @OVS_FRAG_TYPE_NONE: Packet is not a fragment.
256 * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0.
257 * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset.
258 *
259 * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct
260 * ovs_key_ipv6.
261 */
262enum ovs_frag_type {
263 OVS_FRAG_TYPE_NONE,
264 OVS_FRAG_TYPE_FIRST,
265 OVS_FRAG_TYPE_LATER,
266 __OVS_FRAG_TYPE_MAX
267};
268
269#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1)
270
271struct ovs_key_ethernet {
272 __u8 eth_src[6];
273 __u8 eth_dst[6];
274};
275
276struct ovs_key_ipv4 {
277 __be32 ipv4_src;
278 __be32 ipv4_dst;
279 __u8 ipv4_proto;
280 __u8 ipv4_tos;
281 __u8 ipv4_ttl;
282 __u8 ipv4_frag; /* One of OVS_FRAG_TYPE_*. */
283};
284
285struct ovs_key_ipv6 {
286 __be32 ipv6_src[4];
287 __be32 ipv6_dst[4];
288 __be32 ipv6_label; /* 20-bits in least-significant bits. */
289 __u8 ipv6_proto;
290 __u8 ipv6_tclass;
291 __u8 ipv6_hlimit;
292 __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */
293};
294
295struct ovs_key_tcp {
296 __be16 tcp_src;
297 __be16 tcp_dst;
298};
299
300struct ovs_key_udp {
301 __be16 udp_src;
302 __be16 udp_dst;
303};
304
305struct ovs_key_icmp {
306 __u8 icmp_type;
307 __u8 icmp_code;
308};
309
310struct ovs_key_icmpv6 {
311 __u8 icmpv6_type;
312 __u8 icmpv6_code;
313};
314
315struct ovs_key_arp {
316 __be32 arp_sip;
317 __be32 arp_tip;
318 __be16 arp_op;
319 __u8 arp_sha[6];
320 __u8 arp_tha[6];
321};
322
323struct ovs_key_nd {
324 __u32 nd_target[4];
325 __u8 nd_sll[6];
326 __u8 nd_tll[6];
327};
328
329/**
330 * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
331 * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
332 * key. Always present in notifications. Required for all requests (except
333 * dumps).
334 * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying
335 * the actions to take for packets that match the key. Always present in
336 * notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for
337 * %OVS_FLOW_CMD_SET requests.
338 * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this
339 * flow. Present in notifications if the stats would be nonzero. Ignored in
340 * requests.
341 * @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the
342 * TCP flags seen on packets in this flow. Only present in notifications for
343 * TCP flows, and only if it would be nonzero. Ignored in requests.
344 * @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on
345 * the system monotonic clock, at which a packet was last processed for this
346 * flow. Only present in notifications if a packet has been processed for this
347 * flow. Ignored in requests.
348 * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the
349 * last-used time, accumulated TCP flags, and statistics for this flow.
350 * Otherwise ignored in requests. Never present in notifications.
351 *
352 * These attributes follow the &struct ovs_header within the Generic Netlink
353 * payload for %OVS_FLOW_* commands.
354 */
355enum ovs_flow_attr {
356 OVS_FLOW_ATTR_UNSPEC,
357 OVS_FLOW_ATTR_KEY, /* Sequence of OVS_KEY_ATTR_* attributes. */
358 OVS_FLOW_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
359 OVS_FLOW_ATTR_STATS, /* struct ovs_flow_stats. */
360 OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
361 OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */
362 OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */
363 __OVS_FLOW_ATTR_MAX
364};
365
366#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
367
368/**
369 * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
370 * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
371 * @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of
372 * %UINT32_MAX samples all packets and intermediate values sample intermediate
373 * fractions of packets.
374 * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
375 * Actions are passed as nested attributes.
376 *
377 * Executes the specified actions with the given probability on a per-packet
378 * basis.
379 */
380enum ovs_sample_attr {
381 OVS_SAMPLE_ATTR_UNSPEC,
382 OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
383 OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
384 __OVS_SAMPLE_ATTR_MAX,
385};
386
387#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
388
389/**
390 * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
391 * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
392 * message should be sent. Required.
393 * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is
394 * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA.
395 */
396enum ovs_userspace_attr {
397 OVS_USERSPACE_ATTR_UNSPEC,
398 OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */
399 OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */
400 __OVS_USERSPACE_ATTR_MAX
401};
402
403#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
404
405/**
406 * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
407 * @vlan_tpid: Tag protocol identifier (TPID) to push.
408 * @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set
409 * (but it will not be set in the 802.1Q header that is pushed).
410 *
411 * The @vlan_tpid value is typically %ETH_P_8021Q. The only acceptable TPID
412 * values are those that the kernel module also parses as 802.1Q headers, to
413 * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN
414 * from having surprising results.
415 */
416struct ovs_action_push_vlan {
417 __be16 vlan_tpid; /* 802.1Q TPID. */
418 __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */
419};
420
421/**
422 * enum ovs_action_attr - Action types.
423 *
424 * @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
425 * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested
426 * %OVS_USERSPACE_ATTR_* attributes.
427 * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The
428 * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
429 * value.
430 * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
431 * packet.
432 * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
433 * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
434 * the nested %OVS_SAMPLE_ATTR_* attributes.
435 *
436 * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
437 * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
438 * type may not be changed.
439 */
440
441enum ovs_action_attr {
442 OVS_ACTION_ATTR_UNSPEC,
443 OVS_ACTION_ATTR_OUTPUT, /* u32 port number. */
444 OVS_ACTION_ATTR_USERSPACE, /* Nested OVS_USERSPACE_ATTR_*. */
445 OVS_ACTION_ATTR_SET, /* One nested OVS_KEY_ATTR_*. */
446 OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */
447 OVS_ACTION_ATTR_POP_VLAN, /* No argument. */
448 OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */
449 __OVS_ACTION_ATTR_MAX
450};
451
452#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
453 23
454#endif /* _LINUX_OPENVSWITCH_H */ 24#endif /* _LINUX_OPENVSWITCH_H */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 7df190525337..ab5d4992e568 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -285,6 +285,7 @@ header-y += nvram.h
285header-y += omap3isp.h 285header-y += omap3isp.h
286header-y += omapfb.h 286header-y += omapfb.h
287header-y += oom.h 287header-y += oom.h
288header-y += openvswitch.h
288header-y += packet_diag.h 289header-y += packet_diag.h
289header-y += param.h 290header-y += param.h
290header-y += parport.h 291header-y += parport.h
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
new file mode 100644
index 000000000000..405918dd7b3f
--- /dev/null
+++ b/include/uapi/linux/openvswitch.h
@@ -0,0 +1,456 @@
1
2/*
3 * Copyright (c) 2007-2011 Nicira Networks.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#ifndef _UAPI__LINUX_OPENVSWITCH_H
21#define _UAPI__LINUX_OPENVSWITCH_H 1
22
23#include <linux/types.h>
24#include <linux/if_ether.h>
25
26/**
27 * struct ovs_header - header for OVS Generic Netlink messages.
28 * @dp_ifindex: ifindex of local port for datapath (0 to make a request not
29 * specific to a datapath).
30 *
31 * Attributes following the header are specific to a particular OVS Generic
32 * Netlink family, but all of the OVS families use this header.
33 */
34
35struct ovs_header {
36 int dp_ifindex;
37};
38
39/* Datapaths. */
40
41#define OVS_DATAPATH_FAMILY "ovs_datapath"
42#define OVS_DATAPATH_MCGROUP "ovs_datapath"
43#define OVS_DATAPATH_VERSION 0x1
44
45enum ovs_datapath_cmd {
46 OVS_DP_CMD_UNSPEC,
47 OVS_DP_CMD_NEW,
48 OVS_DP_CMD_DEL,
49 OVS_DP_CMD_GET,
50 OVS_DP_CMD_SET
51};
52
53/**
54 * enum ovs_datapath_attr - attributes for %OVS_DP_* commands.
55 * @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local
56 * port". This is the name of the network device whose dp_ifindex is given in
57 * the &struct ovs_header. Always present in notifications. Required in
58 * %OVS_DP_NEW requests. May be used as an alternative to specifying
59 * dp_ifindex in other requests (with a dp_ifindex of 0).
60 * @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially
61 * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on
62 * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
63 * not be sent.
64 * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
65 * datapath. Always present in notifications.
66 *
67 * These attributes follow the &struct ovs_header within the Generic Netlink
68 * payload for %OVS_DP_* commands.
69 */
70enum ovs_datapath_attr {
71 OVS_DP_ATTR_UNSPEC,
72 OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */
73 OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */
74 OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */
75 __OVS_DP_ATTR_MAX
76};
77
78#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1)
79
80struct ovs_dp_stats {
81 __u64 n_hit; /* Number of flow table matches. */
82 __u64 n_missed; /* Number of flow table misses. */
83 __u64 n_lost; /* Number of misses not sent to userspace. */
84 __u64 n_flows; /* Number of flows present */
85};
86
87struct ovs_vport_stats {
88 __u64 rx_packets; /* total packets received */
89 __u64 tx_packets; /* total packets transmitted */
90 __u64 rx_bytes; /* total bytes received */
91 __u64 tx_bytes; /* total bytes transmitted */
92 __u64 rx_errors; /* bad packets received */
93 __u64 tx_errors; /* packet transmit problems */
94 __u64 rx_dropped; /* no space in linux buffers */
95 __u64 tx_dropped; /* no space available in linux */
96};
97
98/* Fixed logical ports. */
99#define OVSP_LOCAL ((__u32)0)
100
101/* Packet transfer. */
102
103#define OVS_PACKET_FAMILY "ovs_packet"
104#define OVS_PACKET_VERSION 0x1
105
106enum ovs_packet_cmd {
107 OVS_PACKET_CMD_UNSPEC,
108
109 /* Kernel-to-user notifications. */
110 OVS_PACKET_CMD_MISS, /* Flow table miss. */
111 OVS_PACKET_CMD_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */
112
113 /* Userspace commands. */
114 OVS_PACKET_CMD_EXECUTE /* Apply actions to a packet. */
115};
116
117/**
118 * enum ovs_packet_attr - attributes for %OVS_PACKET_* commands.
119 * @OVS_PACKET_ATTR_PACKET: Present for all notifications. Contains the entire
120 * packet as received, from the start of the Ethernet header onward. For
121 * %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by
122 * actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is
123 * the flow key extracted from the packet as originally received.
124 * @OVS_PACKET_ATTR_KEY: Present for all notifications. Contains the flow key
125 * extracted from the packet as nested %OVS_KEY_ATTR_* attributes. This allows
126 * userspace to adapt its flow setup strategy by comparing its notion of the
127 * flow key against the kernel's.
128 * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used
129 * for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes.
130 * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION
131 * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
132 * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content
133 * specified there.
134 *
135 * These attributes follow the &struct ovs_header within the Generic Netlink
136 * payload for %OVS_PACKET_* commands.
137 */
138enum ovs_packet_attr {
139 OVS_PACKET_ATTR_UNSPEC,
140 OVS_PACKET_ATTR_PACKET, /* Packet data. */
141 OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */
142 OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
143 OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */
144 __OVS_PACKET_ATTR_MAX
145};
146
147#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1)
148
149/* Virtual ports. */
150
151#define OVS_VPORT_FAMILY "ovs_vport"
152#define OVS_VPORT_MCGROUP "ovs_vport"
153#define OVS_VPORT_VERSION 0x1
154
155enum ovs_vport_cmd {
156 OVS_VPORT_CMD_UNSPEC,
157 OVS_VPORT_CMD_NEW,
158 OVS_VPORT_CMD_DEL,
159 OVS_VPORT_CMD_GET,
160 OVS_VPORT_CMD_SET
161};
162
163enum ovs_vport_type {
164 OVS_VPORT_TYPE_UNSPEC,
165 OVS_VPORT_TYPE_NETDEV, /* network device */
166 OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
167 __OVS_VPORT_TYPE_MAX
168};
169
170#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1)
171
172/**
173 * enum ovs_vport_attr - attributes for %OVS_VPORT_* commands.
174 * @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath.
175 * @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type
176 * of vport.
177 * @OVS_VPORT_ATTR_NAME: Name of vport. For a vport based on a network device
178 * this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes
179 * plus a null terminator.
180 * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information.
181 * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that
182 * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on
183 * this port. A value of zero indicates that upcalls should not be sent.
184 * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for
185 * packets sent or received through the vport.
186 *
187 * These attributes follow the &struct ovs_header within the Generic Netlink
188 * payload for %OVS_VPORT_* commands.
189 *
190 * For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and
191 * %OVS_VPORT_ATTR_NAME attributes are required. %OVS_VPORT_ATTR_PORT_NO is
192 * optional; if not specified a free port number is automatically selected.
193 * Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type
194 * of vport.
195 * and other attributes are ignored.
196 *
197 * For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to
198 * look up the vport to operate on; otherwise dp_idx from the &struct
199 * ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport.
200 */
201enum ovs_vport_attr {
202 OVS_VPORT_ATTR_UNSPEC,
203 OVS_VPORT_ATTR_PORT_NO, /* u32 port number within datapath */
204 OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */
205 OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */
206 OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */
207 OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */
208 OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */
209 __OVS_VPORT_ATTR_MAX
210};
211
212#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
213
214/* Flows. */
215
216#define OVS_FLOW_FAMILY "ovs_flow"
217#define OVS_FLOW_MCGROUP "ovs_flow"
218#define OVS_FLOW_VERSION 0x1
219
220enum ovs_flow_cmd {
221 OVS_FLOW_CMD_UNSPEC,
222 OVS_FLOW_CMD_NEW,
223 OVS_FLOW_CMD_DEL,
224 OVS_FLOW_CMD_GET,
225 OVS_FLOW_CMD_SET
226};
227
228struct ovs_flow_stats {
229 __u64 n_packets; /* Number of matched packets. */
230 __u64 n_bytes; /* Number of matched bytes. */
231};
232
233enum ovs_key_attr {
234 OVS_KEY_ATTR_UNSPEC,
235 OVS_KEY_ATTR_ENCAP, /* Nested set of encapsulated attributes. */
236 OVS_KEY_ATTR_PRIORITY, /* u32 skb->priority */
237 OVS_KEY_ATTR_IN_PORT, /* u32 OVS dp port number */
238 OVS_KEY_ATTR_ETHERNET, /* struct ovs_key_ethernet */
239 OVS_KEY_ATTR_VLAN, /* be16 VLAN TCI */
240 OVS_KEY_ATTR_ETHERTYPE, /* be16 Ethernet type */
241 OVS_KEY_ATTR_IPV4, /* struct ovs_key_ipv4 */
242 OVS_KEY_ATTR_IPV6, /* struct ovs_key_ipv6 */
243 OVS_KEY_ATTR_TCP, /* struct ovs_key_tcp */
244 OVS_KEY_ATTR_UDP, /* struct ovs_key_udp */
245 OVS_KEY_ATTR_ICMP, /* struct ovs_key_icmp */
246 OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */
247 OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */
248 OVS_KEY_ATTR_ND, /* struct ovs_key_nd */
249 OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */
250 __OVS_KEY_ATTR_MAX
251};
252
253#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
254
255/**
256 * enum ovs_frag_type - IPv4 and IPv6 fragment type
257 * @OVS_FRAG_TYPE_NONE: Packet is not a fragment.
258 * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0.
259 * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset.
260 *
261 * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct
262 * ovs_key_ipv6.
263 */
264enum ovs_frag_type {
265 OVS_FRAG_TYPE_NONE,
266 OVS_FRAG_TYPE_FIRST,
267 OVS_FRAG_TYPE_LATER,
268 __OVS_FRAG_TYPE_MAX
269};
270
271#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1)
272
273struct ovs_key_ethernet {
274 __u8 eth_src[ETH_ALEN];
275 __u8 eth_dst[ETH_ALEN];
276};
277
278struct ovs_key_ipv4 {
279 __be32 ipv4_src;
280 __be32 ipv4_dst;
281 __u8 ipv4_proto;
282 __u8 ipv4_tos;
283 __u8 ipv4_ttl;
284 __u8 ipv4_frag; /* One of OVS_FRAG_TYPE_*. */
285};
286
287struct ovs_key_ipv6 {
288 __be32 ipv6_src[4];
289 __be32 ipv6_dst[4];
290 __be32 ipv6_label; /* 20-bits in least-significant bits. */
291 __u8 ipv6_proto;
292 __u8 ipv6_tclass;
293 __u8 ipv6_hlimit;
294 __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */
295};
296
297struct ovs_key_tcp {
298 __be16 tcp_src;
299 __be16 tcp_dst;
300};
301
302struct ovs_key_udp {
303 __be16 udp_src;
304 __be16 udp_dst;
305};
306
307struct ovs_key_icmp {
308 __u8 icmp_type;
309 __u8 icmp_code;
310};
311
312struct ovs_key_icmpv6 {
313 __u8 icmpv6_type;
314 __u8 icmpv6_code;
315};
316
317struct ovs_key_arp {
318 __be32 arp_sip;
319 __be32 arp_tip;
320 __be16 arp_op;
321 __u8 arp_sha[ETH_ALEN];
322 __u8 arp_tha[ETH_ALEN];
323};
324
325struct ovs_key_nd {
326 __u32 nd_target[4];
327 __u8 nd_sll[ETH_ALEN];
328 __u8 nd_tll[ETH_ALEN];
329};
330
331/**
332 * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
333 * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
334 * key. Always present in notifications. Required for all requests (except
335 * dumps).
336 * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying
337 * the actions to take for packets that match the key. Always present in
338 * notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for
339 * %OVS_FLOW_CMD_SET requests.
340 * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this
341 * flow. Present in notifications if the stats would be nonzero. Ignored in
342 * requests.
343 * @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the
344 * TCP flags seen on packets in this flow. Only present in notifications for
345 * TCP flows, and only if it would be nonzero. Ignored in requests.
346 * @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on
347 * the system monotonic clock, at which a packet was last processed for this
348 * flow. Only present in notifications if a packet has been processed for this
349 * flow. Ignored in requests.
350 * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the
351 * last-used time, accumulated TCP flags, and statistics for this flow.
352 * Otherwise ignored in requests. Never present in notifications.
353 *
354 * These attributes follow the &struct ovs_header within the Generic Netlink
355 * payload for %OVS_FLOW_* commands.
356 */
357enum ovs_flow_attr {
358 OVS_FLOW_ATTR_UNSPEC,
359 OVS_FLOW_ATTR_KEY, /* Sequence of OVS_KEY_ATTR_* attributes. */
360 OVS_FLOW_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
361 OVS_FLOW_ATTR_STATS, /* struct ovs_flow_stats. */
362 OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
363 OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */
364 OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */
365 __OVS_FLOW_ATTR_MAX
366};
367
368#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
369
370/**
371 * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
372 * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
373 * @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of
374 * %UINT32_MAX samples all packets and intermediate values sample intermediate
375 * fractions of packets.
376 * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
377 * Actions are passed as nested attributes.
378 *
379 * Executes the specified actions with the given probability on a per-packet
380 * basis.
381 */
382enum ovs_sample_attr {
383 OVS_SAMPLE_ATTR_UNSPEC,
384 OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
385 OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */
386 __OVS_SAMPLE_ATTR_MAX,
387};
388
389#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
390
391/**
392 * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
393 * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
394 * message should be sent. Required.
395 * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is
396 * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA.
397 */
398enum ovs_userspace_attr {
399 OVS_USERSPACE_ATTR_UNSPEC,
400 OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */
401 OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */
402 __OVS_USERSPACE_ATTR_MAX
403};
404
405#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
406
407/**
408 * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
409 * @vlan_tpid: Tag protocol identifier (TPID) to push.
410 * @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set
411 * (but it will not be set in the 802.1Q header that is pushed).
412 *
413 * The @vlan_tpid value is typically %ETH_P_8021Q. The only acceptable TPID
414 * values are those that the kernel module also parses as 802.1Q headers, to
415 * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN
416 * from having surprising results.
417 */
418struct ovs_action_push_vlan {
419 __be16 vlan_tpid; /* 802.1Q TPID. */
420 __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */
421};
422
423/**
424 * enum ovs_action_attr - Action types.
425 *
426 * @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
427 * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested
428 * %OVS_USERSPACE_ATTR_* attributes.
429 * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The
430 * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
431 * value.
432 * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
433 * packet.
434 * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
435 * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
436 * the nested %OVS_SAMPLE_ATTR_* attributes.
437 *
438 * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
439 * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
440 * type may not be changed.
441 */
442
443enum ovs_action_attr {
444 OVS_ACTION_ATTR_UNSPEC,
445 OVS_ACTION_ATTR_OUTPUT, /* u32 port number. */
446 OVS_ACTION_ATTR_USERSPACE, /* Nested OVS_USERSPACE_ATTR_*. */
447 OVS_ACTION_ATTR_SET, /* One nested OVS_KEY_ATTR_*. */
448 OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */
449 OVS_ACTION_ATTR_POP_VLAN, /* No argument. */
450 OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */
451 __OVS_ACTION_ATTR_MAX
452};
453
454#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
455
456#endif /* _LINUX_OPENVSWITCH_H */
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 8759265a3e46..b7d0b7c3fe2c 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -44,6 +44,7 @@
44#include <linux/netfilter_ipv4.h> 44#include <linux/netfilter_ipv4.h>
45#include <linux/inetdevice.h> 45#include <linux/inetdevice.h>
46#include <linux/list.h> 46#include <linux/list.h>
47#include <linux/lockdep.h>
47#include <linux/openvswitch.h> 48#include <linux/openvswitch.h>
48#include <linux/rculist.h> 49#include <linux/rculist.h>
49#include <linux/dmi.h> 50#include <linux/dmi.h>
@@ -56,38 +57,59 @@
56#include "flow.h" 57#include "flow.h"
57#include "vport-internal_dev.h" 58#include "vport-internal_dev.h"
58 59
59/**
60 * struct ovs_net - Per net-namespace data for ovs.
61 * @dps: List of datapaths to enable dumping them all out.
62 * Protected by genl_mutex.
63 */
64struct ovs_net {
65 struct list_head dps;
66};
67
68static int ovs_net_id __read_mostly;
69 60
70#define REHASH_FLOW_INTERVAL (10 * 60 * HZ) 61#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
71static void rehash_flow_table(struct work_struct *work); 62static void rehash_flow_table(struct work_struct *work);
72static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); 63static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
73 64
65int ovs_net_id __read_mostly;
66
67static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
68 struct genl_multicast_group *grp)
69{
70 genl_notify(skb, genl_info_net(info), info->snd_portid,
71 grp->id, info->nlhdr, GFP_KERNEL);
72}
73
74/** 74/**
75 * DOC: Locking: 75 * DOC: Locking:
76 * 76 *
77 * Writes to device state (add/remove datapath, port, set operations on vports, 77 * All writes e.g. Writes to device state (add/remove datapath, port, set
78 * etc.) are protected by RTNL. 78 * operations on vports, etc.), Writes to other state (flow table
79 * 79 * modifications, set miscellaneous datapath parameters, etc.) are protected
80 * Writes to other state (flow table modifications, set miscellaneous datapath 80 * by ovs_lock.
81 * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside
82 * genl_mutex.
83 * 81 *
84 * Reads are protected by RCU. 82 * Reads are protected by RCU.
85 * 83 *
86 * There are a few special cases (mostly stats) that have their own 84 * There are a few special cases (mostly stats) that have their own
87 * synchronization but they nest under all of above and don't interact with 85 * synchronization but they nest under all of above and don't interact with
88 * each other. 86 * each other.
87 *
88 * The RTNL lock nests inside ovs_mutex.
89 */ 89 */
90 90
91static DEFINE_MUTEX(ovs_mutex);
92
93void ovs_lock(void)
94{
95 mutex_lock(&ovs_mutex);
96}
97
98void ovs_unlock(void)
99{
100 mutex_unlock(&ovs_mutex);
101}
102
103#ifdef CONFIG_LOCKDEP
104int lockdep_ovsl_is_held(void)
105{
106 if (debug_locks)
107 return lockdep_is_held(&ovs_mutex);
108 else
109 return 1;
110}
111#endif
112
91static struct vport *new_vport(const struct vport_parms *); 113static struct vport *new_vport(const struct vport_parms *);
92static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, 114static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
93 const struct dp_upcall_info *); 115 const struct dp_upcall_info *);
@@ -95,7 +117,7 @@ static int queue_userspace_packet(struct net *, int dp_ifindex,
95 struct sk_buff *, 117 struct sk_buff *,
96 const struct dp_upcall_info *); 118 const struct dp_upcall_info *);
97 119
98/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ 120/* Must be called with rcu_read_lock or ovs_mutex. */
99static struct datapath *get_dp(struct net *net, int dp_ifindex) 121static struct datapath *get_dp(struct net *net, int dp_ifindex)
100{ 122{
101 struct datapath *dp = NULL; 123 struct datapath *dp = NULL;
@@ -113,10 +135,10 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)
113 return dp; 135 return dp;
114} 136}
115 137
116/* Must be called with rcu_read_lock or RTNL lock. */ 138/* Must be called with rcu_read_lock or ovs_mutex. */
117const char *ovs_dp_name(const struct datapath *dp) 139const char *ovs_dp_name(const struct datapath *dp)
118{ 140{
119 struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL); 141 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
120 return vport->ops->get_name(vport); 142 return vport->ops->get_name(vport);
121} 143}
122 144
@@ -168,7 +190,7 @@ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
168 return NULL; 190 return NULL;
169} 191}
170 192
171/* Called with RTNL lock and genl_lock. */ 193/* Called with ovs_mutex. */
172static struct vport *new_vport(const struct vport_parms *parms) 194static struct vport *new_vport(const struct vport_parms *parms)
173{ 195{
174 struct vport *vport; 196 struct vport *vport;
@@ -180,14 +202,12 @@ static struct vport *new_vport(const struct vport_parms *parms)
180 202
181 hlist_add_head_rcu(&vport->dp_hash_node, head); 203 hlist_add_head_rcu(&vport->dp_hash_node, head);
182 } 204 }
183
184 return vport; 205 return vport;
185} 206}
186 207
187/* Called with RTNL lock. */
188void ovs_dp_detach_port(struct vport *p) 208void ovs_dp_detach_port(struct vport *p)
189{ 209{
190 ASSERT_RTNL(); 210 ASSERT_OVSL();
191 211
192 /* First drop references to device. */ 212 /* First drop references to device. */
193 hlist_del_rcu(&p->dp_hash_node); 213 hlist_del_rcu(&p->dp_hash_node);
@@ -337,6 +357,35 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
337 return err; 357 return err;
338} 358}
339 359
360static size_t key_attr_size(void)
361{
362 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
363 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
364 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
365 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
366 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
367 + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */
368 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
369 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
370 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
371 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
372 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
373}
374
375static size_t upcall_msg_size(const struct sk_buff *skb,
376 const struct nlattr *userdata)
377{
378 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
379 + nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */
380 + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
381
382 /* OVS_PACKET_ATTR_USERDATA */
383 if (userdata)
384 size += NLA_ALIGN(userdata->nla_len);
385
386 return size;
387}
388
340static int queue_userspace_packet(struct net *net, int dp_ifindex, 389static int queue_userspace_packet(struct net *net, int dp_ifindex,
341 struct sk_buff *skb, 390 struct sk_buff *skb,
342 const struct dp_upcall_info *upcall_info) 391 const struct dp_upcall_info *upcall_info)
@@ -345,7 +394,6 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
345 struct sk_buff *nskb = NULL; 394 struct sk_buff *nskb = NULL;
346 struct sk_buff *user_skb; /* to be queued to userspace */ 395 struct sk_buff *user_skb; /* to be queued to userspace */
347 struct nlattr *nla; 396 struct nlattr *nla;
348 unsigned int len;
349 int err; 397 int err;
350 398
351 if (vlan_tx_tag_present(skb)) { 399 if (vlan_tx_tag_present(skb)) {
@@ -366,13 +414,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
366 goto out; 414 goto out;
367 } 415 }
368 416
369 len = sizeof(struct ovs_header); 417 user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC);
370 len += nla_total_size(skb->len);
371 len += nla_total_size(FLOW_BUFSIZE);
372 if (upcall_info->userdata)
373 len += NLA_ALIGN(upcall_info->userdata->nla_len);
374
375 user_skb = genlmsg_new(len, GFP_ATOMIC);
376 if (!user_skb) { 418 if (!user_skb) {
377 err = -ENOMEM; 419 err = -ENOMEM;
378 goto out; 420 goto out;
@@ -403,13 +445,13 @@ out:
403 return err; 445 return err;
404} 446}
405 447
406/* Called with genl_mutex. */ 448/* Called with ovs_mutex. */
407static int flush_flows(struct datapath *dp) 449static int flush_flows(struct datapath *dp)
408{ 450{
409 struct flow_table *old_table; 451 struct flow_table *old_table;
410 struct flow_table *new_table; 452 struct flow_table *new_table;
411 453
412 old_table = genl_dereference(dp->table); 454 old_table = ovsl_dereference(dp->table);
413 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); 455 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
414 if (!new_table) 456 if (!new_table)
415 return -ENOMEM; 457 return -ENOMEM;
@@ -662,8 +704,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
662 704
663 err = -EINVAL; 705 err = -EINVAL;
664 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || 706 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
665 !a[OVS_PACKET_ATTR_ACTIONS] || 707 !a[OVS_PACKET_ATTR_ACTIONS])
666 nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
667 goto err; 708 goto err;
668 709
669 len = nla_len(a[OVS_PACKET_ATTR_PACKET]); 710 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
@@ -673,7 +714,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
673 goto err; 714 goto err;
674 skb_reserve(packet, NET_IP_ALIGN); 715 skb_reserve(packet, NET_IP_ALIGN);
675 716
676 memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len); 717 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
677 718
678 skb_reset_mac_header(packet); 719 skb_reset_mac_header(packet);
679 eth = eth_hdr(packet); 720 eth = eth_hdr(packet);
@@ -744,7 +785,7 @@ err:
744} 785}
745 786
746static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { 787static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
747 [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC }, 788 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
748 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, 789 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
749 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, 790 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
750}; 791};
@@ -760,7 +801,7 @@ static struct genl_ops dp_packet_genl_ops[] = {
760static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) 801static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
761{ 802{
762 int i; 803 int i;
763 struct flow_table *table = genl_dereference(dp->table); 804 struct flow_table *table = ovsl_dereference(dp->table);
764 805
765 stats->n_flows = ovs_flow_tbl_count(table); 806 stats->n_flows = ovs_flow_tbl_count(table);
766 807
@@ -802,7 +843,17 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = {
802 .name = OVS_FLOW_MCGROUP 843 .name = OVS_FLOW_MCGROUP
803}; 844};
804 845
805/* Called with genl_lock. */ 846static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
847{
848 return NLMSG_ALIGN(sizeof(struct ovs_header))
849 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
850 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
851 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
852 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
853 + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
854}
855
856/* Called with ovs_mutex. */
806static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, 857static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
807 struct sk_buff *skb, u32 portid, 858 struct sk_buff *skb, u32 portid,
808 u32 seq, u32 flags, u8 cmd) 859 u32 seq, u32 flags, u8 cmd)
@@ -816,8 +867,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
816 u8 tcp_flags; 867 u8 tcp_flags;
817 int err; 868 int err;
818 869
819 sf_acts = rcu_dereference_protected(flow->sf_acts, 870 sf_acts = ovsl_dereference(flow->sf_acts);
820 lockdep_genl_is_held());
821 871
822 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); 872 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
823 if (!ovs_header) 873 if (!ovs_header)
@@ -880,25 +930,10 @@ error:
880static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) 930static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
881{ 931{
882 const struct sw_flow_actions *sf_acts; 932 const struct sw_flow_actions *sf_acts;
883 int len;
884
885 sf_acts = rcu_dereference_protected(flow->sf_acts,
886 lockdep_genl_is_held());
887
888 /* OVS_FLOW_ATTR_KEY */
889 len = nla_total_size(FLOW_BUFSIZE);
890 /* OVS_FLOW_ATTR_ACTIONS */
891 len += nla_total_size(sf_acts->actions_len);
892 /* OVS_FLOW_ATTR_STATS */
893 len += nla_total_size(sizeof(struct ovs_flow_stats));
894 /* OVS_FLOW_ATTR_TCP_FLAGS */
895 len += nla_total_size(1);
896 /* OVS_FLOW_ATTR_USED */
897 len += nla_total_size(8);
898 933
899 len += NLMSG_ALIGN(sizeof(struct ovs_header)); 934 sf_acts = ovsl_dereference(flow->sf_acts);
900 935
901 return genlmsg_new(len, GFP_KERNEL); 936 return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL);
902} 937}
903 938
904static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, 939static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
@@ -947,12 +982,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
947 goto error; 982 goto error;
948 } 983 }
949 984
985 ovs_lock();
950 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 986 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
951 error = -ENODEV; 987 error = -ENODEV;
952 if (!dp) 988 if (!dp)
953 goto error; 989 goto err_unlock_ovs;
954 990
955 table = genl_dereference(dp->table); 991 table = ovsl_dereference(dp->table);
956 flow = ovs_flow_tbl_lookup(table, &key, key_len); 992 flow = ovs_flow_tbl_lookup(table, &key, key_len);
957 if (!flow) { 993 if (!flow) {
958 struct sw_flow_actions *acts; 994 struct sw_flow_actions *acts;
@@ -960,7 +996,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
960 /* Bail out if we're not allowed to create a new flow. */ 996 /* Bail out if we're not allowed to create a new flow. */
961 error = -ENOENT; 997 error = -ENOENT;
962 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) 998 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
963 goto error; 999 goto err_unlock_ovs;
964 1000
965 /* Expand table, if necessary, to make room. */ 1001 /* Expand table, if necessary, to make room. */
966 if (ovs_flow_tbl_need_to_expand(table)) { 1002 if (ovs_flow_tbl_need_to_expand(table)) {
@@ -970,7 +1006,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
970 if (!IS_ERR(new_table)) { 1006 if (!IS_ERR(new_table)) {
971 rcu_assign_pointer(dp->table, new_table); 1007 rcu_assign_pointer(dp->table, new_table);
972 ovs_flow_tbl_deferred_destroy(table); 1008 ovs_flow_tbl_deferred_destroy(table);
973 table = genl_dereference(dp->table); 1009 table = ovsl_dereference(dp->table);
974 } 1010 }
975 } 1011 }
976 1012
@@ -978,7 +1014,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
978 flow = ovs_flow_alloc(); 1014 flow = ovs_flow_alloc();
979 if (IS_ERR(flow)) { 1015 if (IS_ERR(flow)) {
980 error = PTR_ERR(flow); 1016 error = PTR_ERR(flow);
981 goto error; 1017 goto err_unlock_ovs;
982 } 1018 }
983 flow->key = key; 1019 flow->key = key;
984 clear_stats(flow); 1020 clear_stats(flow);
@@ -1011,11 +1047,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1011 error = -EEXIST; 1047 error = -EEXIST;
1012 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && 1048 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
1013 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) 1049 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1014 goto error; 1050 goto err_unlock_ovs;
1015 1051
1016 /* Update actions. */ 1052 /* Update actions. */
1017 old_acts = rcu_dereference_protected(flow->sf_acts, 1053 old_acts = ovsl_dereference(flow->sf_acts);
1018 lockdep_genl_is_held());
1019 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; 1054 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
1020 if (acts_attrs && 1055 if (acts_attrs &&
1021 (old_acts->actions_len != nla_len(acts_attrs) || 1056 (old_acts->actions_len != nla_len(acts_attrs) ||
@@ -1026,7 +1061,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1026 new_acts = ovs_flow_actions_alloc(acts_attrs); 1061 new_acts = ovs_flow_actions_alloc(acts_attrs);
1027 error = PTR_ERR(new_acts); 1062 error = PTR_ERR(new_acts);
1028 if (IS_ERR(new_acts)) 1063 if (IS_ERR(new_acts))
1029 goto error; 1064 goto err_unlock_ovs;
1030 1065
1031 rcu_assign_pointer(flow->sf_acts, new_acts); 1066 rcu_assign_pointer(flow->sf_acts, new_acts);
1032 ovs_flow_deferred_free_acts(old_acts); 1067 ovs_flow_deferred_free_acts(old_acts);
@@ -1042,11 +1077,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1042 spin_unlock_bh(&flow->lock); 1077 spin_unlock_bh(&flow->lock);
1043 } 1078 }
1044 } 1079 }
1080 ovs_unlock();
1045 1081
1046 if (!IS_ERR(reply)) 1082 if (!IS_ERR(reply))
1047 genl_notify(reply, genl_info_net(info), info->snd_portid, 1083 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
1048 ovs_dp_flow_multicast_group.id, info->nlhdr,
1049 GFP_KERNEL);
1050 else 1084 else
1051 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 1085 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1052 ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); 1086 ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
@@ -1054,6 +1088,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1054 1088
1055error_free_flow: 1089error_free_flow:
1056 ovs_flow_free(flow); 1090 ovs_flow_free(flow);
1091err_unlock_ovs:
1092 ovs_unlock();
1057error: 1093error:
1058 return error; 1094 return error;
1059} 1095}
@@ -1076,21 +1112,32 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1076 if (err) 1112 if (err)
1077 return err; 1113 return err;
1078 1114
1115 ovs_lock();
1079 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1116 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1080 if (!dp) 1117 if (!dp) {
1081 return -ENODEV; 1118 err = -ENODEV;
1119 goto unlock;
1120 }
1082 1121
1083 table = genl_dereference(dp->table); 1122 table = ovsl_dereference(dp->table);
1084 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1123 flow = ovs_flow_tbl_lookup(table, &key, key_len);
1085 if (!flow) 1124 if (!flow) {
1086 return -ENOENT; 1125 err = -ENOENT;
1126 goto unlock;
1127 }
1087 1128
1088 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 1129 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1089 info->snd_seq, OVS_FLOW_CMD_NEW); 1130 info->snd_seq, OVS_FLOW_CMD_NEW);
1090 if (IS_ERR(reply)) 1131 if (IS_ERR(reply)) {
1091 return PTR_ERR(reply); 1132 err = PTR_ERR(reply);
1133 goto unlock;
1134 }
1092 1135
1136 ovs_unlock();
1093 return genlmsg_reply(reply, info); 1137 return genlmsg_reply(reply, info);
1138unlock:
1139 ovs_unlock();
1140 return err;
1094} 1141}
1095 1142
1096static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) 1143static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
@@ -1105,25 +1152,33 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1105 int err; 1152 int err;
1106 int key_len; 1153 int key_len;
1107 1154
1155 ovs_lock();
1108 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1156 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1109 if (!dp) 1157 if (!dp) {
1110 return -ENODEV; 1158 err = -ENODEV;
1111 1159 goto unlock;
1112 if (!a[OVS_FLOW_ATTR_KEY]) 1160 }
1113 return flush_flows(dp);
1114 1161
1162 if (!a[OVS_FLOW_ATTR_KEY]) {
1163 err = flush_flows(dp);
1164 goto unlock;
1165 }
1115 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1166 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1116 if (err) 1167 if (err)
1117 return err; 1168 goto unlock;
1118 1169
1119 table = genl_dereference(dp->table); 1170 table = ovsl_dereference(dp->table);
1120 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1171 flow = ovs_flow_tbl_lookup(table, &key, key_len);
1121 if (!flow) 1172 if (!flow) {
1122 return -ENOENT; 1173 err = -ENOENT;
1174 goto unlock;
1175 }
1123 1176
1124 reply = ovs_flow_cmd_alloc_info(flow); 1177 reply = ovs_flow_cmd_alloc_info(flow);
1125 if (!reply) 1178 if (!reply) {
1126 return -ENOMEM; 1179 err = -ENOMEM;
1180 goto unlock;
1181 }
1127 1182
1128 ovs_flow_tbl_remove(table, flow); 1183 ovs_flow_tbl_remove(table, flow);
1129 1184
@@ -1132,10 +1187,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1132 BUG_ON(err < 0); 1187 BUG_ON(err < 0);
1133 1188
1134 ovs_flow_deferred_free(flow); 1189 ovs_flow_deferred_free(flow);
1190 ovs_unlock();
1135 1191
1136 genl_notify(reply, genl_info_net(info), info->snd_portid, 1192 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
1137 ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1138 return 0; 1193 return 0;
1194unlock:
1195 ovs_unlock();
1196 return err;
1139} 1197}
1140 1198
1141static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1199static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1144,11 +1202,14 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1144 struct datapath *dp; 1202 struct datapath *dp;
1145 struct flow_table *table; 1203 struct flow_table *table;
1146 1204
1205 ovs_lock();
1147 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1206 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1148 if (!dp) 1207 if (!dp) {
1208 ovs_unlock();
1149 return -ENODEV; 1209 return -ENODEV;
1210 }
1150 1211
1151 table = genl_dereference(dp->table); 1212 table = ovsl_dereference(dp->table);
1152 1213
1153 for (;;) { 1214 for (;;) {
1154 struct sw_flow *flow; 1215 struct sw_flow *flow;
@@ -1169,6 +1230,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1169 cb->args[0] = bucket; 1230 cb->args[0] = bucket;
1170 cb->args[1] = obj; 1231 cb->args[1] = obj;
1171 } 1232 }
1233 ovs_unlock();
1172 return skb->len; 1234 return skb->len;
1173} 1235}
1174 1236
@@ -1214,6 +1276,16 @@ static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
1214 .name = OVS_DATAPATH_MCGROUP 1276 .name = OVS_DATAPATH_MCGROUP
1215}; 1277};
1216 1278
1279static size_t ovs_dp_cmd_msg_size(void)
1280{
1281 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1282
1283 msgsize += nla_total_size(IFNAMSIZ);
1284 msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
1285
1286 return msgsize;
1287}
1288
1217static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, 1289static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1218 u32 portid, u32 seq, u32 flags, u8 cmd) 1290 u32 portid, u32 seq, u32 flags, u8 cmd)
1219{ 1291{
@@ -1252,7 +1324,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
1252 struct sk_buff *skb; 1324 struct sk_buff *skb;
1253 int retval; 1325 int retval;
1254 1326
1255 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1327 skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1256 if (!skb) 1328 if (!skb)
1257 return ERR_PTR(-ENOMEM); 1329 return ERR_PTR(-ENOMEM);
1258 1330
@@ -1264,7 +1336,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
1264 return skb; 1336 return skb;
1265} 1337}
1266 1338
1267/* Called with genl_mutex and optionally with RTNL lock also. */ 1339/* Called with ovs_mutex. */
1268static struct datapath *lookup_datapath(struct net *net, 1340static struct datapath *lookup_datapath(struct net *net,
1269 struct ovs_header *ovs_header, 1341 struct ovs_header *ovs_header,
1270 struct nlattr *a[OVS_DP_ATTR_MAX + 1]) 1342 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
@@ -1298,12 +1370,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1298 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) 1370 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1299 goto err; 1371 goto err;
1300 1372
1301 rtnl_lock(); 1373 ovs_lock();
1302 1374
1303 err = -ENOMEM; 1375 err = -ENOMEM;
1304 dp = kzalloc(sizeof(*dp), GFP_KERNEL); 1376 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1305 if (dp == NULL) 1377 if (dp == NULL)
1306 goto err_unlock_rtnl; 1378 goto err_unlock_ovs;
1307 1379
1308 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); 1380 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1309 1381
@@ -1354,37 +1426,34 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1354 1426
1355 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); 1427 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1356 list_add_tail(&dp->list_node, &ovs_net->dps); 1428 list_add_tail(&dp->list_node, &ovs_net->dps);
1357 rtnl_unlock();
1358 1429
1359 genl_notify(reply, genl_info_net(info), info->snd_portid, 1430 ovs_unlock();
1360 ovs_dp_datapath_multicast_group.id, info->nlhdr, 1431
1361 GFP_KERNEL); 1432 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1362 return 0; 1433 return 0;
1363 1434
1364err_destroy_local_port: 1435err_destroy_local_port:
1365 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); 1436 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1366err_destroy_ports_array: 1437err_destroy_ports_array:
1367 kfree(dp->ports); 1438 kfree(dp->ports);
1368err_destroy_percpu: 1439err_destroy_percpu:
1369 free_percpu(dp->stats_percpu); 1440 free_percpu(dp->stats_percpu);
1370err_destroy_table: 1441err_destroy_table:
1371 ovs_flow_tbl_destroy(genl_dereference(dp->table)); 1442 ovs_flow_tbl_destroy(ovsl_dereference(dp->table));
1372err_free_dp: 1443err_free_dp:
1373 release_net(ovs_dp_get_net(dp)); 1444 release_net(ovs_dp_get_net(dp));
1374 kfree(dp); 1445 kfree(dp);
1375err_unlock_rtnl: 1446err_unlock_ovs:
1376 rtnl_unlock(); 1447 ovs_unlock();
1377err: 1448err:
1378 return err; 1449 return err;
1379} 1450}
1380 1451
1381/* Called with genl_mutex. */ 1452/* Called with ovs_mutex. */
1382static void __dp_destroy(struct datapath *dp) 1453static void __dp_destroy(struct datapath *dp)
1383{ 1454{
1384 int i; 1455 int i;
1385 1456
1386 rtnl_lock();
1387
1388 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 1457 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1389 struct vport *vport; 1458 struct vport *vport;
1390 struct hlist_node *n; 1459 struct hlist_node *n;
@@ -1395,14 +1464,11 @@ static void __dp_destroy(struct datapath *dp)
1395 } 1464 }
1396 1465
1397 list_del(&dp->list_node); 1466 list_del(&dp->list_node);
1398 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
1399 1467
1400 /* rtnl_unlock() will wait until all the references to devices that 1468 /* OVSP_LOCAL is datapath internal port. We need to make sure that
1401 * are pending unregistration have been dropped. We do it here to 1469 * all port in datapath are destroyed first before freeing datapath.
1402 * ensure that any internal devices (which contain DP pointers) are
1403 * fully destroyed before freeing the datapath.
1404 */ 1470 */
1405 rtnl_unlock(); 1471 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1406 1472
1407 call_rcu(&dp->rcu, destroy_dp_rcu); 1473 call_rcu(&dp->rcu, destroy_dp_rcu);
1408} 1474}
@@ -1413,24 +1479,27 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1413 struct datapath *dp; 1479 struct datapath *dp;
1414 int err; 1480 int err;
1415 1481
1482 ovs_lock();
1416 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1483 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1417 err = PTR_ERR(dp); 1484 err = PTR_ERR(dp);
1418 if (IS_ERR(dp)) 1485 if (IS_ERR(dp))
1419 return err; 1486 goto unlock;
1420 1487
1421 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1488 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1422 info->snd_seq, OVS_DP_CMD_DEL); 1489 info->snd_seq, OVS_DP_CMD_DEL);
1423 err = PTR_ERR(reply); 1490 err = PTR_ERR(reply);
1424 if (IS_ERR(reply)) 1491 if (IS_ERR(reply))
1425 return err; 1492 goto unlock;
1426 1493
1427 __dp_destroy(dp); 1494 __dp_destroy(dp);
1495 ovs_unlock();
1428 1496
1429 genl_notify(reply, genl_info_net(info), info->snd_portid, 1497 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1430 ovs_dp_datapath_multicast_group.id, info->nlhdr,
1431 GFP_KERNEL);
1432 1498
1433 return 0; 1499 return 0;
1500unlock:
1501 ovs_unlock();
1502 return err;
1434} 1503}
1435 1504
1436static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) 1505static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
@@ -1439,9 +1508,11 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1439 struct datapath *dp; 1508 struct datapath *dp;
1440 int err; 1509 int err;
1441 1510
1511 ovs_lock();
1442 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1512 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1513 err = PTR_ERR(dp);
1443 if (IS_ERR(dp)) 1514 if (IS_ERR(dp))
1444 return PTR_ERR(dp); 1515 goto unlock;
1445 1516
1446 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1517 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1447 info->snd_seq, OVS_DP_CMD_NEW); 1518 info->snd_seq, OVS_DP_CMD_NEW);
@@ -1449,31 +1520,45 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1449 err = PTR_ERR(reply); 1520 err = PTR_ERR(reply);
1450 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 1521 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1451 ovs_dp_datapath_multicast_group.id, err); 1522 ovs_dp_datapath_multicast_group.id, err);
1452 return 0; 1523 err = 0;
1524 goto unlock;
1453 } 1525 }
1454 1526
1455 genl_notify(reply, genl_info_net(info), info->snd_portid, 1527 ovs_unlock();
1456 ovs_dp_datapath_multicast_group.id, info->nlhdr, 1528 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1457 GFP_KERNEL);
1458 1529
1459 return 0; 1530 return 0;
1531unlock:
1532 ovs_unlock();
1533 return err;
1460} 1534}
1461 1535
1462static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) 1536static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1463{ 1537{
1464 struct sk_buff *reply; 1538 struct sk_buff *reply;
1465 struct datapath *dp; 1539 struct datapath *dp;
1540 int err;
1466 1541
1542 ovs_lock();
1467 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1543 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1468 if (IS_ERR(dp)) 1544 if (IS_ERR(dp)) {
1469 return PTR_ERR(dp); 1545 err = PTR_ERR(dp);
1546 goto unlock;
1547 }
1470 1548
1471 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1549 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1472 info->snd_seq, OVS_DP_CMD_NEW); 1550 info->snd_seq, OVS_DP_CMD_NEW);
1473 if (IS_ERR(reply)) 1551 if (IS_ERR(reply)) {
1474 return PTR_ERR(reply); 1552 err = PTR_ERR(reply);
1553 goto unlock;
1554 }
1475 1555
1556 ovs_unlock();
1476 return genlmsg_reply(reply, info); 1557 return genlmsg_reply(reply, info);
1558
1559unlock:
1560 ovs_unlock();
1561 return err;
1477} 1562}
1478 1563
1479static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1564static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1483,6 +1568,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1483 int skip = cb->args[0]; 1568 int skip = cb->args[0];
1484 int i = 0; 1569 int i = 0;
1485 1570
1571 ovs_lock();
1486 list_for_each_entry(dp, &ovs_net->dps, list_node) { 1572 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1487 if (i >= skip && 1573 if (i >= skip &&
1488 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, 1574 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
@@ -1491,6 +1577,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1491 break; 1577 break;
1492 i++; 1578 i++;
1493 } 1579 }
1580 ovs_unlock();
1494 1581
1495 cb->args[0] = i; 1582 cb->args[0] = i;
1496 1583
@@ -1543,7 +1630,7 @@ struct genl_multicast_group ovs_dp_vport_multicast_group = {
1543 .name = OVS_VPORT_MCGROUP 1630 .name = OVS_VPORT_MCGROUP
1544}; 1631};
1545 1632
1546/* Called with RTNL lock or RCU read lock. */ 1633/* Called with ovs_mutex or RCU read lock. */
1547static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, 1634static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1548 u32 portid, u32 seq, u32 flags, u8 cmd) 1635 u32 portid, u32 seq, u32 flags, u8 cmd)
1549{ 1636{
@@ -1582,7 +1669,7 @@ error:
1582 return err; 1669 return err;
1583} 1670}
1584 1671
1585/* Called with RTNL lock or RCU read lock. */ 1672/* Called with ovs_mutex or RCU read lock. */
1586struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, 1673struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1587 u32 seq, u8 cmd) 1674 u32 seq, u8 cmd)
1588{ 1675{
@@ -1601,7 +1688,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1601 return skb; 1688 return skb;
1602} 1689}
1603 1690
1604/* Called with RTNL lock or RCU read lock. */ 1691/* Called with ovs_mutex or RCU read lock. */
1605static struct vport *lookup_vport(struct net *net, 1692static struct vport *lookup_vport(struct net *net,
1606 struct ovs_header *ovs_header, 1693 struct ovs_header *ovs_header,
1607 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) 1694 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
@@ -1627,7 +1714,7 @@ static struct vport *lookup_vport(struct net *net,
1627 if (!dp) 1714 if (!dp)
1628 return ERR_PTR(-ENODEV); 1715 return ERR_PTR(-ENODEV);
1629 1716
1630 vport = ovs_vport_rtnl_rcu(dp, port_no); 1717 vport = ovs_vport_ovsl_rcu(dp, port_no);
1631 if (!vport) 1718 if (!vport)
1632 return ERR_PTR(-ENODEV); 1719 return ERR_PTR(-ENODEV);
1633 return vport; 1720 return vport;
@@ -1651,7 +1738,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1651 !a[OVS_VPORT_ATTR_UPCALL_PID]) 1738 !a[OVS_VPORT_ATTR_UPCALL_PID])
1652 goto exit; 1739 goto exit;
1653 1740
1654 rtnl_lock(); 1741 ovs_lock();
1655 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1742 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1656 err = -ENODEV; 1743 err = -ENODEV;
1657 if (!dp) 1744 if (!dp)
@@ -1664,7 +1751,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1664 if (port_no >= DP_MAX_PORTS) 1751 if (port_no >= DP_MAX_PORTS)
1665 goto exit_unlock; 1752 goto exit_unlock;
1666 1753
1667 vport = ovs_vport_rtnl_rcu(dp, port_no); 1754 vport = ovs_vport_ovsl(dp, port_no);
1668 err = -EBUSY; 1755 err = -EBUSY;
1669 if (vport) 1756 if (vport)
1670 goto exit_unlock; 1757 goto exit_unlock;
@@ -1674,7 +1761,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1674 err = -EFBIG; 1761 err = -EFBIG;
1675 goto exit_unlock; 1762 goto exit_unlock;
1676 } 1763 }
1677 vport = ovs_vport_rtnl(dp, port_no); 1764 vport = ovs_vport_ovsl(dp, port_no);
1678 if (!vport) 1765 if (!vport)
1679 break; 1766 break;
1680 } 1767 }
@@ -1700,11 +1787,11 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1700 ovs_dp_detach_port(vport); 1787 ovs_dp_detach_port(vport);
1701 goto exit_unlock; 1788 goto exit_unlock;
1702 } 1789 }
1703 genl_notify(reply, genl_info_net(info), info->snd_portid, 1790
1704 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); 1791 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1705 1792
1706exit_unlock: 1793exit_unlock:
1707 rtnl_unlock(); 1794 ovs_unlock();
1708exit: 1795exit:
1709 return err; 1796 return err;
1710} 1797}
@@ -1716,7 +1803,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1716 struct vport *vport; 1803 struct vport *vport;
1717 int err; 1804 int err;
1718 1805
1719 rtnl_lock(); 1806 ovs_lock();
1720 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1807 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1721 err = PTR_ERR(vport); 1808 err = PTR_ERR(vport);
1722 if (IS_ERR(vport)) 1809 if (IS_ERR(vport))
@@ -1742,11 +1829,12 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1742 goto exit_unlock; 1829 goto exit_unlock;
1743 } 1830 }
1744 1831
1745 genl_notify(reply, genl_info_net(info), info->snd_portid, 1832 ovs_unlock();
1746 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); 1833 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1834 return 0;
1747 1835
1748exit_unlock: 1836exit_unlock:
1749 rtnl_unlock(); 1837 ovs_unlock();
1750 return err; 1838 return err;
1751} 1839}
1752 1840
@@ -1757,7 +1845,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1757 struct vport *vport; 1845 struct vport *vport;
1758 int err; 1846 int err;
1759 1847
1760 rtnl_lock(); 1848 ovs_lock();
1761 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1849 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1762 err = PTR_ERR(vport); 1850 err = PTR_ERR(vport);
1763 if (IS_ERR(vport)) 1851 if (IS_ERR(vport))
@@ -1777,11 +1865,10 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1777 err = 0; 1865 err = 0;
1778 ovs_dp_detach_port(vport); 1866 ovs_dp_detach_port(vport);
1779 1867
1780 genl_notify(reply, genl_info_net(info), info->snd_portid, 1868 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1781 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1782 1869
1783exit_unlock: 1870exit_unlock:
1784 rtnl_unlock(); 1871 ovs_unlock();
1785 return err; 1872 return err;
1786} 1873}
1787 1874
@@ -1941,13 +2028,13 @@ static void rehash_flow_table(struct work_struct *work)
1941 struct datapath *dp; 2028 struct datapath *dp;
1942 struct net *net; 2029 struct net *net;
1943 2030
1944 genl_lock(); 2031 ovs_lock();
1945 rtnl_lock(); 2032 rtnl_lock();
1946 for_each_net(net) { 2033 for_each_net(net) {
1947 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2034 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1948 2035
1949 list_for_each_entry(dp, &ovs_net->dps, list_node) { 2036 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1950 struct flow_table *old_table = genl_dereference(dp->table); 2037 struct flow_table *old_table = ovsl_dereference(dp->table);
1951 struct flow_table *new_table; 2038 struct flow_table *new_table;
1952 2039
1953 new_table = ovs_flow_tbl_rehash(old_table); 2040 new_table = ovs_flow_tbl_rehash(old_table);
@@ -1958,8 +2045,7 @@ static void rehash_flow_table(struct work_struct *work)
1958 } 2045 }
1959 } 2046 }
1960 rtnl_unlock(); 2047 rtnl_unlock();
1961 genl_unlock(); 2048 ovs_unlock();
1962
1963 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); 2049 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
1964} 2050}
1965 2051
@@ -1968,18 +2054,21 @@ static int __net_init ovs_init_net(struct net *net)
1968 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2054 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1969 2055
1970 INIT_LIST_HEAD(&ovs_net->dps); 2056 INIT_LIST_HEAD(&ovs_net->dps);
2057 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
1971 return 0; 2058 return 0;
1972} 2059}
1973 2060
1974static void __net_exit ovs_exit_net(struct net *net) 2061static void __net_exit ovs_exit_net(struct net *net)
1975{ 2062{
1976 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1977 struct datapath *dp, *dp_next; 2063 struct datapath *dp, *dp_next;
2064 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1978 2065
1979 genl_lock(); 2066 ovs_lock();
1980 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) 2067 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
1981 __dp_destroy(dp); 2068 __dp_destroy(dp);
1982 genl_unlock(); 2069 ovs_unlock();
2070
2071 cancel_work_sync(&ovs_net->dp_notify_work);
1983} 2072}
1984 2073
1985static struct pernet_operations ovs_net_ops = { 2074static struct pernet_operations ovs_net_ops = {
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 9125ad5c5aeb..16b840695216 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -57,10 +57,9 @@ struct dp_stats_percpu {
57 * struct datapath - datapath for flow-based packet switching 57 * struct datapath - datapath for flow-based packet switching
58 * @rcu: RCU callback head for deferred destruction. 58 * @rcu: RCU callback head for deferred destruction.
59 * @list_node: Element in global 'dps' list. 59 * @list_node: Element in global 'dps' list.
60 * @n_flows: Number of flows currently in flow table. 60 * @table: Current flow table. Protected by ovs_mutex and RCU.
61 * @table: Current flow table. Protected by genl_lock and RCU.
62 * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by 61 * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by
63 * RTNL and RCU. 62 * ovs_mutex and RCU.
64 * @stats_percpu: Per-CPU datapath statistics. 63 * @stats_percpu: Per-CPU datapath statistics.
65 * @net: Reference to net namespace. 64 * @net: Reference to net namespace.
66 * 65 *
@@ -86,26 +85,6 @@ struct datapath {
86#endif 85#endif
87}; 86};
88 87
89struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
90
91static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
92{
93 WARN_ON_ONCE(!rcu_read_lock_held());
94 return ovs_lookup_vport(dp, port_no);
95}
96
97static inline struct vport *ovs_vport_rtnl_rcu(const struct datapath *dp, int port_no)
98{
99 WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked());
100 return ovs_lookup_vport(dp, port_no);
101}
102
103static inline struct vport *ovs_vport_rtnl(const struct datapath *dp, int port_no)
104{
105 ASSERT_RTNL();
106 return ovs_lookup_vport(dp, port_no);
107}
108
109/** 88/**
110 * struct ovs_skb_cb - OVS data in skb CB 89 * struct ovs_skb_cb - OVS data in skb CB
111 * @flow: The flow associated with this packet. May be %NULL if no flow. 90 * @flow: The flow associated with this packet. May be %NULL if no flow.
@@ -132,6 +111,30 @@ struct dp_upcall_info {
132 u32 portid; 111 u32 portid;
133}; 112};
134 113
114/**
115 * struct ovs_net - Per net-namespace data for ovs.
116 * @dps: List of datapaths to enable dumping them all out.
117 * Protected by genl_mutex.
118 */
119struct ovs_net {
120 struct list_head dps;
121 struct work_struct dp_notify_work;
122};
123
124extern int ovs_net_id;
125void ovs_lock(void);
126void ovs_unlock(void);
127
128#ifdef CONFIG_LOCKDEP
129int lockdep_ovsl_is_held(void);
130#else
131#define lockdep_ovsl_is_held() 1
132#endif
133
134#define ASSERT_OVSL() WARN_ON(unlikely(!lockdep_ovsl_is_held()))
135#define ovsl_dereference(p) \
136 rcu_dereference_protected(p, lockdep_ovsl_is_held())
137
135static inline struct net *ovs_dp_get_net(struct datapath *dp) 138static inline struct net *ovs_dp_get_net(struct datapath *dp)
136{ 139{
137 return read_pnet(&dp->net); 140 return read_pnet(&dp->net);
@@ -142,6 +145,26 @@ static inline void ovs_dp_set_net(struct datapath *dp, struct net *net)
142 write_pnet(&dp->net, net); 145 write_pnet(&dp->net, net);
143} 146}
144 147
148struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
149
150static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
151{
152 WARN_ON_ONCE(!rcu_read_lock_held());
153 return ovs_lookup_vport(dp, port_no);
154}
155
156static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no)
157{
158 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
159 return ovs_lookup_vport(dp, port_no);
160}
161
162static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no)
163{
164 ASSERT_OVSL();
165 return ovs_lookup_vport(dp, port_no);
166}
167
145extern struct notifier_block ovs_dp_device_notifier; 168extern struct notifier_block ovs_dp_device_notifier;
146extern struct genl_multicast_group ovs_dp_vport_multicast_group; 169extern struct genl_multicast_group ovs_dp_vport_multicast_group;
147 170
@@ -155,4 +178,5 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
155 u8 cmd); 178 u8 cmd);
156 179
157int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb); 180int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
181void ovs_dp_notify_wq(struct work_struct *work);
158#endif /* datapath.h */ 182#endif /* datapath.h */
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 5558350e0d33..ef4feec6cd84 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -18,46 +18,78 @@
18 18
19#include <linux/netdevice.h> 19#include <linux/netdevice.h>
20#include <net/genetlink.h> 20#include <net/genetlink.h>
21#include <net/netns/generic.h>
21 22
22#include "datapath.h" 23#include "datapath.h"
23#include "vport-internal_dev.h" 24#include "vport-internal_dev.h"
24#include "vport-netdev.h" 25#include "vport-netdev.h"
25 26
27static void dp_detach_port_notify(struct vport *vport)
28{
29 struct sk_buff *notify;
30 struct datapath *dp;
31
32 dp = vport->dp;
33 notify = ovs_vport_cmd_build_info(vport, 0, 0,
34 OVS_VPORT_CMD_DEL);
35 ovs_dp_detach_port(vport);
36 if (IS_ERR(notify)) {
37 netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
38 ovs_dp_vport_multicast_group.id,
39 PTR_ERR(notify));
40 return;
41 }
42
43 genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
44 ovs_dp_vport_multicast_group.id,
45 GFP_KERNEL);
46}
47
48void ovs_dp_notify_wq(struct work_struct *work)
49{
50 struct ovs_net *ovs_net = container_of(work, struct ovs_net, dp_notify_work);
51 struct datapath *dp;
52
53 ovs_lock();
54 list_for_each_entry(dp, &ovs_net->dps, list_node) {
55 int i;
56
57 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
58 struct vport *vport;
59 struct hlist_node *n;
60
61 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
62 struct netdev_vport *netdev_vport;
63
64 if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
65 continue;
66
67 netdev_vport = netdev_vport_priv(vport);
68 if (netdev_vport->dev->reg_state == NETREG_UNREGISTERED ||
69 netdev_vport->dev->reg_state == NETREG_UNREGISTERING)
70 dp_detach_port_notify(vport);
71 }
72 }
73 }
74 ovs_unlock();
75}
76
26static int dp_device_event(struct notifier_block *unused, unsigned long event, 77static int dp_device_event(struct notifier_block *unused, unsigned long event,
27 void *ptr) 78 void *ptr)
28{ 79{
80 struct ovs_net *ovs_net;
29 struct net_device *dev = ptr; 81 struct net_device *dev = ptr;
30 struct vport *vport; 82 struct vport *vport = NULL;
31 83
32 if (ovs_is_internal_dev(dev)) 84 if (!ovs_is_internal_dev(dev))
33 vport = ovs_internal_dev_get_vport(dev);
34 else
35 vport = ovs_netdev_get_vport(dev); 85 vport = ovs_netdev_get_vport(dev);
36 86
37 if (!vport) 87 if (!vport)
38 return NOTIFY_DONE; 88 return NOTIFY_DONE;
39 89
40 switch (event) { 90 if (event == NETDEV_UNREGISTER) {
41 case NETDEV_UNREGISTER: 91 ovs_net = net_generic(dev_net(dev), ovs_net_id);
42 if (!ovs_is_internal_dev(dev)) { 92 queue_work(system_wq, &ovs_net->dp_notify_work);
43 struct sk_buff *notify;
44 struct datapath *dp = vport->dp;
45
46 notify = ovs_vport_cmd_build_info(vport, 0, 0,
47 OVS_VPORT_CMD_DEL);
48 ovs_dp_detach_port(vport);
49 if (IS_ERR(notify)) {
50 netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
51 ovs_dp_vport_multicast_group.id,
52 PTR_ERR(notify));
53 break;
54 }
55
56 genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
57 ovs_dp_vport_multicast_group.id,
58 GFP_KERNEL);
59 }
60 break;
61 } 93 }
62 94
63 return NOTIFY_DONE; 95 return NOTIFY_DONE;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 332486839347..cf9328be75e9 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -211,7 +211,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions)
211 return ERR_PTR(-ENOMEM); 211 return ERR_PTR(-ENOMEM);
212 212
213 sfa->actions_len = actions_len; 213 sfa->actions_len = actions_len;
214 memcpy(sfa->actions, nla_data(actions), actions_len); 214 nla_memcpy(sfa->actions, actions, actions_len);
215 return sfa; 215 return sfa;
216} 216}
217 217
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index a7bb60ff3b5b..0875fde65b9c 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -138,27 +138,6 @@ int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
138void ovs_flow_used(struct sw_flow *, struct sk_buff *); 138void ovs_flow_used(struct sw_flow *, struct sk_buff *);
139u64 ovs_flow_used_time(unsigned long flow_jiffies); 139u64 ovs_flow_used_time(unsigned long flow_jiffies);
140 140
141/* Upper bound on the length of a nlattr-formatted flow key. The longest
142 * nlattr-formatted flow key would be:
143 *
144 * struct pad nl hdr total
145 * ------ --- ------ -----
146 * OVS_KEY_ATTR_PRIORITY 4 -- 4 8
147 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8
148 * OVS_KEY_ATTR_SKB_MARK 4 -- 4 8
149 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16
150 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype)
151 * OVS_KEY_ATTR_8021Q 4 -- 4 8
152 * OVS_KEY_ATTR_ENCAP 0 -- 4 4 (VLAN encapsulation)
153 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (inner VLAN ethertype)
154 * OVS_KEY_ATTR_IPV6 40 -- 4 44
155 * OVS_KEY_ATTR_ICMPV6 2 2 4 8
156 * OVS_KEY_ATTR_ND 28 -- 4 32
157 * -------------------------------------------------
158 * total 152
159 */
160#define FLOW_BUFSIZE 152
161
162int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); 141int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
163int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, 142int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
164 const struct nlattr *); 143 const struct nlattr *);
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 40f8a2489c90..9604760494b1 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -173,16 +173,19 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
173 if (vport->port_no == OVSP_LOCAL) 173 if (vport->port_no == OVSP_LOCAL)
174 netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL; 174 netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL;
175 175
176 rtnl_lock();
176 err = register_netdevice(netdev_vport->dev); 177 err = register_netdevice(netdev_vport->dev);
177 if (err) 178 if (err)
178 goto error_free_netdev; 179 goto error_free_netdev;
179 180
180 dev_set_promiscuity(netdev_vport->dev, 1); 181 dev_set_promiscuity(netdev_vport->dev, 1);
182 rtnl_unlock();
181 netif_start_queue(netdev_vport->dev); 183 netif_start_queue(netdev_vport->dev);
182 184
183 return vport; 185 return vport;
184 186
185error_free_netdev: 187error_free_netdev:
188 rtnl_unlock();
186 free_netdev(netdev_vport->dev); 189 free_netdev(netdev_vport->dev);
187error_free_vport: 190error_free_vport:
188 ovs_vport_free(vport); 191 ovs_vport_free(vport);
@@ -195,10 +198,13 @@ static void internal_dev_destroy(struct vport *vport)
195 struct netdev_vport *netdev_vport = netdev_vport_priv(vport); 198 struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
196 199
197 netif_stop_queue(netdev_vport->dev); 200 netif_stop_queue(netdev_vport->dev);
201 rtnl_lock();
198 dev_set_promiscuity(netdev_vport->dev, -1); 202 dev_set_promiscuity(netdev_vport->dev, -1);
199 203
200 /* unregister_netdevice() waits for an RCU grace period. */ 204 /* unregister_netdevice() waits for an RCU grace period. */
201 unregister_netdevice(netdev_vport->dev); 205 unregister_netdevice(netdev_vport->dev);
206
207 rtnl_unlock();
202} 208}
203 209
204static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) 210static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 2130d61c384a..40a89ae8e19f 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -100,16 +100,20 @@ static struct vport *netdev_create(const struct vport_parms *parms)
100 goto error_put; 100 goto error_put;
101 } 101 }
102 102
103 rtnl_lock();
103 err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook, 104 err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
104 vport); 105 vport);
105 if (err) 106 if (err)
106 goto error_put; 107 goto error_unlock;
107 108
108 dev_set_promiscuity(netdev_vport->dev, 1); 109 dev_set_promiscuity(netdev_vport->dev, 1);
109 netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH; 110 netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
111 rtnl_unlock();
110 112
111 return vport; 113 return vport;
112 114
115error_unlock:
116 rtnl_unlock();
113error_put: 117error_put:
114 dev_put(netdev_vport->dev); 118 dev_put(netdev_vport->dev);
115error_free_vport: 119error_free_vport:
@@ -131,9 +135,11 @@ static void netdev_destroy(struct vport *vport)
131{ 135{
132 struct netdev_vport *netdev_vport = netdev_vport_priv(vport); 136 struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
133 137
138 rtnl_lock();
134 netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; 139 netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
135 netdev_rx_handler_unregister(netdev_vport->dev); 140 netdev_rx_handler_unregister(netdev_vport->dev);
136 dev_set_promiscuity(netdev_vport->dev, -1); 141 dev_set_promiscuity(netdev_vport->dev, -1);
142 rtnl_unlock();
137 143
138 call_rcu(&netdev_vport->rcu, free_port_rcu); 144 call_rcu(&netdev_vport->rcu, free_port_rcu);
139} 145}
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index f6b8132ce4cb..720623190eaa 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -40,7 +40,7 @@ static const struct vport_ops *vport_ops_list[] = {
40 &ovs_internal_vport_ops, 40 &ovs_internal_vport_ops,
41}; 41};
42 42
43/* Protected by RCU read lock for reading, RTNL lock for writing. */ 43/* Protected by RCU read lock for reading, ovs_mutex for writing. */
44static struct hlist_head *dev_table; 44static struct hlist_head *dev_table;
45#define VPORT_HASH_BUCKETS 1024 45#define VPORT_HASH_BUCKETS 1024
46 46
@@ -80,7 +80,7 @@ static struct hlist_head *hash_bucket(struct net *net, const char *name)
80 * 80 *
81 * @name: name of port to find 81 * @name: name of port to find
82 * 82 *
83 * Must be called with RTNL or RCU read lock. 83 * Must be called with ovs or RCU read lock.
84 */ 84 */
85struct vport *ovs_vport_locate(struct net *net, const char *name) 85struct vport *ovs_vport_locate(struct net *net, const char *name)
86{ 86{
@@ -128,7 +128,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
128 vport->ops = ops; 128 vport->ops = ops;
129 INIT_HLIST_NODE(&vport->dp_hash_node); 129 INIT_HLIST_NODE(&vport->dp_hash_node);
130 130
131 vport->percpu_stats = alloc_percpu(struct vport_percpu_stats); 131 vport->percpu_stats = alloc_percpu(struct pcpu_tstats);
132 if (!vport->percpu_stats) { 132 if (!vport->percpu_stats) {
133 kfree(vport); 133 kfree(vport);
134 return ERR_PTR(-ENOMEM); 134 return ERR_PTR(-ENOMEM);
@@ -161,7 +161,7 @@ void ovs_vport_free(struct vport *vport)
161 * @parms: Information about new vport. 161 * @parms: Information about new vport.
162 * 162 *
163 * Creates a new vport with the specified configuration (which is dependent on 163 * Creates a new vport with the specified configuration (which is dependent on
164 * device type). RTNL lock must be held. 164 * device type). ovs_mutex must be held.
165 */ 165 */
166struct vport *ovs_vport_add(const struct vport_parms *parms) 166struct vport *ovs_vport_add(const struct vport_parms *parms)
167{ 167{
@@ -169,8 +169,6 @@ struct vport *ovs_vport_add(const struct vport_parms *parms)
169 int err = 0; 169 int err = 0;
170 int i; 170 int i;
171 171
172 ASSERT_RTNL();
173
174 for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) { 172 for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
175 if (vport_ops_list[i]->type == parms->type) { 173 if (vport_ops_list[i]->type == parms->type) {
176 struct hlist_head *bucket; 174 struct hlist_head *bucket;
@@ -201,12 +199,10 @@ out:
201 * @port: New configuration. 199 * @port: New configuration.
202 * 200 *
203 * Modifies an existing device with the specified configuration (which is 201 * Modifies an existing device with the specified configuration (which is
204 * dependent on device type). RTNL lock must be held. 202 * dependent on device type). ovs_mutex must be held.
205 */ 203 */
206int ovs_vport_set_options(struct vport *vport, struct nlattr *options) 204int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
207{ 205{
208 ASSERT_RTNL();
209
210 if (!vport->ops->set_options) 206 if (!vport->ops->set_options)
211 return -EOPNOTSUPP; 207 return -EOPNOTSUPP;
212 return vport->ops->set_options(vport, options); 208 return vport->ops->set_options(vport, options);
@@ -218,11 +214,11 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
218 * @vport: vport to delete. 214 * @vport: vport to delete.
219 * 215 *
220 * Detaches @vport from its datapath and destroys it. It is possible to fail 216 * Detaches @vport from its datapath and destroys it. It is possible to fail
221 * for reasons such as lack of memory. RTNL lock must be held. 217 * for reasons such as lack of memory. ovs_mutex must be held.
222 */ 218 */
223void ovs_vport_del(struct vport *vport) 219void ovs_vport_del(struct vport *vport)
224{ 220{
225 ASSERT_RTNL(); 221 ASSERT_OVSL();
226 222
227 hlist_del_rcu(&vport->hash_node); 223 hlist_del_rcu(&vport->hash_node);
228 224
@@ -237,7 +233,7 @@ void ovs_vport_del(struct vport *vport)
237 * 233 *
238 * Retrieves transmit, receive, and error stats for the given device. 234 * Retrieves transmit, receive, and error stats for the given device.
239 * 235 *
240 * Must be called with RTNL lock or rcu_read_lock. 236 * Must be called with ovs_mutex or rcu_read_lock.
241 */ 237 */
242void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) 238void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
243{ 239{
@@ -264,16 +260,16 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
264 spin_unlock_bh(&vport->stats_lock); 260 spin_unlock_bh(&vport->stats_lock);
265 261
266 for_each_possible_cpu(i) { 262 for_each_possible_cpu(i) {
267 const struct vport_percpu_stats *percpu_stats; 263 const struct pcpu_tstats *percpu_stats;
268 struct vport_percpu_stats local_stats; 264 struct pcpu_tstats local_stats;
269 unsigned int start; 265 unsigned int start;
270 266
271 percpu_stats = per_cpu_ptr(vport->percpu_stats, i); 267 percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
272 268
273 do { 269 do {
274 start = u64_stats_fetch_begin_bh(&percpu_stats->sync); 270 start = u64_stats_fetch_begin_bh(&percpu_stats->syncp);
275 local_stats = *percpu_stats; 271 local_stats = *percpu_stats;
276 } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start)); 272 } while (u64_stats_fetch_retry_bh(&percpu_stats->syncp, start));
277 273
278 stats->rx_bytes += local_stats.rx_bytes; 274 stats->rx_bytes += local_stats.rx_bytes;
279 stats->rx_packets += local_stats.rx_packets; 275 stats->rx_packets += local_stats.rx_packets;
@@ -296,22 +292,24 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
296 * negative error code if a real error occurred. If an error occurs, @skb is 292 * negative error code if a real error occurred. If an error occurs, @skb is
297 * left unmodified. 293 * left unmodified.
298 * 294 *
299 * Must be called with RTNL lock or rcu_read_lock. 295 * Must be called with ovs_mutex or rcu_read_lock.
300 */ 296 */
301int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) 297int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
302{ 298{
303 struct nlattr *nla; 299 struct nlattr *nla;
300 int err;
301
302 if (!vport->ops->get_options)
303 return 0;
304 304
305 nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS); 305 nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS);
306 if (!nla) 306 if (!nla)
307 return -EMSGSIZE; 307 return -EMSGSIZE;
308 308
309 if (vport->ops->get_options) { 309 err = vport->ops->get_options(vport, skb);
310 int err = vport->ops->get_options(vport, skb); 310 if (err) {
311 if (err) { 311 nla_nest_cancel(skb, nla);
312 nla_nest_cancel(skb, nla); 312 return err;
313 return err;
314 }
315 } 313 }
316 314
317 nla_nest_end(skb, nla); 315 nla_nest_end(skb, nla);
@@ -329,13 +327,13 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
329 */ 327 */
330void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) 328void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
331{ 329{
332 struct vport_percpu_stats *stats; 330 struct pcpu_tstats *stats;
333 331
334 stats = this_cpu_ptr(vport->percpu_stats); 332 stats = this_cpu_ptr(vport->percpu_stats);
335 u64_stats_update_begin(&stats->sync); 333 u64_stats_update_begin(&stats->syncp);
336 stats->rx_packets++; 334 stats->rx_packets++;
337 stats->rx_bytes += skb->len; 335 stats->rx_bytes += skb->len;
338 u64_stats_update_end(&stats->sync); 336 u64_stats_update_end(&stats->syncp);
339 337
340 ovs_dp_process_received_packet(vport, skb); 338 ovs_dp_process_received_packet(vport, skb);
341} 339}
@@ -346,7 +344,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
346 * @vport: vport on which to send the packet 344 * @vport: vport on which to send the packet
347 * @skb: skb to send 345 * @skb: skb to send
348 * 346 *
349 * Sends the given packet and returns the length of data sent. Either RTNL 347 * Sends the given packet and returns the length of data sent. Either ovs
350 * lock or rcu_read_lock must be held. 348 * lock or rcu_read_lock must be held.
351 */ 349 */
352int ovs_vport_send(struct vport *vport, struct sk_buff *skb) 350int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
@@ -354,14 +352,14 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
354 int sent = vport->ops->send(vport, skb); 352 int sent = vport->ops->send(vport, skb);
355 353
356 if (likely(sent)) { 354 if (likely(sent)) {
357 struct vport_percpu_stats *stats; 355 struct pcpu_tstats *stats;
358 356
359 stats = this_cpu_ptr(vport->percpu_stats); 357 stats = this_cpu_ptr(vport->percpu_stats);
360 358
361 u64_stats_update_begin(&stats->sync); 359 u64_stats_update_begin(&stats->syncp);
362 stats->tx_packets++; 360 stats->tx_packets++;
363 stats->tx_bytes += sent; 361 stats->tx_bytes += sent;
364 u64_stats_update_end(&stats->sync); 362 u64_stats_update_end(&stats->syncp);
365 } 363 }
366 return sent; 364 return sent;
367} 365}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index aee7d43114c9..7ba08c30b853 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -19,6 +19,7 @@
19#ifndef VPORT_H 19#ifndef VPORT_H
20#define VPORT_H 1 20#define VPORT_H 1
21 21
22#include <linux/if_tunnel.h>
22#include <linux/list.h> 23#include <linux/list.h>
23#include <linux/netlink.h> 24#include <linux/netlink.h>
24#include <linux/openvswitch.h> 25#include <linux/openvswitch.h>
@@ -50,14 +51,6 @@ int ovs_vport_send(struct vport *, struct sk_buff *);
50 51
51/* The following definitions are for implementers of vport devices: */ 52/* The following definitions are for implementers of vport devices: */
52 53
53struct vport_percpu_stats {
54 u64 rx_bytes;
55 u64 rx_packets;
56 u64 tx_bytes;
57 u64 tx_packets;
58 struct u64_stats_sync sync;
59};
60
61struct vport_err_stats { 54struct vport_err_stats {
62 u64 rx_dropped; 55 u64 rx_dropped;
63 u64 rx_errors; 56 u64 rx_errors;
@@ -89,7 +82,7 @@ struct vport {
89 struct hlist_node dp_hash_node; 82 struct hlist_node dp_hash_node;
90 const struct vport_ops *ops; 83 const struct vport_ops *ops;
91 84
92 struct vport_percpu_stats __percpu *percpu_stats; 85 struct pcpu_tstats __percpu *percpu_stats;
93 86
94 spinlock_t stats_lock; 87 spinlock_t stats_lock;
95 struct vport_err_stats err_stats; 88 struct vport_err_stats err_stats;
@@ -138,14 +131,14 @@ struct vport_parms {
138struct vport_ops { 131struct vport_ops {
139 enum ovs_vport_type type; 132 enum ovs_vport_type type;
140 133
141 /* Called with RTNL lock. */ 134 /* Called with ovs_mutex. */
142 struct vport *(*create)(const struct vport_parms *); 135 struct vport *(*create)(const struct vport_parms *);
143 void (*destroy)(struct vport *); 136 void (*destroy)(struct vport *);
144 137
145 int (*set_options)(struct vport *, struct nlattr *); 138 int (*set_options)(struct vport *, struct nlattr *);
146 int (*get_options)(const struct vport *, struct sk_buff *); 139 int (*get_options)(const struct vport *, struct sk_buff *);
147 140
148 /* Called with rcu_read_lock or RTNL lock. */ 141 /* Called with rcu_read_lock or ovs_mutex. */
149 const char *(*get_name)(const struct vport *); 142 const char *(*get_name)(const struct vport *);
150 void (*get_config)(const struct vport *, void *); 143 void (*get_config)(const struct vport *, void *);
151 int (*get_ifindex)(const struct vport *); 144 int (*get_ifindex)(const struct vport *);