aboutsummaryrefslogtreecommitdiffstats
path: root/net/openvswitch
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-09-04 15:22:28 -0400
committerDavid S. Miller <davem@davemloft.net>2012-09-04 15:22:28 -0400
commitcefd81cfeca14ec4c63cc748441634f1d4c0eb3f (patch)
treeb4541c563d40c4600e79319c2e98919caa19cede /net/openvswitch
parent3731a334c012ed825a87e1f152bbf980f79dabb2 (diff)
parent15eac2a74277bc7de68a7c2a64a7c91b4b6f5961 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch
Diffstat (limited to 'net/openvswitch')
-rw-r--r--net/openvswitch/actions.c2
-rw-r--r--net/openvswitch/datapath.c375
-rw-r--r--net/openvswitch/datapath.h50
-rw-r--r--net/openvswitch/dp_notify.c8
-rw-r--r--net/openvswitch/flow.c11
-rw-r--r--net/openvswitch/flow.h3
-rw-r--r--net/openvswitch/vport-internal_dev.c7
-rw-r--r--net/openvswitch/vport-netdev.c2
-rw-r--r--net/openvswitch/vport.c23
-rw-r--r--net/openvswitch/vport.h7
10 files changed, 317 insertions, 171 deletions
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index f3f96badf5aa..0da687769f56 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -266,7 +266,7 @@ static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
266 if (unlikely(!skb)) 266 if (unlikely(!skb))
267 return -ENOMEM; 267 return -ENOMEM;
268 268
269 vport = rcu_dereference(dp->ports[out_port]); 269 vport = ovs_vport_rcu(dp, out_port);
270 if (unlikely(!vport)) { 270 if (unlikely(!vport)) {
271 kfree_skb(skb); 271 kfree_skb(skb);
272 return -ENODEV; 272 return -ENODEV;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d8277d29e710..105a0b5adc51 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -49,12 +49,29 @@
49#include <linux/dmi.h> 49#include <linux/dmi.h>
50#include <linux/workqueue.h> 50#include <linux/workqueue.h>
51#include <net/genetlink.h> 51#include <net/genetlink.h>
52#include <net/net_namespace.h>
53#include <net/netns/generic.h>
52 54
53#include "datapath.h" 55#include "datapath.h"
54#include "flow.h" 56#include "flow.h"
55#include "vport-internal_dev.h" 57#include "vport-internal_dev.h"
56 58
57/** 59/**
60 * struct ovs_net - Per net-namespace data for ovs.
61 * @dps: List of datapaths to enable dumping them all out.
62 * Protected by genl_mutex.
63 */
64struct ovs_net {
65 struct list_head dps;
66};
67
68static int ovs_net_id __read_mostly;
69
70#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
71static void rehash_flow_table(struct work_struct *work);
72static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
73
74/**
58 * DOC: Locking: 75 * DOC: Locking:
59 * 76 *
60 * Writes to device state (add/remove datapath, port, set operations on vports, 77 * Writes to device state (add/remove datapath, port, set operations on vports,
@@ -71,29 +88,21 @@
71 * each other. 88 * each other.
72 */ 89 */
73 90
74/* Global list of datapaths to enable dumping them all out.
75 * Protected by genl_mutex.
76 */
77static LIST_HEAD(dps);
78
79#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
80static void rehash_flow_table(struct work_struct *work);
81static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
82
83static struct vport *new_vport(const struct vport_parms *); 91static struct vport *new_vport(const struct vport_parms *);
84static int queue_gso_packets(int dp_ifindex, struct sk_buff *, 92static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
85 const struct dp_upcall_info *); 93 const struct dp_upcall_info *);
86static int queue_userspace_packet(int dp_ifindex, struct sk_buff *, 94static int queue_userspace_packet(struct net *, int dp_ifindex,
95 struct sk_buff *,
87 const struct dp_upcall_info *); 96 const struct dp_upcall_info *);
88 97
89/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ 98/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
90static struct datapath *get_dp(int dp_ifindex) 99static struct datapath *get_dp(struct net *net, int dp_ifindex)
91{ 100{
92 struct datapath *dp = NULL; 101 struct datapath *dp = NULL;
93 struct net_device *dev; 102 struct net_device *dev;
94 103
95 rcu_read_lock(); 104 rcu_read_lock();
96 dev = dev_get_by_index_rcu(&init_net, dp_ifindex); 105 dev = dev_get_by_index_rcu(net, dp_ifindex);
97 if (dev) { 106 if (dev) {
98 struct vport *vport = ovs_internal_dev_get_vport(dev); 107 struct vport *vport = ovs_internal_dev_get_vport(dev);
99 if (vport) 108 if (vport)
@@ -107,7 +116,7 @@ static struct datapath *get_dp(int dp_ifindex)
107/* Must be called with rcu_read_lock or RTNL lock. */ 116/* Must be called with rcu_read_lock or RTNL lock. */
108const char *ovs_dp_name(const struct datapath *dp) 117const char *ovs_dp_name(const struct datapath *dp)
109{ 118{
110 struct vport *vport = rcu_dereference_rtnl(dp->ports[OVSP_LOCAL]); 119 struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL);
111 return vport->ops->get_name(vport); 120 return vport->ops->get_name(vport);
112} 121}
113 122
@@ -118,7 +127,7 @@ static int get_dpifindex(struct datapath *dp)
118 127
119 rcu_read_lock(); 128 rcu_read_lock();
120 129
121 local = rcu_dereference(dp->ports[OVSP_LOCAL]); 130 local = ovs_vport_rcu(dp, OVSP_LOCAL);
122 if (local) 131 if (local)
123 ifindex = local->ops->get_ifindex(local); 132 ifindex = local->ops->get_ifindex(local);
124 else 133 else
@@ -135,9 +144,31 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
135 144
136 ovs_flow_tbl_destroy((__force struct flow_table *)dp->table); 145 ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
137 free_percpu(dp->stats_percpu); 146 free_percpu(dp->stats_percpu);
147 release_net(ovs_dp_get_net(dp));
148 kfree(dp->ports);
138 kfree(dp); 149 kfree(dp);
139} 150}
140 151
152static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
153 u16 port_no)
154{
155 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
156}
157
158struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
159{
160 struct vport *vport;
161 struct hlist_node *n;
162 struct hlist_head *head;
163
164 head = vport_hash_bucket(dp, port_no);
165 hlist_for_each_entry_rcu(vport, n, head, dp_hash_node) {
166 if (vport->port_no == port_no)
167 return vport;
168 }
169 return NULL;
170}
171
141/* Called with RTNL lock and genl_lock. */ 172/* Called with RTNL lock and genl_lock. */
142static struct vport *new_vport(const struct vport_parms *parms) 173static struct vport *new_vport(const struct vport_parms *parms)
143{ 174{
@@ -146,9 +177,9 @@ static struct vport *new_vport(const struct vport_parms *parms)
146 vport = ovs_vport_add(parms); 177 vport = ovs_vport_add(parms);
147 if (!IS_ERR(vport)) { 178 if (!IS_ERR(vport)) {
148 struct datapath *dp = parms->dp; 179 struct datapath *dp = parms->dp;
180 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
149 181
150 rcu_assign_pointer(dp->ports[parms->port_no], vport); 182 hlist_add_head_rcu(&vport->dp_hash_node, head);
151 list_add(&vport->node, &dp->port_list);
152 } 183 }
153 184
154 return vport; 185 return vport;
@@ -160,8 +191,7 @@ void ovs_dp_detach_port(struct vport *p)
160 ASSERT_RTNL(); 191 ASSERT_RTNL();
161 192
162 /* First drop references to device. */ 193 /* First drop references to device. */
163 list_del(&p->node); 194 hlist_del_rcu(&p->dp_hash_node);
164 rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
165 195
166 /* Then destroy it. */ 196 /* Then destroy it. */
167 ovs_vport_del(p); 197 ovs_vport_del(p);
@@ -220,11 +250,12 @@ static struct genl_family dp_packet_genl_family = {
220 .hdrsize = sizeof(struct ovs_header), 250 .hdrsize = sizeof(struct ovs_header),
221 .name = OVS_PACKET_FAMILY, 251 .name = OVS_PACKET_FAMILY,
222 .version = OVS_PACKET_VERSION, 252 .version = OVS_PACKET_VERSION,
223 .maxattr = OVS_PACKET_ATTR_MAX 253 .maxattr = OVS_PACKET_ATTR_MAX,
254 .netnsok = true
224}; 255};
225 256
226int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, 257int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
227 const struct dp_upcall_info *upcall_info) 258 const struct dp_upcall_info *upcall_info)
228{ 259{
229 struct dp_stats_percpu *stats; 260 struct dp_stats_percpu *stats;
230 int dp_ifindex; 261 int dp_ifindex;
@@ -242,9 +273,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
242 } 273 }
243 274
244 if (!skb_is_gso(skb)) 275 if (!skb_is_gso(skb))
245 err = queue_userspace_packet(dp_ifindex, skb, upcall_info); 276 err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
246 else 277 else
247 err = queue_gso_packets(dp_ifindex, skb, upcall_info); 278 err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
248 if (err) 279 if (err)
249 goto err; 280 goto err;
250 281
@@ -260,7 +291,8 @@ err:
260 return err; 291 return err;
261} 292}
262 293
263static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb, 294static int queue_gso_packets(struct net *net, int dp_ifindex,
295 struct sk_buff *skb,
264 const struct dp_upcall_info *upcall_info) 296 const struct dp_upcall_info *upcall_info)
265{ 297{
266 unsigned short gso_type = skb_shinfo(skb)->gso_type; 298 unsigned short gso_type = skb_shinfo(skb)->gso_type;
@@ -276,7 +308,7 @@ static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb,
276 /* Queue all of the segments. */ 308 /* Queue all of the segments. */
277 skb = segs; 309 skb = segs;
278 do { 310 do {
279 err = queue_userspace_packet(dp_ifindex, skb, upcall_info); 311 err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info);
280 if (err) 312 if (err)
281 break; 313 break;
282 314
@@ -306,7 +338,8 @@ static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb,
306 return err; 338 return err;
307} 339}
308 340
309static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb, 341static int queue_userspace_packet(struct net *net, int dp_ifindex,
342 struct sk_buff *skb,
310 const struct dp_upcall_info *upcall_info) 343 const struct dp_upcall_info *upcall_info)
311{ 344{
312 struct ovs_header *upcall; 345 struct ovs_header *upcall;
@@ -362,7 +395,7 @@ static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb,
362 395
363 skb_copy_and_csum_dev(skb, nla_data(nla)); 396 skb_copy_and_csum_dev(skb, nla_data(nla));
364 397
365 err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid); 398 err = genlmsg_unicast(net, user_skb, upcall_info->pid);
366 399
367out: 400out:
368 kfree_skb(nskb); 401 kfree_skb(nskb);
@@ -370,15 +403,10 @@ out:
370} 403}
371 404
372/* Called with genl_mutex. */ 405/* Called with genl_mutex. */
373static int flush_flows(int dp_ifindex) 406static int flush_flows(struct datapath *dp)
374{ 407{
375 struct flow_table *old_table; 408 struct flow_table *old_table;
376 struct flow_table *new_table; 409 struct flow_table *new_table;
377 struct datapath *dp;
378
379 dp = get_dp(dp_ifindex);
380 if (!dp)
381 return -ENODEV;
382 410
383 old_table = genl_dereference(dp->table); 411 old_table = genl_dereference(dp->table);
384 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); 412 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
@@ -668,7 +696,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
668 packet->priority = flow->key.phy.priority; 696 packet->priority = flow->key.phy.priority;
669 697
670 rcu_read_lock(); 698 rcu_read_lock();
671 dp = get_dp(ovs_header->dp_ifindex); 699 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
672 err = -ENODEV; 700 err = -ENODEV;
673 if (!dp) 701 if (!dp)
674 goto err_unlock; 702 goto err_unlock;
@@ -742,7 +770,8 @@ static struct genl_family dp_flow_genl_family = {
742 .hdrsize = sizeof(struct ovs_header), 770 .hdrsize = sizeof(struct ovs_header),
743 .name = OVS_FLOW_FAMILY, 771 .name = OVS_FLOW_FAMILY,
744 .version = OVS_FLOW_VERSION, 772 .version = OVS_FLOW_VERSION,
745 .maxattr = OVS_FLOW_ATTR_MAX 773 .maxattr = OVS_FLOW_ATTR_MAX,
774 .netnsok = true
746}; 775};
747 776
748static struct genl_multicast_group ovs_dp_flow_multicast_group = { 777static struct genl_multicast_group ovs_dp_flow_multicast_group = {
@@ -894,7 +923,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
894 goto error; 923 goto error;
895 } 924 }
896 925
897 dp = get_dp(ovs_header->dp_ifindex); 926 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
898 error = -ENODEV; 927 error = -ENODEV;
899 if (!dp) 928 if (!dp)
900 goto error; 929 goto error;
@@ -995,7 +1024,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
995 ovs_dp_flow_multicast_group.id, info->nlhdr, 1024 ovs_dp_flow_multicast_group.id, info->nlhdr,
996 GFP_KERNEL); 1025 GFP_KERNEL);
997 else 1026 else
998 netlink_set_err(init_net.genl_sock, 0, 1027 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
999 ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); 1028 ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
1000 return 0; 1029 return 0;
1001 1030
@@ -1023,7 +1052,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1023 if (err) 1052 if (err)
1024 return err; 1053 return err;
1025 1054
1026 dp = get_dp(ovs_header->dp_ifindex); 1055 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1027 if (!dp) 1056 if (!dp)
1028 return -ENODEV; 1057 return -ENODEV;
1029 1058
@@ -1052,16 +1081,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1052 int err; 1081 int err;
1053 int key_len; 1082 int key_len;
1054 1083
1084 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1085 if (!dp)
1086 return -ENODEV;
1087
1055 if (!a[OVS_FLOW_ATTR_KEY]) 1088 if (!a[OVS_FLOW_ATTR_KEY])
1056 return flush_flows(ovs_header->dp_ifindex); 1089 return flush_flows(dp);
1090
1057 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1091 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1058 if (err) 1092 if (err)
1059 return err; 1093 return err;
1060 1094
1061 dp = get_dp(ovs_header->dp_ifindex);
1062 if (!dp)
1063 return -ENODEV;
1064
1065 table = genl_dereference(dp->table); 1095 table = genl_dereference(dp->table);
1066 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1096 flow = ovs_flow_tbl_lookup(table, &key, key_len);
1067 if (!flow) 1097 if (!flow)
@@ -1090,7 +1120,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1090 struct datapath *dp; 1120 struct datapath *dp;
1091 struct flow_table *table; 1121 struct flow_table *table;
1092 1122
1093 dp = get_dp(ovs_header->dp_ifindex); 1123 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1094 if (!dp) 1124 if (!dp)
1095 return -ENODEV; 1125 return -ENODEV;
1096 1126
@@ -1152,7 +1182,8 @@ static struct genl_family dp_datapath_genl_family = {
1152 .hdrsize = sizeof(struct ovs_header), 1182 .hdrsize = sizeof(struct ovs_header),
1153 .name = OVS_DATAPATH_FAMILY, 1183 .name = OVS_DATAPATH_FAMILY,
1154 .version = OVS_DATAPATH_VERSION, 1184 .version = OVS_DATAPATH_VERSION,
1155 .maxattr = OVS_DP_ATTR_MAX 1185 .maxattr = OVS_DP_ATTR_MAX,
1186 .netnsok = true
1156}; 1187};
1157 1188
1158static struct genl_multicast_group ovs_dp_datapath_multicast_group = { 1189static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
@@ -1210,18 +1241,19 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid,
1210} 1241}
1211 1242
1212/* Called with genl_mutex and optionally with RTNL lock also. */ 1243/* Called with genl_mutex and optionally with RTNL lock also. */
1213static struct datapath *lookup_datapath(struct ovs_header *ovs_header, 1244static struct datapath *lookup_datapath(struct net *net,
1245 struct ovs_header *ovs_header,
1214 struct nlattr *a[OVS_DP_ATTR_MAX + 1]) 1246 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1215{ 1247{
1216 struct datapath *dp; 1248 struct datapath *dp;
1217 1249
1218 if (!a[OVS_DP_ATTR_NAME]) 1250 if (!a[OVS_DP_ATTR_NAME])
1219 dp = get_dp(ovs_header->dp_ifindex); 1251 dp = get_dp(net, ovs_header->dp_ifindex);
1220 else { 1252 else {
1221 struct vport *vport; 1253 struct vport *vport;
1222 1254
1223 rcu_read_lock(); 1255 rcu_read_lock();
1224 vport = ovs_vport_locate(nla_data(a[OVS_DP_ATTR_NAME])); 1256 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1225 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; 1257 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1226 rcu_read_unlock(); 1258 rcu_read_unlock();
1227 } 1259 }
@@ -1235,22 +1267,21 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1235 struct sk_buff *reply; 1267 struct sk_buff *reply;
1236 struct datapath *dp; 1268 struct datapath *dp;
1237 struct vport *vport; 1269 struct vport *vport;
1238 int err; 1270 struct ovs_net *ovs_net;
1271 int err, i;
1239 1272
1240 err = -EINVAL; 1273 err = -EINVAL;
1241 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) 1274 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1242 goto err; 1275 goto err;
1243 1276
1244 rtnl_lock(); 1277 rtnl_lock();
1245 err = -ENODEV;
1246 if (!try_module_get(THIS_MODULE))
1247 goto err_unlock_rtnl;
1248 1278
1249 err = -ENOMEM; 1279 err = -ENOMEM;
1250 dp = kzalloc(sizeof(*dp), GFP_KERNEL); 1280 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1251 if (dp == NULL) 1281 if (dp == NULL)
1252 goto err_put_module; 1282 goto err_unlock_rtnl;
1253 INIT_LIST_HEAD(&dp->port_list); 1283
1284 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1254 1285
1255 /* Allocate table. */ 1286 /* Allocate table. */
1256 err = -ENOMEM; 1287 err = -ENOMEM;
@@ -1264,6 +1295,16 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1264 goto err_destroy_table; 1295 goto err_destroy_table;
1265 } 1296 }
1266 1297
1298 dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1299 GFP_KERNEL);
1300 if (!dp->ports) {
1301 err = -ENOMEM;
1302 goto err_destroy_percpu;
1303 }
1304
1305 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1306 INIT_HLIST_HEAD(&dp->ports[i]);
1307
1267 /* Set up our datapath device. */ 1308 /* Set up our datapath device. */
1268 parms.name = nla_data(a[OVS_DP_ATTR_NAME]); 1309 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1269 parms.type = OVS_VPORT_TYPE_INTERNAL; 1310 parms.type = OVS_VPORT_TYPE_INTERNAL;
@@ -1278,7 +1319,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1278 if (err == -EBUSY) 1319 if (err == -EBUSY)
1279 err = -EEXIST; 1320 err = -EEXIST;
1280 1321
1281 goto err_destroy_percpu; 1322 goto err_destroy_ports_array;
1282 } 1323 }
1283 1324
1284 reply = ovs_dp_cmd_build_info(dp, info->snd_pid, 1325 reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
@@ -1287,7 +1328,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1287 if (IS_ERR(reply)) 1328 if (IS_ERR(reply))
1288 goto err_destroy_local_port; 1329 goto err_destroy_local_port;
1289 1330
1290 list_add_tail(&dp->list_node, &dps); 1331 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1332 list_add_tail(&dp->list_node, &ovs_net->dps);
1291 rtnl_unlock(); 1333 rtnl_unlock();
1292 1334
1293 genl_notify(reply, genl_info_net(info), info->snd_pid, 1335 genl_notify(reply, genl_info_net(info), info->snd_pid,
@@ -1296,46 +1338,40 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1296 return 0; 1338 return 0;
1297 1339
1298err_destroy_local_port: 1340err_destroy_local_port:
1299 ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL])); 1341 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
1342err_destroy_ports_array:
1343 kfree(dp->ports);
1300err_destroy_percpu: 1344err_destroy_percpu:
1301 free_percpu(dp->stats_percpu); 1345 free_percpu(dp->stats_percpu);
1302err_destroy_table: 1346err_destroy_table:
1303 ovs_flow_tbl_destroy(genl_dereference(dp->table)); 1347 ovs_flow_tbl_destroy(genl_dereference(dp->table));
1304err_free_dp: 1348err_free_dp:
1349 release_net(ovs_dp_get_net(dp));
1305 kfree(dp); 1350 kfree(dp);
1306err_put_module:
1307 module_put(THIS_MODULE);
1308err_unlock_rtnl: 1351err_unlock_rtnl:
1309 rtnl_unlock(); 1352 rtnl_unlock();
1310err: 1353err:
1311 return err; 1354 return err;
1312} 1355}
1313 1356
1314static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) 1357/* Called with genl_mutex. */
1358static void __dp_destroy(struct datapath *dp)
1315{ 1359{
1316 struct vport *vport, *next_vport; 1360 int i;
1317 struct sk_buff *reply;
1318 struct datapath *dp;
1319 int err;
1320 1361
1321 rtnl_lock(); 1362 rtnl_lock();
1322 dp = lookup_datapath(info->userhdr, info->attrs);
1323 err = PTR_ERR(dp);
1324 if (IS_ERR(dp))
1325 goto exit_unlock;
1326 1363
1327 reply = ovs_dp_cmd_build_info(dp, info->snd_pid, 1364 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1328 info->snd_seq, OVS_DP_CMD_DEL); 1365 struct vport *vport;
1329 err = PTR_ERR(reply); 1366 struct hlist_node *node, *n;
1330 if (IS_ERR(reply))
1331 goto exit_unlock;
1332 1367
1333 list_for_each_entry_safe(vport, next_vport, &dp->port_list, node) 1368 hlist_for_each_entry_safe(vport, node, n, &dp->ports[i], dp_hash_node)
1334 if (vport->port_no != OVSP_LOCAL) 1369 if (vport->port_no != OVSP_LOCAL)
1335 ovs_dp_detach_port(vport); 1370 ovs_dp_detach_port(vport);
1371 }
1336 1372
1337 list_del(&dp->list_node); 1373 list_del(&dp->list_node);
1338 ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL])); 1374 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
1339 1375
1340 /* rtnl_unlock() will wait until all the references to devices that 1376 /* rtnl_unlock() will wait until all the references to devices that
1341 * are pending unregistration have been dropped. We do it here to 1377 * are pending unregistration have been dropped. We do it here to
@@ -1345,17 +1381,32 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1345 rtnl_unlock(); 1381 rtnl_unlock();
1346 1382
1347 call_rcu(&dp->rcu, destroy_dp_rcu); 1383 call_rcu(&dp->rcu, destroy_dp_rcu);
1348 module_put(THIS_MODULE); 1384}
1385
1386static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1387{
1388 struct sk_buff *reply;
1389 struct datapath *dp;
1390 int err;
1391
1392 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1393 err = PTR_ERR(dp);
1394 if (IS_ERR(dp))
1395 return err;
1396
1397 reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
1398 info->snd_seq, OVS_DP_CMD_DEL);
1399 err = PTR_ERR(reply);
1400 if (IS_ERR(reply))
1401 return err;
1402
1403 __dp_destroy(dp);
1349 1404
1350 genl_notify(reply, genl_info_net(info), info->snd_pid, 1405 genl_notify(reply, genl_info_net(info), info->snd_pid,
1351 ovs_dp_datapath_multicast_group.id, info->nlhdr, 1406 ovs_dp_datapath_multicast_group.id, info->nlhdr,
1352 GFP_KERNEL); 1407 GFP_KERNEL);
1353 1408
1354 return 0; 1409 return 0;
1355
1356exit_unlock:
1357 rtnl_unlock();
1358 return err;
1359} 1410}
1360 1411
1361static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) 1412static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
@@ -1364,7 +1415,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1364 struct datapath *dp; 1415 struct datapath *dp;
1365 int err; 1416 int err;
1366 1417
1367 dp = lookup_datapath(info->userhdr, info->attrs); 1418 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1368 if (IS_ERR(dp)) 1419 if (IS_ERR(dp))
1369 return PTR_ERR(dp); 1420 return PTR_ERR(dp);
1370 1421
@@ -1372,7 +1423,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1372 info->snd_seq, OVS_DP_CMD_NEW); 1423 info->snd_seq, OVS_DP_CMD_NEW);
1373 if (IS_ERR(reply)) { 1424 if (IS_ERR(reply)) {
1374 err = PTR_ERR(reply); 1425 err = PTR_ERR(reply);
1375 netlink_set_err(init_net.genl_sock, 0, 1426 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1376 ovs_dp_datapath_multicast_group.id, err); 1427 ovs_dp_datapath_multicast_group.id, err);
1377 return 0; 1428 return 0;
1378 } 1429 }
@@ -1389,7 +1440,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1389 struct sk_buff *reply; 1440 struct sk_buff *reply;
1390 struct datapath *dp; 1441 struct datapath *dp;
1391 1442
1392 dp = lookup_datapath(info->userhdr, info->attrs); 1443 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1393 if (IS_ERR(dp)) 1444 if (IS_ERR(dp))
1394 return PTR_ERR(dp); 1445 return PTR_ERR(dp);
1395 1446
@@ -1403,11 +1454,12 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1403 1454
1404static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1455static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1405{ 1456{
1457 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1406 struct datapath *dp; 1458 struct datapath *dp;
1407 int skip = cb->args[0]; 1459 int skip = cb->args[0];
1408 int i = 0; 1460 int i = 0;
1409 1461
1410 list_for_each_entry(dp, &dps, list_node) { 1462 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1411 if (i >= skip && 1463 if (i >= skip &&
1412 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid, 1464 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
1413 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1465 cb->nlh->nlmsg_seq, NLM_F_MULTI,
@@ -1459,7 +1511,8 @@ static struct genl_family dp_vport_genl_family = {
1459 .hdrsize = sizeof(struct ovs_header), 1511 .hdrsize = sizeof(struct ovs_header),
1460 .name = OVS_VPORT_FAMILY, 1512 .name = OVS_VPORT_FAMILY,
1461 .version = OVS_VPORT_VERSION, 1513 .version = OVS_VPORT_VERSION,
1462 .maxattr = OVS_VPORT_ATTR_MAX 1514 .maxattr = OVS_VPORT_ATTR_MAX,
1515 .netnsok = true
1463}; 1516};
1464 1517
1465struct genl_multicast_group ovs_dp_vport_multicast_group = { 1518struct genl_multicast_group ovs_dp_vport_multicast_group = {
@@ -1525,14 +1578,15 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid,
1525} 1578}
1526 1579
1527/* Called with RTNL lock or RCU read lock. */ 1580/* Called with RTNL lock or RCU read lock. */
1528static struct vport *lookup_vport(struct ovs_header *ovs_header, 1581static struct vport *lookup_vport(struct net *net,
1582 struct ovs_header *ovs_header,
1529 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) 1583 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1530{ 1584{
1531 struct datapath *dp; 1585 struct datapath *dp;
1532 struct vport *vport; 1586 struct vport *vport;
1533 1587
1534 if (a[OVS_VPORT_ATTR_NAME]) { 1588 if (a[OVS_VPORT_ATTR_NAME]) {
1535 vport = ovs_vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME])); 1589 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1536 if (!vport) 1590 if (!vport)
1537 return ERR_PTR(-ENODEV); 1591 return ERR_PTR(-ENODEV);
1538 if (ovs_header->dp_ifindex && 1592 if (ovs_header->dp_ifindex &&
@@ -1545,11 +1599,11 @@ static struct vport *lookup_vport(struct ovs_header *ovs_header,
1545 if (port_no >= DP_MAX_PORTS) 1599 if (port_no >= DP_MAX_PORTS)
1546 return ERR_PTR(-EFBIG); 1600 return ERR_PTR(-EFBIG);
1547 1601
1548 dp = get_dp(ovs_header->dp_ifindex); 1602 dp = get_dp(net, ovs_header->dp_ifindex);
1549 if (!dp) 1603 if (!dp)
1550 return ERR_PTR(-ENODEV); 1604 return ERR_PTR(-ENODEV);
1551 1605
1552 vport = rcu_dereference_rtnl(dp->ports[port_no]); 1606 vport = ovs_vport_rtnl_rcu(dp, port_no);
1553 if (!vport) 1607 if (!vport)
1554 return ERR_PTR(-ENOENT); 1608 return ERR_PTR(-ENOENT);
1555 return vport; 1609 return vport;
@@ -1574,7 +1628,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1574 goto exit; 1628 goto exit;
1575 1629
1576 rtnl_lock(); 1630 rtnl_lock();
1577 dp = get_dp(ovs_header->dp_ifindex); 1631 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1578 err = -ENODEV; 1632 err = -ENODEV;
1579 if (!dp) 1633 if (!dp)
1580 goto exit_unlock; 1634 goto exit_unlock;
@@ -1586,7 +1640,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1586 if (port_no >= DP_MAX_PORTS) 1640 if (port_no >= DP_MAX_PORTS)
1587 goto exit_unlock; 1641 goto exit_unlock;
1588 1642
1589 vport = rtnl_dereference(dp->ports[port_no]); 1643 vport = ovs_vport_rtnl_rcu(dp, port_no);
1590 err = -EBUSY; 1644 err = -EBUSY;
1591 if (vport) 1645 if (vport)
1592 goto exit_unlock; 1646 goto exit_unlock;
@@ -1596,7 +1650,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1596 err = -EFBIG; 1650 err = -EFBIG;
1597 goto exit_unlock; 1651 goto exit_unlock;
1598 } 1652 }
1599 vport = rtnl_dereference(dp->ports[port_no]); 1653 vport = ovs_vport_rtnl(dp, port_no);
1600 if (!vport) 1654 if (!vport)
1601 break; 1655 break;
1602 } 1656 }
@@ -1638,7 +1692,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1638 int err; 1692 int err;
1639 1693
1640 rtnl_lock(); 1694 rtnl_lock();
1641 vport = lookup_vport(info->userhdr, a); 1695 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1642 err = PTR_ERR(vport); 1696 err = PTR_ERR(vport);
1643 if (IS_ERR(vport)) 1697 if (IS_ERR(vport))
1644 goto exit_unlock; 1698 goto exit_unlock;
@@ -1658,7 +1712,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1658 reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, 1712 reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1659 OVS_VPORT_CMD_NEW); 1713 OVS_VPORT_CMD_NEW);
1660 if (IS_ERR(reply)) { 1714 if (IS_ERR(reply)) {
1661 netlink_set_err(init_net.genl_sock, 0, 1715 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1662 ovs_dp_vport_multicast_group.id, PTR_ERR(reply)); 1716 ovs_dp_vport_multicast_group.id, PTR_ERR(reply));
1663 goto exit_unlock; 1717 goto exit_unlock;
1664 } 1718 }
@@ -1679,7 +1733,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1679 int err; 1733 int err;
1680 1734
1681 rtnl_lock(); 1735 rtnl_lock();
1682 vport = lookup_vport(info->userhdr, a); 1736 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1683 err = PTR_ERR(vport); 1737 err = PTR_ERR(vport);
1684 if (IS_ERR(vport)) 1738 if (IS_ERR(vport))
1685 goto exit_unlock; 1739 goto exit_unlock;
@@ -1714,7 +1768,7 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1714 int err; 1768 int err;
1715 1769
1716 rcu_read_lock(); 1770 rcu_read_lock();
1717 vport = lookup_vport(ovs_header, a); 1771 vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
1718 err = PTR_ERR(vport); 1772 err = PTR_ERR(vport);
1719 if (IS_ERR(vport)) 1773 if (IS_ERR(vport))
1720 goto exit_unlock; 1774 goto exit_unlock;
@@ -1738,54 +1792,39 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1738{ 1792{
1739 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); 1793 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1740 struct datapath *dp; 1794 struct datapath *dp;
1741 u32 port_no; 1795 int bucket = cb->args[0], skip = cb->args[1];
1742 int retval; 1796 int i, j = 0;
1743 1797
1744 dp = get_dp(ovs_header->dp_ifindex); 1798 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1745 if (!dp) 1799 if (!dp)
1746 return -ENODEV; 1800 return -ENODEV;
1747 1801
1748 rcu_read_lock(); 1802 rcu_read_lock();
1749 for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) { 1803 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
1750 struct vport *vport; 1804 struct vport *vport;
1751 1805 struct hlist_node *n;
1752 vport = rcu_dereference(dp->ports[port_no]); 1806
1753 if (!vport) 1807 j = 0;
1754 continue; 1808 hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) {
1755 1809 if (j >= skip &&
1756 if (ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid, 1810 ovs_vport_cmd_fill_info(vport, skb,
1757 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1811 NETLINK_CB(cb->skb).pid,
1758 OVS_VPORT_CMD_NEW) < 0) 1812 cb->nlh->nlmsg_seq,
1759 break; 1813 NLM_F_MULTI,
1760 } 1814 OVS_VPORT_CMD_NEW) < 0)
1761 rcu_read_unlock(); 1815 goto out;
1762 1816
1763 cb->args[0] = port_no; 1817 j++;
1764 retval = skb->len;
1765
1766 return retval;
1767}
1768
1769static void rehash_flow_table(struct work_struct *work)
1770{
1771 struct datapath *dp;
1772
1773 genl_lock();
1774
1775 list_for_each_entry(dp, &dps, list_node) {
1776 struct flow_table *old_table = genl_dereference(dp->table);
1777 struct flow_table *new_table;
1778
1779 new_table = ovs_flow_tbl_rehash(old_table);
1780 if (!IS_ERR(new_table)) {
1781 rcu_assign_pointer(dp->table, new_table);
1782 ovs_flow_tbl_deferred_destroy(old_table);
1783 } 1818 }
1819 skip = 0;
1784 } 1820 }
1821out:
1822 rcu_read_unlock();
1785 1823
1786 genl_unlock(); 1824 cb->args[0] = i;
1825 cb->args[1] = j;
1787 1826
1788 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); 1827 return skb->len;
1789} 1828}
1790 1829
1791static struct genl_ops dp_vport_genl_ops[] = { 1830static struct genl_ops dp_vport_genl_ops[] = {
@@ -1872,6 +1911,59 @@ error:
1872 return err; 1911 return err;
1873} 1912}
1874 1913
1914static void rehash_flow_table(struct work_struct *work)
1915{
1916 struct datapath *dp;
1917 struct net *net;
1918
1919 genl_lock();
1920 rtnl_lock();
1921 for_each_net(net) {
1922 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1923
1924 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1925 struct flow_table *old_table = genl_dereference(dp->table);
1926 struct flow_table *new_table;
1927
1928 new_table = ovs_flow_tbl_rehash(old_table);
1929 if (!IS_ERR(new_table)) {
1930 rcu_assign_pointer(dp->table, new_table);
1931 ovs_flow_tbl_deferred_destroy(old_table);
1932 }
1933 }
1934 }
1935 rtnl_unlock();
1936 genl_unlock();
1937
1938 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
1939}
1940
1941static int __net_init ovs_init_net(struct net *net)
1942{
1943 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1944
1945 INIT_LIST_HEAD(&ovs_net->dps);
1946 return 0;
1947}
1948
1949static void __net_exit ovs_exit_net(struct net *net)
1950{
1951 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1952 struct datapath *dp, *dp_next;
1953
1954 genl_lock();
1955 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
1956 __dp_destroy(dp);
1957 genl_unlock();
1958}
1959
1960static struct pernet_operations ovs_net_ops = {
1961 .init = ovs_init_net,
1962 .exit = ovs_exit_net,
1963 .id = &ovs_net_id,
1964 .size = sizeof(struct ovs_net),
1965};
1966
1875static int __init dp_init(void) 1967static int __init dp_init(void)
1876{ 1968{
1877 struct sk_buff *dummy_skb; 1969 struct sk_buff *dummy_skb;
@@ -1889,10 +1981,14 @@ static int __init dp_init(void)
1889 if (err) 1981 if (err)
1890 goto error_flow_exit; 1982 goto error_flow_exit;
1891 1983
1892 err = register_netdevice_notifier(&ovs_dp_device_notifier); 1984 err = register_pernet_device(&ovs_net_ops);
1893 if (err) 1985 if (err)
1894 goto error_vport_exit; 1986 goto error_vport_exit;
1895 1987
1988 err = register_netdevice_notifier(&ovs_dp_device_notifier);
1989 if (err)
1990 goto error_netns_exit;
1991
1896 err = dp_register_genl(); 1992 err = dp_register_genl();
1897 if (err < 0) 1993 if (err < 0)
1898 goto error_unreg_notifier; 1994 goto error_unreg_notifier;
@@ -1903,6 +1999,8 @@ static int __init dp_init(void)
1903 1999
1904error_unreg_notifier: 2000error_unreg_notifier:
1905 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2001 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2002error_netns_exit:
2003 unregister_pernet_device(&ovs_net_ops);
1906error_vport_exit: 2004error_vport_exit:
1907 ovs_vport_exit(); 2005 ovs_vport_exit();
1908error_flow_exit: 2006error_flow_exit:
@@ -1914,9 +2012,10 @@ error:
1914static void dp_cleanup(void) 2012static void dp_cleanup(void)
1915{ 2013{
1916 cancel_delayed_work_sync(&rehash_flow_wq); 2014 cancel_delayed_work_sync(&rehash_flow_wq);
1917 rcu_barrier();
1918 dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); 2015 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
1919 unregister_netdevice_notifier(&ovs_dp_device_notifier); 2016 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2017 unregister_pernet_device(&ovs_net_ops);
2018 rcu_barrier();
1920 ovs_vport_exit(); 2019 ovs_vport_exit();
1921 ovs_flow_exit(); 2020 ovs_flow_exit();
1922} 2021}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index c1105c147531..129ec5480758 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -27,10 +27,11 @@
27#include <linux/u64_stats_sync.h> 27#include <linux/u64_stats_sync.h>
28 28
29#include "flow.h" 29#include "flow.h"
30#include "vport.h"
30 31
31struct vport; 32#define DP_MAX_PORTS USHRT_MAX
33#define DP_VPORT_HASH_BUCKETS 1024
32 34
33#define DP_MAX_PORTS 1024
34#define SAMPLE_ACTION_DEPTH 3 35#define SAMPLE_ACTION_DEPTH 3
35 36
36/** 37/**
@@ -58,11 +59,10 @@ struct dp_stats_percpu {
58 * @list_node: Element in global 'dps' list. 59 * @list_node: Element in global 'dps' list.
59 * @n_flows: Number of flows currently in flow table. 60 * @n_flows: Number of flows currently in flow table.
60 * @table: Current flow table. Protected by genl_lock and RCU. 61 * @table: Current flow table. Protected by genl_lock and RCU.
61 * @ports: Map from port number to &struct vport. %OVSP_LOCAL port 62 * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by
62 * always exists, other ports may be %NULL. Protected by RTNL and RCU. 63 * RTNL and RCU.
63 * @port_list: List of all ports in @ports in arbitrary order. RTNL required
64 * to iterate or modify.
65 * @stats_percpu: Per-CPU datapath statistics. 64 * @stats_percpu: Per-CPU datapath statistics.
65 * @net: Reference to net namespace.
66 * 66 *
67 * Context: See the comment on locking at the top of datapath.c for additional 67 * Context: See the comment on locking at the top of datapath.c for additional
68 * locking information. 68 * locking information.
@@ -75,13 +75,37 @@ struct datapath {
75 struct flow_table __rcu *table; 75 struct flow_table __rcu *table;
76 76
77 /* Switch ports. */ 77 /* Switch ports. */
78 struct vport __rcu *ports[DP_MAX_PORTS]; 78 struct hlist_head *ports;
79 struct list_head port_list;
80 79
81 /* Stats. */ 80 /* Stats. */
82 struct dp_stats_percpu __percpu *stats_percpu; 81 struct dp_stats_percpu __percpu *stats_percpu;
82
83#ifdef CONFIG_NET_NS
84 /* Network namespace ref. */
85 struct net *net;
86#endif
83}; 87};
84 88
89struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
90
91static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
92{
93 WARN_ON_ONCE(!rcu_read_lock_held());
94 return ovs_lookup_vport(dp, port_no);
95}
96
97static inline struct vport *ovs_vport_rtnl_rcu(const struct datapath *dp, int port_no)
98{
99 WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked());
100 return ovs_lookup_vport(dp, port_no);
101}
102
103static inline struct vport *ovs_vport_rtnl(const struct datapath *dp, int port_no)
104{
105 ASSERT_RTNL();
106 return ovs_lookup_vport(dp, port_no);
107}
108
85/** 109/**
86 * struct ovs_skb_cb - OVS data in skb CB 110 * struct ovs_skb_cb - OVS data in skb CB
87 * @flow: The flow associated with this packet. May be %NULL if no flow. 111 * @flow: The flow associated with this packet. May be %NULL if no flow.
@@ -108,6 +132,16 @@ struct dp_upcall_info {
108 u32 pid; 132 u32 pid;
109}; 133};
110 134
135static inline struct net *ovs_dp_get_net(struct datapath *dp)
136{
137 return read_pnet(&dp->net);
138}
139
140static inline void ovs_dp_set_net(struct datapath *dp, struct net *net)
141{
142 write_pnet(&dp->net, net);
143}
144
111extern struct notifier_block ovs_dp_device_notifier; 145extern struct notifier_block ovs_dp_device_notifier;
112extern struct genl_multicast_group ovs_dp_vport_multicast_group; 146extern struct genl_multicast_group ovs_dp_vport_multicast_group;
113 147
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 36dcee8fc84a..5558350e0d33 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -41,19 +41,21 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event,
41 case NETDEV_UNREGISTER: 41 case NETDEV_UNREGISTER:
42 if (!ovs_is_internal_dev(dev)) { 42 if (!ovs_is_internal_dev(dev)) {
43 struct sk_buff *notify; 43 struct sk_buff *notify;
44 struct datapath *dp = vport->dp;
44 45
45 notify = ovs_vport_cmd_build_info(vport, 0, 0, 46 notify = ovs_vport_cmd_build_info(vport, 0, 0,
46 OVS_VPORT_CMD_DEL); 47 OVS_VPORT_CMD_DEL);
47 ovs_dp_detach_port(vport); 48 ovs_dp_detach_port(vport);
48 if (IS_ERR(notify)) { 49 if (IS_ERR(notify)) {
49 netlink_set_err(init_net.genl_sock, 0, 50 netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
50 ovs_dp_vport_multicast_group.id, 51 ovs_dp_vport_multicast_group.id,
51 PTR_ERR(notify)); 52 PTR_ERR(notify));
52 break; 53 break;
53 } 54 }
54 55
55 genlmsg_multicast(notify, 0, ovs_dp_vport_multicast_group.id, 56 genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
56 GFP_KERNEL); 57 ovs_dp_vport_multicast_group.id,
58 GFP_KERNEL);
57 } 59 }
58 break; 60 break;
59 } 61 }
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index c7bf2f26525a..98c70630ad06 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -203,10 +203,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions)
203 int actions_len = nla_len(actions); 203 int actions_len = nla_len(actions);
204 struct sw_flow_actions *sfa; 204 struct sw_flow_actions *sfa;
205 205
206 /* At least DP_MAX_PORTS actions are required to be able to flood a 206 if (actions_len > MAX_ACTIONS_BUFSIZE)
207 * packet to every port. Factor of 2 allows for setting VLAN tags,
208 * etc. */
209 if (actions_len > 2 * DP_MAX_PORTS * nla_total_size(4))
210 return ERR_PTR(-EINVAL); 207 return ERR_PTR(-EINVAL);
211 208
212 sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL); 209 sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL);
@@ -992,7 +989,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
992 swkey->phy.in_port = in_port; 989 swkey->phy.in_port = in_port;
993 attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 990 attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
994 } else { 991 } else {
995 swkey->phy.in_port = USHRT_MAX; 992 swkey->phy.in_port = DP_MAX_PORTS;
996 } 993 }
997 994
998 /* Data attributes. */ 995 /* Data attributes. */
@@ -1135,7 +1132,7 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
1135 const struct nlattr *nla; 1132 const struct nlattr *nla;
1136 int rem; 1133 int rem;
1137 1134
1138 *in_port = USHRT_MAX; 1135 *in_port = DP_MAX_PORTS;
1139 *priority = 0; 1136 *priority = 0;
1140 1137
1141 nla_for_each_nested(nla, attr, rem) { 1138 nla_for_each_nested(nla, attr, rem) {
@@ -1172,7 +1169,7 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1172 nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) 1169 nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
1173 goto nla_put_failure; 1170 goto nla_put_failure;
1174 1171
1175 if (swkey->phy.in_port != USHRT_MAX && 1172 if (swkey->phy.in_port != DP_MAX_PORTS &&
1176 nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) 1173 nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port))
1177 goto nla_put_failure; 1174 goto nla_put_failure;
1178 1175
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 9b75617ca4e0..d92e22a638cf 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -43,7 +43,7 @@ struct sw_flow_actions {
43struct sw_flow_key { 43struct sw_flow_key {
44 struct { 44 struct {
45 u32 priority; /* Packet QoS priority. */ 45 u32 priority; /* Packet QoS priority. */
46 u16 in_port; /* Input switch port (or USHRT_MAX). */ 46 u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
47 } phy; 47 } phy;
48 struct { 48 struct {
49 u8 src[ETH_ALEN]; /* Ethernet source address. */ 49 u8 src[ETH_ALEN]; /* Ethernet source address. */
@@ -161,6 +161,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
161int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, 161int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
162 const struct nlattr *); 162 const struct nlattr *);
163 163
164#define MAX_ACTIONS_BUFSIZE (16 * 1024)
164#define TBL_MIN_BUCKETS 1024 165#define TBL_MIN_BUCKETS 1024
165 166
166struct flow_table { 167struct flow_table {
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 4061b9ee07f7..5d460c37df07 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -144,7 +144,7 @@ static void do_setup(struct net_device *netdev)
144 netdev->tx_queue_len = 0; 144 netdev->tx_queue_len = 0;
145 145
146 netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | 146 netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
147 NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO; 147 NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO;
148 148
149 netdev->vlan_features = netdev->features; 149 netdev->vlan_features = netdev->features;
150 netdev->features |= NETIF_F_HW_VLAN_TX; 150 netdev->features |= NETIF_F_HW_VLAN_TX;
@@ -175,9 +175,14 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
175 goto error_free_vport; 175 goto error_free_vport;
176 } 176 }
177 177
178 dev_net_set(netdev_vport->dev, ovs_dp_get_net(vport->dp));
178 internal_dev = internal_dev_priv(netdev_vport->dev); 179 internal_dev = internal_dev_priv(netdev_vport->dev);
179 internal_dev->vport = vport; 180 internal_dev->vport = vport;
180 181
182 /* Restrict bridge port to current netns. */
183 if (vport->port_no == OVSP_LOCAL)
184 netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL;
185
181 err = register_netdevice(netdev_vport->dev); 186 err = register_netdevice(netdev_vport->dev);
182 if (err) 187 if (err)
183 goto error_free_netdev; 188 goto error_free_netdev;
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 6ea3551cc78c..3c1e58ba714b 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -83,7 +83,7 @@ static struct vport *netdev_create(const struct vport_parms *parms)
83 83
84 netdev_vport = netdev_vport_priv(vport); 84 netdev_vport = netdev_vport_priv(vport);
85 85
86 netdev_vport->dev = dev_get_by_name(&init_net, parms->name); 86 netdev_vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), parms->name);
87 if (!netdev_vport->dev) { 87 if (!netdev_vport->dev) {
88 err = -ENODEV; 88 err = -ENODEV;
89 goto error_free_vport; 89 goto error_free_vport;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 6140336e79d7..1abd9609ba78 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -16,10 +16,10 @@
16 * 02110-1301, USA 16 * 02110-1301, USA
17 */ 17 */
18 18
19#include <linux/dcache.h>
20#include <linux/etherdevice.h> 19#include <linux/etherdevice.h>
21#include <linux/if.h> 20#include <linux/if.h>
22#include <linux/if_vlan.h> 21#include <linux/if_vlan.h>
22#include <linux/jhash.h>
23#include <linux/kernel.h> 23#include <linux/kernel.h>
24#include <linux/list.h> 24#include <linux/list.h>
25#include <linux/mutex.h> 25#include <linux/mutex.h>
@@ -27,7 +27,9 @@
27#include <linux/rcupdate.h> 27#include <linux/rcupdate.h>
28#include <linux/rtnetlink.h> 28#include <linux/rtnetlink.h>
29#include <linux/compat.h> 29#include <linux/compat.h>
30#include <net/net_namespace.h>
30 31
32#include "datapath.h"
31#include "vport.h" 33#include "vport.h"
32#include "vport-internal_dev.h" 34#include "vport-internal_dev.h"
33 35
@@ -67,9 +69,9 @@ void ovs_vport_exit(void)
67 kfree(dev_table); 69 kfree(dev_table);
68} 70}
69 71
70static struct hlist_head *hash_bucket(const char *name) 72static struct hlist_head *hash_bucket(struct net *net, const char *name)
71{ 73{
72 unsigned int hash = full_name_hash(name, strlen(name)); 74 unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
73 return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)]; 75 return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
74} 76}
75 77
@@ -80,14 +82,15 @@ static struct hlist_head *hash_bucket(const char *name)
80 * 82 *
81 * Must be called with RTNL or RCU read lock. 83 * Must be called with RTNL or RCU read lock.
82 */ 84 */
83struct vport *ovs_vport_locate(const char *name) 85struct vport *ovs_vport_locate(struct net *net, const char *name)
84{ 86{
85 struct hlist_head *bucket = hash_bucket(name); 87 struct hlist_head *bucket = hash_bucket(net, name);
86 struct vport *vport; 88 struct vport *vport;
87 struct hlist_node *node; 89 struct hlist_node *node;
88 90
89 hlist_for_each_entry_rcu(vport, node, bucket, hash_node) 91 hlist_for_each_entry_rcu(vport, node, bucket, hash_node)
90 if (!strcmp(name, vport->ops->get_name(vport))) 92 if (!strcmp(name, vport->ops->get_name(vport)) &&
93 net_eq(ovs_dp_get_net(vport->dp), net))
91 return vport; 94 return vport;
92 95
93 return NULL; 96 return NULL;
@@ -124,6 +127,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
124 vport->port_no = parms->port_no; 127 vport->port_no = parms->port_no;
125 vport->upcall_pid = parms->upcall_pid; 128 vport->upcall_pid = parms->upcall_pid;
126 vport->ops = ops; 129 vport->ops = ops;
130 INIT_HLIST_NODE(&vport->dp_hash_node);
127 131
128 vport->percpu_stats = alloc_percpu(struct vport_percpu_stats); 132 vport->percpu_stats = alloc_percpu(struct vport_percpu_stats);
129 if (!vport->percpu_stats) { 133 if (!vport->percpu_stats) {
@@ -170,14 +174,17 @@ struct vport *ovs_vport_add(const struct vport_parms *parms)
170 174
171 for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) { 175 for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
172 if (vport_ops_list[i]->type == parms->type) { 176 if (vport_ops_list[i]->type == parms->type) {
177 struct hlist_head *bucket;
178
173 vport = vport_ops_list[i]->create(parms); 179 vport = vport_ops_list[i]->create(parms);
174 if (IS_ERR(vport)) { 180 if (IS_ERR(vport)) {
175 err = PTR_ERR(vport); 181 err = PTR_ERR(vport);
176 goto out; 182 goto out;
177 } 183 }
178 184
179 hlist_add_head_rcu(&vport->hash_node, 185 bucket = hash_bucket(ovs_dp_get_net(vport->dp),
180 hash_bucket(vport->ops->get_name(vport))); 186 vport->ops->get_name(vport));
187 hlist_add_head_rcu(&vport->hash_node, bucket);
181 return vport; 188 return vport;
182 } 189 }
183 } 190 }
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index aac680ca2b06..c56e4836e93b 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -20,6 +20,7 @@
20#define VPORT_H 1 20#define VPORT_H 1
21 21
22#include <linux/list.h> 22#include <linux/list.h>
23#include <linux/netlink.h>
23#include <linux/openvswitch.h> 24#include <linux/openvswitch.h>
24#include <linux/skbuff.h> 25#include <linux/skbuff.h>
25#include <linux/spinlock.h> 26#include <linux/spinlock.h>
@@ -38,7 +39,7 @@ void ovs_vport_exit(void);
38struct vport *ovs_vport_add(const struct vport_parms *); 39struct vport *ovs_vport_add(const struct vport_parms *);
39void ovs_vport_del(struct vport *); 40void ovs_vport_del(struct vport *);
40 41
41struct vport *ovs_vport_locate(const char *name); 42struct vport *ovs_vport_locate(struct net *net, const char *name);
42 43
43void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *); 44void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *);
44 45
@@ -69,10 +70,10 @@ struct vport_err_stats {
69 * @rcu: RCU callback head for deferred destruction. 70 * @rcu: RCU callback head for deferred destruction.
70 * @port_no: Index into @dp's @ports array. 71 * @port_no: Index into @dp's @ports array.
71 * @dp: Datapath to which this port belongs. 72 * @dp: Datapath to which this port belongs.
72 * @node: Element in @dp's @port_list.
73 * @upcall_pid: The Netlink port to use for packets received on this port that 73 * @upcall_pid: The Netlink port to use for packets received on this port that
74 * miss the flow table. 74 * miss the flow table.
75 * @hash_node: Element in @dev_table hash table in vport.c. 75 * @hash_node: Element in @dev_table hash table in vport.c.
76 * @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
76 * @ops: Class structure. 77 * @ops: Class structure.
77 * @percpu_stats: Points to per-CPU statistics used and maintained by vport 78 * @percpu_stats: Points to per-CPU statistics used and maintained by vport
78 * @stats_lock: Protects @err_stats; 79 * @stats_lock: Protects @err_stats;
@@ -82,10 +83,10 @@ struct vport {
82 struct rcu_head rcu; 83 struct rcu_head rcu;
83 u16 port_no; 84 u16 port_no;
84 struct datapath *dp; 85 struct datapath *dp;
85 struct list_head node;
86 u32 upcall_pid; 86 u32 upcall_pid;
87 87
88 struct hlist_node hash_node; 88 struct hlist_node hash_node;
89 struct hlist_node dp_hash_node;
89 const struct vport_ops *ops; 90 const struct vport_ops *ops;
90 91
91 struct vport_percpu_stats __percpu *percpu_stats; 92 struct vport_percpu_stats __percpu *percpu_stats;