aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPravin B Shelar <pshelar@nicira.com>2013-04-15 16:23:03 -0400
committerJesse Gross <jesse@nicira.com>2013-04-15 17:38:40 -0400
commit8e4e1713e4978447c5f799aa668dcc6d2cb0dee9 (patch)
treedc0c2e05b677183d617d74020fa9d1ed28691102
parentb4f9e8cdc82e4a07c3ca50395af5800a6229363e (diff)
openvswitch: Simplify datapath locking.
Currently OVS uses combination of genl and rtnl lock to protect datapath state. This was done due to networking stack locking. But this has complicated locking and there are few lock ordering issues with new tunneling protocols. Following patch simplifies locking by introducing new ovs mutex and now this lock is used to protect entire ovs state. Signed-off-by: Pravin B Shelar <pshelar@nicira.com> Signed-off-by: Jesse Gross <jesse@nicira.com>
-rw-r--r--net/openvswitch/datapath.c274
-rw-r--r--net/openvswitch/datapath.h69
-rw-r--r--net/openvswitch/dp_notify.c82
-rw-r--r--net/openvswitch/vport-internal_dev.c6
-rw-r--r--net/openvswitch/vport-netdev.c8
-rw-r--r--net/openvswitch/vport.c22
-rw-r--r--net/openvswitch/vport.h4
7 files changed, 298 insertions, 167 deletions
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d406503e01b6..b7d0b7c3fe2c 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -44,6 +44,7 @@
44#include <linux/netfilter_ipv4.h> 44#include <linux/netfilter_ipv4.h>
45#include <linux/inetdevice.h> 45#include <linux/inetdevice.h>
46#include <linux/list.h> 46#include <linux/list.h>
47#include <linux/lockdep.h>
47#include <linux/openvswitch.h> 48#include <linux/openvswitch.h>
48#include <linux/rculist.h> 49#include <linux/rculist.h>
49#include <linux/dmi.h> 50#include <linux/dmi.h>
@@ -56,21 +57,13 @@
56#include "flow.h" 57#include "flow.h"
57#include "vport-internal_dev.h" 58#include "vport-internal_dev.h"
58 59
59/**
60 * struct ovs_net - Per net-namespace data for ovs.
61 * @dps: List of datapaths to enable dumping them all out.
62 * Protected by genl_mutex.
63 */
64struct ovs_net {
65 struct list_head dps;
66};
67
68static int ovs_net_id __read_mostly;
69 60
70#define REHASH_FLOW_INTERVAL (10 * 60 * HZ) 61#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
71static void rehash_flow_table(struct work_struct *work); 62static void rehash_flow_table(struct work_struct *work);
72static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); 63static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
73 64
65int ovs_net_id __read_mostly;
66
74static void ovs_notify(struct sk_buff *skb, struct genl_info *info, 67static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
75 struct genl_multicast_group *grp) 68 struct genl_multicast_group *grp)
76{ 69{
@@ -81,20 +74,42 @@ static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
81/** 74/**
82 * DOC: Locking: 75 * DOC: Locking:
83 * 76 *
84 * Writes to device state (add/remove datapath, port, set operations on vports, 77 * All writes e.g. Writes to device state (add/remove datapath, port, set
85 * etc.) are protected by RTNL. 78 * operations on vports, etc.), Writes to other state (flow table
86 * 79 * modifications, set miscellaneous datapath parameters, etc.) are protected
87 * Writes to other state (flow table modifications, set miscellaneous datapath 80 * by ovs_lock.
88 * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside
89 * genl_mutex.
90 * 81 *
91 * Reads are protected by RCU. 82 * Reads are protected by RCU.
92 * 83 *
93 * There are a few special cases (mostly stats) that have their own 84 * There are a few special cases (mostly stats) that have their own
94 * synchronization but they nest under all of above and don't interact with 85 * synchronization but they nest under all of above and don't interact with
95 * each other. 86 * each other.
87 *
88 * The RTNL lock nests inside ovs_mutex.
96 */ 89 */
97 90
91static DEFINE_MUTEX(ovs_mutex);
92
93void ovs_lock(void)
94{
95 mutex_lock(&ovs_mutex);
96}
97
98void ovs_unlock(void)
99{
100 mutex_unlock(&ovs_mutex);
101}
102
103#ifdef CONFIG_LOCKDEP
104int lockdep_ovsl_is_held(void)
105{
106 if (debug_locks)
107 return lockdep_is_held(&ovs_mutex);
108 else
109 return 1;
110}
111#endif
112
98static struct vport *new_vport(const struct vport_parms *); 113static struct vport *new_vport(const struct vport_parms *);
99static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, 114static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
100 const struct dp_upcall_info *); 115 const struct dp_upcall_info *);
@@ -102,7 +117,7 @@ static int queue_userspace_packet(struct net *, int dp_ifindex,
102 struct sk_buff *, 117 struct sk_buff *,
103 const struct dp_upcall_info *); 118 const struct dp_upcall_info *);
104 119
105/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ 120/* Must be called with rcu_read_lock or ovs_mutex. */
106static struct datapath *get_dp(struct net *net, int dp_ifindex) 121static struct datapath *get_dp(struct net *net, int dp_ifindex)
107{ 122{
108 struct datapath *dp = NULL; 123 struct datapath *dp = NULL;
@@ -120,10 +135,10 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)
120 return dp; 135 return dp;
121} 136}
122 137
123/* Must be called with rcu_read_lock or RTNL lock. */ 138/* Must be called with rcu_read_lock or ovs_mutex. */
124const char *ovs_dp_name(const struct datapath *dp) 139const char *ovs_dp_name(const struct datapath *dp)
125{ 140{
126 struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL); 141 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
127 return vport->ops->get_name(vport); 142 return vport->ops->get_name(vport);
128} 143}
129 144
@@ -175,7 +190,7 @@ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
175 return NULL; 190 return NULL;
176} 191}
177 192
178/* Called with RTNL lock and genl_lock. */ 193/* Called with ovs_mutex. */
179static struct vport *new_vport(const struct vport_parms *parms) 194static struct vport *new_vport(const struct vport_parms *parms)
180{ 195{
181 struct vport *vport; 196 struct vport *vport;
@@ -187,14 +202,12 @@ static struct vport *new_vport(const struct vport_parms *parms)
187 202
188 hlist_add_head_rcu(&vport->dp_hash_node, head); 203 hlist_add_head_rcu(&vport->dp_hash_node, head);
189 } 204 }
190
191 return vport; 205 return vport;
192} 206}
193 207
194/* Called with RTNL lock. */
195void ovs_dp_detach_port(struct vport *p) 208void ovs_dp_detach_port(struct vport *p)
196{ 209{
197 ASSERT_RTNL(); 210 ASSERT_OVSL();
198 211
199 /* First drop references to device. */ 212 /* First drop references to device. */
200 hlist_del_rcu(&p->dp_hash_node); 213 hlist_del_rcu(&p->dp_hash_node);
@@ -432,13 +445,13 @@ out:
432 return err; 445 return err;
433} 446}
434 447
435/* Called with genl_mutex. */ 448/* Called with ovs_mutex. */
436static int flush_flows(struct datapath *dp) 449static int flush_flows(struct datapath *dp)
437{ 450{
438 struct flow_table *old_table; 451 struct flow_table *old_table;
439 struct flow_table *new_table; 452 struct flow_table *new_table;
440 453
441 old_table = genl_dereference(dp->table); 454 old_table = ovsl_dereference(dp->table);
442 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); 455 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
443 if (!new_table) 456 if (!new_table)
444 return -ENOMEM; 457 return -ENOMEM;
@@ -788,7 +801,7 @@ static struct genl_ops dp_packet_genl_ops[] = {
788static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) 801static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
789{ 802{
790 int i; 803 int i;
791 struct flow_table *table = genl_dereference(dp->table); 804 struct flow_table *table = ovsl_dereference(dp->table);
792 805
793 stats->n_flows = ovs_flow_tbl_count(table); 806 stats->n_flows = ovs_flow_tbl_count(table);
794 807
@@ -840,7 +853,7 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
840 + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ 853 + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
841} 854}
842 855
843/* Called with genl_lock. */ 856/* Called with ovs_mutex. */
844static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, 857static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
845 struct sk_buff *skb, u32 portid, 858 struct sk_buff *skb, u32 portid,
846 u32 seq, u32 flags, u8 cmd) 859 u32 seq, u32 flags, u8 cmd)
@@ -854,8 +867,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
854 u8 tcp_flags; 867 u8 tcp_flags;
855 int err; 868 int err;
856 869
857 sf_acts = rcu_dereference_protected(flow->sf_acts, 870 sf_acts = ovsl_dereference(flow->sf_acts);
858 lockdep_genl_is_held());
859 871
860 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); 872 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
861 if (!ovs_header) 873 if (!ovs_header)
@@ -919,8 +931,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
919{ 931{
920 const struct sw_flow_actions *sf_acts; 932 const struct sw_flow_actions *sf_acts;
921 933
922 sf_acts = rcu_dereference_protected(flow->sf_acts, 934 sf_acts = ovsl_dereference(flow->sf_acts);
923 lockdep_genl_is_held());
924 935
925 return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL); 936 return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL);
926} 937}
@@ -971,12 +982,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
971 goto error; 982 goto error;
972 } 983 }
973 984
985 ovs_lock();
974 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 986 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
975 error = -ENODEV; 987 error = -ENODEV;
976 if (!dp) 988 if (!dp)
977 goto error; 989 goto err_unlock_ovs;
978 990
979 table = genl_dereference(dp->table); 991 table = ovsl_dereference(dp->table);
980 flow = ovs_flow_tbl_lookup(table, &key, key_len); 992 flow = ovs_flow_tbl_lookup(table, &key, key_len);
981 if (!flow) { 993 if (!flow) {
982 struct sw_flow_actions *acts; 994 struct sw_flow_actions *acts;
@@ -984,7 +996,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
984 /* Bail out if we're not allowed to create a new flow. */ 996 /* Bail out if we're not allowed to create a new flow. */
985 error = -ENOENT; 997 error = -ENOENT;
986 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) 998 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
987 goto error; 999 goto err_unlock_ovs;
988 1000
989 /* Expand table, if necessary, to make room. */ 1001 /* Expand table, if necessary, to make room. */
990 if (ovs_flow_tbl_need_to_expand(table)) { 1002 if (ovs_flow_tbl_need_to_expand(table)) {
@@ -994,7 +1006,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
994 if (!IS_ERR(new_table)) { 1006 if (!IS_ERR(new_table)) {
995 rcu_assign_pointer(dp->table, new_table); 1007 rcu_assign_pointer(dp->table, new_table);
996 ovs_flow_tbl_deferred_destroy(table); 1008 ovs_flow_tbl_deferred_destroy(table);
997 table = genl_dereference(dp->table); 1009 table = ovsl_dereference(dp->table);
998 } 1010 }
999 } 1011 }
1000 1012
@@ -1002,7 +1014,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1002 flow = ovs_flow_alloc(); 1014 flow = ovs_flow_alloc();
1003 if (IS_ERR(flow)) { 1015 if (IS_ERR(flow)) {
1004 error = PTR_ERR(flow); 1016 error = PTR_ERR(flow);
1005 goto error; 1017 goto err_unlock_ovs;
1006 } 1018 }
1007 flow->key = key; 1019 flow->key = key;
1008 clear_stats(flow); 1020 clear_stats(flow);
@@ -1035,11 +1047,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1035 error = -EEXIST; 1047 error = -EEXIST;
1036 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && 1048 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
1037 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) 1049 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1038 goto error; 1050 goto err_unlock_ovs;
1039 1051
1040 /* Update actions. */ 1052 /* Update actions. */
1041 old_acts = rcu_dereference_protected(flow->sf_acts, 1053 old_acts = ovsl_dereference(flow->sf_acts);
1042 lockdep_genl_is_held());
1043 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; 1054 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
1044 if (acts_attrs && 1055 if (acts_attrs &&
1045 (old_acts->actions_len != nla_len(acts_attrs) || 1056 (old_acts->actions_len != nla_len(acts_attrs) ||
@@ -1050,7 +1061,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1050 new_acts = ovs_flow_actions_alloc(acts_attrs); 1061 new_acts = ovs_flow_actions_alloc(acts_attrs);
1051 error = PTR_ERR(new_acts); 1062 error = PTR_ERR(new_acts);
1052 if (IS_ERR(new_acts)) 1063 if (IS_ERR(new_acts))
1053 goto error; 1064 goto err_unlock_ovs;
1054 1065
1055 rcu_assign_pointer(flow->sf_acts, new_acts); 1066 rcu_assign_pointer(flow->sf_acts, new_acts);
1056 ovs_flow_deferred_free_acts(old_acts); 1067 ovs_flow_deferred_free_acts(old_acts);
@@ -1066,6 +1077,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1066 spin_unlock_bh(&flow->lock); 1077 spin_unlock_bh(&flow->lock);
1067 } 1078 }
1068 } 1079 }
1080 ovs_unlock();
1069 1081
1070 if (!IS_ERR(reply)) 1082 if (!IS_ERR(reply))
1071 ovs_notify(reply, info, &ovs_dp_flow_multicast_group); 1083 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
@@ -1076,6 +1088,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1076 1088
1077error_free_flow: 1089error_free_flow:
1078 ovs_flow_free(flow); 1090 ovs_flow_free(flow);
1091err_unlock_ovs:
1092 ovs_unlock();
1079error: 1093error:
1080 return error; 1094 return error;
1081} 1095}
@@ -1098,21 +1112,32 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1098 if (err) 1112 if (err)
1099 return err; 1113 return err;
1100 1114
1115 ovs_lock();
1101 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1116 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1102 if (!dp) 1117 if (!dp) {
1103 return -ENODEV; 1118 err = -ENODEV;
1119 goto unlock;
1120 }
1104 1121
1105 table = genl_dereference(dp->table); 1122 table = ovsl_dereference(dp->table);
1106 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1123 flow = ovs_flow_tbl_lookup(table, &key, key_len);
1107 if (!flow) 1124 if (!flow) {
1108 return -ENOENT; 1125 err = -ENOENT;
1126 goto unlock;
1127 }
1109 1128
1110 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 1129 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1111 info->snd_seq, OVS_FLOW_CMD_NEW); 1130 info->snd_seq, OVS_FLOW_CMD_NEW);
1112 if (IS_ERR(reply)) 1131 if (IS_ERR(reply)) {
1113 return PTR_ERR(reply); 1132 err = PTR_ERR(reply);
1133 goto unlock;
1134 }
1114 1135
1136 ovs_unlock();
1115 return genlmsg_reply(reply, info); 1137 return genlmsg_reply(reply, info);
1138unlock:
1139 ovs_unlock();
1140 return err;
1116} 1141}
1117 1142
1118static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) 1143static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
@@ -1127,25 +1152,33 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1127 int err; 1152 int err;
1128 int key_len; 1153 int key_len;
1129 1154
1155 ovs_lock();
1130 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1156 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1131 if (!dp) 1157 if (!dp) {
1132 return -ENODEV; 1158 err = -ENODEV;
1133 1159 goto unlock;
1134 if (!a[OVS_FLOW_ATTR_KEY]) 1160 }
1135 return flush_flows(dp);
1136 1161
1162 if (!a[OVS_FLOW_ATTR_KEY]) {
1163 err = flush_flows(dp);
1164 goto unlock;
1165 }
1137 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1166 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1138 if (err) 1167 if (err)
1139 return err; 1168 goto unlock;
1140 1169
1141 table = genl_dereference(dp->table); 1170 table = ovsl_dereference(dp->table);
1142 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1171 flow = ovs_flow_tbl_lookup(table, &key, key_len);
1143 if (!flow) 1172 if (!flow) {
1144 return -ENOENT; 1173 err = -ENOENT;
1174 goto unlock;
1175 }
1145 1176
1146 reply = ovs_flow_cmd_alloc_info(flow); 1177 reply = ovs_flow_cmd_alloc_info(flow);
1147 if (!reply) 1178 if (!reply) {
1148 return -ENOMEM; 1179 err = -ENOMEM;
1180 goto unlock;
1181 }
1149 1182
1150 ovs_flow_tbl_remove(table, flow); 1183 ovs_flow_tbl_remove(table, flow);
1151 1184
@@ -1154,9 +1187,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1154 BUG_ON(err < 0); 1187 BUG_ON(err < 0);
1155 1188
1156 ovs_flow_deferred_free(flow); 1189 ovs_flow_deferred_free(flow);
1190 ovs_unlock();
1157 1191
1158 ovs_notify(reply, info, &ovs_dp_flow_multicast_group); 1192 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
1159 return 0; 1193 return 0;
1194unlock:
1195 ovs_unlock();
1196 return err;
1160} 1197}
1161 1198
1162static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1199static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1165,11 +1202,14 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1165 struct datapath *dp; 1202 struct datapath *dp;
1166 struct flow_table *table; 1203 struct flow_table *table;
1167 1204
1205 ovs_lock();
1168 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1206 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1169 if (!dp) 1207 if (!dp) {
1208 ovs_unlock();
1170 return -ENODEV; 1209 return -ENODEV;
1210 }
1171 1211
1172 table = genl_dereference(dp->table); 1212 table = ovsl_dereference(dp->table);
1173 1213
1174 for (;;) { 1214 for (;;) {
1175 struct sw_flow *flow; 1215 struct sw_flow *flow;
@@ -1190,6 +1230,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1190 cb->args[0] = bucket; 1230 cb->args[0] = bucket;
1191 cb->args[1] = obj; 1231 cb->args[1] = obj;
1192 } 1232 }
1233 ovs_unlock();
1193 return skb->len; 1234 return skb->len;
1194} 1235}
1195 1236
@@ -1295,7 +1336,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
1295 return skb; 1336 return skb;
1296} 1337}
1297 1338
1298/* Called with genl_mutex and optionally with RTNL lock also. */ 1339/* Called with ovs_mutex. */
1299static struct datapath *lookup_datapath(struct net *net, 1340static struct datapath *lookup_datapath(struct net *net,
1300 struct ovs_header *ovs_header, 1341 struct ovs_header *ovs_header,
1301 struct nlattr *a[OVS_DP_ATTR_MAX + 1]) 1342 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
@@ -1329,12 +1370,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1329 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) 1370 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1330 goto err; 1371 goto err;
1331 1372
1332 rtnl_lock(); 1373 ovs_lock();
1333 1374
1334 err = -ENOMEM; 1375 err = -ENOMEM;
1335 dp = kzalloc(sizeof(*dp), GFP_KERNEL); 1376 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1336 if (dp == NULL) 1377 if (dp == NULL)
1337 goto err_unlock_rtnl; 1378 goto err_unlock_ovs;
1338 1379
1339 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); 1380 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1340 1381
@@ -1385,35 +1426,34 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1385 1426
1386 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); 1427 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1387 list_add_tail(&dp->list_node, &ovs_net->dps); 1428 list_add_tail(&dp->list_node, &ovs_net->dps);
1388 rtnl_unlock(); 1429
1430 ovs_unlock();
1389 1431
1390 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group); 1432 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1391 return 0; 1433 return 0;
1392 1434
1393err_destroy_local_port: 1435err_destroy_local_port:
1394 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); 1436 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1395err_destroy_ports_array: 1437err_destroy_ports_array:
1396 kfree(dp->ports); 1438 kfree(dp->ports);
1397err_destroy_percpu: 1439err_destroy_percpu:
1398 free_percpu(dp->stats_percpu); 1440 free_percpu(dp->stats_percpu);
1399err_destroy_table: 1441err_destroy_table:
1400 ovs_flow_tbl_destroy(genl_dereference(dp->table)); 1442 ovs_flow_tbl_destroy(ovsl_dereference(dp->table));
1401err_free_dp: 1443err_free_dp:
1402 release_net(ovs_dp_get_net(dp)); 1444 release_net(ovs_dp_get_net(dp));
1403 kfree(dp); 1445 kfree(dp);
1404err_unlock_rtnl: 1446err_unlock_ovs:
1405 rtnl_unlock(); 1447 ovs_unlock();
1406err: 1448err:
1407 return err; 1449 return err;
1408} 1450}
1409 1451
1410/* Called with genl_mutex. */ 1452/* Called with ovs_mutex. */
1411static void __dp_destroy(struct datapath *dp) 1453static void __dp_destroy(struct datapath *dp)
1412{ 1454{
1413 int i; 1455 int i;
1414 1456
1415 rtnl_lock();
1416
1417 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 1457 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1418 struct vport *vport; 1458 struct vport *vport;
1419 struct hlist_node *n; 1459 struct hlist_node *n;
@@ -1424,14 +1464,11 @@ static void __dp_destroy(struct datapath *dp)
1424 } 1464 }
1425 1465
1426 list_del(&dp->list_node); 1466 list_del(&dp->list_node);
1427 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
1428 1467
1429 /* rtnl_unlock() will wait until all the references to devices that 1468 /* OVSP_LOCAL is datapath internal port. We need to make sure that
1430 * are pending unregistration have been dropped. We do it here to 1469 * all port in datapath are destroyed first before freeing datapath.
1431 * ensure that any internal devices (which contain DP pointers) are
1432 * fully destroyed before freeing the datapath.
1433 */ 1470 */
1434 rtnl_unlock(); 1471 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1435 1472
1436 call_rcu(&dp->rcu, destroy_dp_rcu); 1473 call_rcu(&dp->rcu, destroy_dp_rcu);
1437} 1474}
@@ -1442,22 +1479,27 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1442 struct datapath *dp; 1479 struct datapath *dp;
1443 int err; 1480 int err;
1444 1481
1482 ovs_lock();
1445 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1483 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1446 err = PTR_ERR(dp); 1484 err = PTR_ERR(dp);
1447 if (IS_ERR(dp)) 1485 if (IS_ERR(dp))
1448 return err; 1486 goto unlock;
1449 1487
1450 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1488 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1451 info->snd_seq, OVS_DP_CMD_DEL); 1489 info->snd_seq, OVS_DP_CMD_DEL);
1452 err = PTR_ERR(reply); 1490 err = PTR_ERR(reply);
1453 if (IS_ERR(reply)) 1491 if (IS_ERR(reply))
1454 return err; 1492 goto unlock;
1455 1493
1456 __dp_destroy(dp); 1494 __dp_destroy(dp);
1495 ovs_unlock();
1457 1496
1458 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group); 1497 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1459 1498
1460 return 0; 1499 return 0;
1500unlock:
1501 ovs_unlock();
1502 return err;
1461} 1503}
1462 1504
1463static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) 1505static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
@@ -1466,9 +1508,11 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1466 struct datapath *dp; 1508 struct datapath *dp;
1467 int err; 1509 int err;
1468 1510
1511 ovs_lock();
1469 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1512 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1513 err = PTR_ERR(dp);
1470 if (IS_ERR(dp)) 1514 if (IS_ERR(dp))
1471 return PTR_ERR(dp); 1515 goto unlock;
1472 1516
1473 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1517 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1474 info->snd_seq, OVS_DP_CMD_NEW); 1518 info->snd_seq, OVS_DP_CMD_NEW);
@@ -1476,29 +1520,45 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1476 err = PTR_ERR(reply); 1520 err = PTR_ERR(reply);
1477 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 1521 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1478 ovs_dp_datapath_multicast_group.id, err); 1522 ovs_dp_datapath_multicast_group.id, err);
1479 return 0; 1523 err = 0;
1524 goto unlock;
1480 } 1525 }
1481 1526
1527 ovs_unlock();
1482 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group); 1528 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1483 1529
1484 return 0; 1530 return 0;
1531unlock:
1532 ovs_unlock();
1533 return err;
1485} 1534}
1486 1535
1487static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) 1536static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1488{ 1537{
1489 struct sk_buff *reply; 1538 struct sk_buff *reply;
1490 struct datapath *dp; 1539 struct datapath *dp;
1540 int err;
1491 1541
1542 ovs_lock();
1492 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1543 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1493 if (IS_ERR(dp)) 1544 if (IS_ERR(dp)) {
1494 return PTR_ERR(dp); 1545 err = PTR_ERR(dp);
1546 goto unlock;
1547 }
1495 1548
1496 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1549 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1497 info->snd_seq, OVS_DP_CMD_NEW); 1550 info->snd_seq, OVS_DP_CMD_NEW);
1498 if (IS_ERR(reply)) 1551 if (IS_ERR(reply)) {
1499 return PTR_ERR(reply); 1552 err = PTR_ERR(reply);
1553 goto unlock;
1554 }
1500 1555
1556 ovs_unlock();
1501 return genlmsg_reply(reply, info); 1557 return genlmsg_reply(reply, info);
1558
1559unlock:
1560 ovs_unlock();
1561 return err;
1502} 1562}
1503 1563
1504static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1564static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1508,6 +1568,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1508 int skip = cb->args[0]; 1568 int skip = cb->args[0];
1509 int i = 0; 1569 int i = 0;
1510 1570
1571 ovs_lock();
1511 list_for_each_entry(dp, &ovs_net->dps, list_node) { 1572 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1512 if (i >= skip && 1573 if (i >= skip &&
1513 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, 1574 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
@@ -1516,6 +1577,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1516 break; 1577 break;
1517 i++; 1578 i++;
1518 } 1579 }
1580 ovs_unlock();
1519 1581
1520 cb->args[0] = i; 1582 cb->args[0] = i;
1521 1583
@@ -1568,7 +1630,7 @@ struct genl_multicast_group ovs_dp_vport_multicast_group = {
1568 .name = OVS_VPORT_MCGROUP 1630 .name = OVS_VPORT_MCGROUP
1569}; 1631};
1570 1632
1571/* Called with RTNL lock or RCU read lock. */ 1633/* Called with ovs_mutex or RCU read lock. */
1572static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, 1634static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1573 u32 portid, u32 seq, u32 flags, u8 cmd) 1635 u32 portid, u32 seq, u32 flags, u8 cmd)
1574{ 1636{
@@ -1607,7 +1669,7 @@ error:
1607 return err; 1669 return err;
1608} 1670}
1609 1671
1610/* Called with RTNL lock or RCU read lock. */ 1672/* Called with ovs_mutex or RCU read lock. */
1611struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, 1673struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1612 u32 seq, u8 cmd) 1674 u32 seq, u8 cmd)
1613{ 1675{
@@ -1626,7 +1688,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1626 return skb; 1688 return skb;
1627} 1689}
1628 1690
1629/* Called with RTNL lock or RCU read lock. */ 1691/* Called with ovs_mutex or RCU read lock. */
1630static struct vport *lookup_vport(struct net *net, 1692static struct vport *lookup_vport(struct net *net,
1631 struct ovs_header *ovs_header, 1693 struct ovs_header *ovs_header,
1632 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) 1694 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
@@ -1652,7 +1714,7 @@ static struct vport *lookup_vport(struct net *net,
1652 if (!dp) 1714 if (!dp)
1653 return ERR_PTR(-ENODEV); 1715 return ERR_PTR(-ENODEV);
1654 1716
1655 vport = ovs_vport_rtnl_rcu(dp, port_no); 1717 vport = ovs_vport_ovsl_rcu(dp, port_no);
1656 if (!vport) 1718 if (!vport)
1657 return ERR_PTR(-ENODEV); 1719 return ERR_PTR(-ENODEV);
1658 return vport; 1720 return vport;
@@ -1676,7 +1738,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1676 !a[OVS_VPORT_ATTR_UPCALL_PID]) 1738 !a[OVS_VPORT_ATTR_UPCALL_PID])
1677 goto exit; 1739 goto exit;
1678 1740
1679 rtnl_lock(); 1741 ovs_lock();
1680 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1742 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1681 err = -ENODEV; 1743 err = -ENODEV;
1682 if (!dp) 1744 if (!dp)
@@ -1689,7 +1751,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1689 if (port_no >= DP_MAX_PORTS) 1751 if (port_no >= DP_MAX_PORTS)
1690 goto exit_unlock; 1752 goto exit_unlock;
1691 1753
1692 vport = ovs_vport_rtnl_rcu(dp, port_no); 1754 vport = ovs_vport_ovsl(dp, port_no);
1693 err = -EBUSY; 1755 err = -EBUSY;
1694 if (vport) 1756 if (vport)
1695 goto exit_unlock; 1757 goto exit_unlock;
@@ -1699,7 +1761,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1699 err = -EFBIG; 1761 err = -EFBIG;
1700 goto exit_unlock; 1762 goto exit_unlock;
1701 } 1763 }
1702 vport = ovs_vport_rtnl(dp, port_no); 1764 vport = ovs_vport_ovsl(dp, port_no);
1703 if (!vport) 1765 if (!vport)
1704 break; 1766 break;
1705 } 1767 }
@@ -1729,7 +1791,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1729 ovs_notify(reply, info, &ovs_dp_vport_multicast_group); 1791 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1730 1792
1731exit_unlock: 1793exit_unlock:
1732 rtnl_unlock(); 1794 ovs_unlock();
1733exit: 1795exit:
1734 return err; 1796 return err;
1735} 1797}
@@ -1741,7 +1803,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1741 struct vport *vport; 1803 struct vport *vport;
1742 int err; 1804 int err;
1743 1805
1744 rtnl_lock(); 1806 ovs_lock();
1745 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1807 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1746 err = PTR_ERR(vport); 1808 err = PTR_ERR(vport);
1747 if (IS_ERR(vport)) 1809 if (IS_ERR(vport))
@@ -1767,10 +1829,12 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1767 goto exit_unlock; 1829 goto exit_unlock;
1768 } 1830 }
1769 1831
1832 ovs_unlock();
1770 ovs_notify(reply, info, &ovs_dp_vport_multicast_group); 1833 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1834 return 0;
1771 1835
1772exit_unlock: 1836exit_unlock:
1773 rtnl_unlock(); 1837 ovs_unlock();
1774 return err; 1838 return err;
1775} 1839}
1776 1840
@@ -1781,7 +1845,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1781 struct vport *vport; 1845 struct vport *vport;
1782 int err; 1846 int err;
1783 1847
1784 rtnl_lock(); 1848 ovs_lock();
1785 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1849 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1786 err = PTR_ERR(vport); 1850 err = PTR_ERR(vport);
1787 if (IS_ERR(vport)) 1851 if (IS_ERR(vport))
@@ -1804,7 +1868,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1804 ovs_notify(reply, info, &ovs_dp_vport_multicast_group); 1868 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1805 1869
1806exit_unlock: 1870exit_unlock:
1807 rtnl_unlock(); 1871 ovs_unlock();
1808 return err; 1872 return err;
1809} 1873}
1810 1874
@@ -1964,13 +2028,13 @@ static void rehash_flow_table(struct work_struct *work)
1964 struct datapath *dp; 2028 struct datapath *dp;
1965 struct net *net; 2029 struct net *net;
1966 2030
1967 genl_lock(); 2031 ovs_lock();
1968 rtnl_lock(); 2032 rtnl_lock();
1969 for_each_net(net) { 2033 for_each_net(net) {
1970 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2034 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1971 2035
1972 list_for_each_entry(dp, &ovs_net->dps, list_node) { 2036 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1973 struct flow_table *old_table = genl_dereference(dp->table); 2037 struct flow_table *old_table = ovsl_dereference(dp->table);
1974 struct flow_table *new_table; 2038 struct flow_table *new_table;
1975 2039
1976 new_table = ovs_flow_tbl_rehash(old_table); 2040 new_table = ovs_flow_tbl_rehash(old_table);
@@ -1981,8 +2045,7 @@ static void rehash_flow_table(struct work_struct *work)
1981 } 2045 }
1982 } 2046 }
1983 rtnl_unlock(); 2047 rtnl_unlock();
1984 genl_unlock(); 2048 ovs_unlock();
1985
1986 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); 2049 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
1987} 2050}
1988 2051
@@ -1991,18 +2054,21 @@ static int __net_init ovs_init_net(struct net *net)
1991 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2054 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1992 2055
1993 INIT_LIST_HEAD(&ovs_net->dps); 2056 INIT_LIST_HEAD(&ovs_net->dps);
2057 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
1994 return 0; 2058 return 0;
1995} 2059}
1996 2060
1997static void __net_exit ovs_exit_net(struct net *net) 2061static void __net_exit ovs_exit_net(struct net *net)
1998{ 2062{
1999 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2000 struct datapath *dp, *dp_next; 2063 struct datapath *dp, *dp_next;
2064 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2001 2065
2002 genl_lock(); 2066 ovs_lock();
2003 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) 2067 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2004 __dp_destroy(dp); 2068 __dp_destroy(dp);
2005 genl_unlock(); 2069 ovs_unlock();
2070
2071 cancel_work_sync(&ovs_net->dp_notify_work);
2006} 2072}
2007 2073
2008static struct pernet_operations ovs_net_ops = { 2074static struct pernet_operations ovs_net_ops = {
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 655beb1fe078..16b840695216 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -57,9 +57,9 @@ struct dp_stats_percpu {
57 * struct datapath - datapath for flow-based packet switching 57 * struct datapath - datapath for flow-based packet switching
58 * @rcu: RCU callback head for deferred destruction. 58 * @rcu: RCU callback head for deferred destruction.
59 * @list_node: Element in global 'dps' list. 59 * @list_node: Element in global 'dps' list.
60 * @table: Current flow table. Protected by genl_lock and RCU. 60 * @table: Current flow table. Protected by ovs_mutex and RCU.
61 * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by 61 * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by
62 * RTNL and RCU. 62 * ovs_mutex and RCU.
63 * @stats_percpu: Per-CPU datapath statistics. 63 * @stats_percpu: Per-CPU datapath statistics.
64 * @net: Reference to net namespace. 64 * @net: Reference to net namespace.
65 * 65 *
@@ -85,26 +85,6 @@ struct datapath {
85#endif 85#endif
86}; 86};
87 87
88struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
89
90static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
91{
92 WARN_ON_ONCE(!rcu_read_lock_held());
93 return ovs_lookup_vport(dp, port_no);
94}
95
96static inline struct vport *ovs_vport_rtnl_rcu(const struct datapath *dp, int port_no)
97{
98 WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked());
99 return ovs_lookup_vport(dp, port_no);
100}
101
102static inline struct vport *ovs_vport_rtnl(const struct datapath *dp, int port_no)
103{
104 ASSERT_RTNL();
105 return ovs_lookup_vport(dp, port_no);
106}
107
108/** 88/**
109 * struct ovs_skb_cb - OVS data in skb CB 89 * struct ovs_skb_cb - OVS data in skb CB
110 * @flow: The flow associated with this packet. May be %NULL if no flow. 90 * @flow: The flow associated with this packet. May be %NULL if no flow.
@@ -131,6 +111,30 @@ struct dp_upcall_info {
131 u32 portid; 111 u32 portid;
132}; 112};
133 113
114/**
115 * struct ovs_net - Per net-namespace data for ovs.
116 * @dps: List of datapaths to enable dumping them all out.
117 * Protected by genl_mutex.
118 */
119struct ovs_net {
120 struct list_head dps;
121 struct work_struct dp_notify_work;
122};
123
124extern int ovs_net_id;
125void ovs_lock(void);
126void ovs_unlock(void);
127
128#ifdef CONFIG_LOCKDEP
129int lockdep_ovsl_is_held(void);
130#else
131#define lockdep_ovsl_is_held() 1
132#endif
133
134#define ASSERT_OVSL() WARN_ON(unlikely(!lockdep_ovsl_is_held()))
135#define ovsl_dereference(p) \
136 rcu_dereference_protected(p, lockdep_ovsl_is_held())
137
134static inline struct net *ovs_dp_get_net(struct datapath *dp) 138static inline struct net *ovs_dp_get_net(struct datapath *dp)
135{ 139{
136 return read_pnet(&dp->net); 140 return read_pnet(&dp->net);
@@ -141,6 +145,26 @@ static inline void ovs_dp_set_net(struct datapath *dp, struct net *net)
141 write_pnet(&dp->net, net); 145 write_pnet(&dp->net, net);
142} 146}
143 147
148struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
149
150static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
151{
152 WARN_ON_ONCE(!rcu_read_lock_held());
153 return ovs_lookup_vport(dp, port_no);
154}
155
156static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no)
157{
158 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
159 return ovs_lookup_vport(dp, port_no);
160}
161
162static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no)
163{
164 ASSERT_OVSL();
165 return ovs_lookup_vport(dp, port_no);
166}
167
144extern struct notifier_block ovs_dp_device_notifier; 168extern struct notifier_block ovs_dp_device_notifier;
145extern struct genl_multicast_group ovs_dp_vport_multicast_group; 169extern struct genl_multicast_group ovs_dp_vport_multicast_group;
146 170
@@ -154,4 +178,5 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
154 u8 cmd); 178 u8 cmd);
155 179
156int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb); 180int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
181void ovs_dp_notify_wq(struct work_struct *work);
157#endif /* datapath.h */ 182#endif /* datapath.h */
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 5558350e0d33..ef4feec6cd84 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -18,46 +18,78 @@
18 18
19#include <linux/netdevice.h> 19#include <linux/netdevice.h>
20#include <net/genetlink.h> 20#include <net/genetlink.h>
21#include <net/netns/generic.h>
21 22
22#include "datapath.h" 23#include "datapath.h"
23#include "vport-internal_dev.h" 24#include "vport-internal_dev.h"
24#include "vport-netdev.h" 25#include "vport-netdev.h"
25 26
27static void dp_detach_port_notify(struct vport *vport)
28{
29 struct sk_buff *notify;
30 struct datapath *dp;
31
32 dp = vport->dp;
33 notify = ovs_vport_cmd_build_info(vport, 0, 0,
34 OVS_VPORT_CMD_DEL);
35 ovs_dp_detach_port(vport);
36 if (IS_ERR(notify)) {
37 netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
38 ovs_dp_vport_multicast_group.id,
39 PTR_ERR(notify));
40 return;
41 }
42
43 genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
44 ovs_dp_vport_multicast_group.id,
45 GFP_KERNEL);
46}
47
48void ovs_dp_notify_wq(struct work_struct *work)
49{
50 struct ovs_net *ovs_net = container_of(work, struct ovs_net, dp_notify_work);
51 struct datapath *dp;
52
53 ovs_lock();
54 list_for_each_entry(dp, &ovs_net->dps, list_node) {
55 int i;
56
57 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
58 struct vport *vport;
59 struct hlist_node *n;
60
61 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
62 struct netdev_vport *netdev_vport;
63
64 if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
65 continue;
66
67 netdev_vport = netdev_vport_priv(vport);
68 if (netdev_vport->dev->reg_state == NETREG_UNREGISTERED ||
69 netdev_vport->dev->reg_state == NETREG_UNREGISTERING)
70 dp_detach_port_notify(vport);
71 }
72 }
73 }
74 ovs_unlock();
75}
76
26static int dp_device_event(struct notifier_block *unused, unsigned long event, 77static int dp_device_event(struct notifier_block *unused, unsigned long event,
27 void *ptr) 78 void *ptr)
28{ 79{
80 struct ovs_net *ovs_net;
29 struct net_device *dev = ptr; 81 struct net_device *dev = ptr;
30 struct vport *vport; 82 struct vport *vport = NULL;
31 83
32 if (ovs_is_internal_dev(dev)) 84 if (!ovs_is_internal_dev(dev))
33 vport = ovs_internal_dev_get_vport(dev);
34 else
35 vport = ovs_netdev_get_vport(dev); 85 vport = ovs_netdev_get_vport(dev);
36 86
37 if (!vport) 87 if (!vport)
38 return NOTIFY_DONE; 88 return NOTIFY_DONE;
39 89
40 switch (event) { 90 if (event == NETDEV_UNREGISTER) {
41 case NETDEV_UNREGISTER: 91 ovs_net = net_generic(dev_net(dev), ovs_net_id);
42 if (!ovs_is_internal_dev(dev)) { 92 queue_work(system_wq, &ovs_net->dp_notify_work);
43 struct sk_buff *notify;
44 struct datapath *dp = vport->dp;
45
46 notify = ovs_vport_cmd_build_info(vport, 0, 0,
47 OVS_VPORT_CMD_DEL);
48 ovs_dp_detach_port(vport);
49 if (IS_ERR(notify)) {
50 netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
51 ovs_dp_vport_multicast_group.id,
52 PTR_ERR(notify));
53 break;
54 }
55
56 genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
57 ovs_dp_vport_multicast_group.id,
58 GFP_KERNEL);
59 }
60 break;
61 } 93 }
62 94
63 return NOTIFY_DONE; 95 return NOTIFY_DONE;
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 40f8a2489c90..9604760494b1 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -173,16 +173,19 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
173 if (vport->port_no == OVSP_LOCAL) 173 if (vport->port_no == OVSP_LOCAL)
174 netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL; 174 netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL;
175 175
176 rtnl_lock();
176 err = register_netdevice(netdev_vport->dev); 177 err = register_netdevice(netdev_vport->dev);
177 if (err) 178 if (err)
178 goto error_free_netdev; 179 goto error_free_netdev;
179 180
180 dev_set_promiscuity(netdev_vport->dev, 1); 181 dev_set_promiscuity(netdev_vport->dev, 1);
182 rtnl_unlock();
181 netif_start_queue(netdev_vport->dev); 183 netif_start_queue(netdev_vport->dev);
182 184
183 return vport; 185 return vport;
184 186
185error_free_netdev: 187error_free_netdev:
188 rtnl_unlock();
186 free_netdev(netdev_vport->dev); 189 free_netdev(netdev_vport->dev);
187error_free_vport: 190error_free_vport:
188 ovs_vport_free(vport); 191 ovs_vport_free(vport);
@@ -195,10 +198,13 @@ static void internal_dev_destroy(struct vport *vport)
195 struct netdev_vport *netdev_vport = netdev_vport_priv(vport); 198 struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
196 199
197 netif_stop_queue(netdev_vport->dev); 200 netif_stop_queue(netdev_vport->dev);
201 rtnl_lock();
198 dev_set_promiscuity(netdev_vport->dev, -1); 202 dev_set_promiscuity(netdev_vport->dev, -1);
199 203
200 /* unregister_netdevice() waits for an RCU grace period. */ 204 /* unregister_netdevice() waits for an RCU grace period. */
201 unregister_netdevice(netdev_vport->dev); 205 unregister_netdevice(netdev_vport->dev);
206
207 rtnl_unlock();
202} 208}
203 209
204static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) 210static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 2130d61c384a..40a89ae8e19f 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -100,16 +100,20 @@ static struct vport *netdev_create(const struct vport_parms *parms)
100 goto error_put; 100 goto error_put;
101 } 101 }
102 102
103 rtnl_lock();
103 err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook, 104 err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
104 vport); 105 vport);
105 if (err) 106 if (err)
106 goto error_put; 107 goto error_unlock;
107 108
108 dev_set_promiscuity(netdev_vport->dev, 1); 109 dev_set_promiscuity(netdev_vport->dev, 1);
109 netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH; 110 netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
111 rtnl_unlock();
110 112
111 return vport; 113 return vport;
112 114
115error_unlock:
116 rtnl_unlock();
113error_put: 117error_put:
114 dev_put(netdev_vport->dev); 118 dev_put(netdev_vport->dev);
115error_free_vport: 119error_free_vport:
@@ -131,9 +135,11 @@ static void netdev_destroy(struct vport *vport)
131{ 135{
132 struct netdev_vport *netdev_vport = netdev_vport_priv(vport); 136 struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
133 137
138 rtnl_lock();
134 netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; 139 netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
135 netdev_rx_handler_unregister(netdev_vport->dev); 140 netdev_rx_handler_unregister(netdev_vport->dev);
136 dev_set_promiscuity(netdev_vport->dev, -1); 141 dev_set_promiscuity(netdev_vport->dev, -1);
142 rtnl_unlock();
137 143
138 call_rcu(&netdev_vport->rcu, free_port_rcu); 144 call_rcu(&netdev_vport->rcu, free_port_rcu);
139} 145}
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 71a2de8726cb..c90d856d441c 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -40,7 +40,7 @@ static const struct vport_ops *vport_ops_list[] = {
40 &ovs_internal_vport_ops, 40 &ovs_internal_vport_ops,
41}; 41};
42 42
43/* Protected by RCU read lock for reading, RTNL lock for writing. */ 43/* Protected by RCU read lock for reading, ovs_mutex for writing. */
44static struct hlist_head *dev_table; 44static struct hlist_head *dev_table;
45#define VPORT_HASH_BUCKETS 1024 45#define VPORT_HASH_BUCKETS 1024
46 46
@@ -80,7 +80,7 @@ static struct hlist_head *hash_bucket(struct net *net, const char *name)
80 * 80 *
81 * @name: name of port to find 81 * @name: name of port to find
82 * 82 *
83 * Must be called with RTNL or RCU read lock. 83 * Must be called with ovs or RCU read lock.
84 */ 84 */
85struct vport *ovs_vport_locate(struct net *net, const char *name) 85struct vport *ovs_vport_locate(struct net *net, const char *name)
86{ 86{
@@ -161,7 +161,7 @@ void ovs_vport_free(struct vport *vport)
161 * @parms: Information about new vport. 161 * @parms: Information about new vport.
162 * 162 *
163 * Creates a new vport with the specified configuration (which is dependent on 163 * Creates a new vport with the specified configuration (which is dependent on
164 * device type). RTNL lock must be held. 164 * device type). ovs_mutex must be held.
165 */ 165 */
166struct vport *ovs_vport_add(const struct vport_parms *parms) 166struct vport *ovs_vport_add(const struct vport_parms *parms)
167{ 167{
@@ -169,8 +169,6 @@ struct vport *ovs_vport_add(const struct vport_parms *parms)
169 int err = 0; 169 int err = 0;
170 int i; 170 int i;
171 171
172 ASSERT_RTNL();
173
174 for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) { 172 for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
175 if (vport_ops_list[i]->type == parms->type) { 173 if (vport_ops_list[i]->type == parms->type) {
176 struct hlist_head *bucket; 174 struct hlist_head *bucket;
@@ -201,12 +199,10 @@ out:
201 * @port: New configuration. 199 * @port: New configuration.
202 * 200 *
203 * Modifies an existing device with the specified configuration (which is 201 * Modifies an existing device with the specified configuration (which is
204 * dependent on device type). RTNL lock must be held. 202 * dependent on device type). ovs_mutex must be held.
205 */ 203 */
206int ovs_vport_set_options(struct vport *vport, struct nlattr *options) 204int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
207{ 205{
208 ASSERT_RTNL();
209
210 if (!vport->ops->set_options) 206 if (!vport->ops->set_options)
211 return -EOPNOTSUPP; 207 return -EOPNOTSUPP;
212 return vport->ops->set_options(vport, options); 208 return vport->ops->set_options(vport, options);
@@ -218,11 +214,11 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
218 * @vport: vport to delete. 214 * @vport: vport to delete.
219 * 215 *
220 * Detaches @vport from its datapath and destroys it. It is possible to fail 216 * Detaches @vport from its datapath and destroys it. It is possible to fail
221 * for reasons such as lack of memory. RTNL lock must be held. 217 * for reasons such as lack of memory. ovs_mutex must be held.
222 */ 218 */
223void ovs_vport_del(struct vport *vport) 219void ovs_vport_del(struct vport *vport)
224{ 220{
225 ASSERT_RTNL(); 221 ASSERT_OVSL();
226 222
227 hlist_del_rcu(&vport->hash_node); 223 hlist_del_rcu(&vport->hash_node);
228 224
@@ -237,7 +233,7 @@ void ovs_vport_del(struct vport *vport)
237 * 233 *
238 * Retrieves transmit, receive, and error stats for the given device. 234 * Retrieves transmit, receive, and error stats for the given device.
239 * 235 *
240 * Must be called with RTNL lock or rcu_read_lock. 236 * Must be called with ovs_mutex or rcu_read_lock.
241 */ 237 */
242void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) 238void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
243{ 239{
@@ -296,7 +292,7 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
296 * negative error code if a real error occurred. If an error occurs, @skb is 292 * negative error code if a real error occurred. If an error occurs, @skb is
297 * left unmodified. 293 * left unmodified.
298 * 294 *
299 * Must be called with RTNL lock or rcu_read_lock. 295 * Must be called with ovs_mutex or rcu_read_lock.
300 */ 296 */
301int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) 297int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
302{ 298{
@@ -348,7 +344,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
348 * @vport: vport on which to send the packet 344 * @vport: vport on which to send the packet
349 * @skb: skb to send 345 * @skb: skb to send
350 * 346 *
351 * Sends the given packet and returns the length of data sent. Either RTNL 347 * Sends the given packet and returns the length of data sent. Either ovs
352 * lock or rcu_read_lock must be held. 348 * lock or rcu_read_lock must be held.
353 */ 349 */
354int ovs_vport_send(struct vport *vport, struct sk_buff *skb) 350int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index aee7d43114c9..7282b8436ba7 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -138,14 +138,14 @@ struct vport_parms {
138struct vport_ops { 138struct vport_ops {
139 enum ovs_vport_type type; 139 enum ovs_vport_type type;
140 140
141 /* Called with RTNL lock. */ 141 /* Called with ovs_mutex. */
142 struct vport *(*create)(const struct vport_parms *); 142 struct vport *(*create)(const struct vport_parms *);
143 void (*destroy)(struct vport *); 143 void (*destroy)(struct vport *);
144 144
145 int (*set_options)(struct vport *, struct nlattr *); 145 int (*set_options)(struct vport *, struct nlattr *);
146 int (*get_options)(const struct vport *, struct sk_buff *); 146 int (*get_options)(const struct vport *, struct sk_buff *);
147 147
148 /* Called with rcu_read_lock or RTNL lock. */ 148 /* Called with rcu_read_lock or ovs_mutex. */
149 const char *(*get_name)(const struct vport *); 149 const char *(*get_name)(const struct vport *);
150 void (*get_config)(const struct vport *, void *); 150 void (*get_config)(const struct vport *, void *);
151 int (*get_ifindex)(const struct vport *); 151 int (*get_ifindex)(const struct vport *);