aboutsummaryrefslogtreecommitdiffstats
path: root/net/openvswitch/datapath.c
diff options
context:
space:
mode:
authorPravin B Shelar <pshelar@nicira.com>2013-04-15 16:23:03 -0400
committerJesse Gross <jesse@nicira.com>2013-04-15 17:38:40 -0400
commit8e4e1713e4978447c5f799aa668dcc6d2cb0dee9 (patch)
treedc0c2e05b677183d617d74020fa9d1ed28691102 /net/openvswitch/datapath.c
parentb4f9e8cdc82e4a07c3ca50395af5800a6229363e (diff)
openvswitch: Simplify datapath locking.
Currently OVS uses combination of genl and rtnl lock to protect datapath state. This was done due to networking stack locking. But this has complicated locking and there are few lock ordering issues with new tunneling protocols. Following patch simplifies locking by introducing new ovs mutex and now this lock is used to protect entire ovs state. Signed-off-by: Pravin B Shelar <pshelar@nicira.com> Signed-off-by: Jesse Gross <jesse@nicira.com>
Diffstat (limited to 'net/openvswitch/datapath.c')
-rw-r--r--net/openvswitch/datapath.c274
1 files changed, 170 insertions, 104 deletions
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d406503e01b6..b7d0b7c3fe2c 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -44,6 +44,7 @@
44#include <linux/netfilter_ipv4.h> 44#include <linux/netfilter_ipv4.h>
45#include <linux/inetdevice.h> 45#include <linux/inetdevice.h>
46#include <linux/list.h> 46#include <linux/list.h>
47#include <linux/lockdep.h>
47#include <linux/openvswitch.h> 48#include <linux/openvswitch.h>
48#include <linux/rculist.h> 49#include <linux/rculist.h>
49#include <linux/dmi.h> 50#include <linux/dmi.h>
@@ -56,21 +57,13 @@
56#include "flow.h" 57#include "flow.h"
57#include "vport-internal_dev.h" 58#include "vport-internal_dev.h"
58 59
59/**
60 * struct ovs_net - Per net-namespace data for ovs.
61 * @dps: List of datapaths to enable dumping them all out.
62 * Protected by genl_mutex.
63 */
64struct ovs_net {
65 struct list_head dps;
66};
67
68static int ovs_net_id __read_mostly;
69 60
70#define REHASH_FLOW_INTERVAL (10 * 60 * HZ) 61#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
71static void rehash_flow_table(struct work_struct *work); 62static void rehash_flow_table(struct work_struct *work);
72static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); 63static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
73 64
65int ovs_net_id __read_mostly;
66
74static void ovs_notify(struct sk_buff *skb, struct genl_info *info, 67static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
75 struct genl_multicast_group *grp) 68 struct genl_multicast_group *grp)
76{ 69{
@@ -81,20 +74,42 @@ static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
81/** 74/**
82 * DOC: Locking: 75 * DOC: Locking:
83 * 76 *
84 * Writes to device state (add/remove datapath, port, set operations on vports, 77 * All writes e.g. Writes to device state (add/remove datapath, port, set
85 * etc.) are protected by RTNL. 78 * operations on vports, etc.), Writes to other state (flow table
86 * 79 * modifications, set miscellaneous datapath parameters, etc.) are protected
87 * Writes to other state (flow table modifications, set miscellaneous datapath 80 * by ovs_lock.
88 * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside
89 * genl_mutex.
90 * 81 *
91 * Reads are protected by RCU. 82 * Reads are protected by RCU.
92 * 83 *
93 * There are a few special cases (mostly stats) that have their own 84 * There are a few special cases (mostly stats) that have their own
94 * synchronization but they nest under all of above and don't interact with 85 * synchronization but they nest under all of above and don't interact with
95 * each other. 86 * each other.
87 *
88 * The RTNL lock nests inside ovs_mutex.
96 */ 89 */
97 90
91static DEFINE_MUTEX(ovs_mutex);
92
93void ovs_lock(void)
94{
95 mutex_lock(&ovs_mutex);
96}
97
98void ovs_unlock(void)
99{
100 mutex_unlock(&ovs_mutex);
101}
102
103#ifdef CONFIG_LOCKDEP
104int lockdep_ovsl_is_held(void)
105{
106 if (debug_locks)
107 return lockdep_is_held(&ovs_mutex);
108 else
109 return 1;
110}
111#endif
112
98static struct vport *new_vport(const struct vport_parms *); 113static struct vport *new_vport(const struct vport_parms *);
99static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, 114static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
100 const struct dp_upcall_info *); 115 const struct dp_upcall_info *);
@@ -102,7 +117,7 @@ static int queue_userspace_packet(struct net *, int dp_ifindex,
102 struct sk_buff *, 117 struct sk_buff *,
103 const struct dp_upcall_info *); 118 const struct dp_upcall_info *);
104 119
105/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ 120/* Must be called with rcu_read_lock or ovs_mutex. */
106static struct datapath *get_dp(struct net *net, int dp_ifindex) 121static struct datapath *get_dp(struct net *net, int dp_ifindex)
107{ 122{
108 struct datapath *dp = NULL; 123 struct datapath *dp = NULL;
@@ -120,10 +135,10 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)
120 return dp; 135 return dp;
121} 136}
122 137
123/* Must be called with rcu_read_lock or RTNL lock. */ 138/* Must be called with rcu_read_lock or ovs_mutex. */
124const char *ovs_dp_name(const struct datapath *dp) 139const char *ovs_dp_name(const struct datapath *dp)
125{ 140{
126 struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL); 141 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
127 return vport->ops->get_name(vport); 142 return vport->ops->get_name(vport);
128} 143}
129 144
@@ -175,7 +190,7 @@ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
175 return NULL; 190 return NULL;
176} 191}
177 192
178/* Called with RTNL lock and genl_lock. */ 193/* Called with ovs_mutex. */
179static struct vport *new_vport(const struct vport_parms *parms) 194static struct vport *new_vport(const struct vport_parms *parms)
180{ 195{
181 struct vport *vport; 196 struct vport *vport;
@@ -187,14 +202,12 @@ static struct vport *new_vport(const struct vport_parms *parms)
187 202
188 hlist_add_head_rcu(&vport->dp_hash_node, head); 203 hlist_add_head_rcu(&vport->dp_hash_node, head);
189 } 204 }
190
191 return vport; 205 return vport;
192} 206}
193 207
194/* Called with RTNL lock. */
195void ovs_dp_detach_port(struct vport *p) 208void ovs_dp_detach_port(struct vport *p)
196{ 209{
197 ASSERT_RTNL(); 210 ASSERT_OVSL();
198 211
199 /* First drop references to device. */ 212 /* First drop references to device. */
200 hlist_del_rcu(&p->dp_hash_node); 213 hlist_del_rcu(&p->dp_hash_node);
@@ -432,13 +445,13 @@ out:
432 return err; 445 return err;
433} 446}
434 447
435/* Called with genl_mutex. */ 448/* Called with ovs_mutex. */
436static int flush_flows(struct datapath *dp) 449static int flush_flows(struct datapath *dp)
437{ 450{
438 struct flow_table *old_table; 451 struct flow_table *old_table;
439 struct flow_table *new_table; 452 struct flow_table *new_table;
440 453
441 old_table = genl_dereference(dp->table); 454 old_table = ovsl_dereference(dp->table);
442 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); 455 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
443 if (!new_table) 456 if (!new_table)
444 return -ENOMEM; 457 return -ENOMEM;
@@ -788,7 +801,7 @@ static struct genl_ops dp_packet_genl_ops[] = {
788static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) 801static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
789{ 802{
790 int i; 803 int i;
791 struct flow_table *table = genl_dereference(dp->table); 804 struct flow_table *table = ovsl_dereference(dp->table);
792 805
793 stats->n_flows = ovs_flow_tbl_count(table); 806 stats->n_flows = ovs_flow_tbl_count(table);
794 807
@@ -840,7 +853,7 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
840 + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ 853 + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
841} 854}
842 855
843/* Called with genl_lock. */ 856/* Called with ovs_mutex. */
844static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, 857static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
845 struct sk_buff *skb, u32 portid, 858 struct sk_buff *skb, u32 portid,
846 u32 seq, u32 flags, u8 cmd) 859 u32 seq, u32 flags, u8 cmd)
@@ -854,8 +867,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
854 u8 tcp_flags; 867 u8 tcp_flags;
855 int err; 868 int err;
856 869
857 sf_acts = rcu_dereference_protected(flow->sf_acts, 870 sf_acts = ovsl_dereference(flow->sf_acts);
858 lockdep_genl_is_held());
859 871
860 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); 872 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
861 if (!ovs_header) 873 if (!ovs_header)
@@ -919,8 +931,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
919{ 931{
920 const struct sw_flow_actions *sf_acts; 932 const struct sw_flow_actions *sf_acts;
921 933
922 sf_acts = rcu_dereference_protected(flow->sf_acts, 934 sf_acts = ovsl_dereference(flow->sf_acts);
923 lockdep_genl_is_held());
924 935
925 return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL); 936 return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL);
926} 937}
@@ -971,12 +982,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
971 goto error; 982 goto error;
972 } 983 }
973 984
985 ovs_lock();
974 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 986 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
975 error = -ENODEV; 987 error = -ENODEV;
976 if (!dp) 988 if (!dp)
977 goto error; 989 goto err_unlock_ovs;
978 990
979 table = genl_dereference(dp->table); 991 table = ovsl_dereference(dp->table);
980 flow = ovs_flow_tbl_lookup(table, &key, key_len); 992 flow = ovs_flow_tbl_lookup(table, &key, key_len);
981 if (!flow) { 993 if (!flow) {
982 struct sw_flow_actions *acts; 994 struct sw_flow_actions *acts;
@@ -984,7 +996,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
984 /* Bail out if we're not allowed to create a new flow. */ 996 /* Bail out if we're not allowed to create a new flow. */
985 error = -ENOENT; 997 error = -ENOENT;
986 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) 998 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
987 goto error; 999 goto err_unlock_ovs;
988 1000
989 /* Expand table, if necessary, to make room. */ 1001 /* Expand table, if necessary, to make room. */
990 if (ovs_flow_tbl_need_to_expand(table)) { 1002 if (ovs_flow_tbl_need_to_expand(table)) {
@@ -994,7 +1006,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
994 if (!IS_ERR(new_table)) { 1006 if (!IS_ERR(new_table)) {
995 rcu_assign_pointer(dp->table, new_table); 1007 rcu_assign_pointer(dp->table, new_table);
996 ovs_flow_tbl_deferred_destroy(table); 1008 ovs_flow_tbl_deferred_destroy(table);
997 table = genl_dereference(dp->table); 1009 table = ovsl_dereference(dp->table);
998 } 1010 }
999 } 1011 }
1000 1012
@@ -1002,7 +1014,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1002 flow = ovs_flow_alloc(); 1014 flow = ovs_flow_alloc();
1003 if (IS_ERR(flow)) { 1015 if (IS_ERR(flow)) {
1004 error = PTR_ERR(flow); 1016 error = PTR_ERR(flow);
1005 goto error; 1017 goto err_unlock_ovs;
1006 } 1018 }
1007 flow->key = key; 1019 flow->key = key;
1008 clear_stats(flow); 1020 clear_stats(flow);
@@ -1035,11 +1047,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1035 error = -EEXIST; 1047 error = -EEXIST;
1036 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && 1048 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
1037 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) 1049 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1038 goto error; 1050 goto err_unlock_ovs;
1039 1051
1040 /* Update actions. */ 1052 /* Update actions. */
1041 old_acts = rcu_dereference_protected(flow->sf_acts, 1053 old_acts = ovsl_dereference(flow->sf_acts);
1042 lockdep_genl_is_held());
1043 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; 1054 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
1044 if (acts_attrs && 1055 if (acts_attrs &&
1045 (old_acts->actions_len != nla_len(acts_attrs) || 1056 (old_acts->actions_len != nla_len(acts_attrs) ||
@@ -1050,7 +1061,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1050 new_acts = ovs_flow_actions_alloc(acts_attrs); 1061 new_acts = ovs_flow_actions_alloc(acts_attrs);
1051 error = PTR_ERR(new_acts); 1062 error = PTR_ERR(new_acts);
1052 if (IS_ERR(new_acts)) 1063 if (IS_ERR(new_acts))
1053 goto error; 1064 goto err_unlock_ovs;
1054 1065
1055 rcu_assign_pointer(flow->sf_acts, new_acts); 1066 rcu_assign_pointer(flow->sf_acts, new_acts);
1056 ovs_flow_deferred_free_acts(old_acts); 1067 ovs_flow_deferred_free_acts(old_acts);
@@ -1066,6 +1077,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1066 spin_unlock_bh(&flow->lock); 1077 spin_unlock_bh(&flow->lock);
1067 } 1078 }
1068 } 1079 }
1080 ovs_unlock();
1069 1081
1070 if (!IS_ERR(reply)) 1082 if (!IS_ERR(reply))
1071 ovs_notify(reply, info, &ovs_dp_flow_multicast_group); 1083 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
@@ -1076,6 +1088,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1076 1088
1077error_free_flow: 1089error_free_flow:
1078 ovs_flow_free(flow); 1090 ovs_flow_free(flow);
1091err_unlock_ovs:
1092 ovs_unlock();
1079error: 1093error:
1080 return error; 1094 return error;
1081} 1095}
@@ -1098,21 +1112,32 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1098 if (err) 1112 if (err)
1099 return err; 1113 return err;
1100 1114
1115 ovs_lock();
1101 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1116 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1102 if (!dp) 1117 if (!dp) {
1103 return -ENODEV; 1118 err = -ENODEV;
1119 goto unlock;
1120 }
1104 1121
1105 table = genl_dereference(dp->table); 1122 table = ovsl_dereference(dp->table);
1106 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1123 flow = ovs_flow_tbl_lookup(table, &key, key_len);
1107 if (!flow) 1124 if (!flow) {
1108 return -ENOENT; 1125 err = -ENOENT;
1126 goto unlock;
1127 }
1109 1128
1110 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 1129 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1111 info->snd_seq, OVS_FLOW_CMD_NEW); 1130 info->snd_seq, OVS_FLOW_CMD_NEW);
1112 if (IS_ERR(reply)) 1131 if (IS_ERR(reply)) {
1113 return PTR_ERR(reply); 1132 err = PTR_ERR(reply);
1133 goto unlock;
1134 }
1114 1135
1136 ovs_unlock();
1115 return genlmsg_reply(reply, info); 1137 return genlmsg_reply(reply, info);
1138unlock:
1139 ovs_unlock();
1140 return err;
1116} 1141}
1117 1142
1118static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) 1143static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
@@ -1127,25 +1152,33 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1127 int err; 1152 int err;
1128 int key_len; 1153 int key_len;
1129 1154
1155 ovs_lock();
1130 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1156 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1131 if (!dp) 1157 if (!dp) {
1132 return -ENODEV; 1158 err = -ENODEV;
1133 1159 goto unlock;
1134 if (!a[OVS_FLOW_ATTR_KEY]) 1160 }
1135 return flush_flows(dp);
1136 1161
1162 if (!a[OVS_FLOW_ATTR_KEY]) {
1163 err = flush_flows(dp);
1164 goto unlock;
1165 }
1137 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1166 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1138 if (err) 1167 if (err)
1139 return err; 1168 goto unlock;
1140 1169
1141 table = genl_dereference(dp->table); 1170 table = ovsl_dereference(dp->table);
1142 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1171 flow = ovs_flow_tbl_lookup(table, &key, key_len);
1143 if (!flow) 1172 if (!flow) {
1144 return -ENOENT; 1173 err = -ENOENT;
1174 goto unlock;
1175 }
1145 1176
1146 reply = ovs_flow_cmd_alloc_info(flow); 1177 reply = ovs_flow_cmd_alloc_info(flow);
1147 if (!reply) 1178 if (!reply) {
1148 return -ENOMEM; 1179 err = -ENOMEM;
1180 goto unlock;
1181 }
1149 1182
1150 ovs_flow_tbl_remove(table, flow); 1183 ovs_flow_tbl_remove(table, flow);
1151 1184
@@ -1154,9 +1187,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1154 BUG_ON(err < 0); 1187 BUG_ON(err < 0);
1155 1188
1156 ovs_flow_deferred_free(flow); 1189 ovs_flow_deferred_free(flow);
1190 ovs_unlock();
1157 1191
1158 ovs_notify(reply, info, &ovs_dp_flow_multicast_group); 1192 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
1159 return 0; 1193 return 0;
1194unlock:
1195 ovs_unlock();
1196 return err;
1160} 1197}
1161 1198
1162static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1199static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1165,11 +1202,14 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1165 struct datapath *dp; 1202 struct datapath *dp;
1166 struct flow_table *table; 1203 struct flow_table *table;
1167 1204
1205 ovs_lock();
1168 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1206 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1169 if (!dp) 1207 if (!dp) {
1208 ovs_unlock();
1170 return -ENODEV; 1209 return -ENODEV;
1210 }
1171 1211
1172 table = genl_dereference(dp->table); 1212 table = ovsl_dereference(dp->table);
1173 1213
1174 for (;;) { 1214 for (;;) {
1175 struct sw_flow *flow; 1215 struct sw_flow *flow;
@@ -1190,6 +1230,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1190 cb->args[0] = bucket; 1230 cb->args[0] = bucket;
1191 cb->args[1] = obj; 1231 cb->args[1] = obj;
1192 } 1232 }
1233 ovs_unlock();
1193 return skb->len; 1234 return skb->len;
1194} 1235}
1195 1236
@@ -1295,7 +1336,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
1295 return skb; 1336 return skb;
1296} 1337}
1297 1338
1298/* Called with genl_mutex and optionally with RTNL lock also. */ 1339/* Called with ovs_mutex. */
1299static struct datapath *lookup_datapath(struct net *net, 1340static struct datapath *lookup_datapath(struct net *net,
1300 struct ovs_header *ovs_header, 1341 struct ovs_header *ovs_header,
1301 struct nlattr *a[OVS_DP_ATTR_MAX + 1]) 1342 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
@@ -1329,12 +1370,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1329 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) 1370 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1330 goto err; 1371 goto err;
1331 1372
1332 rtnl_lock(); 1373 ovs_lock();
1333 1374
1334 err = -ENOMEM; 1375 err = -ENOMEM;
1335 dp = kzalloc(sizeof(*dp), GFP_KERNEL); 1376 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1336 if (dp == NULL) 1377 if (dp == NULL)
1337 goto err_unlock_rtnl; 1378 goto err_unlock_ovs;
1338 1379
1339 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); 1380 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1340 1381
@@ -1385,35 +1426,34 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1385 1426
1386 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); 1427 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1387 list_add_tail(&dp->list_node, &ovs_net->dps); 1428 list_add_tail(&dp->list_node, &ovs_net->dps);
1388 rtnl_unlock(); 1429
1430 ovs_unlock();
1389 1431
1390 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group); 1432 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1391 return 0; 1433 return 0;
1392 1434
1393err_destroy_local_port: 1435err_destroy_local_port:
1394 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); 1436 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1395err_destroy_ports_array: 1437err_destroy_ports_array:
1396 kfree(dp->ports); 1438 kfree(dp->ports);
1397err_destroy_percpu: 1439err_destroy_percpu:
1398 free_percpu(dp->stats_percpu); 1440 free_percpu(dp->stats_percpu);
1399err_destroy_table: 1441err_destroy_table:
1400 ovs_flow_tbl_destroy(genl_dereference(dp->table)); 1442 ovs_flow_tbl_destroy(ovsl_dereference(dp->table));
1401err_free_dp: 1443err_free_dp:
1402 release_net(ovs_dp_get_net(dp)); 1444 release_net(ovs_dp_get_net(dp));
1403 kfree(dp); 1445 kfree(dp);
1404err_unlock_rtnl: 1446err_unlock_ovs:
1405 rtnl_unlock(); 1447 ovs_unlock();
1406err: 1448err:
1407 return err; 1449 return err;
1408} 1450}
1409 1451
1410/* Called with genl_mutex. */ 1452/* Called with ovs_mutex. */
1411static void __dp_destroy(struct datapath *dp) 1453static void __dp_destroy(struct datapath *dp)
1412{ 1454{
1413 int i; 1455 int i;
1414 1456
1415 rtnl_lock();
1416
1417 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 1457 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1418 struct vport *vport; 1458 struct vport *vport;
1419 struct hlist_node *n; 1459 struct hlist_node *n;
@@ -1424,14 +1464,11 @@ static void __dp_destroy(struct datapath *dp)
1424 } 1464 }
1425 1465
1426 list_del(&dp->list_node); 1466 list_del(&dp->list_node);
1427 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
1428 1467
1429 /* rtnl_unlock() will wait until all the references to devices that 1468 /* OVSP_LOCAL is datapath internal port. We need to make sure that
1430 * are pending unregistration have been dropped. We do it here to 1469 * all port in datapath are destroyed first before freeing datapath.
1431 * ensure that any internal devices (which contain DP pointers) are
1432 * fully destroyed before freeing the datapath.
1433 */ 1470 */
1434 rtnl_unlock(); 1471 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1435 1472
1436 call_rcu(&dp->rcu, destroy_dp_rcu); 1473 call_rcu(&dp->rcu, destroy_dp_rcu);
1437} 1474}
@@ -1442,22 +1479,27 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1442 struct datapath *dp; 1479 struct datapath *dp;
1443 int err; 1480 int err;
1444 1481
1482 ovs_lock();
1445 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1483 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1446 err = PTR_ERR(dp); 1484 err = PTR_ERR(dp);
1447 if (IS_ERR(dp)) 1485 if (IS_ERR(dp))
1448 return err; 1486 goto unlock;
1449 1487
1450 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1488 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1451 info->snd_seq, OVS_DP_CMD_DEL); 1489 info->snd_seq, OVS_DP_CMD_DEL);
1452 err = PTR_ERR(reply); 1490 err = PTR_ERR(reply);
1453 if (IS_ERR(reply)) 1491 if (IS_ERR(reply))
1454 return err; 1492 goto unlock;
1455 1493
1456 __dp_destroy(dp); 1494 __dp_destroy(dp);
1495 ovs_unlock();
1457 1496
1458 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group); 1497 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1459 1498
1460 return 0; 1499 return 0;
1500unlock:
1501 ovs_unlock();
1502 return err;
1461} 1503}
1462 1504
1463static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) 1505static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
@@ -1466,9 +1508,11 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1466 struct datapath *dp; 1508 struct datapath *dp;
1467 int err; 1509 int err;
1468 1510
1511 ovs_lock();
1469 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1512 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1513 err = PTR_ERR(dp);
1470 if (IS_ERR(dp)) 1514 if (IS_ERR(dp))
1471 return PTR_ERR(dp); 1515 goto unlock;
1472 1516
1473 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1517 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1474 info->snd_seq, OVS_DP_CMD_NEW); 1518 info->snd_seq, OVS_DP_CMD_NEW);
@@ -1476,29 +1520,45 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1476 err = PTR_ERR(reply); 1520 err = PTR_ERR(reply);
1477 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 1521 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1478 ovs_dp_datapath_multicast_group.id, err); 1522 ovs_dp_datapath_multicast_group.id, err);
1479 return 0; 1523 err = 0;
1524 goto unlock;
1480 } 1525 }
1481 1526
1527 ovs_unlock();
1482 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group); 1528 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1483 1529
1484 return 0; 1530 return 0;
1531unlock:
1532 ovs_unlock();
1533 return err;
1485} 1534}
1486 1535
1487static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) 1536static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1488{ 1537{
1489 struct sk_buff *reply; 1538 struct sk_buff *reply;
1490 struct datapath *dp; 1539 struct datapath *dp;
1540 int err;
1491 1541
1542 ovs_lock();
1492 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1543 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1493 if (IS_ERR(dp)) 1544 if (IS_ERR(dp)) {
1494 return PTR_ERR(dp); 1545 err = PTR_ERR(dp);
1546 goto unlock;
1547 }
1495 1548
1496 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1549 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1497 info->snd_seq, OVS_DP_CMD_NEW); 1550 info->snd_seq, OVS_DP_CMD_NEW);
1498 if (IS_ERR(reply)) 1551 if (IS_ERR(reply)) {
1499 return PTR_ERR(reply); 1552 err = PTR_ERR(reply);
1553 goto unlock;
1554 }
1500 1555
1556 ovs_unlock();
1501 return genlmsg_reply(reply, info); 1557 return genlmsg_reply(reply, info);
1558
1559unlock:
1560 ovs_unlock();
1561 return err;
1502} 1562}
1503 1563
1504static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1564static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1508,6 +1568,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1508 int skip = cb->args[0]; 1568 int skip = cb->args[0];
1509 int i = 0; 1569 int i = 0;
1510 1570
1571 ovs_lock();
1511 list_for_each_entry(dp, &ovs_net->dps, list_node) { 1572 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1512 if (i >= skip && 1573 if (i >= skip &&
1513 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, 1574 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
@@ -1516,6 +1577,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1516 break; 1577 break;
1517 i++; 1578 i++;
1518 } 1579 }
1580 ovs_unlock();
1519 1581
1520 cb->args[0] = i; 1582 cb->args[0] = i;
1521 1583
@@ -1568,7 +1630,7 @@ struct genl_multicast_group ovs_dp_vport_multicast_group = {
1568 .name = OVS_VPORT_MCGROUP 1630 .name = OVS_VPORT_MCGROUP
1569}; 1631};
1570 1632
1571/* Called with RTNL lock or RCU read lock. */ 1633/* Called with ovs_mutex or RCU read lock. */
1572static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, 1634static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1573 u32 portid, u32 seq, u32 flags, u8 cmd) 1635 u32 portid, u32 seq, u32 flags, u8 cmd)
1574{ 1636{
@@ -1607,7 +1669,7 @@ error:
1607 return err; 1669 return err;
1608} 1670}
1609 1671
1610/* Called with RTNL lock or RCU read lock. */ 1672/* Called with ovs_mutex or RCU read lock. */
1611struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, 1673struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1612 u32 seq, u8 cmd) 1674 u32 seq, u8 cmd)
1613{ 1675{
@@ -1626,7 +1688,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1626 return skb; 1688 return skb;
1627} 1689}
1628 1690
1629/* Called with RTNL lock or RCU read lock. */ 1691/* Called with ovs_mutex or RCU read lock. */
1630static struct vport *lookup_vport(struct net *net, 1692static struct vport *lookup_vport(struct net *net,
1631 struct ovs_header *ovs_header, 1693 struct ovs_header *ovs_header,
1632 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) 1694 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
@@ -1652,7 +1714,7 @@ static struct vport *lookup_vport(struct net *net,
1652 if (!dp) 1714 if (!dp)
1653 return ERR_PTR(-ENODEV); 1715 return ERR_PTR(-ENODEV);
1654 1716
1655 vport = ovs_vport_rtnl_rcu(dp, port_no); 1717 vport = ovs_vport_ovsl_rcu(dp, port_no);
1656 if (!vport) 1718 if (!vport)
1657 return ERR_PTR(-ENODEV); 1719 return ERR_PTR(-ENODEV);
1658 return vport; 1720 return vport;
@@ -1676,7 +1738,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1676 !a[OVS_VPORT_ATTR_UPCALL_PID]) 1738 !a[OVS_VPORT_ATTR_UPCALL_PID])
1677 goto exit; 1739 goto exit;
1678 1740
1679 rtnl_lock(); 1741 ovs_lock();
1680 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1742 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1681 err = -ENODEV; 1743 err = -ENODEV;
1682 if (!dp) 1744 if (!dp)
@@ -1689,7 +1751,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1689 if (port_no >= DP_MAX_PORTS) 1751 if (port_no >= DP_MAX_PORTS)
1690 goto exit_unlock; 1752 goto exit_unlock;
1691 1753
1692 vport = ovs_vport_rtnl_rcu(dp, port_no); 1754 vport = ovs_vport_ovsl(dp, port_no);
1693 err = -EBUSY; 1755 err = -EBUSY;
1694 if (vport) 1756 if (vport)
1695 goto exit_unlock; 1757 goto exit_unlock;
@@ -1699,7 +1761,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1699 err = -EFBIG; 1761 err = -EFBIG;
1700 goto exit_unlock; 1762 goto exit_unlock;
1701 } 1763 }
1702 vport = ovs_vport_rtnl(dp, port_no); 1764 vport = ovs_vport_ovsl(dp, port_no);
1703 if (!vport) 1765 if (!vport)
1704 break; 1766 break;
1705 } 1767 }
@@ -1729,7 +1791,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1729 ovs_notify(reply, info, &ovs_dp_vport_multicast_group); 1791 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1730 1792
1731exit_unlock: 1793exit_unlock:
1732 rtnl_unlock(); 1794 ovs_unlock();
1733exit: 1795exit:
1734 return err; 1796 return err;
1735} 1797}
@@ -1741,7 +1803,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1741 struct vport *vport; 1803 struct vport *vport;
1742 int err; 1804 int err;
1743 1805
1744 rtnl_lock(); 1806 ovs_lock();
1745 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1807 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1746 err = PTR_ERR(vport); 1808 err = PTR_ERR(vport);
1747 if (IS_ERR(vport)) 1809 if (IS_ERR(vport))
@@ -1767,10 +1829,12 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1767 goto exit_unlock; 1829 goto exit_unlock;
1768 } 1830 }
1769 1831
1832 ovs_unlock();
1770 ovs_notify(reply, info, &ovs_dp_vport_multicast_group); 1833 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1834 return 0;
1771 1835
1772exit_unlock: 1836exit_unlock:
1773 rtnl_unlock(); 1837 ovs_unlock();
1774 return err; 1838 return err;
1775} 1839}
1776 1840
@@ -1781,7 +1845,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1781 struct vport *vport; 1845 struct vport *vport;
1782 int err; 1846 int err;
1783 1847
1784 rtnl_lock(); 1848 ovs_lock();
1785 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1849 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1786 err = PTR_ERR(vport); 1850 err = PTR_ERR(vport);
1787 if (IS_ERR(vport)) 1851 if (IS_ERR(vport))
@@ -1804,7 +1868,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1804 ovs_notify(reply, info, &ovs_dp_vport_multicast_group); 1868 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1805 1869
1806exit_unlock: 1870exit_unlock:
1807 rtnl_unlock(); 1871 ovs_unlock();
1808 return err; 1872 return err;
1809} 1873}
1810 1874
@@ -1964,13 +2028,13 @@ static void rehash_flow_table(struct work_struct *work)
1964 struct datapath *dp; 2028 struct datapath *dp;
1965 struct net *net; 2029 struct net *net;
1966 2030
1967 genl_lock(); 2031 ovs_lock();
1968 rtnl_lock(); 2032 rtnl_lock();
1969 for_each_net(net) { 2033 for_each_net(net) {
1970 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2034 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1971 2035
1972 list_for_each_entry(dp, &ovs_net->dps, list_node) { 2036 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1973 struct flow_table *old_table = genl_dereference(dp->table); 2037 struct flow_table *old_table = ovsl_dereference(dp->table);
1974 struct flow_table *new_table; 2038 struct flow_table *new_table;
1975 2039
1976 new_table = ovs_flow_tbl_rehash(old_table); 2040 new_table = ovs_flow_tbl_rehash(old_table);
@@ -1981,8 +2045,7 @@ static void rehash_flow_table(struct work_struct *work)
1981 } 2045 }
1982 } 2046 }
1983 rtnl_unlock(); 2047 rtnl_unlock();
1984 genl_unlock(); 2048 ovs_unlock();
1985
1986 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); 2049 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
1987} 2050}
1988 2051
@@ -1991,18 +2054,21 @@ static int __net_init ovs_init_net(struct net *net)
1991 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2054 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1992 2055
1993 INIT_LIST_HEAD(&ovs_net->dps); 2056 INIT_LIST_HEAD(&ovs_net->dps);
2057 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
1994 return 0; 2058 return 0;
1995} 2059}
1996 2060
1997static void __net_exit ovs_exit_net(struct net *net) 2061static void __net_exit ovs_exit_net(struct net *net)
1998{ 2062{
1999 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2000 struct datapath *dp, *dp_next; 2063 struct datapath *dp, *dp_next;
2064 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2001 2065
2002 genl_lock(); 2066 ovs_lock();
2003 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) 2067 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2004 __dp_destroy(dp); 2068 __dp_destroy(dp);
2005 genl_unlock(); 2069 ovs_unlock();
2070
2071 cancel_work_sync(&ovs_net->dp_notify_work);
2006} 2072}
2007 2073
2008static struct pernet_operations ovs_net_ops = { 2074static struct pernet_operations ovs_net_ops = {