aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Zhou <azhou@nicira.com>2013-08-07 23:01:00 -0400
committerJesse Gross <jesse@nicira.com>2013-08-23 19:43:07 -0400
commit03f0d916aa0317592dda11bd17c7357858719b6c (patch)
tree436f94d9c4846cadfa73ee0822f44a6383f3a2f3
parent3fa34de67861abfc4846ccec886ca549d46ae56c (diff)
openvswitch: Mega flow implementation
Add wildcarded flow support in kernel datapath. Wildcarded flow can improve OVS flow set up performance by avoid sending matching new flows to the user space program. The exact performance boost will largely dependent on wildcarded flow hit rate. In case all new flows hits wildcard flows, the flow set up rate is within 5% of that of linux bridge module. Pravin has made significant contributions to this patch. Including API clean ups and bug fixes. Signed-off-by: Pravin B Shelar <pshelar@nicira.com> Signed-off-by: Andy Zhou <azhou@nicira.com> Signed-off-by: Jesse Gross <jesse@nicira.com>
-rw-r--r--Documentation/networking/openvswitch.txt40
-rw-r--r--include/uapi/linux/openvswitch.h9
-rw-r--r--net/openvswitch/actions.c6
-rw-r--r--net/openvswitch/datapath.c140
-rw-r--r--net/openvswitch/datapath.h6
-rw-r--r--net/openvswitch/flow.c1387
-rw-r--r--net/openvswitch/flow.h96
7 files changed, 1171 insertions, 513 deletions
diff --git a/Documentation/networking/openvswitch.txt b/Documentation/networking/openvswitch.txt
index 8fa2dd1e792e..37c20ee2455e 100644
--- a/Documentation/networking/openvswitch.txt
+++ b/Documentation/networking/openvswitch.txt
@@ -91,6 +91,46 @@ Often we ellipsize arguments not important to the discussion, e.g.:
91 in_port(1), eth(...), eth_type(0x0800), ipv4(...), tcp(...) 91 in_port(1), eth(...), eth_type(0x0800), ipv4(...), tcp(...)
92 92
93 93
94Wildcarded flow key format
95--------------------------
96
97A wildcarded flow is described with two sequences of Netlink attributes
98passed over the Netlink socket. A flow key, exactly as described above, and an
99optional corresponding flow mask.
100
101A wildcarded flow can represent a group of exact match flows. Each '1' bit
102in the mask specifies a exact match with the corresponding bit in the flow key.
103A '0' bit specifies a don't care bit, which will match either a '1' or '0' bit
104of a incoming packet. Using wildcarded flow can improve the flow set up rate
105by reduce the number of new flows need to be processed by the user space program.
106
107Support for the mask Netlink attribute is optional for both the kernel and user
108space program. The kernel can ignore the mask attribute, installing an exact
109match flow, or reduce the number of don't care bits in the kernel to less than
110what was specified by the user space program. In this case, variations in bits
111that the kernel does not implement will simply result in additional flow setups.
112The kernel module will also work with user space programs that neither support
113nor supply flow mask attributes.
114
115Since the kernel may ignore or modify wildcard bits, it can be difficult for
116the userspace program to know exactly what matches are installed. There are
117two possible approaches: reactively install flows as they miss the kernel
118flow table (and therefore not attempt to determine wildcard changes at all)
119or use the kernel's response messages to determine the installed wildcards.
120
121When interacting with userspace, the kernel should maintain the match portion
122of the key exactly as originally installed. This will provides a handle to
123identify the flow for all future operations. However, when reporting the
124mask of an installed flow, the mask should include any restrictions imposed
125by the kernel.
126
127The behavior when using overlapping wildcarded flows is undefined. It is the
128responsibility of the user space program to ensure that any incoming packet
129can match at most one flow, wildcarded or not. The current implementation
130performs best-effort detection of overlapping wildcarded flows and may reject
131some but not all of them. However, this behavior may change in future versions.
132
133
94Basic rule for evolving flow keys 134Basic rule for evolving flow keys
95--------------------------------- 135---------------------------------
96 136
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 52490b0e62b5..de1fa5d3780f 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -1,6 +1,6 @@
1 1
2/* 2/*
3 * Copyright (c) 2007-2011 Nicira Networks. 3 * Copyright (c) 2007-2013 Nicira, Inc.
4 * 4 *
5 * This program is free software; you can redistribute it and/or 5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public 6 * modify it under the terms of version 2 of the GNU General Public
@@ -379,6 +379,12 @@ struct ovs_key_nd {
379 * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the 379 * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the
380 * last-used time, accumulated TCP flags, and statistics for this flow. 380 * last-used time, accumulated TCP flags, and statistics for this flow.
381 * Otherwise ignored in requests. Never present in notifications. 381 * Otherwise ignored in requests. Never present in notifications.
382 * @OVS_FLOW_ATTR_MASK: Nested %OVS_KEY_ATTR_* attributes specifying the
383 * mask bits for wildcarded flow match. Mask bit value '1' specifies exact
384 * match with corresponding flow key bit, while mask bit value '0' specifies
385 * a wildcarded match. Omitting attribute is treated as wildcarding all
386 * corresponding fields. Optional for all requests. If not present,
387 * all flow key bits are exact match bits.
382 * 388 *
383 * These attributes follow the &struct ovs_header within the Generic Netlink 389 * These attributes follow the &struct ovs_header within the Generic Netlink
384 * payload for %OVS_FLOW_* commands. 390 * payload for %OVS_FLOW_* commands.
@@ -391,6 +397,7 @@ enum ovs_flow_attr {
391 OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */ 397 OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
392 OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */ 398 OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */
393 OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */ 399 OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */
400 OVS_FLOW_ATTR_MASK, /* Sequence of OVS_KEY_ATTR_* attributes. */
394 __OVS_FLOW_ATTR_MAX 401 __OVS_FLOW_ATTR_MAX
395}; 402};
396 403
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index ab101f715447..1f680222f4f7 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2007-2012 Nicira, Inc. 2 * Copyright (c) 2007-2013 Nicira, Inc.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public 5 * modify it under the terms of version 2 of the GNU General Public
@@ -376,8 +376,10 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
376 const struct nlattr *a; 376 const struct nlattr *a;
377 int rem; 377 int rem;
378 378
379 BUG_ON(!OVS_CB(skb)->pkt_key);
380
379 upcall.cmd = OVS_PACKET_CMD_ACTION; 381 upcall.cmd = OVS_PACKET_CMD_ACTION;
380 upcall.key = &OVS_CB(skb)->flow->key; 382 upcall.key = OVS_CB(skb)->pkt_key;
381 upcall.userdata = NULL; 383 upcall.userdata = NULL;
382 upcall.portid = 0; 384 upcall.portid = 0;
383 385
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9d97ef3c9830..d29cd9aa4a67 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2007-2012 Nicira, Inc. 2 * Copyright (c) 2007-2013 Nicira, Inc.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public 5 * modify it under the terms of version 2 of the GNU General Public
@@ -165,7 +165,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
165{ 165{
166 struct datapath *dp = container_of(rcu, struct datapath, rcu); 166 struct datapath *dp = container_of(rcu, struct datapath, rcu);
167 167
168 ovs_flow_tbl_destroy((__force struct flow_table *)dp->table); 168 ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false);
169 free_percpu(dp->stats_percpu); 169 free_percpu(dp->stats_percpu);
170 release_net(ovs_dp_get_net(dp)); 170 release_net(ovs_dp_get_net(dp));
171 kfree(dp->ports); 171 kfree(dp->ports);
@@ -226,19 +226,18 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
226 struct sw_flow_key key; 226 struct sw_flow_key key;
227 u64 *stats_counter; 227 u64 *stats_counter;
228 int error; 228 int error;
229 int key_len;
230 229
231 stats = this_cpu_ptr(dp->stats_percpu); 230 stats = this_cpu_ptr(dp->stats_percpu);
232 231
233 /* Extract flow from 'skb' into 'key'. */ 232 /* Extract flow from 'skb' into 'key'. */
234 error = ovs_flow_extract(skb, p->port_no, &key, &key_len); 233 error = ovs_flow_extract(skb, p->port_no, &key);
235 if (unlikely(error)) { 234 if (unlikely(error)) {
236 kfree_skb(skb); 235 kfree_skb(skb);
237 return; 236 return;
238 } 237 }
239 238
240 /* Look up flow. */ 239 /* Look up flow. */
241 flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len); 240 flow = ovs_flow_lookup(rcu_dereference(dp->table), &key);
242 if (unlikely(!flow)) { 241 if (unlikely(!flow)) {
243 struct dp_upcall_info upcall; 242 struct dp_upcall_info upcall;
244 243
@@ -253,6 +252,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
253 } 252 }
254 253
255 OVS_CB(skb)->flow = flow; 254 OVS_CB(skb)->flow = flow;
255 OVS_CB(skb)->pkt_key = &key;
256 256
257 stats_counter = &stats->n_hit; 257 stats_counter = &stats->n_hit;
258 ovs_flow_used(OVS_CB(skb)->flow, skb); 258 ovs_flow_used(OVS_CB(skb)->flow, skb);
@@ -435,7 +435,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
435 upcall->dp_ifindex = dp_ifindex; 435 upcall->dp_ifindex = dp_ifindex;
436 436
437 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); 437 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
438 ovs_flow_to_nlattrs(upcall_info->key, user_skb); 438 ovs_flow_to_nlattrs(upcall_info->key, upcall_info->key, user_skb);
439 nla_nest_end(user_skb, nla); 439 nla_nest_end(user_skb, nla);
440 440
441 if (upcall_info->userdata) 441 if (upcall_info->userdata)
@@ -468,7 +468,7 @@ static int flush_flows(struct datapath *dp)
468 468
469 rcu_assign_pointer(dp->table, new_table); 469 rcu_assign_pointer(dp->table, new_table);
470 470
471 ovs_flow_tbl_deferred_destroy(old_table); 471 ovs_flow_tbl_destroy(old_table, true);
472 return 0; 472 return 0;
473} 473}
474 474
@@ -611,10 +611,12 @@ static int validate_tp_port(const struct sw_flow_key *flow_key)
611static int validate_and_copy_set_tun(const struct nlattr *attr, 611static int validate_and_copy_set_tun(const struct nlattr *attr,
612 struct sw_flow_actions **sfa) 612 struct sw_flow_actions **sfa)
613{ 613{
614 struct ovs_key_ipv4_tunnel tun_key; 614 struct sw_flow_match match;
615 struct sw_flow_key key;
615 int err, start; 616 int err, start;
616 617
617 err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &tun_key); 618 ovs_match_init(&match, &key, NULL);
619 err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &match, false);
618 if (err) 620 if (err)
619 return err; 621 return err;
620 622
@@ -622,7 +624,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
622 if (start < 0) 624 if (start < 0)
623 return start; 625 return start;
624 626
625 err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key)); 627 err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
628 sizeof(match.key->tun_key));
626 add_nested_action_end(*sfa, start); 629 add_nested_action_end(*sfa, start);
627 630
628 return err; 631 return err;
@@ -857,7 +860,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
857 struct ethhdr *eth; 860 struct ethhdr *eth;
858 int len; 861 int len;
859 int err; 862 int err;
860 int key_len;
861 863
862 err = -EINVAL; 864 err = -EINVAL;
863 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || 865 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
@@ -890,11 +892,11 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
890 if (IS_ERR(flow)) 892 if (IS_ERR(flow))
891 goto err_kfree_skb; 893 goto err_kfree_skb;
892 894
893 err = ovs_flow_extract(packet, -1, &flow->key, &key_len); 895 err = ovs_flow_extract(packet, -1, &flow->key);
894 if (err) 896 if (err)
895 goto err_flow_free; 897 goto err_flow_free;
896 898
897 err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]); 899 err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]);
898 if (err) 900 if (err)
899 goto err_flow_free; 901 goto err_flow_free;
900 acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); 902 acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
@@ -908,6 +910,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
908 goto err_flow_free; 910 goto err_flow_free;
909 911
910 OVS_CB(packet)->flow = flow; 912 OVS_CB(packet)->flow = flow;
913 OVS_CB(packet)->pkt_key = &flow->key;
911 packet->priority = flow->key.phy.priority; 914 packet->priority = flow->key.phy.priority;
912 packet->mark = flow->key.phy.skb_mark; 915 packet->mark = flow->key.phy.skb_mark;
913 916
@@ -922,13 +925,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
922 local_bh_enable(); 925 local_bh_enable();
923 rcu_read_unlock(); 926 rcu_read_unlock();
924 927
925 ovs_flow_free(flow); 928 ovs_flow_free(flow, false);
926 return err; 929 return err;
927 930
928err_unlock: 931err_unlock:
929 rcu_read_unlock(); 932 rcu_read_unlock();
930err_flow_free: 933err_flow_free:
931 ovs_flow_free(flow); 934 ovs_flow_free(flow, false);
932err_kfree_skb: 935err_kfree_skb:
933 kfree_skb(packet); 936 kfree_skb(packet);
934err: 937err:
@@ -1045,7 +1048,8 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
1045 if (!start) 1048 if (!start)
1046 return -EMSGSIZE; 1049 return -EMSGSIZE;
1047 1050
1048 err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key)); 1051 err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
1052 nla_data(ovs_key));
1049 if (err) 1053 if (err)
1050 return err; 1054 return err;
1051 nla_nest_end(skb, start); 1055 nla_nest_end(skb, start);
@@ -1093,6 +1097,7 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
1093{ 1097{
1094 return NLMSG_ALIGN(sizeof(struct ovs_header)) 1098 return NLMSG_ALIGN(sizeof(struct ovs_header))
1095 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */ 1099 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
1100 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */
1096 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ 1101 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
1097 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ 1102 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
1098 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */ 1103 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
@@ -1119,12 +1124,25 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
1119 1124
1120 ovs_header->dp_ifindex = get_dpifindex(dp); 1125 ovs_header->dp_ifindex = get_dpifindex(dp);
1121 1126
1127 /* Fill flow key. */
1122 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); 1128 nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
1123 if (!nla) 1129 if (!nla)
1124 goto nla_put_failure; 1130 goto nla_put_failure;
1125 err = ovs_flow_to_nlattrs(&flow->key, skb); 1131
1132 err = ovs_flow_to_nlattrs(&flow->unmasked_key,
1133 &flow->unmasked_key, skb);
1134 if (err)
1135 goto error;
1136 nla_nest_end(skb, nla);
1137
1138 nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
1139 if (!nla)
1140 goto nla_put_failure;
1141
1142 err = ovs_flow_to_nlattrs(&flow->key, &flow->mask->key, skb);
1126 if (err) 1143 if (err)
1127 goto error; 1144 goto error;
1145
1128 nla_nest_end(skb, nla); 1146 nla_nest_end(skb, nla);
1129 1147
1130 spin_lock_bh(&flow->lock); 1148 spin_lock_bh(&flow->lock);
@@ -1214,20 +1232,24 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1214{ 1232{
1215 struct nlattr **a = info->attrs; 1233 struct nlattr **a = info->attrs;
1216 struct ovs_header *ovs_header = info->userhdr; 1234 struct ovs_header *ovs_header = info->userhdr;
1217 struct sw_flow_key key; 1235 struct sw_flow_key key, masked_key;
1218 struct sw_flow *flow; 1236 struct sw_flow *flow = NULL;
1237 struct sw_flow_mask mask;
1219 struct sk_buff *reply; 1238 struct sk_buff *reply;
1220 struct datapath *dp; 1239 struct datapath *dp;
1221 struct flow_table *table; 1240 struct flow_table *table;
1222 struct sw_flow_actions *acts = NULL; 1241 struct sw_flow_actions *acts = NULL;
1242 struct sw_flow_match match;
1223 int error; 1243 int error;
1224 int key_len;
1225 1244
1226 /* Extract key. */ 1245 /* Extract key. */
1227 error = -EINVAL; 1246 error = -EINVAL;
1228 if (!a[OVS_FLOW_ATTR_KEY]) 1247 if (!a[OVS_FLOW_ATTR_KEY])
1229 goto error; 1248 goto error;
1230 error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1249
1250 ovs_match_init(&match, &key, &mask);
1251 error = ovs_match_from_nlattrs(&match,
1252 a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
1231 if (error) 1253 if (error)
1232 goto error; 1254 goto error;
1233 1255
@@ -1238,9 +1260,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1238 if (IS_ERR(acts)) 1260 if (IS_ERR(acts))
1239 goto error; 1261 goto error;
1240 1262
1241 error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0, &acts); 1263 ovs_flow_key_mask(&masked_key, &key, &mask);
1242 if (error) 1264 error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
1265 &masked_key, 0, &acts);
1266 if (error) {
1267 OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
1243 goto err_kfree; 1268 goto err_kfree;
1269 }
1244 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { 1270 } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
1245 error = -EINVAL; 1271 error = -EINVAL;
1246 goto error; 1272 goto error;
@@ -1253,8 +1279,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1253 goto err_unlock_ovs; 1279 goto err_unlock_ovs;
1254 1280
1255 table = ovsl_dereference(dp->table); 1281 table = ovsl_dereference(dp->table);
1256 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1282
1283 /* Check if this is a duplicate flow */
1284 flow = ovs_flow_lookup(table, &key);
1257 if (!flow) { 1285 if (!flow) {
1286 struct sw_flow_mask *mask_p;
1258 /* Bail out if we're not allowed to create a new flow. */ 1287 /* Bail out if we're not allowed to create a new flow. */
1259 error = -ENOENT; 1288 error = -ENOENT;
1260 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) 1289 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
@@ -1267,7 +1296,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1267 new_table = ovs_flow_tbl_expand(table); 1296 new_table = ovs_flow_tbl_expand(table);
1268 if (!IS_ERR(new_table)) { 1297 if (!IS_ERR(new_table)) {
1269 rcu_assign_pointer(dp->table, new_table); 1298 rcu_assign_pointer(dp->table, new_table);
1270 ovs_flow_tbl_deferred_destroy(table); 1299 ovs_flow_tbl_destroy(table, true);
1271 table = ovsl_dereference(dp->table); 1300 table = ovsl_dereference(dp->table);
1272 } 1301 }
1273 } 1302 }
@@ -1280,14 +1309,30 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1280 } 1309 }
1281 clear_stats(flow); 1310 clear_stats(flow);
1282 1311
1312 flow->key = masked_key;
1313 flow->unmasked_key = key;
1314
1315 /* Make sure mask is unique in the system */
1316 mask_p = ovs_sw_flow_mask_find(table, &mask);
1317 if (!mask_p) {
1318 /* Allocate a new mask if none exsits. */
1319 mask_p = ovs_sw_flow_mask_alloc();
1320 if (!mask_p)
1321 goto err_flow_free;
1322 mask_p->key = mask.key;
1323 mask_p->range = mask.range;
1324 ovs_sw_flow_mask_insert(table, mask_p);
1325 }
1326
1327 ovs_sw_flow_mask_add_ref(mask_p);
1328 flow->mask = mask_p;
1283 rcu_assign_pointer(flow->sf_acts, acts); 1329 rcu_assign_pointer(flow->sf_acts, acts);
1284 1330
1285 /* Put flow in bucket. */ 1331 /* Put flow in bucket. */
1286 ovs_flow_tbl_insert(table, flow, &key, key_len); 1332 ovs_flow_insert(table, flow);
1287 1333
1288 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 1334 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1289 info->snd_seq, 1335 info->snd_seq, OVS_FLOW_CMD_NEW);
1290 OVS_FLOW_CMD_NEW);
1291 } else { 1336 } else {
1292 /* We found a matching flow. */ 1337 /* We found a matching flow. */
1293 struct sw_flow_actions *old_acts; 1338 struct sw_flow_actions *old_acts;
@@ -1303,6 +1348,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1303 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) 1348 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1304 goto err_unlock_ovs; 1349 goto err_unlock_ovs;
1305 1350
1351 /* The unmasked key has to be the same for flow updates. */
1352 error = -EINVAL;
1353 if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) {
1354 OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n");
1355 goto err_unlock_ovs;
1356 }
1357
1306 /* Update actions. */ 1358 /* Update actions. */
1307 old_acts = ovsl_dereference(flow->sf_acts); 1359 old_acts = ovsl_dereference(flow->sf_acts);
1308 rcu_assign_pointer(flow->sf_acts, acts); 1360 rcu_assign_pointer(flow->sf_acts, acts);
@@ -1327,6 +1379,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1327 ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); 1379 ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
1328 return 0; 1380 return 0;
1329 1381
1382err_flow_free:
1383 ovs_flow_free(flow, false);
1330err_unlock_ovs: 1384err_unlock_ovs:
1331 ovs_unlock(); 1385 ovs_unlock();
1332err_kfree: 1386err_kfree:
@@ -1344,12 +1398,16 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1344 struct sw_flow *flow; 1398 struct sw_flow *flow;
1345 struct datapath *dp; 1399 struct datapath *dp;
1346 struct flow_table *table; 1400 struct flow_table *table;
1401 struct sw_flow_match match;
1347 int err; 1402 int err;
1348 int key_len;
1349 1403
1350 if (!a[OVS_FLOW_ATTR_KEY]) 1404 if (!a[OVS_FLOW_ATTR_KEY]) {
1405 OVS_NLERR("Flow get message rejected, Key attribute missing.\n");
1351 return -EINVAL; 1406 return -EINVAL;
1352 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1407 }
1408
1409 ovs_match_init(&match, &key, NULL);
1410 err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
1353 if (err) 1411 if (err)
1354 return err; 1412 return err;
1355 1413
@@ -1361,7 +1419,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1361 } 1419 }
1362 1420
1363 table = ovsl_dereference(dp->table); 1421 table = ovsl_dereference(dp->table);
1364 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1422 flow = ovs_flow_lookup_unmasked_key(table, &match);
1365 if (!flow) { 1423 if (!flow) {
1366 err = -ENOENT; 1424 err = -ENOENT;
1367 goto unlock; 1425 goto unlock;
@@ -1390,8 +1448,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1390 struct sw_flow *flow; 1448 struct sw_flow *flow;
1391 struct datapath *dp; 1449 struct datapath *dp;
1392 struct flow_table *table; 1450 struct flow_table *table;
1451 struct sw_flow_match match;
1393 int err; 1452 int err;
1394 int key_len;
1395 1453
1396 ovs_lock(); 1454 ovs_lock();
1397 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1455 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1404,12 +1462,14 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1404 err = flush_flows(dp); 1462 err = flush_flows(dp);
1405 goto unlock; 1463 goto unlock;
1406 } 1464 }
1407 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1465
1466 ovs_match_init(&match, &key, NULL);
1467 err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
1408 if (err) 1468 if (err)
1409 goto unlock; 1469 goto unlock;
1410 1470
1411 table = ovsl_dereference(dp->table); 1471 table = ovsl_dereference(dp->table);
1412 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1472 flow = ovs_flow_lookup_unmasked_key(table, &match);
1413 if (!flow) { 1473 if (!flow) {
1414 err = -ENOENT; 1474 err = -ENOENT;
1415 goto unlock; 1475 goto unlock;
@@ -1421,13 +1481,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1421 goto unlock; 1481 goto unlock;
1422 } 1482 }
1423 1483
1424 ovs_flow_tbl_remove(table, flow); 1484 ovs_flow_remove(table, flow);
1425 1485
1426 err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, 1486 err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
1427 info->snd_seq, 0, OVS_FLOW_CMD_DEL); 1487 info->snd_seq, 0, OVS_FLOW_CMD_DEL);
1428 BUG_ON(err < 0); 1488 BUG_ON(err < 0);
1429 1489
1430 ovs_flow_deferred_free(flow); 1490 ovs_flow_free(flow, true);
1431 ovs_unlock(); 1491 ovs_unlock();
1432 1492
1433 ovs_notify(reply, info, &ovs_dp_flow_multicast_group); 1493 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
@@ -1457,7 +1517,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1457 1517
1458 bucket = cb->args[0]; 1518 bucket = cb->args[0];
1459 obj = cb->args[1]; 1519 obj = cb->args[1];
1460 flow = ovs_flow_tbl_next(table, &bucket, &obj); 1520 flow = ovs_flow_dump_next(table, &bucket, &obj);
1461 if (!flow) 1521 if (!flow)
1462 break; 1522 break;
1463 1523
@@ -1680,7 +1740,7 @@ err_destroy_ports_array:
1680err_destroy_percpu: 1740err_destroy_percpu:
1681 free_percpu(dp->stats_percpu); 1741 free_percpu(dp->stats_percpu);
1682err_destroy_table: 1742err_destroy_table:
1683 ovs_flow_tbl_destroy(ovsl_dereference(dp->table)); 1743 ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false);
1684err_free_dp: 1744err_free_dp:
1685 release_net(ovs_dp_get_net(dp)); 1745 release_net(ovs_dp_get_net(dp));
1686 kfree(dp); 1746 kfree(dp);
@@ -2287,7 +2347,7 @@ static void rehash_flow_table(struct work_struct *work)
2287 new_table = ovs_flow_tbl_rehash(old_table); 2347 new_table = ovs_flow_tbl_rehash(old_table);
2288 if (!IS_ERR(new_table)) { 2348 if (!IS_ERR(new_table)) {
2289 rcu_assign_pointer(dp->table, new_table); 2349 rcu_assign_pointer(dp->table, new_table);
2290 ovs_flow_tbl_deferred_destroy(old_table); 2350 ovs_flow_tbl_destroy(old_table, true);
2291 } 2351 }
2292 } 2352 }
2293 } 2353 }
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index a91486484916..4d109c176ef3 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -88,11 +88,13 @@ struct datapath {
88/** 88/**
89 * struct ovs_skb_cb - OVS data in skb CB 89 * struct ovs_skb_cb - OVS data in skb CB
90 * @flow: The flow associated with this packet. May be %NULL if no flow. 90 * @flow: The flow associated with this packet. May be %NULL if no flow.
91 * @pkt_key: The flow information extracted from the packet. Must be nonnull.
91 * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the 92 * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the
92 * packet is not being tunneled. 93 * packet is not being tunneled.
93 */ 94 */
94struct ovs_skb_cb { 95struct ovs_skb_cb {
95 struct sw_flow *flow; 96 struct sw_flow *flow;
97 struct sw_flow_key *pkt_key;
96 struct ovs_key_ipv4_tunnel *tun_key; 98 struct ovs_key_ipv4_tunnel *tun_key;
97}; 99};
98#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) 100#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
@@ -183,4 +185,8 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
183 185
184int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb); 186int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
185void ovs_dp_notify_wq(struct work_struct *work); 187void ovs_dp_notify_wq(struct work_struct *work);
188
189#define OVS_NLERR(fmt, ...) \
190 pr_info_once("netlink: " fmt, ##__VA_ARGS__)
191
186#endif /* datapath.h */ 192#endif /* datapath.h */
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index fca282520cee..1fceb9653598 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2007-2011 Nicira, Inc. 2 * Copyright (c) 2007-2013 Nicira, Inc.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public 5 * modify it under the terms of version 2 of the GNU General Public
@@ -46,6 +46,184 @@
46 46
47static struct kmem_cache *flow_cache; 47static struct kmem_cache *flow_cache;
48 48
49static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
50 struct sw_flow_key_range *range, u8 val);
51
52static void update_range__(struct sw_flow_match *match,
53 size_t offset, size_t size, bool is_mask)
54{
55 struct sw_flow_key_range *range = NULL;
56 size_t start = offset;
57 size_t end = offset + size;
58
59 if (!is_mask)
60 range = &match->range;
61 else if (match->mask)
62 range = &match->mask->range;
63
64 if (!range)
65 return;
66
67 if (range->start == range->end) {
68 range->start = start;
69 range->end = end;
70 return;
71 }
72
73 if (range->start > start)
74 range->start = start;
75
76 if (range->end < end)
77 range->end = end;
78}
79
80#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
81 do { \
82 update_range__(match, offsetof(struct sw_flow_key, field), \
83 sizeof((match)->key->field), is_mask); \
84 if (is_mask) { \
85 if ((match)->mask) \
86 (match)->mask->key.field = value; \
87 } else { \
88 (match)->key->field = value; \
89 } \
90 } while (0)
91
92#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
93 do { \
94 update_range__(match, offsetof(struct sw_flow_key, field), \
95 len, is_mask); \
96 if (is_mask) { \
97 if ((match)->mask) \
98 memcpy(&(match)->mask->key.field, value_p, len);\
99 } else { \
100 memcpy(&(match)->key->field, value_p, len); \
101 } \
102 } while (0)
103
104void ovs_match_init(struct sw_flow_match *match,
105 struct sw_flow_key *key,
106 struct sw_flow_mask *mask)
107{
108 memset(match, 0, sizeof(*match));
109 match->key = key;
110 match->mask = mask;
111
112 memset(key, 0, sizeof(*key));
113
114 if (mask) {
115 memset(&mask->key, 0, sizeof(mask->key));
116 mask->range.start = mask->range.end = 0;
117 }
118}
119
120static bool ovs_match_validate(const struct sw_flow_match *match,
121 u64 key_attrs, u64 mask_attrs)
122{
123 u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
124 u64 mask_allowed = key_attrs; /* At most allow all key attributes */
125
126 /* The following mask attributes allowed only if they
127 * pass the validation tests. */
128 mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
129 | (1 << OVS_KEY_ATTR_IPV6)
130 | (1 << OVS_KEY_ATTR_TCP)
131 | (1 << OVS_KEY_ATTR_UDP)
132 | (1 << OVS_KEY_ATTR_ICMP)
133 | (1 << OVS_KEY_ATTR_ICMPV6)
134 | (1 << OVS_KEY_ATTR_ARP)
135 | (1 << OVS_KEY_ATTR_ND));
136
137 /* Always allowed mask fields. */
138 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
139 | (1 << OVS_KEY_ATTR_IN_PORT)
140 | (1 << OVS_KEY_ATTR_ETHERTYPE));
141
142 /* Check key attributes. */
143 if (match->key->eth.type == htons(ETH_P_ARP)
144 || match->key->eth.type == htons(ETH_P_RARP)) {
145 key_expected |= 1 << OVS_KEY_ATTR_ARP;
146 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
147 mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
148 }
149
150 if (match->key->eth.type == htons(ETH_P_IP)) {
151 key_expected |= 1 << OVS_KEY_ATTR_IPV4;
152 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
153 mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
154
155 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
156 if (match->key->ip.proto == IPPROTO_UDP) {
157 key_expected |= 1 << OVS_KEY_ATTR_UDP;
158 if (match->mask && (match->mask->key.ip.proto == 0xff))
159 mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
160 }
161
162 if (match->key->ip.proto == IPPROTO_TCP) {
163 key_expected |= 1 << OVS_KEY_ATTR_TCP;
164 if (match->mask && (match->mask->key.ip.proto == 0xff))
165 mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
166 }
167
168 if (match->key->ip.proto == IPPROTO_ICMP) {
169 key_expected |= 1 << OVS_KEY_ATTR_ICMP;
170 if (match->mask && (match->mask->key.ip.proto == 0xff))
171 mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
172 }
173 }
174 }
175
176 if (match->key->eth.type == htons(ETH_P_IPV6)) {
177 key_expected |= 1 << OVS_KEY_ATTR_IPV6;
178 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
179 mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
180
181 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
182 if (match->key->ip.proto == IPPROTO_UDP) {
183 key_expected |= 1 << OVS_KEY_ATTR_UDP;
184 if (match->mask && (match->mask->key.ip.proto == 0xff))
185 mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
186 }
187
188 if (match->key->ip.proto == IPPROTO_TCP) {
189 key_expected |= 1 << OVS_KEY_ATTR_TCP;
190 if (match->mask && (match->mask->key.ip.proto == 0xff))
191 mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
192 }
193
194 if (match->key->ip.proto == IPPROTO_ICMPV6) {
195 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
196 if (match->mask && (match->mask->key.ip.proto == 0xff))
197 mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
198
199 if (match->key->ipv6.tp.src ==
200 htons(NDISC_NEIGHBOUR_SOLICITATION) ||
201 match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
202 key_expected |= 1 << OVS_KEY_ATTR_ND;
203 if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
204 mask_allowed |= 1 << OVS_KEY_ATTR_ND;
205 }
206 }
207 }
208 }
209
210 if ((key_attrs & key_expected) != key_expected) {
211 /* Key attributes check failed. */
212 OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
213 key_attrs, key_expected);
214 return false;
215 }
216
217 if ((mask_attrs & mask_allowed) != mask_attrs) {
218 /* Mask attributes check failed. */
219 OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
220 mask_attrs, mask_allowed);
221 return false;
222 }
223
224 return true;
225}
226
49static int check_header(struct sk_buff *skb, int len) 227static int check_header(struct sk_buff *skb, int len)
50{ 228{
51 if (unlikely(skb->len < len)) 229 if (unlikely(skb->len < len))
@@ -121,12 +299,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
121 return cur_ms - idle_ms; 299 return cur_ms - idle_ms;
122} 300}
123 301
124#define SW_FLOW_KEY_OFFSET(field) \ 302static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
125 (offsetof(struct sw_flow_key, field) + \
126 FIELD_SIZEOF(struct sw_flow_key, field))
127
128static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
129 int *key_lenp)
130{ 303{
131 unsigned int nh_ofs = skb_network_offset(skb); 304 unsigned int nh_ofs = skb_network_offset(skb);
132 unsigned int nh_len; 305 unsigned int nh_len;
@@ -136,8 +309,6 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
136 __be16 frag_off; 309 __be16 frag_off;
137 int err; 310 int err;
138 311
139 *key_lenp = SW_FLOW_KEY_OFFSET(ipv6.label);
140
141 err = check_header(skb, nh_ofs + sizeof(*nh)); 312 err = check_header(skb, nh_ofs + sizeof(*nh));
142 if (unlikely(err)) 313 if (unlikely(err))
143 return err; 314 return err;
@@ -176,6 +347,21 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
176 sizeof(struct icmp6hdr)); 347 sizeof(struct icmp6hdr));
177} 348}
178 349
350void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
351 const struct sw_flow_mask *mask)
352{
353 u8 *m = (u8 *)&mask->key + mask->range.start;
354 u8 *s = (u8 *)src + mask->range.start;
355 u8 *d = (u8 *)dst + mask->range.start;
356 int i;
357
358 memset(dst, 0, sizeof(*dst));
359 for (i = 0; i < ovs_sw_flow_mask_size_roundup(mask); i++) {
360 *d = *s & *m;
361 d++, s++, m++;
362 }
363}
364
179#define TCP_FLAGS_OFFSET 13 365#define TCP_FLAGS_OFFSET 13
180#define TCP_FLAG_MASK 0x3f 366#define TCP_FLAG_MASK 0x3f
181 367
@@ -224,6 +410,7 @@ struct sw_flow *ovs_flow_alloc(void)
224 410
225 spin_lock_init(&flow->lock); 411 spin_lock_init(&flow->lock);
226 flow->sf_acts = NULL; 412 flow->sf_acts = NULL;
413 flow->mask = NULL;
227 414
228 return flow; 415 return flow;
229} 416}
@@ -263,7 +450,7 @@ static void free_buckets(struct flex_array *buckets)
263 flex_array_free(buckets); 450 flex_array_free(buckets);
264} 451}
265 452
266struct flow_table *ovs_flow_tbl_alloc(int new_size) 453static struct flow_table *__flow_tbl_alloc(int new_size)
267{ 454{
268 struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); 455 struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL);
269 456
@@ -281,17 +468,15 @@ struct flow_table *ovs_flow_tbl_alloc(int new_size)
281 table->node_ver = 0; 468 table->node_ver = 0;
282 table->keep_flows = false; 469 table->keep_flows = false;
283 get_random_bytes(&table->hash_seed, sizeof(u32)); 470 get_random_bytes(&table->hash_seed, sizeof(u32));
471 table->mask_list = NULL;
284 472
285 return table; 473 return table;
286} 474}
287 475
288void ovs_flow_tbl_destroy(struct flow_table *table) 476static void __flow_tbl_destroy(struct flow_table *table)
289{ 477{
290 int i; 478 int i;
291 479
292 if (!table)
293 return;
294
295 if (table->keep_flows) 480 if (table->keep_flows)
296 goto skip_flows; 481 goto skip_flows;
297 482
@@ -303,31 +488,55 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
303 488
304 hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { 489 hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
305 hlist_del(&flow->hash_node[ver]); 490 hlist_del(&flow->hash_node[ver]);
306 ovs_flow_free(flow); 491 ovs_flow_free(flow, false);
307 } 492 }
308 } 493 }
309 494
495 BUG_ON(!list_empty(table->mask_list));
496 kfree(table->mask_list);
497
310skip_flows: 498skip_flows:
311 free_buckets(table->buckets); 499 free_buckets(table->buckets);
312 kfree(table); 500 kfree(table);
313} 501}
314 502
503struct flow_table *ovs_flow_tbl_alloc(int new_size)
504{
505 struct flow_table *table = __flow_tbl_alloc(new_size);
506
507 if (!table)
508 return NULL;
509
510 table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL);
511 if (!table->mask_list) {
512 table->keep_flows = true;
513 __flow_tbl_destroy(table);
514 return NULL;
515 }
516 INIT_LIST_HEAD(table->mask_list);
517
518 return table;
519}
520
315static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) 521static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
316{ 522{
317 struct flow_table *table = container_of(rcu, struct flow_table, rcu); 523 struct flow_table *table = container_of(rcu, struct flow_table, rcu);
318 524
319 ovs_flow_tbl_destroy(table); 525 __flow_tbl_destroy(table);
320} 526}
321 527
322void ovs_flow_tbl_deferred_destroy(struct flow_table *table) 528void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
323{ 529{
324 if (!table) 530 if (!table)
325 return; 531 return;
326 532
327 call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); 533 if (deferred)
534 call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
535 else
536 __flow_tbl_destroy(table);
328} 537}
329 538
330struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *last) 539struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *last)
331{ 540{
332 struct sw_flow *flow; 541 struct sw_flow *flow;
333 struct hlist_head *head; 542 struct hlist_head *head;
@@ -353,11 +562,13 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la
353 return NULL; 562 return NULL;
354} 563}
355 564
356static void __flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) 565static void __tbl_insert(struct flow_table *table, struct sw_flow *flow)
357{ 566{
358 struct hlist_head *head; 567 struct hlist_head *head;
568
359 head = find_bucket(table, flow->hash); 569 head = find_bucket(table, flow->hash);
360 hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); 570 hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
571
361 table->count++; 572 table->count++;
362} 573}
363 574
@@ -377,8 +588,10 @@ static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new
377 head = flex_array_get(old->buckets, i); 588 head = flex_array_get(old->buckets, i);
378 589
379 hlist_for_each_entry(flow, head, hash_node[old_ver]) 590 hlist_for_each_entry(flow, head, hash_node[old_ver])
380 __flow_tbl_insert(new, flow); 591 __tbl_insert(new, flow);
381 } 592 }
593
594 new->mask_list = old->mask_list;
382 old->keep_flows = true; 595 old->keep_flows = true;
383} 596}
384 597
@@ -386,7 +599,7 @@ static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buck
386{ 599{
387 struct flow_table *new_table; 600 struct flow_table *new_table;
388 601
389 new_table = ovs_flow_tbl_alloc(n_buckets); 602 new_table = __flow_tbl_alloc(n_buckets);
390 if (!new_table) 603 if (!new_table)
391 return ERR_PTR(-ENOMEM); 604 return ERR_PTR(-ENOMEM);
392 605
@@ -405,28 +618,30 @@ struct flow_table *ovs_flow_tbl_expand(struct flow_table *table)
405 return __flow_tbl_rehash(table, table->n_buckets * 2); 618 return __flow_tbl_rehash(table, table->n_buckets * 2);
406} 619}
407 620
408void ovs_flow_free(struct sw_flow *flow) 621static void __flow_free(struct sw_flow *flow)
409{ 622{
410 if (unlikely(!flow))
411 return;
412
413 kfree((struct sf_flow_acts __force *)flow->sf_acts); 623 kfree((struct sf_flow_acts __force *)flow->sf_acts);
414 kmem_cache_free(flow_cache, flow); 624 kmem_cache_free(flow_cache, flow);
415} 625}
416 626
417/* RCU callback used by ovs_flow_deferred_free. */
418static void rcu_free_flow_callback(struct rcu_head *rcu) 627static void rcu_free_flow_callback(struct rcu_head *rcu)
419{ 628{
420 struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); 629 struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
421 630
422 ovs_flow_free(flow); 631 __flow_free(flow);
423} 632}
424 633
425/* Schedules 'flow' to be freed after the next RCU grace period. 634void ovs_flow_free(struct sw_flow *flow, bool deferred)
426 * The caller must hold rcu_read_lock for this to be sensible. */
427void ovs_flow_deferred_free(struct sw_flow *flow)
428{ 635{
429 call_rcu(&flow->rcu, rcu_free_flow_callback); 636 if (!flow)
637 return;
638
639 ovs_sw_flow_mask_del_ref(flow->mask, deferred);
640
641 if (deferred)
642 call_rcu(&flow->rcu, rcu_free_flow_callback);
643 else
644 __flow_free(flow);
430} 645}
431 646
432/* Schedules 'sf_acts' to be freed after the next RCU grace period. 647/* Schedules 'sf_acts' to be freed after the next RCU grace period.
@@ -497,18 +712,15 @@ static __be16 parse_ethertype(struct sk_buff *skb)
497} 712}
498 713
499static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, 714static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
500 int *key_lenp, int nh_len) 715 int nh_len)
501{ 716{
502 struct icmp6hdr *icmp = icmp6_hdr(skb); 717 struct icmp6hdr *icmp = icmp6_hdr(skb);
503 int error = 0;
504 int key_len;
505 718
506 /* The ICMPv6 type and code fields use the 16-bit transport port 719 /* The ICMPv6 type and code fields use the 16-bit transport port
507 * fields, so we need to store them in 16-bit network byte order. 720 * fields, so we need to store them in 16-bit network byte order.
508 */ 721 */
509 key->ipv6.tp.src = htons(icmp->icmp6_type); 722 key->ipv6.tp.src = htons(icmp->icmp6_type);
510 key->ipv6.tp.dst = htons(icmp->icmp6_code); 723 key->ipv6.tp.dst = htons(icmp->icmp6_code);
511 key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
512 724
513 if (icmp->icmp6_code == 0 && 725 if (icmp->icmp6_code == 0 &&
514 (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || 726 (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -517,21 +729,17 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
517 struct nd_msg *nd; 729 struct nd_msg *nd;
518 int offset; 730 int offset;
519 731
520 key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
521
522 /* In order to process neighbor discovery options, we need the 732 /* In order to process neighbor discovery options, we need the
523 * entire packet. 733 * entire packet.
524 */ 734 */
525 if (unlikely(icmp_len < sizeof(*nd))) 735 if (unlikely(icmp_len < sizeof(*nd)))
526 goto out; 736 return 0;
527 if (unlikely(skb_linearize(skb))) { 737
528 error = -ENOMEM; 738 if (unlikely(skb_linearize(skb)))
529 goto out; 739 return -ENOMEM;
530 }
531 740
532 nd = (struct nd_msg *)skb_transport_header(skb); 741 nd = (struct nd_msg *)skb_transport_header(skb);
533 key->ipv6.nd.target = nd->target; 742 key->ipv6.nd.target = nd->target;
534 key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
535 743
536 icmp_len -= sizeof(*nd); 744 icmp_len -= sizeof(*nd);
537 offset = 0; 745 offset = 0;
@@ -541,7 +749,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
541 int opt_len = nd_opt->nd_opt_len * 8; 749 int opt_len = nd_opt->nd_opt_len * 8;
542 750
543 if (unlikely(!opt_len || opt_len > icmp_len)) 751 if (unlikely(!opt_len || opt_len > icmp_len))
544 goto invalid; 752 return 0;
545 753
546 /* Store the link layer address if the appropriate 754 /* Store the link layer address if the appropriate
547 * option is provided. It is considered an error if 755 * option is provided. It is considered an error if
@@ -566,16 +774,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
566 } 774 }
567 } 775 }
568 776
569 goto out; 777 return 0;
570 778
571invalid: 779invalid:
572 memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target)); 780 memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
573 memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll)); 781 memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
574 memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll)); 782 memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
575 783
576out: 784 return 0;
577 *key_lenp = key_len;
578 return error;
579} 785}
580 786
581/** 787/**
@@ -584,7 +790,6 @@ out:
584 * Ethernet header 790 * Ethernet header
585 * @in_port: port number on which @skb was received. 791 * @in_port: port number on which @skb was received.
586 * @key: output flow key 792 * @key: output flow key
587 * @key_lenp: length of output flow key
588 * 793 *
589 * The caller must ensure that skb->len >= ETH_HLEN. 794 * The caller must ensure that skb->len >= ETH_HLEN.
590 * 795 *
@@ -602,11 +807,9 @@ out:
602 * of a correct length, otherwise the same as skb->network_header. 807 * of a correct length, otherwise the same as skb->network_header.
603 * For other key->eth.type values it is left untouched. 808 * For other key->eth.type values it is left untouched.
604 */ 809 */
605int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, 810int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
606 int *key_lenp)
607{ 811{
608 int error = 0; 812 int error;
609 int key_len = SW_FLOW_KEY_OFFSET(eth);
610 struct ethhdr *eth; 813 struct ethhdr *eth;
611 814
612 memset(key, 0, sizeof(*key)); 815 memset(key, 0, sizeof(*key));
@@ -649,15 +852,13 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
649 struct iphdr *nh; 852 struct iphdr *nh;
650 __be16 offset; 853 __be16 offset;
651 854
652 key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
653
654 error = check_iphdr(skb); 855 error = check_iphdr(skb);
655 if (unlikely(error)) { 856 if (unlikely(error)) {
656 if (error == -EINVAL) { 857 if (error == -EINVAL) {
657 skb->transport_header = skb->network_header; 858 skb->transport_header = skb->network_header;
658 error = 0; 859 error = 0;
659 } 860 }
660 goto out; 861 return error;
661 } 862 }
662 863
663 nh = ip_hdr(skb); 864 nh = ip_hdr(skb);
@@ -671,7 +872,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
671 offset = nh->frag_off & htons(IP_OFFSET); 872 offset = nh->frag_off & htons(IP_OFFSET);
672 if (offset) { 873 if (offset) {
673 key->ip.frag = OVS_FRAG_TYPE_LATER; 874 key->ip.frag = OVS_FRAG_TYPE_LATER;
674 goto out; 875 return 0;
675 } 876 }
676 if (nh->frag_off & htons(IP_MF) || 877 if (nh->frag_off & htons(IP_MF) ||
677 skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 878 skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
@@ -679,21 +880,18 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
679 880
680 /* Transport layer. */ 881 /* Transport layer. */
681 if (key->ip.proto == IPPROTO_TCP) { 882 if (key->ip.proto == IPPROTO_TCP) {
682 key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
683 if (tcphdr_ok(skb)) { 883 if (tcphdr_ok(skb)) {
684 struct tcphdr *tcp = tcp_hdr(skb); 884 struct tcphdr *tcp = tcp_hdr(skb);
685 key->ipv4.tp.src = tcp->source; 885 key->ipv4.tp.src = tcp->source;
686 key->ipv4.tp.dst = tcp->dest; 886 key->ipv4.tp.dst = tcp->dest;
687 } 887 }
688 } else if (key->ip.proto == IPPROTO_UDP) { 888 } else if (key->ip.proto == IPPROTO_UDP) {
689 key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
690 if (udphdr_ok(skb)) { 889 if (udphdr_ok(skb)) {
691 struct udphdr *udp = udp_hdr(skb); 890 struct udphdr *udp = udp_hdr(skb);
692 key->ipv4.tp.src = udp->source; 891 key->ipv4.tp.src = udp->source;
693 key->ipv4.tp.dst = udp->dest; 892 key->ipv4.tp.dst = udp->dest;
694 } 893 }
695 } else if (key->ip.proto == IPPROTO_ICMP) { 894 } else if (key->ip.proto == IPPROTO_ICMP) {
696 key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
697 if (icmphdr_ok(skb)) { 895 if (icmphdr_ok(skb)) {
698 struct icmphdr *icmp = icmp_hdr(skb); 896 struct icmphdr *icmp = icmp_hdr(skb);
699 /* The ICMP type and code fields use the 16-bit 897 /* The ICMP type and code fields use the 16-bit
@@ -722,53 +920,49 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
722 memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst)); 920 memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
723 memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN); 921 memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
724 memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN); 922 memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
725 key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
726 } 923 }
727 } else if (key->eth.type == htons(ETH_P_IPV6)) { 924 } else if (key->eth.type == htons(ETH_P_IPV6)) {
728 int nh_len; /* IPv6 Header + Extensions */ 925 int nh_len; /* IPv6 Header + Extensions */
729 926
730 nh_len = parse_ipv6hdr(skb, key, &key_len); 927 nh_len = parse_ipv6hdr(skb, key);
731 if (unlikely(nh_len < 0)) { 928 if (unlikely(nh_len < 0)) {
732 if (nh_len == -EINVAL) 929 if (nh_len == -EINVAL) {
733 skb->transport_header = skb->network_header; 930 skb->transport_header = skb->network_header;
734 else 931 error = 0;
932 } else {
735 error = nh_len; 933 error = nh_len;
736 goto out; 934 }
935 return error;
737 } 936 }
738 937
739 if (key->ip.frag == OVS_FRAG_TYPE_LATER) 938 if (key->ip.frag == OVS_FRAG_TYPE_LATER)
740 goto out; 939 return 0;
741 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 940 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
742 key->ip.frag = OVS_FRAG_TYPE_FIRST; 941 key->ip.frag = OVS_FRAG_TYPE_FIRST;
743 942
744 /* Transport layer. */ 943 /* Transport layer. */
745 if (key->ip.proto == NEXTHDR_TCP) { 944 if (key->ip.proto == NEXTHDR_TCP) {
746 key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
747 if (tcphdr_ok(skb)) { 945 if (tcphdr_ok(skb)) {
748 struct tcphdr *tcp = tcp_hdr(skb); 946 struct tcphdr *tcp = tcp_hdr(skb);
749 key->ipv6.tp.src = tcp->source; 947 key->ipv6.tp.src = tcp->source;
750 key->ipv6.tp.dst = tcp->dest; 948 key->ipv6.tp.dst = tcp->dest;
751 } 949 }
752 } else if (key->ip.proto == NEXTHDR_UDP) { 950 } else if (key->ip.proto == NEXTHDR_UDP) {
753 key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
754 if (udphdr_ok(skb)) { 951 if (udphdr_ok(skb)) {
755 struct udphdr *udp = udp_hdr(skb); 952 struct udphdr *udp = udp_hdr(skb);
756 key->ipv6.tp.src = udp->source; 953 key->ipv6.tp.src = udp->source;
757 key->ipv6.tp.dst = udp->dest; 954 key->ipv6.tp.dst = udp->dest;
758 } 955 }
759 } else if (key->ip.proto == NEXTHDR_ICMP) { 956 } else if (key->ip.proto == NEXTHDR_ICMP) {
760 key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
761 if (icmp6hdr_ok(skb)) { 957 if (icmp6hdr_ok(skb)) {
762 error = parse_icmpv6(skb, key, &key_len, nh_len); 958 error = parse_icmpv6(skb, key, nh_len);
763 if (error < 0) 959 if (error)
764 goto out; 960 return error;
765 } 961 }
766 } 962 }
767 } 963 }
768 964
769out: 965 return 0;
770 *key_lenp = key_len;
771 return error;
772} 966}
773 967
774static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_len) 968static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_len)
@@ -777,7 +971,7 @@ static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_l
777 DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0); 971 DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0);
778} 972}
779 973
780static int flow_key_start(struct sw_flow_key *key) 974static int flow_key_start(const struct sw_flow_key *key)
781{ 975{
782 if (key->tun_key.ipv4_dst) 976 if (key->tun_key.ipv4_dst)
783 return 0; 977 return 0;
@@ -785,39 +979,95 @@ static int flow_key_start(struct sw_flow_key *key)
785 return offsetof(struct sw_flow_key, phy); 979 return offsetof(struct sw_flow_key, phy);
786} 980}
787 981
788struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, 982static bool __cmp_key(const struct sw_flow_key *key1,
789 struct sw_flow_key *key, int key_len) 983 const struct sw_flow_key *key2, int key_start, int key_len)
984{
985 return !memcmp((u8 *)key1 + key_start,
986 (u8 *)key2 + key_start, (key_len - key_start));
987}
988
989static bool __flow_cmp_key(const struct sw_flow *flow,
990 const struct sw_flow_key *key, int key_start, int key_len)
991{
992 return __cmp_key(&flow->key, key, key_start, key_len);
993}
994
995static bool __flow_cmp_unmasked_key(const struct sw_flow *flow,
996 const struct sw_flow_key *key, int key_start, int key_len)
997{
998 return __cmp_key(&flow->unmasked_key, key, key_start, key_len);
999}
1000
1001bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
1002 const struct sw_flow_key *key, int key_len)
1003{
1004 int key_start;
1005 key_start = flow_key_start(key);
1006
1007 return __flow_cmp_unmasked_key(flow, key, key_start, key_len);
1008
1009}
1010
1011struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table,
1012 struct sw_flow_match *match)
1013{
1014 struct sw_flow_key *unmasked = match->key;
1015 int key_len = match->range.end;
1016 struct sw_flow *flow;
1017
1018 flow = ovs_flow_lookup(table, unmasked);
1019 if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_len)))
1020 flow = NULL;
1021
1022 return flow;
1023}
1024
1025static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table,
1026 const struct sw_flow_key *flow_key,
1027 struct sw_flow_mask *mask)
790{ 1028{
791 struct sw_flow *flow; 1029 struct sw_flow *flow;
792 struct hlist_head *head; 1030 struct hlist_head *head;
793 u8 *_key; 1031 int key_start = mask->range.start;
794 int key_start; 1032 int key_len = mask->range.end;
795 u32 hash; 1033 u32 hash;
1034 struct sw_flow_key masked_key;
796 1035
797 key_start = flow_key_start(key); 1036 ovs_flow_key_mask(&masked_key, flow_key, mask);
798 hash = ovs_flow_hash(key, key_start, key_len); 1037 hash = ovs_flow_hash(&masked_key, key_start, key_len);
799
800 _key = (u8 *) key + key_start;
801 head = find_bucket(table, hash); 1038 head = find_bucket(table, hash);
802 hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { 1039 hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) {
803 1040 if (flow->mask == mask &&
804 if (flow->hash == hash && 1041 __flow_cmp_key(flow, &masked_key, key_start, key_len))
805 !memcmp((u8 *)&flow->key + key_start, _key, key_len - key_start)) {
806 return flow; 1042 return flow;
807 }
808 } 1043 }
809 return NULL; 1044 return NULL;
810} 1045}
811 1046
812void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, 1047struct sw_flow *ovs_flow_lookup(struct flow_table *tbl,
813 struct sw_flow_key *key, int key_len) 1048 const struct sw_flow_key *key)
814{ 1049{
815 flow->hash = ovs_flow_hash(key, flow_key_start(key), key_len); 1050 struct sw_flow *flow = NULL;
816 memcpy(&flow->key, key, sizeof(flow->key)); 1051 struct sw_flow_mask *mask;
817 __flow_tbl_insert(table, flow); 1052
1053 list_for_each_entry_rcu(mask, tbl->mask_list, list) {
1054 flow = ovs_masked_flow_lookup(tbl, key, mask);
1055 if (flow) /* Found */
1056 break;
1057 }
1058
1059 return flow;
818} 1060}
819 1061
820void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) 1062
1063void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow)
1064{
1065 flow->hash = ovs_flow_hash(&flow->key, flow->mask->range.start,
1066 flow->mask->range.end);
1067 __tbl_insert(table, flow);
1068}
1069
1070void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow)
821{ 1071{
822 BUG_ON(table->count == 0); 1072 BUG_ON(table->count == 0);
823 hlist_del_rcu(&flow->hash_node[table->node_ver]); 1073 hlist_del_rcu(&flow->hash_node[table->node_ver]);
@@ -844,149 +1094,84 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
844 [OVS_KEY_ATTR_TUNNEL] = -1, 1094 [OVS_KEY_ATTR_TUNNEL] = -1,
845}; 1095};
846 1096
847static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, 1097static bool is_all_zero(const u8 *fp, size_t size)
848 const struct nlattr *a[], u32 *attrs)
849{
850 const struct ovs_key_icmp *icmp_key;
851 const struct ovs_key_tcp *tcp_key;
852 const struct ovs_key_udp *udp_key;
853
854 switch (swkey->ip.proto) {
855 case IPPROTO_TCP:
856 if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
857 return -EINVAL;
858 *attrs &= ~(1 << OVS_KEY_ATTR_TCP);
859
860 *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
861 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
862 swkey->ipv4.tp.src = tcp_key->tcp_src;
863 swkey->ipv4.tp.dst = tcp_key->tcp_dst;
864 break;
865
866 case IPPROTO_UDP:
867 if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
868 return -EINVAL;
869 *attrs &= ~(1 << OVS_KEY_ATTR_UDP);
870
871 *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
872 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
873 swkey->ipv4.tp.src = udp_key->udp_src;
874 swkey->ipv4.tp.dst = udp_key->udp_dst;
875 break;
876
877 case IPPROTO_ICMP:
878 if (!(*attrs & (1 << OVS_KEY_ATTR_ICMP)))
879 return -EINVAL;
880 *attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
881
882 *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
883 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
884 swkey->ipv4.tp.src = htons(icmp_key->icmp_type);
885 swkey->ipv4.tp.dst = htons(icmp_key->icmp_code);
886 break;
887 }
888
889 return 0;
890}
891
892static int ipv6_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
893 const struct nlattr *a[], u32 *attrs)
894{ 1098{
895 const struct ovs_key_icmpv6 *icmpv6_key; 1099 int i;
896 const struct ovs_key_tcp *tcp_key;
897 const struct ovs_key_udp *udp_key;
898
899 switch (swkey->ip.proto) {
900 case IPPROTO_TCP:
901 if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
902 return -EINVAL;
903 *attrs &= ~(1 << OVS_KEY_ATTR_TCP);
904
905 *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
906 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
907 swkey->ipv6.tp.src = tcp_key->tcp_src;
908 swkey->ipv6.tp.dst = tcp_key->tcp_dst;
909 break;
910
911 case IPPROTO_UDP:
912 if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
913 return -EINVAL;
914 *attrs &= ~(1 << OVS_KEY_ATTR_UDP);
915
916 *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
917 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
918 swkey->ipv6.tp.src = udp_key->udp_src;
919 swkey->ipv6.tp.dst = udp_key->udp_dst;
920 break;
921
922 case IPPROTO_ICMPV6:
923 if (!(*attrs & (1 << OVS_KEY_ATTR_ICMPV6)))
924 return -EINVAL;
925 *attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
926
927 *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
928 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
929 swkey->ipv6.tp.src = htons(icmpv6_key->icmpv6_type);
930 swkey->ipv6.tp.dst = htons(icmpv6_key->icmpv6_code);
931 1100
932 if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || 1101 if (!fp)
933 swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { 1102 return false;
934 const struct ovs_key_nd *nd_key;
935 1103
936 if (!(*attrs & (1 << OVS_KEY_ATTR_ND))) 1104 for (i = 0; i < size; i++)
937 return -EINVAL; 1105 if (fp[i])
938 *attrs &= ~(1 << OVS_KEY_ATTR_ND); 1106 return false;
939
940 *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
941 nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
942 memcpy(&swkey->ipv6.nd.target, nd_key->nd_target,
943 sizeof(swkey->ipv6.nd.target));
944 memcpy(swkey->ipv6.nd.sll, nd_key->nd_sll, ETH_ALEN);
945 memcpy(swkey->ipv6.nd.tll, nd_key->nd_tll, ETH_ALEN);
946 }
947 break;
948 }
949 1107
950 return 0; 1108 return true;
951} 1109}
952 1110
953static int parse_flow_nlattrs(const struct nlattr *attr, 1111static int __parse_flow_nlattrs(const struct nlattr *attr,
954 const struct nlattr *a[], u32 *attrsp) 1112 const struct nlattr *a[],
1113 u64 *attrsp, bool nz)
955{ 1114{
956 const struct nlattr *nla; 1115 const struct nlattr *nla;
957 u32 attrs; 1116 u32 attrs;
958 int rem; 1117 int rem;
959 1118
960 attrs = 0; 1119 attrs = *attrsp;
961 nla_for_each_nested(nla, attr, rem) { 1120 nla_for_each_nested(nla, attr, rem) {
962 u16 type = nla_type(nla); 1121 u16 type = nla_type(nla);
963 int expected_len; 1122 int expected_len;
964 1123
965 if (type > OVS_KEY_ATTR_MAX || attrs & (1 << type)) 1124 if (type > OVS_KEY_ATTR_MAX) {
1125 OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
1126 type, OVS_KEY_ATTR_MAX);
1127 }
1128
1129 if (attrs & (1 << type)) {
1130 OVS_NLERR("Duplicate key attribute (type %d).\n", type);
966 return -EINVAL; 1131 return -EINVAL;
1132 }
967 1133
968 expected_len = ovs_key_lens[type]; 1134 expected_len = ovs_key_lens[type];
969 if (nla_len(nla) != expected_len && expected_len != -1) 1135 if (nla_len(nla) != expected_len && expected_len != -1) {
1136 OVS_NLERR("Key attribute has unexpected length (type=%d"
1137 ", length=%d, expected=%d).\n", type,
1138 nla_len(nla), expected_len);
970 return -EINVAL; 1139 return -EINVAL;
1140 }
971 1141
972 attrs |= 1 << type; 1142 if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
973 a[type] = nla; 1143 attrs |= 1 << type;
1144 a[type] = nla;
1145 }
974 } 1146 }
975 if (rem) 1147 if (rem) {
1148 OVS_NLERR("Message has %d unknown bytes.\n", rem);
976 return -EINVAL; 1149 return -EINVAL;
1150 }
977 1151
978 *attrsp = attrs; 1152 *attrsp = attrs;
979 return 0; 1153 return 0;
980} 1154}
981 1155
1156static int parse_flow_mask_nlattrs(const struct nlattr *attr,
1157 const struct nlattr *a[], u64 *attrsp)
1158{
1159 return __parse_flow_nlattrs(attr, a, attrsp, true);
1160}
1161
1162static int parse_flow_nlattrs(const struct nlattr *attr,
1163 const struct nlattr *a[], u64 *attrsp)
1164{
1165 return __parse_flow_nlattrs(attr, a, attrsp, false);
1166}
1167
982int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, 1168int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
983 struct ovs_key_ipv4_tunnel *tun_key) 1169 struct sw_flow_match *match, bool is_mask)
984{ 1170{
985 struct nlattr *a; 1171 struct nlattr *a;
986 int rem; 1172 int rem;
987 bool ttl = false; 1173 bool ttl = false;
988 1174 __be16 tun_flags = 0;
989 memset(tun_key, 0, sizeof(*tun_key));
990 1175
991 nla_for_each_nested(a, attr, rem) { 1176 nla_for_each_nested(a, attr, rem) {
992 int type = nla_type(a); 1177 int type = nla_type(a);
@@ -1000,53 +1185,78 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
1000 [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, 1185 [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
1001 }; 1186 };
1002 1187
1003 if (type > OVS_TUNNEL_KEY_ATTR_MAX || 1188 if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
1004 ovs_tunnel_key_lens[type] != nla_len(a)) 1189 OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
1190 type, OVS_TUNNEL_KEY_ATTR_MAX);
1005 return -EINVAL; 1191 return -EINVAL;
1192 }
1193
1194 if (ovs_tunnel_key_lens[type] != nla_len(a)) {
1195 OVS_NLERR("IPv4 tunnel attribute type has unexpected "
1196 " length (type=%d, length=%d, expected=%d).\n",
1197 type, nla_len(a), ovs_tunnel_key_lens[type]);
1198 return -EINVAL;
1199 }
1006 1200
1007 switch (type) { 1201 switch (type) {
1008 case OVS_TUNNEL_KEY_ATTR_ID: 1202 case OVS_TUNNEL_KEY_ATTR_ID:
1009 tun_key->tun_id = nla_get_be64(a); 1203 SW_FLOW_KEY_PUT(match, tun_key.tun_id,
1010 tun_key->tun_flags |= TUNNEL_KEY; 1204 nla_get_be64(a), is_mask);
1205 tun_flags |= TUNNEL_KEY;
1011 break; 1206 break;
1012 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: 1207 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
1013 tun_key->ipv4_src = nla_get_be32(a); 1208 SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
1209 nla_get_be32(a), is_mask);
1014 break; 1210 break;
1015 case OVS_TUNNEL_KEY_ATTR_IPV4_DST: 1211 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
1016 tun_key->ipv4_dst = nla_get_be32(a); 1212 SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
1213 nla_get_be32(a), is_mask);
1017 break; 1214 break;
1018 case OVS_TUNNEL_KEY_ATTR_TOS: 1215 case OVS_TUNNEL_KEY_ATTR_TOS:
1019 tun_key->ipv4_tos = nla_get_u8(a); 1216 SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
1217 nla_get_u8(a), is_mask);
1020 break; 1218 break;
1021 case OVS_TUNNEL_KEY_ATTR_TTL: 1219 case OVS_TUNNEL_KEY_ATTR_TTL:
1022 tun_key->ipv4_ttl = nla_get_u8(a); 1220 SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
1221 nla_get_u8(a), is_mask);
1023 ttl = true; 1222 ttl = true;
1024 break; 1223 break;
1025 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: 1224 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
1026 tun_key->tun_flags |= TUNNEL_DONT_FRAGMENT; 1225 tun_flags |= TUNNEL_DONT_FRAGMENT;
1027 break; 1226 break;
1028 case OVS_TUNNEL_KEY_ATTR_CSUM: 1227 case OVS_TUNNEL_KEY_ATTR_CSUM:
1029 tun_key->tun_flags |= TUNNEL_CSUM; 1228 tun_flags |= TUNNEL_CSUM;
1030 break; 1229 break;
1031 default: 1230 default:
1032 return -EINVAL; 1231 return -EINVAL;
1033
1034 } 1232 }
1035 } 1233 }
1036 if (rem > 0)
1037 return -EINVAL;
1038 1234
1039 if (!tun_key->ipv4_dst) 1235 SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
1040 return -EINVAL;
1041 1236
1042 if (!ttl) 1237 if (rem > 0) {
1238 OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
1043 return -EINVAL; 1239 return -EINVAL;
1240 }
1241
1242 if (!is_mask) {
1243 if (!match->key->tun_key.ipv4_dst) {
1244 OVS_NLERR("IPv4 tunnel destination address is zero.\n");
1245 return -EINVAL;
1246 }
1247
1248 if (!ttl) {
1249 OVS_NLERR("IPv4 tunnel TTL not specified.\n");
1250 return -EINVAL;
1251 }
1252 }
1044 1253
1045 return 0; 1254 return 0;
1046} 1255}
1047 1256
1048int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, 1257int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
1049 const struct ovs_key_ipv4_tunnel *tun_key) 1258 const struct ovs_key_ipv4_tunnel *tun_key,
1259 const struct ovs_key_ipv4_tunnel *output)
1050{ 1260{
1051 struct nlattr *nla; 1261 struct nlattr *nla;
1052 1262
@@ -1054,23 +1264,24 @@ int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
1054 if (!nla) 1264 if (!nla)
1055 return -EMSGSIZE; 1265 return -EMSGSIZE;
1056 1266
1057 if (tun_key->tun_flags & TUNNEL_KEY && 1267 if (output->tun_flags & TUNNEL_KEY &&
1058 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id)) 1268 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
1059 return -EMSGSIZE; 1269 return -EMSGSIZE;
1060 if (tun_key->ipv4_src && 1270 if (output->ipv4_src &&
1061 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ipv4_src)) 1271 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
1062 return -EMSGSIZE; 1272 return -EMSGSIZE;
1063 if (nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ipv4_dst)) 1273 if (output->ipv4_dst &&
1274 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
1064 return -EMSGSIZE; 1275 return -EMSGSIZE;
1065 if (tun_key->ipv4_tos && 1276 if (output->ipv4_tos &&
1066 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, tun_key->ipv4_tos)) 1277 nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
1067 return -EMSGSIZE; 1278 return -EMSGSIZE;
1068 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ipv4_ttl)) 1279 if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
1069 return -EMSGSIZE; 1280 return -EMSGSIZE;
1070 if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) && 1281 if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
1071 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) 1282 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
1072 return -EMSGSIZE; 1283 return -EMSGSIZE;
1073 if ((tun_key->tun_flags & TUNNEL_CSUM) && 1284 if ((output->tun_flags & TUNNEL_CSUM) &&
1074 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) 1285 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
1075 return -EMSGSIZE; 1286 return -EMSGSIZE;
1076 1287
@@ -1078,176 +1289,372 @@ int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
1078 return 0; 1289 return 0;
1079} 1290}
1080 1291
1081/** 1292static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
1082 * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key. 1293 const struct nlattr **a, bool is_mask)
1083 * @swkey: receives the extracted flow key.
1084 * @key_lenp: number of bytes used in @swkey.
1085 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1086 * sequence.
1087 */
1088int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
1089 const struct nlattr *attr)
1090{ 1294{
1091 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; 1295 if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
1092 const struct ovs_key_ethernet *eth_key; 1296 SW_FLOW_KEY_PUT(match, phy.priority,
1093 int key_len; 1297 nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
1094 u32 attrs; 1298 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
1095 int err; 1299 }
1096 1300
1097 memset(swkey, 0, sizeof(struct sw_flow_key)); 1301 if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
1098 key_len = SW_FLOW_KEY_OFFSET(eth); 1302 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
1099 1303
1100 err = parse_flow_nlattrs(attr, a, &attrs); 1304 if (is_mask)
1101 if (err) 1305 in_port = 0xffffffff; /* Always exact match in_port. */
1102 return err; 1306 else if (in_port >= DP_MAX_PORTS)
1307 return -EINVAL;
1103 1308
1104 /* Metadata attributes. */ 1309 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
1105 if (attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { 1310 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
1106 swkey->phy.priority = nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]); 1311 } else if (!is_mask) {
1107 attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); 1312 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
1108 } 1313 }
1109 if (attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { 1314
1110 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); 1315 if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
1111 if (in_port >= DP_MAX_PORTS) 1316 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
1112 return -EINVAL; 1317
1113 swkey->phy.in_port = in_port; 1318 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
1114 attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); 1319 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
1115 } else {
1116 swkey->phy.in_port = DP_MAX_PORTS;
1117 } 1320 }
1118 if (attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { 1321 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
1119 swkey->phy.skb_mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); 1322 if (ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
1120 attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); 1323 is_mask))
1324 return -EINVAL;
1325 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
1121 } 1326 }
1327 return 0;
1328}
1122 1329
1123 if (attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 1330static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
1124 err = ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], &swkey->tun_key); 1331 const struct nlattr **a, bool is_mask)
1125 if (err) 1332{
1126 return err; 1333 int err;
1334 u64 orig_attrs = attrs;
1127 1335
1128 attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 1336 err = metadata_from_nlattrs(match, &attrs, a, is_mask);
1129 } 1337 if (err)
1338 return err;
1130 1339
1131 /* Data attributes. */ 1340 if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
1132 if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET))) 1341 const struct ovs_key_ethernet *eth_key;
1133 return -EINVAL;
1134 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
1135 1342
1136 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); 1343 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
1137 memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN); 1344 SW_FLOW_KEY_MEMCPY(match, eth.src,
1138 memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN); 1345 eth_key->eth_src, ETH_ALEN, is_mask);
1346 SW_FLOW_KEY_MEMCPY(match, eth.dst,
1347 eth_key->eth_dst, ETH_ALEN, is_mask);
1348 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
1349 }
1139 1350
1140 if (attrs & (1u << OVS_KEY_ATTR_ETHERTYPE) && 1351 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
1141 nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q)) {
1142 const struct nlattr *encap;
1143 __be16 tci; 1352 __be16 tci;
1144 1353
1145 if (attrs != ((1 << OVS_KEY_ATTR_VLAN) |
1146 (1 << OVS_KEY_ATTR_ETHERTYPE) |
1147 (1 << OVS_KEY_ATTR_ENCAP)))
1148 return -EINVAL;
1149
1150 encap = a[OVS_KEY_ATTR_ENCAP];
1151 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); 1354 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1152 if (tci & htons(VLAN_TAG_PRESENT)) { 1355 if (!(tci & htons(VLAN_TAG_PRESENT))) {
1153 swkey->eth.tci = tci; 1356 if (is_mask)
1154 1357 OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
1155 err = parse_flow_nlattrs(encap, a, &attrs); 1358 else
1156 if (err) 1359 OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
1157 return err;
1158 } else if (!tci) {
1159 /* Corner case for truncated 802.1Q header. */
1160 if (nla_len(encap))
1161 return -EINVAL;
1162 1360
1163 swkey->eth.type = htons(ETH_P_8021Q);
1164 *key_lenp = key_len;
1165 return 0;
1166 } else {
1167 return -EINVAL; 1361 return -EINVAL;
1168 } 1362 }
1169 } 1363
1364 SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
1365 attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
1366 } else if (!is_mask)
1367 SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
1170 1368
1171 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 1369 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1172 swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 1370 __be16 eth_type;
1173 if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN) 1371
1372 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1373 if (is_mask) {
1374 /* Always exact match EtherType. */
1375 eth_type = htons(0xffff);
1376 } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
1377 OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
1378 ntohs(eth_type), ETH_P_802_3_MIN);
1174 return -EINVAL; 1379 return -EINVAL;
1380 }
1381
1382 SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
1175 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1383 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1176 } else { 1384 } else if (!is_mask) {
1177 swkey->eth.type = htons(ETH_P_802_2); 1385 SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
1178 } 1386 }
1179 1387
1180 if (swkey->eth.type == htons(ETH_P_IP)) { 1388 if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1181 const struct ovs_key_ipv4 *ipv4_key; 1389 const struct ovs_key_ipv4 *ipv4_key;
1182 1390
1183 if (!(attrs & (1 << OVS_KEY_ATTR_IPV4)))
1184 return -EINVAL;
1185 attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
1186
1187 key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
1188 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); 1391 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
1189 if (ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) 1392 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
1393 OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
1394 ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
1190 return -EINVAL; 1395 return -EINVAL;
1191 swkey->ip.proto = ipv4_key->ipv4_proto;
1192 swkey->ip.tos = ipv4_key->ipv4_tos;
1193 swkey->ip.ttl = ipv4_key->ipv4_ttl;
1194 swkey->ip.frag = ipv4_key->ipv4_frag;
1195 swkey->ipv4.addr.src = ipv4_key->ipv4_src;
1196 swkey->ipv4.addr.dst = ipv4_key->ipv4_dst;
1197
1198 if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
1199 err = ipv4_flow_from_nlattrs(swkey, &key_len, a, &attrs);
1200 if (err)
1201 return err;
1202 } 1396 }
1203 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1397 SW_FLOW_KEY_PUT(match, ip.proto,
1204 const struct ovs_key_ipv6 *ipv6_key; 1398 ipv4_key->ipv4_proto, is_mask);
1399 SW_FLOW_KEY_PUT(match, ip.tos,
1400 ipv4_key->ipv4_tos, is_mask);
1401 SW_FLOW_KEY_PUT(match, ip.ttl,
1402 ipv4_key->ipv4_ttl, is_mask);
1403 SW_FLOW_KEY_PUT(match, ip.frag,
1404 ipv4_key->ipv4_frag, is_mask);
1405 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1406 ipv4_key->ipv4_src, is_mask);
1407 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1408 ipv4_key->ipv4_dst, is_mask);
1409 attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
1410 }
1205 1411
1206 if (!(attrs & (1 << OVS_KEY_ATTR_IPV6))) 1412 if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
1207 return -EINVAL; 1413 const struct ovs_key_ipv6 *ipv6_key;
1208 attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
1209 1414
1210 key_len = SW_FLOW_KEY_OFFSET(ipv6.label);
1211 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); 1415 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
1212 if (ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) 1416 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
1417 OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
1418 ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
1213 return -EINVAL; 1419 return -EINVAL;
1214 swkey->ipv6.label = ipv6_key->ipv6_label;
1215 swkey->ip.proto = ipv6_key->ipv6_proto;
1216 swkey->ip.tos = ipv6_key->ipv6_tclass;
1217 swkey->ip.ttl = ipv6_key->ipv6_hlimit;
1218 swkey->ip.frag = ipv6_key->ipv6_frag;
1219 memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src,
1220 sizeof(swkey->ipv6.addr.src));
1221 memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst,
1222 sizeof(swkey->ipv6.addr.dst));
1223
1224 if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
1225 err = ipv6_flow_from_nlattrs(swkey, &key_len, a, &attrs);
1226 if (err)
1227 return err;
1228 } 1420 }
1229 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1421 SW_FLOW_KEY_PUT(match, ipv6.label,
1230 swkey->eth.type == htons(ETH_P_RARP)) { 1422 ipv6_key->ipv6_label, is_mask);
1423 SW_FLOW_KEY_PUT(match, ip.proto,
1424 ipv6_key->ipv6_proto, is_mask);
1425 SW_FLOW_KEY_PUT(match, ip.tos,
1426 ipv6_key->ipv6_tclass, is_mask);
1427 SW_FLOW_KEY_PUT(match, ip.ttl,
1428 ipv6_key->ipv6_hlimit, is_mask);
1429 SW_FLOW_KEY_PUT(match, ip.frag,
1430 ipv6_key->ipv6_frag, is_mask);
1431 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
1432 ipv6_key->ipv6_src,
1433 sizeof(match->key->ipv6.addr.src),
1434 is_mask);
1435 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
1436 ipv6_key->ipv6_dst,
1437 sizeof(match->key->ipv6.addr.dst),
1438 is_mask);
1439
1440 attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
1441 }
1442
1443 if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
1231 const struct ovs_key_arp *arp_key; 1444 const struct ovs_key_arp *arp_key;
1232 1445
1233 if (!(attrs & (1 << OVS_KEY_ATTR_ARP))) 1446 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
1447 if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
1448 OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
1449 arp_key->arp_op);
1234 return -EINVAL; 1450 return -EINVAL;
1451 }
1452
1453 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1454 arp_key->arp_sip, is_mask);
1455 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1456 arp_key->arp_tip, is_mask);
1457 SW_FLOW_KEY_PUT(match, ip.proto,
1458 ntohs(arp_key->arp_op), is_mask);
1459 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
1460 arp_key->arp_sha, ETH_ALEN, is_mask);
1461 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
1462 arp_key->arp_tha, ETH_ALEN, is_mask);
1463
1235 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 1464 attrs &= ~(1 << OVS_KEY_ATTR_ARP);
1465 }
1236 1466
1237 key_len = SW_FLOW_KEY_OFFSET(ipv4.arp); 1467 if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
1238 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); 1468 const struct ovs_key_tcp *tcp_key;
1239 swkey->ipv4.addr.src = arp_key->arp_sip; 1469
1240 swkey->ipv4.addr.dst = arp_key->arp_tip; 1470 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
1241 if (arp_key->arp_op & htons(0xff00)) 1471 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1472 SW_FLOW_KEY_PUT(match, ipv4.tp.src,
1473 tcp_key->tcp_src, is_mask);
1474 SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
1475 tcp_key->tcp_dst, is_mask);
1476 } else {
1477 SW_FLOW_KEY_PUT(match, ipv6.tp.src,
1478 tcp_key->tcp_src, is_mask);
1479 SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
1480 tcp_key->tcp_dst, is_mask);
1481 }
1482 attrs &= ~(1 << OVS_KEY_ATTR_TCP);
1483 }
1484
1485 if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
1486 const struct ovs_key_udp *udp_key;
1487
1488 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
1489 if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1490 SW_FLOW_KEY_PUT(match, ipv4.tp.src,
1491 udp_key->udp_src, is_mask);
1492 SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
1493 udp_key->udp_dst, is_mask);
1494 } else {
1495 SW_FLOW_KEY_PUT(match, ipv6.tp.src,
1496 udp_key->udp_src, is_mask);
1497 SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
1498 udp_key->udp_dst, is_mask);
1499 }
1500 attrs &= ~(1 << OVS_KEY_ATTR_UDP);
1501 }
1502
1503 if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
1504 const struct ovs_key_icmp *icmp_key;
1505
1506 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
1507 SW_FLOW_KEY_PUT(match, ipv4.tp.src,
1508 htons(icmp_key->icmp_type), is_mask);
1509 SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
1510 htons(icmp_key->icmp_code), is_mask);
1511 attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
1512 }
1513
1514 if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
1515 const struct ovs_key_icmpv6 *icmpv6_key;
1516
1517 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
1518 SW_FLOW_KEY_PUT(match, ipv6.tp.src,
1519 htons(icmpv6_key->icmpv6_type), is_mask);
1520 SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
1521 htons(icmpv6_key->icmpv6_code), is_mask);
1522 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
1523 }
1524
1525 if (attrs & (1 << OVS_KEY_ATTR_ND)) {
1526 const struct ovs_key_nd *nd_key;
1527
1528 nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
1529 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
1530 nd_key->nd_target,
1531 sizeof(match->key->ipv6.nd.target),
1532 is_mask);
1533 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
1534 nd_key->nd_sll, ETH_ALEN, is_mask);
1535 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
1536 nd_key->nd_tll, ETH_ALEN, is_mask);
1537 attrs &= ~(1 << OVS_KEY_ATTR_ND);
1538 }
1539
1540 if (attrs != 0)
1541 return -EINVAL;
1542
1543 return 0;
1544}
1545
1546/**
1547 * ovs_match_from_nlattrs - parses Netlink attributes into a flow key and
1548 * mask. In case the 'mask' is NULL, the flow is treated as exact match
1549 * flow. Otherwise, it is treated as a wildcarded flow, except the mask
1550 * does not include any don't care bit.
1551 * @match: receives the extracted flow match information.
1552 * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1553 * sequence. The fields should of the packet that triggered the creation
1554 * of this flow.
1555 * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
1556 * attribute specifies the mask field of the wildcarded flow.
1557 */
1558int ovs_match_from_nlattrs(struct sw_flow_match *match,
1559 const struct nlattr *key,
1560 const struct nlattr *mask)
1561{
1562 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1563 const struct nlattr *encap;
1564 u64 key_attrs = 0;
1565 u64 mask_attrs = 0;
1566 bool encap_valid = false;
1567 int err;
1568
1569 err = parse_flow_nlattrs(key, a, &key_attrs);
1570 if (err)
1571 return err;
1572
1573 if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
1574 (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
1575 (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
1576 __be16 tci;
1577
1578 if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
1579 (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
1580 OVS_NLERR("Invalid Vlan frame.\n");
1242 return -EINVAL; 1581 return -EINVAL;
1243 swkey->ip.proto = ntohs(arp_key->arp_op); 1582 }
1244 memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN); 1583
1245 memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN); 1584 key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1585 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1586 encap = a[OVS_KEY_ATTR_ENCAP];
1587 key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1588 encap_valid = true;
1589
1590 if (tci & htons(VLAN_TAG_PRESENT)) {
1591 err = parse_flow_nlattrs(encap, a, &key_attrs);
1592 if (err)
1593 return err;
1594 } else if (!tci) {
1595 /* Corner case for truncated 802.1Q header. */
1596 if (nla_len(encap)) {
1597 OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
1598 return -EINVAL;
1599 }
1600 } else {
1601 OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
1602 return -EINVAL;
1603 }
1246 } 1604 }
1247 1605
1248 if (attrs) 1606 err = ovs_key_from_nlattrs(match, key_attrs, a, false);
1607 if (err)
1608 return err;
1609
1610 if (mask) {
1611 err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
1612 if (err)
1613 return err;
1614
1615 if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) {
1616 __be16 eth_type = 0;
1617 __be16 tci = 0;
1618
1619 if (!encap_valid) {
1620 OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
1621 return -EINVAL;
1622 }
1623
1624 mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1625 if (a[OVS_KEY_ATTR_ETHERTYPE])
1626 eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1627
1628 if (eth_type == htons(0xffff)) {
1629 mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1630 encap = a[OVS_KEY_ATTR_ENCAP];
1631 err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
1632 } else {
1633 OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
1634 ntohs(eth_type));
1635 return -EINVAL;
1636 }
1637
1638 if (a[OVS_KEY_ATTR_VLAN])
1639 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1640
1641 if (!(tci & htons(VLAN_TAG_PRESENT))) {
1642 OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
1643 return -EINVAL;
1644 }
1645 }
1646
1647 err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
1648 if (err)
1649 return err;
1650 } else {
1651 /* Populate exact match flow's key mask. */
1652 if (match->mask)
1653 ovs_sw_flow_mask_set(match->mask, &match->range, 0xff);
1654 }
1655
1656 if (!ovs_match_validate(match, key_attrs, mask_attrs))
1249 return -EINVAL; 1657 return -EINVAL;
1250 *key_lenp = key_len;
1251 1658
1252 return 0; 1659 return 0;
1253} 1660}
@@ -1255,7 +1662,6 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
1255/** 1662/**
1256 * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. 1663 * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
1257 * @flow: Receives extracted in_port, priority, tun_key and skb_mark. 1664 * @flow: Receives extracted in_port, priority, tun_key and skb_mark.
1258 * @key_len: Length of key in @flow. Used for calculating flow hash.
1259 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute 1665 * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1260 * sequence. 1666 * sequence.
1261 * 1667 *
@@ -1264,102 +1670,100 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
1264 * get the metadata, that is, the parts of the flow key that cannot be 1670 * get the metadata, that is, the parts of the flow key that cannot be
1265 * extracted from the packet itself. 1671 * extracted from the packet itself.
1266 */ 1672 */
1267int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, 1673
1268 const struct nlattr *attr) 1674int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
1675 const struct nlattr *attr)
1269{ 1676{
1270 struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; 1677 struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
1271 const struct nlattr *nla; 1678 const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1272 int rem; 1679 u64 attrs = 0;
1680 int err;
1681 struct sw_flow_match match;
1273 1682
1274 flow->key.phy.in_port = DP_MAX_PORTS; 1683 flow->key.phy.in_port = DP_MAX_PORTS;
1275 flow->key.phy.priority = 0; 1684 flow->key.phy.priority = 0;
1276 flow->key.phy.skb_mark = 0; 1685 flow->key.phy.skb_mark = 0;
1277 memset(tun_key, 0, sizeof(flow->key.tun_key)); 1686 memset(tun_key, 0, sizeof(flow->key.tun_key));
1278 1687
1279 nla_for_each_nested(nla, attr, rem) { 1688 err = parse_flow_nlattrs(attr, a, &attrs);
1280 int type = nla_type(nla); 1689 if (err)
1281
1282 if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) {
1283 int err;
1284
1285 if (nla_len(nla) != ovs_key_lens[type])
1286 return -EINVAL;
1287
1288 switch (type) {
1289 case OVS_KEY_ATTR_PRIORITY:
1290 flow->key.phy.priority = nla_get_u32(nla);
1291 break;
1292
1293 case OVS_KEY_ATTR_TUNNEL:
1294 err = ovs_ipv4_tun_from_nlattr(nla, tun_key);
1295 if (err)
1296 return err;
1297 break;
1298
1299 case OVS_KEY_ATTR_IN_PORT:
1300 if (nla_get_u32(nla) >= DP_MAX_PORTS)
1301 return -EINVAL;
1302 flow->key.phy.in_port = nla_get_u32(nla);
1303 break;
1304
1305 case OVS_KEY_ATTR_SKB_MARK:
1306 flow->key.phy.skb_mark = nla_get_u32(nla);
1307 break;
1308 }
1309 }
1310 }
1311 if (rem)
1312 return -EINVAL; 1690 return -EINVAL;
1313 1691
1314 flow->hash = ovs_flow_hash(&flow->key, 1692 memset(&match, 0, sizeof(match));
1315 flow_key_start(&flow->key), key_len); 1693 match.key = &flow->key;
1694
1695 err = metadata_from_nlattrs(&match, &attrs, a, false);
1696 if (err)
1697 return err;
1316 1698
1317 return 0; 1699 return 0;
1318} 1700}
1319 1701
1320int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) 1702int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
1703 const struct sw_flow_key *output, struct sk_buff *skb)
1321{ 1704{
1322 struct ovs_key_ethernet *eth_key; 1705 struct ovs_key_ethernet *eth_key;
1323 struct nlattr *nla, *encap; 1706 struct nlattr *nla, *encap;
1707 bool is_mask = (swkey != output);
1324 1708
1325 if (swkey->phy.priority && 1709 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
1326 nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
1327 goto nla_put_failure; 1710 goto nla_put_failure;
1328 1711
1329 if (swkey->tun_key.ipv4_dst && 1712 if ((swkey->tun_key.ipv4_dst || is_mask) &&
1330 ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key)) 1713 ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
1331 goto nla_put_failure; 1714 goto nla_put_failure;
1332 1715
1333 if (swkey->phy.in_port != DP_MAX_PORTS && 1716 if (swkey->phy.in_port == DP_MAX_PORTS) {
1334 nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) 1717 if (is_mask && (output->phy.in_port == 0xffff))
1335 goto nla_put_failure; 1718 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
1719 goto nla_put_failure;
1720 } else {
1721 u16 upper_u16;
1722 upper_u16 = !is_mask ? 0 : 0xffff;
1336 1723
1337 if (swkey->phy.skb_mark && 1724 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
1338 nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, swkey->phy.skb_mark)) 1725 (upper_u16 << 16) | output->phy.in_port))
1726 goto nla_put_failure;
1727 }
1728
1729 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
1339 goto nla_put_failure; 1730 goto nla_put_failure;
1340 1731
1341 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 1732 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
1342 if (!nla) 1733 if (!nla)
1343 goto nla_put_failure; 1734 goto nla_put_failure;
1735
1344 eth_key = nla_data(nla); 1736 eth_key = nla_data(nla);
1345 memcpy(eth_key->eth_src, swkey->eth.src, ETH_ALEN); 1737 memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
1346 memcpy(eth_key->eth_dst, swkey->eth.dst, ETH_ALEN); 1738 memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
1347 1739
1348 if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { 1740 if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
1349 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_P_8021Q)) || 1741 __be16 eth_type;
1350 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, swkey->eth.tci)) 1742 eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
1743 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
1744 nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
1351 goto nla_put_failure; 1745 goto nla_put_failure;
1352 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 1746 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
1353 if (!swkey->eth.tci) 1747 if (!swkey->eth.tci)
1354 goto unencap; 1748 goto unencap;
1355 } else { 1749 } else
1356 encap = NULL; 1750 encap = NULL;
1357 }
1358 1751
1359 if (swkey->eth.type == htons(ETH_P_802_2)) 1752 if (swkey->eth.type == htons(ETH_P_802_2)) {
1753 /*
1754 * Ethertype 802.2 is represented in the netlink with omitted
1755 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
1756 * 0xffff in the mask attribute. Ethertype can also
1757 * be wildcarded.
1758 */
1759 if (is_mask && output->eth.type)
1760 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
1761 output->eth.type))
1762 goto nla_put_failure;
1360 goto unencap; 1763 goto unencap;
1764 }
1361 1765
1362 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type)) 1766 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
1363 goto nla_put_failure; 1767 goto nla_put_failure;
1364 1768
1365 if (swkey->eth.type == htons(ETH_P_IP)) { 1769 if (swkey->eth.type == htons(ETH_P_IP)) {
@@ -1369,12 +1773,12 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1369 if (!nla) 1773 if (!nla)
1370 goto nla_put_failure; 1774 goto nla_put_failure;
1371 ipv4_key = nla_data(nla); 1775 ipv4_key = nla_data(nla);
1372 ipv4_key->ipv4_src = swkey->ipv4.addr.src; 1776 ipv4_key->ipv4_src = output->ipv4.addr.src;
1373 ipv4_key->ipv4_dst = swkey->ipv4.addr.dst; 1777 ipv4_key->ipv4_dst = output->ipv4.addr.dst;
1374 ipv4_key->ipv4_proto = swkey->ip.proto; 1778 ipv4_key->ipv4_proto = output->ip.proto;
1375 ipv4_key->ipv4_tos = swkey->ip.tos; 1779 ipv4_key->ipv4_tos = output->ip.tos;
1376 ipv4_key->ipv4_ttl = swkey->ip.ttl; 1780 ipv4_key->ipv4_ttl = output->ip.ttl;
1377 ipv4_key->ipv4_frag = swkey->ip.frag; 1781 ipv4_key->ipv4_frag = output->ip.frag;
1378 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1782 } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1379 struct ovs_key_ipv6 *ipv6_key; 1783 struct ovs_key_ipv6 *ipv6_key;
1380 1784
@@ -1382,15 +1786,15 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1382 if (!nla) 1786 if (!nla)
1383 goto nla_put_failure; 1787 goto nla_put_failure;
1384 ipv6_key = nla_data(nla); 1788 ipv6_key = nla_data(nla);
1385 memcpy(ipv6_key->ipv6_src, &swkey->ipv6.addr.src, 1789 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
1386 sizeof(ipv6_key->ipv6_src)); 1790 sizeof(ipv6_key->ipv6_src));
1387 memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst, 1791 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
1388 sizeof(ipv6_key->ipv6_dst)); 1792 sizeof(ipv6_key->ipv6_dst));
1389 ipv6_key->ipv6_label = swkey->ipv6.label; 1793 ipv6_key->ipv6_label = output->ipv6.label;
1390 ipv6_key->ipv6_proto = swkey->ip.proto; 1794 ipv6_key->ipv6_proto = output->ip.proto;
1391 ipv6_key->ipv6_tclass = swkey->ip.tos; 1795 ipv6_key->ipv6_tclass = output->ip.tos;
1392 ipv6_key->ipv6_hlimit = swkey->ip.ttl; 1796 ipv6_key->ipv6_hlimit = output->ip.ttl;
1393 ipv6_key->ipv6_frag = swkey->ip.frag; 1797 ipv6_key->ipv6_frag = output->ip.frag;
1394 } else if (swkey->eth.type == htons(ETH_P_ARP) || 1798 } else if (swkey->eth.type == htons(ETH_P_ARP) ||
1395 swkey->eth.type == htons(ETH_P_RARP)) { 1799 swkey->eth.type == htons(ETH_P_RARP)) {
1396 struct ovs_key_arp *arp_key; 1800 struct ovs_key_arp *arp_key;
@@ -1400,11 +1804,11 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1400 goto nla_put_failure; 1804 goto nla_put_failure;
1401 arp_key = nla_data(nla); 1805 arp_key = nla_data(nla);
1402 memset(arp_key, 0, sizeof(struct ovs_key_arp)); 1806 memset(arp_key, 0, sizeof(struct ovs_key_arp));
1403 arp_key->arp_sip = swkey->ipv4.addr.src; 1807 arp_key->arp_sip = output->ipv4.addr.src;
1404 arp_key->arp_tip = swkey->ipv4.addr.dst; 1808 arp_key->arp_tip = output->ipv4.addr.dst;
1405 arp_key->arp_op = htons(swkey->ip.proto); 1809 arp_key->arp_op = htons(output->ip.proto);
1406 memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN); 1810 memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN);
1407 memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN); 1811 memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN);
1408 } 1812 }
1409 1813
1410 if ((swkey->eth.type == htons(ETH_P_IP) || 1814 if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -1419,11 +1823,11 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1419 goto nla_put_failure; 1823 goto nla_put_failure;
1420 tcp_key = nla_data(nla); 1824 tcp_key = nla_data(nla);
1421 if (swkey->eth.type == htons(ETH_P_IP)) { 1825 if (swkey->eth.type == htons(ETH_P_IP)) {
1422 tcp_key->tcp_src = swkey->ipv4.tp.src; 1826 tcp_key->tcp_src = output->ipv4.tp.src;
1423 tcp_key->tcp_dst = swkey->ipv4.tp.dst; 1827 tcp_key->tcp_dst = output->ipv4.tp.dst;
1424 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1828 } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1425 tcp_key->tcp_src = swkey->ipv6.tp.src; 1829 tcp_key->tcp_src = output->ipv6.tp.src;
1426 tcp_key->tcp_dst = swkey->ipv6.tp.dst; 1830 tcp_key->tcp_dst = output->ipv6.tp.dst;
1427 } 1831 }
1428 } else if (swkey->ip.proto == IPPROTO_UDP) { 1832 } else if (swkey->ip.proto == IPPROTO_UDP) {
1429 struct ovs_key_udp *udp_key; 1833 struct ovs_key_udp *udp_key;
@@ -1433,11 +1837,11 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1433 goto nla_put_failure; 1837 goto nla_put_failure;
1434 udp_key = nla_data(nla); 1838 udp_key = nla_data(nla);
1435 if (swkey->eth.type == htons(ETH_P_IP)) { 1839 if (swkey->eth.type == htons(ETH_P_IP)) {
1436 udp_key->udp_src = swkey->ipv4.tp.src; 1840 udp_key->udp_src = output->ipv4.tp.src;
1437 udp_key->udp_dst = swkey->ipv4.tp.dst; 1841 udp_key->udp_dst = output->ipv4.tp.dst;
1438 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 1842 } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
1439 udp_key->udp_src = swkey->ipv6.tp.src; 1843 udp_key->udp_src = output->ipv6.tp.src;
1440 udp_key->udp_dst = swkey->ipv6.tp.dst; 1844 udp_key->udp_dst = output->ipv6.tp.dst;
1441 } 1845 }
1442 } else if (swkey->eth.type == htons(ETH_P_IP) && 1846 } else if (swkey->eth.type == htons(ETH_P_IP) &&
1443 swkey->ip.proto == IPPROTO_ICMP) { 1847 swkey->ip.proto == IPPROTO_ICMP) {
@@ -1447,8 +1851,8 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1447 if (!nla) 1851 if (!nla)
1448 goto nla_put_failure; 1852 goto nla_put_failure;
1449 icmp_key = nla_data(nla); 1853 icmp_key = nla_data(nla);
1450 icmp_key->icmp_type = ntohs(swkey->ipv4.tp.src); 1854 icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
1451 icmp_key->icmp_code = ntohs(swkey->ipv4.tp.dst); 1855 icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
1452 } else if (swkey->eth.type == htons(ETH_P_IPV6) && 1856 } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
1453 swkey->ip.proto == IPPROTO_ICMPV6) { 1857 swkey->ip.proto == IPPROTO_ICMPV6) {
1454 struct ovs_key_icmpv6 *icmpv6_key; 1858 struct ovs_key_icmpv6 *icmpv6_key;
@@ -1458,8 +1862,8 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1458 if (!nla) 1862 if (!nla)
1459 goto nla_put_failure; 1863 goto nla_put_failure;
1460 icmpv6_key = nla_data(nla); 1864 icmpv6_key = nla_data(nla);
1461 icmpv6_key->icmpv6_type = ntohs(swkey->ipv6.tp.src); 1865 icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
1462 icmpv6_key->icmpv6_code = ntohs(swkey->ipv6.tp.dst); 1866 icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
1463 1867
1464 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 1868 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
1465 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 1869 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
@@ -1469,10 +1873,10 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
1469 if (!nla) 1873 if (!nla)
1470 goto nla_put_failure; 1874 goto nla_put_failure;
1471 nd_key = nla_data(nla); 1875 nd_key = nla_data(nla);
1472 memcpy(nd_key->nd_target, &swkey->ipv6.nd.target, 1876 memcpy(nd_key->nd_target, &output->ipv6.nd.target,
1473 sizeof(nd_key->nd_target)); 1877 sizeof(nd_key->nd_target));
1474 memcpy(nd_key->nd_sll, swkey->ipv6.nd.sll, ETH_ALEN); 1878 memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN);
1475 memcpy(nd_key->nd_tll, swkey->ipv6.nd.tll, ETH_ALEN); 1879 memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN);
1476 } 1880 }
1477 } 1881 }
1478 } 1882 }
@@ -1504,3 +1908,84 @@ void ovs_flow_exit(void)
1504{ 1908{
1505 kmem_cache_destroy(flow_cache); 1909 kmem_cache_destroy(flow_cache);
1506} 1910}
1911
1912struct sw_flow_mask *ovs_sw_flow_mask_alloc(void)
1913{
1914 struct sw_flow_mask *mask;
1915
1916 mask = kmalloc(sizeof(*mask), GFP_KERNEL);
1917 if (mask)
1918 mask->ref_count = 0;
1919
1920 return mask;
1921}
1922
1923void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask)
1924{
1925 mask->ref_count++;
1926}
1927
1928void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred)
1929{
1930 if (!mask)
1931 return;
1932
1933 BUG_ON(!mask->ref_count);
1934 mask->ref_count--;
1935
1936 if (!mask->ref_count) {
1937 list_del_rcu(&mask->list);
1938 if (deferred)
1939 kfree_rcu(mask, rcu);
1940 else
1941 kfree(mask);
1942 }
1943}
1944
1945static bool ovs_sw_flow_mask_equal(const struct sw_flow_mask *a,
1946 const struct sw_flow_mask *b)
1947{
1948 u8 *a_ = (u8 *)&a->key + a->range.start;
1949 u8 *b_ = (u8 *)&b->key + b->range.start;
1950
1951 return (a->range.end == b->range.end)
1952 && (a->range.start == b->range.start)
1953 && (memcmp(a_, b_, ovs_sw_flow_mask_actual_size(a)) == 0);
1954}
1955
1956struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl,
1957 const struct sw_flow_mask *mask)
1958{
1959 struct list_head *ml;
1960
1961 list_for_each(ml, tbl->mask_list) {
1962 struct sw_flow_mask *m;
1963 m = container_of(ml, struct sw_flow_mask, list);
1964 if (ovs_sw_flow_mask_equal(mask, m))
1965 return m;
1966 }
1967
1968 return NULL;
1969}
1970
1971/**
1972 * add a new mask into the mask list.
1973 * The caller needs to make sure that 'mask' is not the same
1974 * as any masks that are already on the list.
1975 */
1976void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask)
1977{
1978 list_add_rcu(&mask->list, tbl->mask_list);
1979}
1980
1981/**
1982 * Set 'range' fields in the mask to the value of 'val'.
1983 */
1984static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
1985 struct sw_flow_key_range *range, u8 val)
1986{
1987 u8 *m = (u8 *)&mask->key + range->start;
1988
1989 mask->range = *range;
1990 memset(m, val, ovs_sw_flow_mask_size_roundup(mask));
1991}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 66ef7220293e..9674e45f6969 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2007-2011 Nicira, Inc. 2 * Copyright (c) 2007-2013 Nicira, Inc.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public 5 * modify it under the terms of version 2 of the GNU General Public
@@ -33,6 +33,8 @@
33#include <net/inet_ecn.h> 33#include <net/inet_ecn.h>
34 34
35struct sk_buff; 35struct sk_buff;
36struct sw_flow_mask;
37struct flow_table;
36 38
37struct sw_flow_actions { 39struct sw_flow_actions {
38 struct rcu_head rcu; 40 struct rcu_head rcu;
@@ -131,6 +133,8 @@ struct sw_flow {
131 u32 hash; 133 u32 hash;
132 134
133 struct sw_flow_key key; 135 struct sw_flow_key key;
136 struct sw_flow_key unmasked_key;
137 struct sw_flow_mask *mask;
134 struct sw_flow_actions __rcu *sf_acts; 138 struct sw_flow_actions __rcu *sf_acts;
135 139
136 spinlock_t lock; /* Lock for values below. */ 140 spinlock_t lock; /* Lock for values below. */
@@ -140,6 +144,25 @@ struct sw_flow {
140 u8 tcp_flags; /* Union of seen TCP flags. */ 144 u8 tcp_flags; /* Union of seen TCP flags. */
141}; 145};
142 146
147struct sw_flow_key_range {
148 size_t start;
149 size_t end;
150};
151
152static inline u16 ovs_sw_flow_key_range_actual_size(const struct sw_flow_key_range *range)
153{
154 return range->end - range->start;
155}
156
157struct sw_flow_match {
158 struct sw_flow_key *key;
159 struct sw_flow_key_range range;
160 struct sw_flow_mask *mask;
161};
162
163void ovs_match_init(struct sw_flow_match *match,
164 struct sw_flow_key *key, struct sw_flow_mask *mask);
165
143struct arp_eth_header { 166struct arp_eth_header {
144 __be16 ar_hrd; /* format of hardware address */ 167 __be16 ar_hrd; /* format of hardware address */
145 __be16 ar_pro; /* format of protocol address */ 168 __be16 ar_pro; /* format of protocol address */
@@ -159,21 +182,21 @@ void ovs_flow_exit(void);
159 182
160struct sw_flow *ovs_flow_alloc(void); 183struct sw_flow *ovs_flow_alloc(void);
161void ovs_flow_deferred_free(struct sw_flow *); 184void ovs_flow_deferred_free(struct sw_flow *);
162void ovs_flow_free(struct sw_flow *flow); 185void ovs_flow_free(struct sw_flow *, bool deferred);
163 186
164struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len); 187struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len);
165void ovs_flow_deferred_free_acts(struct sw_flow_actions *); 188void ovs_flow_deferred_free_acts(struct sw_flow_actions *);
166 189
167int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, 190int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
168 int *key_lenp);
169void ovs_flow_used(struct sw_flow *, struct sk_buff *); 191void ovs_flow_used(struct sw_flow *, struct sk_buff *);
170u64 ovs_flow_used_time(unsigned long flow_jiffies); 192u64 ovs_flow_used_time(unsigned long flow_jiffies);
171 193int ovs_flow_to_nlattrs(const struct sw_flow_key *,
172int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); 194 const struct sw_flow_key *, struct sk_buff *);
173int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, 195int ovs_match_from_nlattrs(struct sw_flow_match *match,
196 const struct nlattr *,
174 const struct nlattr *); 197 const struct nlattr *);
175int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, 198int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
176 const struct nlattr *attr); 199 const struct nlattr *attr);
177 200
178#define MAX_ACTIONS_BUFSIZE (32 * 1024) 201#define MAX_ACTIONS_BUFSIZE (32 * 1024)
179#define TBL_MIN_BUCKETS 1024 202#define TBL_MIN_BUCKETS 1024
@@ -182,6 +205,7 @@ struct flow_table {
182 struct flex_array *buckets; 205 struct flex_array *buckets;
183 unsigned int count, n_buckets; 206 unsigned int count, n_buckets;
184 struct rcu_head rcu; 207 struct rcu_head rcu;
208 struct list_head *mask_list;
185 int node_ver; 209 int node_ver;
186 u32 hash_seed; 210 u32 hash_seed;
187 bool keep_flows; 211 bool keep_flows;
@@ -197,22 +221,56 @@ static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table)
197 return (table->count > table->n_buckets); 221 return (table->count > table->n_buckets);
198} 222}
199 223
200struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, 224struct sw_flow *ovs_flow_lookup(struct flow_table *,
201 struct sw_flow_key *key, int len); 225 const struct sw_flow_key *);
202void ovs_flow_tbl_destroy(struct flow_table *table); 226struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table,
203void ovs_flow_tbl_deferred_destroy(struct flow_table *table); 227 struct sw_flow_match *match);
228
229void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred);
204struct flow_table *ovs_flow_tbl_alloc(int new_size); 230struct flow_table *ovs_flow_tbl_alloc(int new_size);
205struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); 231struct flow_table *ovs_flow_tbl_expand(struct flow_table *table);
206struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); 232struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table);
207void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
208 struct sw_flow_key *key, int key_len);
209void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
210 233
211struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx); 234void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow);
235void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow);
236
237struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *idx);
212extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; 238extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1];
213int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, 239int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
214 struct ovs_key_ipv4_tunnel *tun_key); 240 struct sw_flow_match *match, bool is_mask);
215int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, 241int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
216 const struct ovs_key_ipv4_tunnel *tun_key); 242 const struct ovs_key_ipv4_tunnel *tun_key,
243 const struct ovs_key_ipv4_tunnel *output);
244
245bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
246 const struct sw_flow_key *key, int key_len);
247
248struct sw_flow_mask {
249 int ref_count;
250 struct rcu_head rcu;
251 struct list_head list;
252 struct sw_flow_key_range range;
253 struct sw_flow_key key;
254};
255
256static inline u16
257ovs_sw_flow_mask_actual_size(const struct sw_flow_mask *mask)
258{
259 return ovs_sw_flow_key_range_actual_size(&mask->range);
260}
261
262static inline u16
263ovs_sw_flow_mask_size_roundup(const struct sw_flow_mask *mask)
264{
265 return roundup(ovs_sw_flow_mask_actual_size(mask), sizeof(u32));
266}
217 267
268struct sw_flow_mask *ovs_sw_flow_mask_alloc(void);
269void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *);
270void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *, bool deferred);
271void ovs_sw_flow_mask_insert(struct flow_table *, struct sw_flow_mask *);
272struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *,
273 const struct sw_flow_mask *);
274void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
275 const struct sw_flow_mask *mask);
218#endif /* flow.h */ 276#endif /* flow.h */