aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-03-06 00:26:16 -0500
committerDavid S. Miller <davem@davemloft.net>2015-03-06 00:26:16 -0500
commitfabe7bed114a0ffc02845b731c26aadd800e7b5d (patch)
treefa5bf3aaa17cb488284366d10a314e8abfb66583
parent24d2e4a50737867aba1e96a587ef0d90c17e3035 (diff)
parentc1beeef7a32a791a60e2adcc217d4461cd1e25d1 (diff)
Merge branch 'l3_hw_offload'
Scott Feldman says: ==================== switchdev: add IPv4 routing offload v4: - Add NETIF_F_NETNS_LOCAL to rocker port feature list to keep rocker ports in the default netns. Rocker hardware can't be partitioned to support multiple namespaces, currently. It would be interesting to add netns support to rocker device by basically adding another match field to each table to match on some unique netns ID, with a port knowing it's netns ID. Future work TDB. - Up-level the RTNH_F_EXTERNAL marking of routes installed to offload device from driver to switchdev common code. Now driver can't skip routes. Either it can install the route or it cannot. Yes or No. If no on any route, all offloading is aborted by removing routes from offload device and setting ipv4.fib_offload_disabled so no more routes can be offloaded. This is harsh, but it's our starting point. We can refine the policies in follow-up work. - Add new net.ipv4.fib_offload_disabled bool that is set if anything goes wrong with route offloading. We can refine this later to make the setting per-device or per-device-port-netdev, but let's start here simple and refine in follow-up work. - Rebase against Alex's latest FIB changes. I think I did everything correctly, and didn't run into any issues with testing, but I'd like Alex to look over the changes and maybe follow-up with any cleanups. v3: Changes based on v2 review comments: - Move check for custom rules up earlier in patch set, to keep git bisect safe. - Simplify the route add/modify failure handling to simple try until failure, and then on failure, undo everything. The switchdev driver will return err when route can normally be installed to device, but the install fails for one reason or another (no space left on device, etc). If a failure happens, uninstall all routes from the device, punting forwarding for all routes back to the kernel. - Scan route's full nexthop list, ensuring all nexthop devs belong to the same switchdev device, otherwise don't try to install route to device. v2: Changes based on v1 review comments and discussions at netconf: - Allow route modification, but use same ndo op used for adding route. Driver/device is expected to modify route in-place, if it can, to avoid interruption of service. - Add new RTNH_F_EXTERNAL flag to mark FIB entries offloaded externally. - Don't offload routes if using custom IP rules. If routes are already offloaded, and custom IP rules are turned on, flush routes from offload device. (Offloaded routes are marked with RTNH_F_EXTERNAL). - Use kernel's neigh resolution code to resolve route's nexthops' neigh MAC addrs. (Thanks davem, works great!). - Use fib->fib_priority in rocker driver to give priorities to routes in OF-DPA unicast route table. v1: This patch set adds L3 routing offload support for IPv4 routes. The idea is to mirror routes installed in the kernel's FIB down to a hardware switch device to offload the data forwarding path for L3. Only the data forwarding path is intercepted. Control and management of the kernel's FIB remains with the kernel. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/rocker/rocker.c483
-rw-r--r--include/linux/netdevice.h22
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--include/net/switchdev.h24
-rw-r--r--include/uapi/linux/rtnetlink.h1
-rw-r--r--net/ipv4/fib_frontend.c13
-rw-r--r--net/ipv4/fib_rules.c3
-rw-r--r--net/ipv4/fib_trie.c92
-rw-r--r--net/switchdev/switchdev.c161
10 files changed, 754 insertions, 48 deletions
diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index a5d1e6ea7d58..d04d3b374e31 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -32,6 +32,9 @@
32#include <linux/bitops.h> 32#include <linux/bitops.h>
33#include <net/switchdev.h> 33#include <net/switchdev.h>
34#include <net/rtnetlink.h> 34#include <net/rtnetlink.h>
35#include <net/ip_fib.h>
36#include <net/netevent.h>
37#include <net/arp.h>
35#include <asm-generic/io-64-nonatomic-lo-hi.h> 38#include <asm-generic/io-64-nonatomic-lo-hi.h>
36#include <generated/utsrelease.h> 39#include <generated/utsrelease.h>
37 40
@@ -111,9 +114,10 @@ struct rocker_flow_tbl_key {
111 114
112struct rocker_flow_tbl_entry { 115struct rocker_flow_tbl_entry {
113 struct hlist_node entry; 116 struct hlist_node entry;
114 u32 ref_count; 117 u32 cmd;
115 u64 cookie; 118 u64 cookie;
116 struct rocker_flow_tbl_key key; 119 struct rocker_flow_tbl_key key;
120 size_t key_len;
117 u32 key_crc32; /* key */ 121 u32 key_crc32; /* key */
118}; 122};
119 123
@@ -161,6 +165,16 @@ struct rocker_internal_vlan_tbl_entry {
161 __be16 vlan_id; 165 __be16 vlan_id;
162}; 166};
163 167
168struct rocker_neigh_tbl_entry {
169 struct hlist_node entry;
170 __be32 ip_addr; /* key */
171 struct net_device *dev;
172 u32 ref_count;
173 u32 index;
174 u8 eth_dst[ETH_ALEN];
175 bool ttl_check;
176};
177
164struct rocker_desc_info { 178struct rocker_desc_info {
165 char *data; /* mapped */ 179 char *data; /* mapped */
166 size_t data_size; 180 size_t data_size;
@@ -234,6 +248,9 @@ struct rocker {
234 unsigned long internal_vlan_bitmap[ROCKER_INTERNAL_VLAN_BITMAP_LEN]; 248 unsigned long internal_vlan_bitmap[ROCKER_INTERNAL_VLAN_BITMAP_LEN];
235 DECLARE_HASHTABLE(internal_vlan_tbl, 8); 249 DECLARE_HASHTABLE(internal_vlan_tbl, 8);
236 spinlock_t internal_vlan_tbl_lock; 250 spinlock_t internal_vlan_tbl_lock;
251 DECLARE_HASHTABLE(neigh_tbl, 16);
252 spinlock_t neigh_tbl_lock;
253 u32 neigh_tbl_next_index;
237}; 254};
238 255
239static const u8 zero_mac[ETH_ALEN] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; 256static const u8 zero_mac[ETH_ALEN] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
@@ -256,7 +273,6 @@ enum {
256 ROCKER_PRIORITY_VLAN = 1, 273 ROCKER_PRIORITY_VLAN = 1,
257 ROCKER_PRIORITY_TERM_MAC_UCAST = 0, 274 ROCKER_PRIORITY_TERM_MAC_UCAST = 0,
258 ROCKER_PRIORITY_TERM_MAC_MCAST = 1, 275 ROCKER_PRIORITY_TERM_MAC_MCAST = 1,
259 ROCKER_PRIORITY_UNICAST_ROUTING = 1,
260 ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_EXACT = 1, 276 ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_EXACT = 1,
261 ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_WILD = 2, 277 ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_WILD = 2,
262 ROCKER_PRIORITY_BRIDGING_VLAN = 3, 278 ROCKER_PRIORITY_BRIDGING_VLAN = 3,
@@ -1940,8 +1956,7 @@ static int rocker_cmd_flow_tbl_add(struct rocker *rocker,
1940 struct rocker_tlv *cmd_info; 1956 struct rocker_tlv *cmd_info;
1941 int err = 0; 1957 int err = 0;
1942 1958
1943 if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, 1959 if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, entry->cmd))
1944 ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD))
1945 return -EMSGSIZE; 1960 return -EMSGSIZE;
1946 cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); 1961 cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO);
1947 if (!cmd_info) 1962 if (!cmd_info)
@@ -1998,8 +2013,7 @@ static int rocker_cmd_flow_tbl_del(struct rocker *rocker,
1998 const struct rocker_flow_tbl_entry *entry = priv; 2013 const struct rocker_flow_tbl_entry *entry = priv;
1999 struct rocker_tlv *cmd_info; 2014 struct rocker_tlv *cmd_info;
2000 2015
2001 if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, 2016 if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, entry->cmd))
2002 ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL))
2003 return -EMSGSIZE; 2017 return -EMSGSIZE;
2004 cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO); 2018 cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO);
2005 if (!cmd_info) 2019 if (!cmd_info)
@@ -2168,9 +2182,9 @@ static int rocker_cmd_group_tbl_del(struct rocker *rocker,
2168 return 0; 2182 return 0;
2169} 2183}
2170 2184
2171/***************************************** 2185/***************************************************
2172 * Flow, group, FDB, internal VLAN tables 2186 * Flow, group, FDB, internal VLAN and neigh tables
2173 *****************************************/ 2187 ***************************************************/
2174 2188
2175static int rocker_init_tbls(struct rocker *rocker) 2189static int rocker_init_tbls(struct rocker *rocker)
2176{ 2190{
@@ -2186,6 +2200,9 @@ static int rocker_init_tbls(struct rocker *rocker)
2186 hash_init(rocker->internal_vlan_tbl); 2200 hash_init(rocker->internal_vlan_tbl);
2187 spin_lock_init(&rocker->internal_vlan_tbl_lock); 2201 spin_lock_init(&rocker->internal_vlan_tbl_lock);
2188 2202
2203 hash_init(rocker->neigh_tbl);
2204 spin_lock_init(&rocker->neigh_tbl_lock);
2205
2189 return 0; 2206 return 0;
2190} 2207}
2191 2208
@@ -2196,6 +2213,7 @@ static void rocker_free_tbls(struct rocker *rocker)
2196 struct rocker_group_tbl_entry *group_entry; 2213 struct rocker_group_tbl_entry *group_entry;
2197 struct rocker_fdb_tbl_entry *fdb_entry; 2214 struct rocker_fdb_tbl_entry *fdb_entry;
2198 struct rocker_internal_vlan_tbl_entry *internal_vlan_entry; 2215 struct rocker_internal_vlan_tbl_entry *internal_vlan_entry;
2216 struct rocker_neigh_tbl_entry *neigh_entry;
2199 struct hlist_node *tmp; 2217 struct hlist_node *tmp;
2200 int bkt; 2218 int bkt;
2201 2219
@@ -2219,16 +2237,22 @@ static void rocker_free_tbls(struct rocker *rocker)
2219 tmp, internal_vlan_entry, entry) 2237 tmp, internal_vlan_entry, entry)
2220 hash_del(&internal_vlan_entry->entry); 2238 hash_del(&internal_vlan_entry->entry);
2221 spin_unlock_irqrestore(&rocker->internal_vlan_tbl_lock, flags); 2239 spin_unlock_irqrestore(&rocker->internal_vlan_tbl_lock, flags);
2240
2241 spin_lock_irqsave(&rocker->neigh_tbl_lock, flags);
2242 hash_for_each_safe(rocker->neigh_tbl, bkt, tmp, neigh_entry, entry)
2243 hash_del(&neigh_entry->entry);
2244 spin_unlock_irqrestore(&rocker->neigh_tbl_lock, flags);
2222} 2245}
2223 2246
2224static struct rocker_flow_tbl_entry * 2247static struct rocker_flow_tbl_entry *
2225rocker_flow_tbl_find(struct rocker *rocker, struct rocker_flow_tbl_entry *match) 2248rocker_flow_tbl_find(struct rocker *rocker, struct rocker_flow_tbl_entry *match)
2226{ 2249{
2227 struct rocker_flow_tbl_entry *found; 2250 struct rocker_flow_tbl_entry *found;
2251 size_t key_len = match->key_len ? match->key_len : sizeof(found->key);
2228 2252
2229 hash_for_each_possible(rocker->flow_tbl, found, 2253 hash_for_each_possible(rocker->flow_tbl, found,
2230 entry, match->key_crc32) { 2254 entry, match->key_crc32) {
2231 if (memcmp(&found->key, &match->key, sizeof(found->key)) == 0) 2255 if (memcmp(&found->key, &match->key, key_len) == 0)
2232 return found; 2256 return found;
2233 } 2257 }
2234 2258
@@ -2241,42 +2265,34 @@ static int rocker_flow_tbl_add(struct rocker_port *rocker_port,
2241{ 2265{
2242 struct rocker *rocker = rocker_port->rocker; 2266 struct rocker *rocker = rocker_port->rocker;
2243 struct rocker_flow_tbl_entry *found; 2267 struct rocker_flow_tbl_entry *found;
2268 size_t key_len = match->key_len ? match->key_len : sizeof(found->key);
2244 unsigned long flags; 2269 unsigned long flags;
2245 bool add_to_hw = false;
2246 int err = 0;
2247 2270
2248 match->key_crc32 = crc32(~0, &match->key, sizeof(match->key)); 2271 match->key_crc32 = crc32(~0, &match->key, key_len);
2249 2272
2250 spin_lock_irqsave(&rocker->flow_tbl_lock, flags); 2273 spin_lock_irqsave(&rocker->flow_tbl_lock, flags);
2251 2274
2252 found = rocker_flow_tbl_find(rocker, match); 2275 found = rocker_flow_tbl_find(rocker, match);
2253 2276
2254 if (found) { 2277 if (found) {
2255 kfree(match); 2278 match->cookie = found->cookie;
2279 hash_del(&found->entry);
2280 kfree(found);
2281 found = match;
2282 found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD;
2256 } else { 2283 } else {
2257 found = match; 2284 found = match;
2258 found->cookie = rocker->flow_tbl_next_cookie++; 2285 found->cookie = rocker->flow_tbl_next_cookie++;
2259 hash_add(rocker->flow_tbl, &found->entry, found->key_crc32); 2286 found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD;
2260 add_to_hw = true;
2261 } 2287 }
2262 2288
2263 found->ref_count++; 2289 hash_add(rocker->flow_tbl, &found->entry, found->key_crc32);
2264 2290
2265 spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags); 2291 spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags);
2266 2292
2267 if (add_to_hw) { 2293 return rocker_cmd_exec(rocker, rocker_port,
2268 err = rocker_cmd_exec(rocker, rocker_port, 2294 rocker_cmd_flow_tbl_add,
2269 rocker_cmd_flow_tbl_add, 2295 found, NULL, NULL, nowait);
2270 found, NULL, NULL, nowait);
2271 if (err) {
2272 spin_lock_irqsave(&rocker->flow_tbl_lock, flags);
2273 hash_del(&found->entry);
2274 spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags);
2275 kfree(found);
2276 }
2277 }
2278
2279 return err;
2280} 2296}
2281 2297
2282static int rocker_flow_tbl_del(struct rocker_port *rocker_port, 2298static int rocker_flow_tbl_del(struct rocker_port *rocker_port,
@@ -2285,29 +2301,26 @@ static int rocker_flow_tbl_del(struct rocker_port *rocker_port,
2285{ 2301{
2286 struct rocker *rocker = rocker_port->rocker; 2302 struct rocker *rocker = rocker_port->rocker;
2287 struct rocker_flow_tbl_entry *found; 2303 struct rocker_flow_tbl_entry *found;
2304 size_t key_len = match->key_len ? match->key_len : sizeof(found->key);
2288 unsigned long flags; 2305 unsigned long flags;
2289 bool del_from_hw = false;
2290 int err = 0; 2306 int err = 0;
2291 2307
2292 match->key_crc32 = crc32(~0, &match->key, sizeof(match->key)); 2308 match->key_crc32 = crc32(~0, &match->key, key_len);
2293 2309
2294 spin_lock_irqsave(&rocker->flow_tbl_lock, flags); 2310 spin_lock_irqsave(&rocker->flow_tbl_lock, flags);
2295 2311
2296 found = rocker_flow_tbl_find(rocker, match); 2312 found = rocker_flow_tbl_find(rocker, match);
2297 2313
2298 if (found) { 2314 if (found) {
2299 found->ref_count--; 2315 hash_del(&found->entry);
2300 if (found->ref_count == 0) { 2316 found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL;
2301 hash_del(&found->entry);
2302 del_from_hw = true;
2303 }
2304 } 2317 }
2305 2318
2306 spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags); 2319 spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags);
2307 2320
2308 kfree(match); 2321 kfree(match);
2309 2322
2310 if (del_from_hw) { 2323 if (found) {
2311 err = rocker_cmd_exec(rocker, rocker_port, 2324 err = rocker_cmd_exec(rocker, rocker_port,
2312 rocker_cmd_flow_tbl_del, 2325 rocker_cmd_flow_tbl_del,
2313 found, NULL, NULL, nowait); 2326 found, NULL, NULL, nowait);
@@ -2467,6 +2480,31 @@ static int rocker_flow_tbl_bridge(struct rocker_port *rocker_port,
2467 return rocker_flow_tbl_do(rocker_port, flags, entry); 2480 return rocker_flow_tbl_do(rocker_port, flags, entry);
2468} 2481}
2469 2482
2483static int rocker_flow_tbl_ucast4_routing(struct rocker_port *rocker_port,
2484 __be16 eth_type, __be32 dst,
2485 __be32 dst_mask, u32 priority,
2486 enum rocker_of_dpa_table_id goto_tbl,
2487 u32 group_id, int flags)
2488{
2489 struct rocker_flow_tbl_entry *entry;
2490
2491 entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags));
2492 if (!entry)
2493 return -ENOMEM;
2494
2495 entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING;
2496 entry->key.priority = priority;
2497 entry->key.ucast_routing.eth_type = eth_type;
2498 entry->key.ucast_routing.dst4 = dst;
2499 entry->key.ucast_routing.dst4_mask = dst_mask;
2500 entry->key.ucast_routing.goto_tbl = goto_tbl;
2501 entry->key.ucast_routing.group_id = group_id;
2502 entry->key_len = offsetof(struct rocker_flow_tbl_key,
2503 ucast_routing.group_id);
2504
2505 return rocker_flow_tbl_do(rocker_port, flags, entry);
2506}
2507
2470static int rocker_flow_tbl_acl(struct rocker_port *rocker_port, 2508static int rocker_flow_tbl_acl(struct rocker_port *rocker_port,
2471 int flags, u32 in_pport, 2509 int flags, u32 in_pport,
2472 u32 in_pport_mask, 2510 u32 in_pport_mask,
@@ -2554,7 +2592,6 @@ static int rocker_group_tbl_add(struct rocker_port *rocker_port,
2554 struct rocker *rocker = rocker_port->rocker; 2592 struct rocker *rocker = rocker_port->rocker;
2555 struct rocker_group_tbl_entry *found; 2593 struct rocker_group_tbl_entry *found;
2556 unsigned long flags; 2594 unsigned long flags;
2557 int err = 0;
2558 2595
2559 spin_lock_irqsave(&rocker->group_tbl_lock, flags); 2596 spin_lock_irqsave(&rocker->group_tbl_lock, flags);
2560 2597
@@ -2574,12 +2611,9 @@ static int rocker_group_tbl_add(struct rocker_port *rocker_port,
2574 2611
2575 spin_unlock_irqrestore(&rocker->group_tbl_lock, flags); 2612 spin_unlock_irqrestore(&rocker->group_tbl_lock, flags);
2576 2613
2577 if (found->cmd) 2614 return rocker_cmd_exec(rocker, rocker_port,
2578 err = rocker_cmd_exec(rocker, rocker_port, 2615 rocker_cmd_group_tbl_add,
2579 rocker_cmd_group_tbl_add, 2616 found, NULL, NULL, nowait);
2580 found, NULL, NULL, nowait);
2581
2582 return err;
2583} 2617}
2584 2618
2585static int rocker_group_tbl_del(struct rocker_port *rocker_port, 2619static int rocker_group_tbl_del(struct rocker_port *rocker_port,
@@ -2675,6 +2709,244 @@ static int rocker_group_l2_flood(struct rocker_port *rocker_port,
2675 group_id); 2709 group_id);
2676} 2710}
2677 2711
2712static int rocker_group_l3_unicast(struct rocker_port *rocker_port,
2713 int flags, u32 index, u8 *src_mac,
2714 u8 *dst_mac, __be16 vlan_id,
2715 bool ttl_check, u32 pport)
2716{
2717 struct rocker_group_tbl_entry *entry;
2718
2719 entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags));
2720 if (!entry)
2721 return -ENOMEM;
2722
2723 entry->group_id = ROCKER_GROUP_L3_UNICAST(index);
2724 if (src_mac)
2725 ether_addr_copy(entry->l3_unicast.eth_src, src_mac);
2726 if (dst_mac)
2727 ether_addr_copy(entry->l3_unicast.eth_dst, dst_mac);
2728 entry->l3_unicast.vlan_id = vlan_id;
2729 entry->l3_unicast.ttl_check = ttl_check;
2730 entry->l3_unicast.group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, pport);
2731
2732 return rocker_group_tbl_do(rocker_port, flags, entry);
2733}
2734
2735static struct rocker_neigh_tbl_entry *
2736 rocker_neigh_tbl_find(struct rocker *rocker, __be32 ip_addr)
2737{
2738 struct rocker_neigh_tbl_entry *found;
2739
2740 hash_for_each_possible(rocker->neigh_tbl, found, entry, ip_addr)
2741 if (found->ip_addr == ip_addr)
2742 return found;
2743
2744 return NULL;
2745}
2746
2747static void _rocker_neigh_add(struct rocker *rocker,
2748 struct rocker_neigh_tbl_entry *entry)
2749{
2750 entry->index = rocker->neigh_tbl_next_index++;
2751 entry->ref_count++;
2752 hash_add(rocker->neigh_tbl, &entry->entry, entry->ip_addr);
2753}
2754
2755static void _rocker_neigh_del(struct rocker *rocker,
2756 struct rocker_neigh_tbl_entry *entry)
2757{
2758 if (--entry->ref_count == 0) {
2759 hash_del(&entry->entry);
2760 kfree(entry);
2761 }
2762}
2763
2764static void _rocker_neigh_update(struct rocker *rocker,
2765 struct rocker_neigh_tbl_entry *entry,
2766 u8 *eth_dst, bool ttl_check)
2767{
2768 if (eth_dst) {
2769 ether_addr_copy(entry->eth_dst, eth_dst);
2770 entry->ttl_check = ttl_check;
2771 } else {
2772 entry->ref_count++;
2773 }
2774}
2775
2776static int rocker_port_ipv4_neigh(struct rocker_port *rocker_port,
2777 int flags, __be32 ip_addr, u8 *eth_dst)
2778{
2779 struct rocker *rocker = rocker_port->rocker;
2780 struct rocker_neigh_tbl_entry *entry;
2781 struct rocker_neigh_tbl_entry *found;
2782 unsigned long lock_flags;
2783 __be16 eth_type = htons(ETH_P_IP);
2784 enum rocker_of_dpa_table_id goto_tbl =
2785 ROCKER_OF_DPA_TABLE_ID_ACL_POLICY;
2786 u32 group_id;
2787 u32 priority = 0;
2788 bool adding = !(flags & ROCKER_OP_FLAG_REMOVE);
2789 bool updating;
2790 bool removing;
2791 int err = 0;
2792
2793 entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags));
2794 if (!entry)
2795 return -ENOMEM;
2796
2797 spin_lock_irqsave(&rocker->neigh_tbl_lock, lock_flags);
2798
2799 found = rocker_neigh_tbl_find(rocker, ip_addr);
2800
2801 updating = found && adding;
2802 removing = found && !adding;
2803 adding = !found && adding;
2804
2805 if (adding) {
2806 entry->ip_addr = ip_addr;
2807 entry->dev = rocker_port->dev;
2808 ether_addr_copy(entry->eth_dst, eth_dst);
2809 entry->ttl_check = true;
2810 _rocker_neigh_add(rocker, entry);
2811 } else if (removing) {
2812 memcpy(entry, found, sizeof(*entry));
2813 _rocker_neigh_del(rocker, found);
2814 } else if (updating) {
2815 _rocker_neigh_update(rocker, found, eth_dst, true);
2816 memcpy(entry, found, sizeof(*entry));
2817 } else {
2818 err = -ENOENT;
2819 }
2820
2821 spin_unlock_irqrestore(&rocker->neigh_tbl_lock, lock_flags);
2822
2823 if (err)
2824 goto err_out;
2825
2826 /* For each active neighbor, we have an L3 unicast group and
2827 * a /32 route to the neighbor, which uses the L3 unicast
2828 * group. The L3 unicast group can also be referred to by
2829 * other routes' nexthops.
2830 */
2831
2832 err = rocker_group_l3_unicast(rocker_port, flags,
2833 entry->index,
2834 rocker_port->dev->dev_addr,
2835 entry->eth_dst,
2836 rocker_port->internal_vlan_id,
2837 entry->ttl_check,
2838 rocker_port->pport);
2839 if (err) {
2840 netdev_err(rocker_port->dev,
2841 "Error (%d) L3 unicast group index %d\n",
2842 err, entry->index);
2843 goto err_out;
2844 }
2845
2846 if (adding || removing) {
2847 group_id = ROCKER_GROUP_L3_UNICAST(entry->index);
2848 err = rocker_flow_tbl_ucast4_routing(rocker_port,
2849 eth_type, ip_addr,
2850 inet_make_mask(32),
2851 priority, goto_tbl,
2852 group_id, flags);
2853
2854 if (err)
2855 netdev_err(rocker_port->dev,
2856 "Error (%d) /32 unicast route %pI4 group 0x%08x\n",
2857 err, &entry->ip_addr, group_id);
2858 }
2859
2860err_out:
2861 if (!adding)
2862 kfree(entry);
2863
2864 return err;
2865}
2866
2867static int rocker_port_ipv4_resolve(struct rocker_port *rocker_port,
2868 __be32 ip_addr)
2869{
2870 struct net_device *dev = rocker_port->dev;
2871 struct neighbour *n = __ipv4_neigh_lookup(dev, ip_addr);
2872 int err = 0;
2873
2874 if (!n)
2875 n = neigh_create(&arp_tbl, &ip_addr, dev);
2876 if (!n)
2877 return -ENOMEM;
2878
2879 /* If the neigh is already resolved, then go ahead and
2880 * install the entry, otherwise start the ARP process to
2881 * resolve the neigh.
2882 */
2883
2884 if (n->nud_state & NUD_VALID)
2885 err = rocker_port_ipv4_neigh(rocker_port, 0, ip_addr, n->ha);
2886 else
2887 neigh_event_send(n, NULL);
2888
2889 return err;
2890}
2891
2892static int rocker_port_ipv4_nh(struct rocker_port *rocker_port, int flags,
2893 __be32 ip_addr, u32 *index)
2894{
2895 struct rocker *rocker = rocker_port->rocker;
2896 struct rocker_neigh_tbl_entry *entry;
2897 struct rocker_neigh_tbl_entry *found;
2898 unsigned long lock_flags;
2899 bool adding = !(flags & ROCKER_OP_FLAG_REMOVE);
2900 bool updating;
2901 bool removing;
2902 bool resolved = true;
2903 int err = 0;
2904
2905 entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags));
2906 if (!entry)
2907 return -ENOMEM;
2908
2909 spin_lock_irqsave(&rocker->neigh_tbl_lock, lock_flags);
2910
2911 found = rocker_neigh_tbl_find(rocker, ip_addr);
2912 if (found)
2913 *index = found->index;
2914
2915 updating = found && adding;
2916 removing = found && !adding;
2917 adding = !found && adding;
2918
2919 if (adding) {
2920 entry->ip_addr = ip_addr;
2921 entry->dev = rocker_port->dev;
2922 _rocker_neigh_add(rocker, entry);
2923 *index = entry->index;
2924 resolved = false;
2925 } else if (removing) {
2926 _rocker_neigh_del(rocker, found);
2927 } else if (updating) {
2928 _rocker_neigh_update(rocker, found, NULL, false);
2929 resolved = !is_zero_ether_addr(found->eth_dst);
2930 } else {
2931 err = -ENOENT;
2932 }
2933
2934 spin_unlock_irqrestore(&rocker->neigh_tbl_lock, lock_flags);
2935
2936 if (!adding)
2937 kfree(entry);
2938
2939 if (err)
2940 return err;
2941
2942 /* Resolved means neigh ip_addr is resolved to neigh mac. */
2943
2944 if (!resolved)
2945 err = rocker_port_ipv4_resolve(rocker_port, ip_addr);
2946
2947 return err;
2948}
2949
2678static int rocker_port_vlan_flood_group(struct rocker_port *rocker_port, 2950static int rocker_port_vlan_flood_group(struct rocker_port *rocker_port,
2679 int flags, __be16 vlan_id) 2951 int flags, __be16 vlan_id)
2680{ 2952{
@@ -3429,6 +3701,51 @@ not_found:
3429 spin_unlock_irqrestore(&rocker->internal_vlan_tbl_lock, lock_flags); 3701 spin_unlock_irqrestore(&rocker->internal_vlan_tbl_lock, lock_flags);
3430} 3702}
3431 3703
3704static int rocker_port_fib_ipv4(struct rocker_port *rocker_port, __be32 dst,
3705 int dst_len, struct fib_info *fi, u32 tb_id,
3706 int flags)
3707{
3708 struct fib_nh *nh;
3709 __be16 eth_type = htons(ETH_P_IP);
3710 __be32 dst_mask = inet_make_mask(dst_len);
3711 __be16 internal_vlan_id = rocker_port->internal_vlan_id;
3712 u32 priority = fi->fib_priority;
3713 enum rocker_of_dpa_table_id goto_tbl =
3714 ROCKER_OF_DPA_TABLE_ID_ACL_POLICY;
3715 u32 group_id;
3716 bool nh_on_port;
3717 bool has_gw;
3718 u32 index;
3719 int err;
3720
3721 /* XXX support ECMP */
3722
3723 nh = fi->fib_nh;
3724 nh_on_port = (fi->fib_dev == rocker_port->dev);
3725 has_gw = !!nh->nh_gw;
3726
3727 if (has_gw && nh_on_port) {
3728 err = rocker_port_ipv4_nh(rocker_port, flags,
3729 nh->nh_gw, &index);
3730 if (err)
3731 return err;
3732
3733 group_id = ROCKER_GROUP_L3_UNICAST(index);
3734 } else {
3735 /* Send to CPU for processing */
3736 group_id = ROCKER_GROUP_L2_INTERFACE(internal_vlan_id, 0);
3737 }
3738
3739 err = rocker_flow_tbl_ucast4_routing(rocker_port, eth_type, dst,
3740 dst_mask, priority, goto_tbl,
3741 group_id, flags);
3742 if (err)
3743 netdev_err(rocker_port->dev, "Error (%d) IPv4 route %pI4\n",
3744 err, &dst);
3745
3746 return err;
3747}
3748
3432/***************** 3749/*****************
3433 * Net device ops 3750 * Net device ops
3434 *****************/ 3751 *****************/
@@ -3830,6 +4147,30 @@ static int rocker_port_switch_port_stp_update(struct net_device *dev, u8 state)
3830 return rocker_port_stp_update(rocker_port, state); 4147 return rocker_port_stp_update(rocker_port, state);
3831} 4148}
3832 4149
4150static int rocker_port_switch_fib_ipv4_add(struct net_device *dev,
4151 __be32 dst, int dst_len,
4152 struct fib_info *fi,
4153 u8 tos, u8 type, u32 tb_id)
4154{
4155 struct rocker_port *rocker_port = netdev_priv(dev);
4156 int flags = 0;
4157
4158 return rocker_port_fib_ipv4(rocker_port, dst, dst_len,
4159 fi, tb_id, flags);
4160}
4161
4162static int rocker_port_switch_fib_ipv4_del(struct net_device *dev,
4163 __be32 dst, int dst_len,
4164 struct fib_info *fi,
4165 u8 tos, u8 type, u32 tb_id)
4166{
4167 struct rocker_port *rocker_port = netdev_priv(dev);
4168 int flags = ROCKER_OP_FLAG_REMOVE;
4169
4170 return rocker_port_fib_ipv4(rocker_port, dst, dst_len,
4171 fi, tb_id, flags);
4172}
4173
3833static const struct net_device_ops rocker_port_netdev_ops = { 4174static const struct net_device_ops rocker_port_netdev_ops = {
3834 .ndo_open = rocker_port_open, 4175 .ndo_open = rocker_port_open,
3835 .ndo_stop = rocker_port_stop, 4176 .ndo_stop = rocker_port_stop,
@@ -3844,6 +4185,8 @@ static const struct net_device_ops rocker_port_netdev_ops = {
3844 .ndo_bridge_getlink = rocker_port_bridge_getlink, 4185 .ndo_bridge_getlink = rocker_port_bridge_getlink,
3845 .ndo_switch_parent_id_get = rocker_port_switch_parent_id_get, 4186 .ndo_switch_parent_id_get = rocker_port_switch_parent_id_get,
3846 .ndo_switch_port_stp_update = rocker_port_switch_port_stp_update, 4187 .ndo_switch_port_stp_update = rocker_port_switch_port_stp_update,
4188 .ndo_switch_fib_ipv4_add = rocker_port_switch_fib_ipv4_add,
4189 .ndo_switch_fib_ipv4_del = rocker_port_switch_fib_ipv4_del,
3847}; 4190};
3848 4191
3849/******************** 4192/********************
@@ -4204,8 +4547,9 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
4204 NAPI_POLL_WEIGHT); 4547 NAPI_POLL_WEIGHT);
4205 rocker_carrier_init(rocker_port); 4548 rocker_carrier_init(rocker_port);
4206 4549
4207 dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | 4550 dev->features |= NETIF_F_NETNS_LOCAL |
4208 NETIF_F_HW_SWITCH_OFFLOAD; 4551 NETIF_F_HW_VLAN_CTAG_FILTER |
4552 NETIF_F_HW_SWITCH_OFFLOAD;
4209 4553
4210 err = register_netdev(dev); 4554 err = register_netdev(dev);
4211 if (err) { 4555 if (err) {
@@ -4546,6 +4890,48 @@ static struct notifier_block rocker_netdevice_nb __read_mostly = {
4546 .notifier_call = rocker_netdevice_event, 4890 .notifier_call = rocker_netdevice_event,
4547}; 4891};
4548 4892
4893/************************************
4894 * Net event notifier event handler
4895 ************************************/
4896
4897static int rocker_neigh_update(struct net_device *dev, struct neighbour *n)
4898{
4899 struct rocker_port *rocker_port = netdev_priv(dev);
4900 int flags = (n->nud_state & NUD_VALID) ? 0 : ROCKER_OP_FLAG_REMOVE;
4901 __be32 ip_addr = *(__be32 *)n->primary_key;
4902
4903 return rocker_port_ipv4_neigh(rocker_port, flags, ip_addr, n->ha);
4904}
4905
4906static int rocker_netevent_event(struct notifier_block *unused,
4907 unsigned long event, void *ptr)
4908{
4909 struct net_device *dev;
4910 struct neighbour *n = ptr;
4911 int err;
4912
4913 switch (event) {
4914 case NETEVENT_NEIGH_UPDATE:
4915 if (n->tbl != &arp_tbl)
4916 return NOTIFY_DONE;
4917 dev = n->dev;
4918 if (!rocker_port_dev_check(dev))
4919 return NOTIFY_DONE;
4920 err = rocker_neigh_update(dev, n);
4921 if (err)
4922 netdev_warn(dev,
4923 "failed to handle neigh update (err %d)\n",
4924 err);
4925 break;
4926 }
4927
4928 return NOTIFY_DONE;
4929}
4930
4931static struct notifier_block rocker_netevent_nb __read_mostly = {
4932 .notifier_call = rocker_netevent_event,
4933};
4934
4549/*********************** 4935/***********************
4550 * Module init and exit 4936 * Module init and exit
4551 ***********************/ 4937 ***********************/
@@ -4555,18 +4941,21 @@ static int __init rocker_module_init(void)
4555 int err; 4941 int err;
4556 4942
4557 register_netdevice_notifier(&rocker_netdevice_nb); 4943 register_netdevice_notifier(&rocker_netdevice_nb);
4944 register_netevent_notifier(&rocker_netevent_nb);
4558 err = pci_register_driver(&rocker_pci_driver); 4945 err = pci_register_driver(&rocker_pci_driver);
4559 if (err) 4946 if (err)
4560 goto err_pci_register_driver; 4947 goto err_pci_register_driver;
4561 return 0; 4948 return 0;
4562 4949
4563err_pci_register_driver: 4950err_pci_register_driver:
4951 unregister_netdevice_notifier(&rocker_netevent_nb);
4564 unregister_netdevice_notifier(&rocker_netdevice_nb); 4952 unregister_netdevice_notifier(&rocker_netdevice_nb);
4565 return err; 4953 return err;
4566} 4954}
4567 4955
4568static void __exit rocker_module_exit(void) 4956static void __exit rocker_module_exit(void)
4569{ 4957{
4958 unregister_netevent_notifier(&rocker_netevent_nb);
4570 unregister_netdevice_notifier(&rocker_netdevice_nb); 4959 unregister_netdevice_notifier(&rocker_netdevice_nb);
4571 pci_unregister_driver(&rocker_pci_driver); 4960 pci_unregister_driver(&rocker_pci_driver);
4572} 4961}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 625c8d71511b..45413784a3b1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -768,6 +768,8 @@ struct netdev_phys_item_id {
768typedef u16 (*select_queue_fallback_t)(struct net_device *dev, 768typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
769 struct sk_buff *skb); 769 struct sk_buff *skb);
770 770
771struct fib_info;
772
771/* 773/*
772 * This structure defines the management hooks for network devices. 774 * This structure defines the management hooks for network devices.
773 * The following hooks can be defined; unless noted otherwise, they are 775 * The following hooks can be defined; unless noted otherwise, they are
@@ -1031,6 +1033,14 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
1031 * int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state); 1033 * int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state);
1032 * Called to notify switch device port of bridge port STP 1034 * Called to notify switch device port of bridge port STP
1033 * state change. 1035 * state change.
1036 * int (*ndo_sw_parent_fib_ipv4_add)(struct net_device *dev, __be32 dst,
1037 * int dst_len, struct fib_info *fi,
1038 * u8 tos, u8 type, u32 tb_id);
1039 * Called to add/modify IPv4 route to switch device.
1040 * int (*ndo_sw_parent_fib_ipv4_del)(struct net_device *dev, __be32 dst,
1041 * int dst_len, struct fib_info *fi,
1042 * u8 tos, u8 type, u32 tb_id);
1043 * Called to delete IPv4 route from switch device.
1034 */ 1044 */
1035struct net_device_ops { 1045struct net_device_ops {
1036 int (*ndo_init)(struct net_device *dev); 1046 int (*ndo_init)(struct net_device *dev);
@@ -1192,6 +1202,18 @@ struct net_device_ops {
1192 struct netdev_phys_item_id *psid); 1202 struct netdev_phys_item_id *psid);
1193 int (*ndo_switch_port_stp_update)(struct net_device *dev, 1203 int (*ndo_switch_port_stp_update)(struct net_device *dev,
1194 u8 state); 1204 u8 state);
1205 int (*ndo_switch_fib_ipv4_add)(struct net_device *dev,
1206 __be32 dst,
1207 int dst_len,
1208 struct fib_info *fi,
1209 u8 tos, u8 type,
1210 u32 tb_id);
1211 int (*ndo_switch_fib_ipv4_del)(struct net_device *dev,
1212 __be32 dst,
1213 int dst_len,
1214 struct fib_info *fi,
1215 u8 tos, u8 type,
1216 u32 tb_id);
1195#endif 1217#endif
1196}; 1218};
1197 1219
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 825cb2800908..1657604c5dd3 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -196,6 +196,7 @@ int fib_table_delete(struct fib_table *, struct fib_config *);
196int fib_table_dump(struct fib_table *table, struct sk_buff *skb, 196int fib_table_dump(struct fib_table *table, struct sk_buff *skb,
197 struct netlink_callback *cb); 197 struct netlink_callback *cb);
198int fib_table_flush(struct fib_table *table); 198int fib_table_flush(struct fib_table *table);
199void fib_table_flush_external(struct fib_table *table);
199void fib_free_table(struct fib_table *tb); 200void fib_free_table(struct fib_table *tb);
200 201
201 202
@@ -308,6 +309,7 @@ static inline int fib_num_tclassid_users(struct net *net)
308 return 0; 309 return 0;
309} 310}
310#endif 311#endif
312void fib_flush_external(struct net *net);
311 313
312/* Exported by fib_semantics.c */ 314/* Exported by fib_semantics.c */
313int ip_fib_check_default(__be32 gw, struct net_device *dev); 315int ip_fib_check_default(__be32 gw, struct net_device *dev);
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index db1db158a00e..1085e12f940f 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -47,6 +47,7 @@ struct netns_ipv4 {
47 int fib_num_tclassid_users; 47 int fib_num_tclassid_users;
48#endif 48#endif
49 struct hlist_head *fib_table_hash; 49 struct hlist_head *fib_table_hash;
50 bool fib_offload_disabled;
50 struct sock *fibnl; 51 struct sock *fibnl;
51 52
52 struct sock * __percpu *icmp_sk; 53 struct sock * __percpu *icmp_sk;
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index cfcdac2e5d25..dc0a5cc7c2c5 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -51,6 +51,12 @@ int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev,
51 struct nlmsghdr *nlh, u16 flags); 51 struct nlmsghdr *nlh, u16 flags);
52int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev, 52int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev,
53 struct nlmsghdr *nlh, u16 flags); 53 struct nlmsghdr *nlh, u16 flags);
54int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
55 u8 tos, u8 type, u32 tb_id);
56int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
57 u8 tos, u8 type, u32 tb_id);
58void netdev_switch_fib_ipv4_abort(struct fib_info *fi);
59
54#else 60#else
55 61
56static inline int netdev_switch_parent_id_get(struct net_device *dev, 62static inline int netdev_switch_parent_id_get(struct net_device *dev,
@@ -109,6 +115,24 @@ static inline int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *
109 return 0; 115 return 0;
110} 116}
111 117
118static inline int netdev_switch_fib_ipv4_add(u32 dst, int dst_len,
119 struct fib_info *fi,
120 u8 tos, u8 type, u32 tb_id)
121{
122 return 0;
123}
124
125static inline int netdev_switch_fib_ipv4_del(u32 dst, int dst_len,
126 struct fib_info *fi,
127 u8 tos, u8 type, u32 tb_id)
128{
129 return 0;
130}
131
132void netdev_switch_fib_ipv4_abort(struct fib_info *fi)
133{
134}
135
112#endif 136#endif
113 137
114#endif /* _LINUX_SWITCHDEV_H_ */ 138#endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 06f75a407f74..c3722b024e73 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -334,6 +334,7 @@ struct rtnexthop {
334#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ 334#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
335#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ 335#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
336#define RTNH_F_ONLINK 4 /* Gateway is forced on link */ 336#define RTNH_F_ONLINK 4 /* Gateway is forced on link */
337#define RTNH_F_EXTERNAL 8 /* Route installed externally */
337 338
338/* Macros to handle hexthops */ 339/* Macros to handle hexthops */
339 340
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 220c4b4af4cf..e067770235bf 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -144,6 +144,19 @@ static void fib_flush(struct net *net)
144 rt_cache_flush(net); 144 rt_cache_flush(net);
145} 145}
146 146
147void fib_flush_external(struct net *net)
148{
149 struct fib_table *tb;
150 struct hlist_head *head;
151 unsigned int h;
152
153 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
154 head = &net->ipv4.fib_table_hash[h];
155 hlist_for_each_entry(tb, head, tb_hlist)
156 fib_table_flush_external(tb);
157 }
158}
159
147/* 160/*
148 * Find address type as if only "dev" was present in the system. If 161 * Find address type as if only "dev" was present in the system. If
149 * on_dev is NULL then all interfaces are taken into consideration. 162 * on_dev is NULL then all interfaces are taken into consideration.
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index d3db718be51d..190d0d00d744 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -209,6 +209,8 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
209 rule4->tos = frh->tos; 209 rule4->tos = frh->tos;
210 210
211 net->ipv4.fib_has_custom_rules = true; 211 net->ipv4.fib_has_custom_rules = true;
212 fib_flush_external(rule->fr_net);
213
212 err = 0; 214 err = 0;
213errout: 215errout:
214 return err; 216 return err;
@@ -224,6 +226,7 @@ static void fib4_rule_delete(struct fib_rule *rule)
224 net->ipv4.fib_num_tclassid_users--; 226 net->ipv4.fib_num_tclassid_users--;
225#endif 227#endif
226 net->ipv4.fib_has_custom_rules = true; 228 net->ipv4.fib_has_custom_rules = true;
229 fib_flush_external(rule->fr_net);
227} 230}
228 231
229static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 232static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index fae34ad4bb1a..6544f1a0cfa1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -79,6 +79,7 @@
79#include <net/tcp.h> 79#include <net/tcp.h>
80#include <net/sock.h> 80#include <net/sock.h>
81#include <net/ip_fib.h> 81#include <net/ip_fib.h>
82#include <net/switchdev.h>
82#include "fib_lookup.h" 83#include "fib_lookup.h"
83 84
84#define MAX_STAT_DEPTH 32 85#define MAX_STAT_DEPTH 32
@@ -1135,7 +1136,18 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1135 new_fa->fa_state = state & ~FA_S_ACCESSED; 1136 new_fa->fa_state = state & ~FA_S_ACCESSED;
1136 new_fa->fa_slen = fa->fa_slen; 1137 new_fa->fa_slen = fa->fa_slen;
1137 1138
1139 err = netdev_switch_fib_ipv4_add(key, plen, fi,
1140 new_fa->fa_tos,
1141 cfg->fc_type,
1142 tb->tb_id);
1143 if (err) {
1144 netdev_switch_fib_ipv4_abort(fi);
1145 kmem_cache_free(fn_alias_kmem, new_fa);
1146 goto out;
1147 }
1148
1138 hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); 1149 hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1150
1139 alias_free_mem_rcu(fa); 1151 alias_free_mem_rcu(fa);
1140 1152
1141 fib_release_info(fi_drop); 1153 fib_release_info(fi_drop);
@@ -1171,10 +1183,18 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1171 new_fa->fa_state = 0; 1183 new_fa->fa_state = 0;
1172 new_fa->fa_slen = slen; 1184 new_fa->fa_slen = slen;
1173 1185
1186 /* (Optionally) offload fib entry to switch hardware. */
1187 err = netdev_switch_fib_ipv4_add(key, plen, fi, tos,
1188 cfg->fc_type, tb->tb_id);
1189 if (err) {
1190 netdev_switch_fib_ipv4_abort(fi);
1191 goto out_free_new_fa;
1192 }
1193
1174 /* Insert new entry to the list. */ 1194 /* Insert new entry to the list. */
1175 err = fib_insert_alias(t, tp, l, new_fa, fa, key); 1195 err = fib_insert_alias(t, tp, l, new_fa, fa, key);
1176 if (err) 1196 if (err)
1177 goto out_free_new_fa; 1197 goto out_sw_fib_del;
1178 1198
1179 if (!plen) 1199 if (!plen)
1180 tb->tb_num_default++; 1200 tb->tb_num_default++;
@@ -1185,6 +1205,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1185succeeded: 1205succeeded:
1186 return 0; 1206 return 0;
1187 1207
1208out_sw_fib_del:
1209 netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id);
1188out_free_new_fa: 1210out_free_new_fa:
1189 kmem_cache_free(fn_alias_kmem, new_fa); 1211 kmem_cache_free(fn_alias_kmem, new_fa);
1190out: 1212out:
@@ -1456,6 +1478,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1456 if (!fa_to_delete) 1478 if (!fa_to_delete)
1457 return -ESRCH; 1479 return -ESRCH;
1458 1480
1481 netdev_switch_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos,
1482 cfg->fc_type, tb->tb_id);
1483
1459 rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, 1484 rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
1460 &cfg->fc_nlinfo, 0); 1485 &cfg->fc_nlinfo, 0);
1461 1486
@@ -1536,6 +1561,67 @@ found:
1536 return n; 1561 return n;
1537} 1562}
1538 1563
1564/* Caller must hold RTNL */
1565void fib_table_flush_external(struct fib_table *tb)
1566{
1567 struct trie *t = (struct trie *)tb->tb_data;
1568 struct fib_alias *fa;
1569 struct tnode *n, *pn;
1570 unsigned long cindex;
1571 unsigned char slen;
1572 int found = 0;
1573
1574 n = rcu_dereference(t->trie);
1575 if (!n)
1576 return;
1577
1578 pn = NULL;
1579 cindex = 0;
1580
1581 while (IS_TNODE(n)) {
1582 /* record pn and cindex for leaf walking */
1583 pn = n;
1584 cindex = 1ul << n->bits;
1585backtrace:
1586 /* walk trie in reverse order */
1587 do {
1588 while (!(cindex--)) {
1589 t_key pkey = pn->key;
1590
1591 n = pn;
1592 pn = node_parent(n);
1593
1594 /* resize completed node */
1595 resize(t, n);
1596
1597 /* if we got the root we are done */
1598 if (!pn)
1599 return;
1600
1601 cindex = get_index(pkey, pn);
1602 }
1603
1604 /* grab the next available node */
1605 n = tnode_get_child(pn, cindex);
1606 } while (!n);
1607 }
1608
1609 hlist_for_each_entry(fa, &n->leaf, fa_list) {
1610 struct fib_info *fi = fa->fa_info;
1611
1612 if (fi && (fi->fib_flags & RTNH_F_EXTERNAL)) {
1613 netdev_switch_fib_ipv4_del(n->key,
1614 KEYLENGTH - fa->fa_slen,
1615 fi, fa->fa_tos,
1616 fa->fa_type, tb->tb_id);
1617 }
1618 }
1619
1620 /* if trie is leaf only loop is completed */
1621 if (pn)
1622 goto backtrace;
1623}
1624
1539/* Caller must hold RTNL. */ 1625/* Caller must hold RTNL. */
1540int fib_table_flush(struct fib_table *tb) 1626int fib_table_flush(struct fib_table *tb)
1541{ 1627{
@@ -1589,6 +1675,10 @@ backtrace:
1589 struct fib_info *fi = fa->fa_info; 1675 struct fib_info *fi = fa->fa_info;
1590 1676
1591 if (fi && (fi->fib_flags & RTNH_F_DEAD)) { 1677 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1678 netdev_switch_fib_ipv4_del(n->key,
1679 KEYLENGTH - fa->fa_slen,
1680 fi, fa->fa_tos,
1681 fa->fa_type, tb->tb_id);
1592 hlist_del_rcu(&fa->fa_list); 1682 hlist_del_rcu(&fa->fa_list);
1593 fib_release_info(fa->fa_info); 1683 fib_release_info(fa->fa_info);
1594 alias_free_mem_rcu(fa); 1684 alias_free_mem_rcu(fa);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 8c1e558db118..f4fd575aa2a3 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -14,6 +14,7 @@
14#include <linux/mutex.h> 14#include <linux/mutex.h>
15#include <linux/notifier.h> 15#include <linux/notifier.h>
16#include <linux/netdevice.h> 16#include <linux/netdevice.h>
17#include <net/ip_fib.h>
17#include <net/switchdev.h> 18#include <net/switchdev.h>
18 19
19/** 20/**
@@ -225,3 +226,163 @@ int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev,
225 return ret; 226 return ret;
226} 227}
227EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink); 228EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink);
229
230static struct net_device *netdev_switch_get_lowest_dev(struct net_device *dev)
231{
232 const struct net_device_ops *ops = dev->netdev_ops;
233 struct net_device *lower_dev;
234 struct net_device *port_dev;
235 struct list_head *iter;
236
237 /* Recusively search down until we find a sw port dev.
238 * (A sw port dev supports ndo_switch_parent_id_get).
239 */
240
241 if (dev->features & NETIF_F_HW_SWITCH_OFFLOAD &&
242 ops->ndo_switch_parent_id_get)
243 return dev;
244
245 netdev_for_each_lower_dev(dev, lower_dev, iter) {
246 port_dev = netdev_switch_get_lowest_dev(lower_dev);
247 if (port_dev)
248 return port_dev;
249 }
250
251 return NULL;
252}
253
254static struct net_device *netdev_switch_get_dev_by_nhs(struct fib_info *fi)
255{
256 struct netdev_phys_item_id psid;
257 struct netdev_phys_item_id prev_psid;
258 struct net_device *dev = NULL;
259 int nhsel;
260
261 /* For this route, all nexthop devs must be on the same switch. */
262
263 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
264 const struct fib_nh *nh = &fi->fib_nh[nhsel];
265
266 if (!nh->nh_dev)
267 return NULL;
268
269 dev = netdev_switch_get_lowest_dev(nh->nh_dev);
270 if (!dev)
271 return NULL;
272
273 if (netdev_switch_parent_id_get(dev, &psid))
274 return NULL;
275
276 if (nhsel > 0) {
277 if (prev_psid.id_len != psid.id_len)
278 return NULL;
279 if (memcmp(prev_psid.id, psid.id, psid.id_len))
280 return NULL;
281 }
282
283 prev_psid = psid;
284 }
285
286 return dev;
287}
288
289/**
290 * netdev_switch_fib_ipv4_add - Add IPv4 route entry to switch
291 *
292 * @dst: route's IPv4 destination address
293 * @dst_len: destination address length (prefix length)
294 * @fi: route FIB info structure
295 * @tos: route TOS
296 * @type: route type
297 * @tb_id: route table ID
298 *
299 * Add IPv4 route entry to switch device.
300 */
301int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
302 u8 tos, u8 type, u32 tb_id)
303{
304 struct net_device *dev;
305 const struct net_device_ops *ops;
306 int err = 0;
307
308 /* Don't offload route if using custom ip rules or if
309 * IPv4 FIB offloading has been disabled completely.
310 */
311
312 if (fi->fib_net->ipv4.fib_has_custom_rules |
313 fi->fib_net->ipv4.fib_offload_disabled)
314 return 0;
315
316 dev = netdev_switch_get_dev_by_nhs(fi);
317 if (!dev)
318 return 0;
319 ops = dev->netdev_ops;
320
321 if (ops->ndo_switch_fib_ipv4_add) {
322 err = ops->ndo_switch_fib_ipv4_add(dev, htonl(dst), dst_len,
323 fi, tos, type, tb_id);
324 if (!err)
325 fi->fib_flags |= RTNH_F_EXTERNAL;
326 }
327
328 return err;
329}
330EXPORT_SYMBOL(netdev_switch_fib_ipv4_add);
331
332/**
333 * netdev_switch_fib_ipv4_del - Delete IPv4 route entry from switch
334 *
335 * @dst: route's IPv4 destination address
336 * @dst_len: destination address length (prefix length)
337 * @fi: route FIB info structure
338 * @tos: route TOS
339 * @type: route type
340 * @tb_id: route table ID
341 *
342 * Delete IPv4 route entry from switch device.
343 */
344int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
345 u8 tos, u8 type, u32 tb_id)
346{
347 struct net_device *dev;
348 const struct net_device_ops *ops;
349 int err = 0;
350
351 if (!(fi->fib_flags & RTNH_F_EXTERNAL))
352 return 0;
353
354 dev = netdev_switch_get_dev_by_nhs(fi);
355 if (!dev)
356 return 0;
357 ops = dev->netdev_ops;
358
359 if (ops->ndo_switch_fib_ipv4_del) {
360 err = ops->ndo_switch_fib_ipv4_del(dev, htonl(dst), dst_len,
361 fi, tos, type, tb_id);
362 if (!err)
363 fi->fib_flags &= ~RTNH_F_EXTERNAL;
364 }
365
366 return err;
367}
368EXPORT_SYMBOL(netdev_switch_fib_ipv4_del);
369
370/**
371 * netdev_switch_fib_ipv4_abort - Abort an IPv4 FIB operation
372 *
373 * @fi: route FIB info structure
374 */
375void netdev_switch_fib_ipv4_abort(struct fib_info *fi)
376{
377 /* There was a problem installing this route to the offload
378 * device. For now, until we come up with more refined
379 * policy handling, abruptly end IPv4 fib offloading for
380 * for entire net by flushing offload device(s) of all
381 * IPv4 routes, and mark IPv4 fib offloading broken from
382 * this point forward.
383 */
384
385 fib_flush_external(fi->fib_net);
386 fi->fib_net->ipv4.fib_offload_disabled = true;
387}
388EXPORT_SYMBOL(netdev_switch_fib_ipv4_abort);