summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNikolay Aleksandrov <nikolay@cumulusnetworks.com>2017-02-04 12:05:07 -0500
committerDavid S. Miller <davem@davemloft.net>2017-02-06 22:53:13 -0500
commitf7cdee8a79a1cb03fa9ca71b825e72f880b344e1 (patch)
treebda0a6e29e0d7748d2f7e59a8da9c5194f486b36
parent1f90c7f3470580e24da25f6a6c1fb480ed9371ac (diff)
bridge: move to workqueue gc
Move the fdb garbage collector to a workqueue which fires at least 10 milliseconds apart and cleans chain by chain allowing for other tasks to run in the meantime. When having thousands of fdbs the system is much more responsive. Most importantly remove the need to check if the matched entry has expired in __br_fdb_get that causes false-sharing and is completely unnecessary if we cleanup entries, at worst we'll get 10ms of traffic for that entry before it gets deleted. Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/bridge/br_device.c1
-rw-r--r--net/bridge/br_fdb.c31
-rw-r--r--net/bridge/br_if.c2
-rw-r--r--net/bridge/br_ioctl.c2
-rw-r--r--net/bridge/br_netlink.c2
-rw-r--r--net/bridge/br_private.h4
-rw-r--r--net/bridge/br_stp.c2
-rw-r--r--net/bridge/br_stp_if.c4
-rw-r--r--net/bridge/br_stp_timer.c2
-rw-r--r--net/bridge/br_sysfs_br.c2
10 files changed, 29 insertions, 23 deletions
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 5ba0b558f8ae..d208ee9ab60a 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -411,4 +411,5 @@ void br_dev_setup(struct net_device *dev)
411 br_netfilter_rtable_init(br); 411 br_netfilter_rtable_init(br);
412 br_stp_timer_init(br); 412 br_stp_timer_init(br);
413 br_multicast_init(br); 413 br_multicast_init(br);
414 INIT_DELAYED_WORK(&br->gc_work, br_fdb_cleanup);
414} 415}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index e4a4176171c9..5cbed5c0db88 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -154,7 +154,7 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
154 if (f->added_by_external_learn) 154 if (f->added_by_external_learn)
155 fdb_del_external_learn(f); 155 fdb_del_external_learn(f);
156 156
157 hlist_del_rcu(&f->hlist); 157 hlist_del_init_rcu(&f->hlist);
158 fdb_notify(br, f, RTM_DELNEIGH); 158 fdb_notify(br, f, RTM_DELNEIGH);
159 call_rcu(&f->rcu, fdb_rcu_free); 159 call_rcu(&f->rcu, fdb_rcu_free);
160} 160}
@@ -290,34 +290,43 @@ out:
290 spin_unlock_bh(&br->hash_lock); 290 spin_unlock_bh(&br->hash_lock);
291} 291}
292 292
293void br_fdb_cleanup(unsigned long _data) 293void br_fdb_cleanup(struct work_struct *work)
294{ 294{
295 struct net_bridge *br = (struct net_bridge *)_data; 295 struct net_bridge *br = container_of(work, struct net_bridge,
296 gc_work.work);
296 unsigned long delay = hold_time(br); 297 unsigned long delay = hold_time(br);
297 unsigned long next_timer = jiffies + br->ageing_time; 298 unsigned long work_delay = delay;
299 unsigned long now = jiffies;
298 int i; 300 int i;
299 301
300 spin_lock(&br->hash_lock);
301 for (i = 0; i < BR_HASH_SIZE; i++) { 302 for (i = 0; i < BR_HASH_SIZE; i++) {
302 struct net_bridge_fdb_entry *f; 303 struct net_bridge_fdb_entry *f;
303 struct hlist_node *n; 304 struct hlist_node *n;
304 305
306 if (!br->hash[i].first)
307 continue;
308
309 spin_lock_bh(&br->hash_lock);
305 hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { 310 hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) {
306 unsigned long this_timer; 311 unsigned long this_timer;
312
307 if (f->is_static) 313 if (f->is_static)
308 continue; 314 continue;
309 if (f->added_by_external_learn) 315 if (f->added_by_external_learn)
310 continue; 316 continue;
311 this_timer = f->updated + delay; 317 this_timer = f->updated + delay;
312 if (time_before_eq(this_timer, jiffies)) 318 if (time_after(this_timer, now))
319 work_delay = min(work_delay, this_timer - now);
320 else
313 fdb_delete(br, f); 321 fdb_delete(br, f);
314 else if (time_before(this_timer, next_timer))
315 next_timer = this_timer;
316 } 322 }
323 spin_unlock_bh(&br->hash_lock);
324 cond_resched();
317 } 325 }
318 spin_unlock(&br->hash_lock);
319 326
320 mod_timer(&br->gc_timer, round_jiffies_up(next_timer)); 327 /* Cleanup minimum 10 milliseconds apart */
328 work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10));
329 mod_delayed_work(system_long_wq, &br->gc_work, work_delay);
321} 330}
322 331
323/* Completely flush all dynamic entries in forwarding database.*/ 332/* Completely flush all dynamic entries in forwarding database.*/
@@ -382,8 +391,6 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
382 &br->hash[br_mac_hash(addr, vid)], hlist) { 391 &br->hash[br_mac_hash(addr, vid)], hlist) {
383 if (ether_addr_equal(fdb->addr.addr, addr) && 392 if (ether_addr_equal(fdb->addr.addr, addr) &&
384 fdb->vlan_id == vid) { 393 fdb->vlan_id == vid) {
385 if (unlikely(has_expired(br, fdb)))
386 break;
387 return fdb; 394 return fdb;
388 } 395 }
389 } 396 }
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index ed0dd3340084..8ac1770aa222 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -313,7 +313,7 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
313 313
314 br_vlan_flush(br); 314 br_vlan_flush(br);
315 br_multicast_dev_del(br); 315 br_multicast_dev_del(br);
316 del_timer_sync(&br->gc_timer); 316 cancel_delayed_work_sync(&br->gc_work);
317 317
318 br_sysfs_delbr(br->dev); 318 br_sysfs_delbr(br->dev);
319 unregister_netdevice_queue(br->dev, head); 319 unregister_netdevice_queue(br->dev, head);
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index da8157c57eb1..7970f8540cbb 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -149,7 +149,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
149 b.hello_timer_value = br_timer_value(&br->hello_timer); 149 b.hello_timer_value = br_timer_value(&br->hello_timer);
150 b.tcn_timer_value = br_timer_value(&br->tcn_timer); 150 b.tcn_timer_value = br_timer_value(&br->tcn_timer);
151 b.topology_change_timer_value = br_timer_value(&br->topology_change_timer); 151 b.topology_change_timer_value = br_timer_value(&br->topology_change_timer);
152 b.gc_timer_value = br_timer_value(&br->gc_timer); 152 b.gc_timer_value = br_timer_value(&br->gc_work.timer);
153 rcu_read_unlock(); 153 rcu_read_unlock();
154 154
155 if (copy_to_user((void __user *)args[1], &b, sizeof(b))) 155 if (copy_to_user((void __user *)args[1], &b, sizeof(b)))
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index fc5d885dbb22..1cbdc5b96aa7 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1250,7 +1250,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
1250 if (nla_put_u64_64bit(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval, 1250 if (nla_put_u64_64bit(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval,
1251 IFLA_BR_PAD)) 1251 IFLA_BR_PAD))
1252 return -EMSGSIZE; 1252 return -EMSGSIZE;
1253 clockval = br_timer_value(&br->gc_timer); 1253 clockval = br_timer_value(&br->gc_work.timer);
1254 if (nla_put_u64_64bit(skb, IFLA_BR_GC_TIMER, clockval, IFLA_BR_PAD)) 1254 if (nla_put_u64_64bit(skb, IFLA_BR_GC_TIMER, clockval, IFLA_BR_PAD))
1255 return -EMSGSIZE; 1255 return -EMSGSIZE;
1256 1256
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index ec8560349b6f..47fd64bf5022 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -379,7 +379,7 @@ struct net_bridge {
379 struct timer_list hello_timer; 379 struct timer_list hello_timer;
380 struct timer_list tcn_timer; 380 struct timer_list tcn_timer;
381 struct timer_list topology_change_timer; 381 struct timer_list topology_change_timer;
382 struct timer_list gc_timer; 382 struct delayed_work gc_work;
383 struct kobject *ifobj; 383 struct kobject *ifobj;
384 u32 auto_cnt; 384 u32 auto_cnt;
385 385
@@ -502,7 +502,7 @@ void br_fdb_find_delete_local(struct net_bridge *br,
502 const unsigned char *addr, u16 vid); 502 const unsigned char *addr, u16 vid);
503void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr); 503void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr);
504void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr); 504void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr);
505void br_fdb_cleanup(unsigned long arg); 505void br_fdb_cleanup(struct work_struct *work);
506void br_fdb_delete_by_port(struct net_bridge *br, 506void br_fdb_delete_by_port(struct net_bridge *br,
507 const struct net_bridge_port *p, u16 vid, int do_all); 507 const struct net_bridge_port *p, u16 vid, int do_all);
508struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, 508struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 71fd1a4e63cc..8f56c2d1f1a7 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -602,7 +602,7 @@ int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time)
602 br->ageing_time = t; 602 br->ageing_time = t;
603 spin_unlock_bh(&br->lock); 603 spin_unlock_bh(&br->lock);
604 604
605 mod_timer(&br->gc_timer, jiffies); 605 mod_delayed_work(system_long_wq, &br->gc_work, 0);
606 606
607 return 0; 607 return 0;
608} 608}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 6c1e21411125..08341d2aa9c9 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -57,7 +57,7 @@ void br_stp_enable_bridge(struct net_bridge *br)
57 spin_lock_bh(&br->lock); 57 spin_lock_bh(&br->lock);
58 if (br->stp_enabled == BR_KERNEL_STP) 58 if (br->stp_enabled == BR_KERNEL_STP)
59 mod_timer(&br->hello_timer, jiffies + br->hello_time); 59 mod_timer(&br->hello_timer, jiffies + br->hello_time);
60 mod_timer(&br->gc_timer, jiffies + HZ/10); 60 mod_delayed_work(system_long_wq, &br->gc_work, HZ / 10);
61 61
62 br_config_bpdu_generation(br); 62 br_config_bpdu_generation(br);
63 63
@@ -88,7 +88,7 @@ void br_stp_disable_bridge(struct net_bridge *br)
88 del_timer_sync(&br->hello_timer); 88 del_timer_sync(&br->hello_timer);
89 del_timer_sync(&br->topology_change_timer); 89 del_timer_sync(&br->topology_change_timer);
90 del_timer_sync(&br->tcn_timer); 90 del_timer_sync(&br->tcn_timer);
91 del_timer_sync(&br->gc_timer); 91 cancel_delayed_work_sync(&br->gc_work);
92} 92}
93 93
94/* called under bridge lock */ 94/* called under bridge lock */
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 7ddb38e0a06e..c98b3e5c140a 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -153,8 +153,6 @@ void br_stp_timer_init(struct net_bridge *br)
153 setup_timer(&br->topology_change_timer, 153 setup_timer(&br->topology_change_timer,
154 br_topology_change_timer_expired, 154 br_topology_change_timer_expired,
155 (unsigned long) br); 155 (unsigned long) br);
156
157 setup_timer(&br->gc_timer, br_fdb_cleanup, (unsigned long) br);
158} 156}
159 157
160void br_stp_port_timer_init(struct net_bridge_port *p) 158void br_stp_port_timer_init(struct net_bridge_port *p)
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index a18148213b08..0f4034934d56 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -263,7 +263,7 @@ static ssize_t gc_timer_show(struct device *d, struct device_attribute *attr,
263 char *buf) 263 char *buf)
264{ 264{
265 struct net_bridge *br = to_bridge(d); 265 struct net_bridge *br = to_bridge(d);
266 return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer)); 266 return sprintf(buf, "%ld\n", br_timer_value(&br->gc_work.timer));
267} 267}
268static DEVICE_ATTR_RO(gc_timer); 268static DEVICE_ATTR_RO(gc_timer);
269 269