aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichal Kubeček <mkubecek@suse.cz>2013-08-01 04:04:14 -0400
committerDavid S. Miller <davem@davemloft.net>2013-08-01 17:16:20 -0400
commit2ac3ac8f86f2fe065d746d9a9abaca867adec577 (patch)
tree6d0513fcc4e048797a417174a667b7c562535ddb
parent1f1059fcf0b30deb1cf1e23af9ef4231c0bcc0bb (diff)
ipv6: prevent fib6_run_gc() contention
On a high-traffic router with many processors and many IPv6 dst entries, soft lockup in fib6_run_gc() can occur when number of entries reaches gc_thresh. This happens because fib6_run_gc() uses fib6_gc_lock to allow only one thread to run the garbage collector but ip6_dst_gc() doesn't update net->ipv6.ip6_rt_last_gc until fib6_run_gc() returns. On a system with many entries, this can take some time so that in the meantime, other threads pass the tests in ip6_dst_gc() (ip6_rt_last_gc is still not updated) and wait for the lock. They then have to run the garbage collector one after another which blocks them for quite long. Resolve this by replacing special value ~0UL of expire parameter to fib6_run_gc() by explicit "force" parameter to choose between spin_lock_bh() and spin_trylock_bh() and call fib6_run_gc() with force=false if gc_thresh is reached but not max_size. Signed-off-by: Michal Kubecek <mkubecek@suse.cz> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip6_fib.h2
-rw-r--r--net/ipv6/ip6_fib.c19
-rw-r--r--net/ipv6/ndisc.c4
-rw-r--r--net/ipv6/route.c4
4 files changed, 13 insertions, 16 deletions
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 2a601e7da1bf..48ec25a7fcb6 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -300,7 +300,7 @@ extern void inet6_rt_notify(int event, struct rt6_info *rt,
300 struct nl_info *info); 300 struct nl_info *info);
301 301
302extern void fib6_run_gc(unsigned long expires, 302extern void fib6_run_gc(unsigned long expires,
303 struct net *net); 303 struct net *net, bool force);
304 304
305extern void fib6_gc_cleanup(void); 305extern void fib6_gc_cleanup(void);
306 306
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 5fc9c7a68d8d..d872553ca933 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1632,19 +1632,16 @@ static int fib6_age(struct rt6_info *rt, void *arg)
1632 1632
1633static DEFINE_SPINLOCK(fib6_gc_lock); 1633static DEFINE_SPINLOCK(fib6_gc_lock);
1634 1634
1635void fib6_run_gc(unsigned long expires, struct net *net) 1635void fib6_run_gc(unsigned long expires, struct net *net, bool force)
1636{ 1636{
1637 if (expires != ~0UL) { 1637 if (force) {
1638 spin_lock_bh(&fib6_gc_lock); 1638 spin_lock_bh(&fib6_gc_lock);
1639 gc_args.timeout = expires ? (int)expires : 1639 } else if (!spin_trylock_bh(&fib6_gc_lock)) {
1640 net->ipv6.sysctl.ip6_rt_gc_interval; 1640 mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
1641 } else { 1641 return;
1642 if (!spin_trylock_bh(&fib6_gc_lock)) {
1643 mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
1644 return;
1645 }
1646 gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval;
1647 } 1642 }
1643 gc_args.timeout = expires ? (int)expires :
1644 net->ipv6.sysctl.ip6_rt_gc_interval;
1648 1645
1649 gc_args.more = icmp6_dst_gc(); 1646 gc_args.more = icmp6_dst_gc();
1650 1647
@@ -1661,7 +1658,7 @@ void fib6_run_gc(unsigned long expires, struct net *net)
1661 1658
1662static void fib6_gc_timer_cb(unsigned long arg) 1659static void fib6_gc_timer_cb(unsigned long arg)
1663{ 1660{
1664 fib6_run_gc(0, (struct net *)arg); 1661 fib6_run_gc(0, (struct net *)arg, true);
1665} 1662}
1666 1663
1667static int __net_init fib6_net_init(struct net *net) 1664static int __net_init fib6_net_init(struct net *net)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 24c03396e008..79aa9652ed86 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1576,7 +1576,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
1576 switch (event) { 1576 switch (event) {
1577 case NETDEV_CHANGEADDR: 1577 case NETDEV_CHANGEADDR:
1578 neigh_changeaddr(&nd_tbl, dev); 1578 neigh_changeaddr(&nd_tbl, dev);
1579 fib6_run_gc(~0UL, net); 1579 fib6_run_gc(0, net, false);
1580 idev = in6_dev_get(dev); 1580 idev = in6_dev_get(dev);
1581 if (!idev) 1581 if (!idev)
1582 break; 1582 break;
@@ -1586,7 +1586,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
1586 break; 1586 break;
1587 case NETDEV_DOWN: 1587 case NETDEV_DOWN:
1588 neigh_ifdown(&nd_tbl, dev); 1588 neigh_ifdown(&nd_tbl, dev);
1589 fib6_run_gc(~0UL, net); 1589 fib6_run_gc(0, net, false);
1590 break; 1590 break;
1591 case NETDEV_NOTIFY_PEERS: 1591 case NETDEV_NOTIFY_PEERS:
1592 ndisc_send_unsol_na(dev); 1592 ndisc_send_unsol_na(dev);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a8c891aa2464..824c424f9648 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1326,7 +1326,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
1326 goto out; 1326 goto out;
1327 1327
1328 net->ipv6.ip6_rt_gc_expire++; 1328 net->ipv6.ip6_rt_gc_expire++;
1329 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1329 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
1330 net->ipv6.ip6_rt_last_gc = now; 1330 net->ipv6.ip6_rt_last_gc = now;
1331 entries = dst_entries_get_slow(ops); 1331 entries = dst_entries_get_slow(ops);
1332 if (entries < ops->gc_thresh) 1332 if (entries < ops->gc_thresh)
@@ -2827,7 +2827,7 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2827 net = (struct net *)ctl->extra1; 2827 net = (struct net *)ctl->extra1;
2828 delay = net->ipv6.sysctl.flush_delay; 2828 delay = net->ipv6.sysctl.flush_delay;
2829 proc_dointvec(ctl, write, buffer, lenp, ppos); 2829 proc_dointvec(ctl, write, buffer, lenp, ppos);
2830 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); 2830 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2831 return 0; 2831 return 0;
2832} 2832}
2833 2833