aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2012-08-22 13:19:46 -0400
committerDavid S. Miller <davem@davemloft.net>2012-08-23 00:50:36 -0400
commit0115e8e30d6fcdd4b8faa30d3ffd90859a591f51 (patch)
tree725026e2192e57b98366947ab53c73689ca040a8
parentbf277b0ccea7d2422b85e232017ce3fddbe9c49c (diff)
net: remove delay at device dismantle
I noticed extra one second delay in device dismantle, tracked down to a call to dst_dev_event() while some call_rcu() are still in RCU queues. These call_rcu() were posted by rt_free(struct rtable *rt) calls. We then wait a little (but one second) in netdev_wait_allrefs() before kicking again NETDEV_UNREGISTER. As the call_rcu() are now completed, dst_dev_event() can do the needed device swap on busy dst. To solve this problem, add a new NETDEV_UNREGISTER_FINAL, called after a rcu_barrier(), but outside of RTNL lock. Use NETDEV_UNREGISTER_FINAL with care ! Change dst_dev_event() handler to react to NETDEV_UNREGISTER_FINAL Also remove NETDEV_UNREGISTER_BATCH, as its not used anymore after IP cache removal. With help from Gao feng Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Tom Herbert <therbert@google.com> Cc: Mahesh Bandewar <maheshb@google.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Gao feng <gaofeng@cn.fujitsu.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--net/core/dev.c22
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/fib_rules.c3
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/ipv4/devinet.c6
-rw-r--r--net/ipv4/fib_frontend.c8
-rw-r--r--net/ipv6/addrconf.c6
8 files changed, 27 insertions, 24 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4936f09a9333..9ad7fa8c10e0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1553,7 +1553,7 @@ struct packet_type {
1553#define NETDEV_PRE_TYPE_CHANGE 0x000E 1553#define NETDEV_PRE_TYPE_CHANGE 0x000E
1554#define NETDEV_POST_TYPE_CHANGE 0x000F 1554#define NETDEV_POST_TYPE_CHANGE 0x000F
1555#define NETDEV_POST_INIT 0x0010 1555#define NETDEV_POST_INIT 0x0010
1556#define NETDEV_UNREGISTER_BATCH 0x0011 1556#define NETDEV_UNREGISTER_FINAL 0x0011
1557#define NETDEV_RELEASE 0x0012 1557#define NETDEV_RELEASE 0x0012
1558#define NETDEV_NOTIFY_PEERS 0x0013 1558#define NETDEV_NOTIFY_PEERS 0x0013
1559#define NETDEV_JOIN 0x0014 1559#define NETDEV_JOIN 0x0014
diff --git a/net/core/dev.c b/net/core/dev.c
index 088923fe4066..0640d2a859c6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1406,7 +1406,6 @@ rollback:
1406 nb->notifier_call(nb, NETDEV_DOWN, dev); 1406 nb->notifier_call(nb, NETDEV_DOWN, dev);
1407 } 1407 }
1408 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1408 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1409 nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1410 } 1409 }
1411 } 1410 }
1412 1411
@@ -1448,7 +1447,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
1448 nb->notifier_call(nb, NETDEV_DOWN, dev); 1447 nb->notifier_call(nb, NETDEV_DOWN, dev);
1449 } 1448 }
1450 nb->notifier_call(nb, NETDEV_UNREGISTER, dev); 1449 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1451 nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1452 } 1450 }
1453 } 1451 }
1454unlock: 1452unlock:
@@ -1468,7 +1466,8 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
1468 1466
1469int call_netdevice_notifiers(unsigned long val, struct net_device *dev) 1467int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1470{ 1468{
1471 ASSERT_RTNL(); 1469 if (val != NETDEV_UNREGISTER_FINAL)
1470 ASSERT_RTNL();
1472 return raw_notifier_call_chain(&netdev_chain, val, dev); 1471 return raw_notifier_call_chain(&netdev_chain, val, dev);
1473} 1472}
1474EXPORT_SYMBOL(call_netdevice_notifiers); 1473EXPORT_SYMBOL(call_netdevice_notifiers);
@@ -5331,10 +5330,6 @@ static void rollback_registered_many(struct list_head *head)
5331 netdev_unregister_kobject(dev); 5330 netdev_unregister_kobject(dev);
5332 } 5331 }
5333 5332
5334 /* Process any work delayed until the end of the batch */
5335 dev = list_first_entry(head, struct net_device, unreg_list);
5336 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
5337
5338 synchronize_net(); 5333 synchronize_net();
5339 5334
5340 list_for_each_entry(dev, head, unreg_list) 5335 list_for_each_entry(dev, head, unreg_list)
@@ -5787,9 +5782,8 @@ static void netdev_wait_allrefs(struct net_device *dev)
5787 5782
5788 /* Rebroadcast unregister notification */ 5783 /* Rebroadcast unregister notification */
5789 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5784 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5790 /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users 5785 rcu_barrier();
5791 * should have already handle it the first time */ 5786 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
5792
5793 if (test_bit(__LINK_STATE_LINKWATCH_PENDING, 5787 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5794 &dev->state)) { 5788 &dev->state)) {
5795 /* We must not have linkwatch events 5789 /* We must not have linkwatch events
@@ -5851,9 +5845,8 @@ void netdev_run_todo(void)
5851 5845
5852 __rtnl_unlock(); 5846 __rtnl_unlock();
5853 5847
5854 /* Wait for rcu callbacks to finish before attempting to drain 5848
5855 * the device list. This usually avoids a 250ms wait. 5849 /* Wait for rcu callbacks to finish before next phase */
5856 */
5857 if (!list_empty(&list)) 5850 if (!list_empty(&list))
5858 rcu_barrier(); 5851 rcu_barrier();
5859 5852
@@ -5862,6 +5855,8 @@ void netdev_run_todo(void)
5862 = list_first_entry(&list, struct net_device, todo_list); 5855 = list_first_entry(&list, struct net_device, todo_list);
5863 list_del(&dev->todo_list); 5856 list_del(&dev->todo_list);
5864 5857
5858 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
5859
5865 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { 5860 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
5866 pr_err("network todo '%s' but state %d\n", 5861 pr_err("network todo '%s' but state %d\n",
5867 dev->name, dev->reg_state); 5862 dev->name, dev->reg_state);
@@ -6256,7 +6251,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
6256 the device is just moving and can keep their slaves up. 6251 the device is just moving and can keep their slaves up.
6257 */ 6252 */
6258 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 6253 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6259 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
6260 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); 6254 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
6261 6255
6262 /* 6256 /*
diff --git a/net/core/dst.c b/net/core/dst.c
index 56d63612e1e4..f6593d238e9a 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -374,7 +374,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
374 struct dst_entry *dst, *last = NULL; 374 struct dst_entry *dst, *last = NULL;
375 375
376 switch (event) { 376 switch (event) {
377 case NETDEV_UNREGISTER: 377 case NETDEV_UNREGISTER_FINAL:
378 case NETDEV_DOWN: 378 case NETDEV_DOWN:
379 mutex_lock(&dst_gc_mutex); 379 mutex_lock(&dst_gc_mutex);
380 for (dst = dst_busy_list; dst; dst = dst->next) { 380 for (dst = dst_busy_list; dst; dst = dst->next) {
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index ab7db83236c9..585093755c23 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -711,15 +711,16 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
711 struct net *net = dev_net(dev); 711 struct net *net = dev_net(dev);
712 struct fib_rules_ops *ops; 712 struct fib_rules_ops *ops;
713 713
714 ASSERT_RTNL();
715 714
716 switch (event) { 715 switch (event) {
717 case NETDEV_REGISTER: 716 case NETDEV_REGISTER:
717 ASSERT_RTNL();
718 list_for_each_entry(ops, &net->rules_ops, list) 718 list_for_each_entry(ops, &net->rules_ops, list)
719 attach_rules(&ops->rules_list, dev); 719 attach_rules(&ops->rules_list, dev);
720 break; 720 break;
721 721
722 case NETDEV_UNREGISTER: 722 case NETDEV_UNREGISTER:
723 ASSERT_RTNL();
723 list_for_each_entry(ops, &net->rules_ops, list) 724 list_for_each_entry(ops, &net->rules_ops, list)
724 detach_rules(&ops->rules_list, dev); 725 detach_rules(&ops->rules_list, dev);
725 break; 726 break;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 34d975b0f277..c64efcff8078 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2358,7 +2358,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
2358 case NETDEV_PRE_TYPE_CHANGE: 2358 case NETDEV_PRE_TYPE_CHANGE:
2359 case NETDEV_GOING_DOWN: 2359 case NETDEV_GOING_DOWN:
2360 case NETDEV_UNREGISTER: 2360 case NETDEV_UNREGISTER:
2361 case NETDEV_UNREGISTER_BATCH: 2361 case NETDEV_UNREGISTER_FINAL:
2362 case NETDEV_RELEASE: 2362 case NETDEV_RELEASE:
2363 case NETDEV_JOIN: 2363 case NETDEV_JOIN:
2364 break; 2364 break;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index adf273f8ad2e..6a5e6e4b142c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1147,8 +1147,12 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1147 void *ptr) 1147 void *ptr)
1148{ 1148{
1149 struct net_device *dev = ptr; 1149 struct net_device *dev = ptr;
1150 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1150 struct in_device *in_dev;
1151 1151
1152 if (event == NETDEV_UNREGISTER_FINAL)
1153 goto out;
1154
1155 in_dev = __in_dev_get_rtnl(dev);
1152 ASSERT_RTNL(); 1156 ASSERT_RTNL();
1153 1157
1154 if (!in_dev) { 1158 if (!in_dev) {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7f073a38c87d..fd7d9ae64f16 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1041,7 +1041,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
1041static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 1041static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1042{ 1042{
1043 struct net_device *dev = ptr; 1043 struct net_device *dev = ptr;
1044 struct in_device *in_dev = __in_dev_get_rtnl(dev); 1044 struct in_device *in_dev;
1045 struct net *net = dev_net(dev); 1045 struct net *net = dev_net(dev);
1046 1046
1047 if (event == NETDEV_UNREGISTER) { 1047 if (event == NETDEV_UNREGISTER) {
@@ -1050,9 +1050,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
1050 return NOTIFY_DONE; 1050 return NOTIFY_DONE;
1051 } 1051 }
1052 1052
1053 if (!in_dev) 1053 if (event == NETDEV_UNREGISTER_FINAL)
1054 return NOTIFY_DONE; 1054 return NOTIFY_DONE;
1055 1055
1056 in_dev = __in_dev_get_rtnl(dev);
1057
1056 switch (event) { 1058 switch (event) {
1057 case NETDEV_UP: 1059 case NETDEV_UP:
1058 for_ifa(in_dev) { 1060 for_ifa(in_dev) {
@@ -1071,8 +1073,6 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
1071 case NETDEV_CHANGE: 1073 case NETDEV_CHANGE:
1072 rt_cache_flush(dev_net(dev), 0); 1074 rt_cache_flush(dev_net(dev), 0);
1073 break; 1075 break;
1074 case NETDEV_UNREGISTER_BATCH:
1075 break;
1076 } 1076 }
1077 return NOTIFY_DONE; 1077 return NOTIFY_DONE;
1078} 1078}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6bc85f7c31e3..e581009cb09e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2566,10 +2566,14 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2566 void *data) 2566 void *data)
2567{ 2567{
2568 struct net_device *dev = (struct net_device *) data; 2568 struct net_device *dev = (struct net_device *) data;
2569 struct inet6_dev *idev = __in6_dev_get(dev); 2569 struct inet6_dev *idev;
2570 int run_pending = 0; 2570 int run_pending = 0;
2571 int err; 2571 int err;
2572 2572
2573 if (event == NETDEV_UNREGISTER_FINAL)
2574 return NOTIFY_DONE;
2575
2576 idev = __in6_dev_get(dev);
2573 switch (event) { 2577 switch (event) {
2574 case NETDEV_REGISTER: 2578 case NETDEV_REGISTER:
2575 if (!idev && dev->mtu >= IPV6_MIN_MTU) { 2579 if (!idev && dev->mtu >= IPV6_MIN_MTU) {