diff options
author | Eric Dumazet <edumazet@google.com> | 2012-08-22 13:19:46 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-08-23 00:50:36 -0400 |
commit | 0115e8e30d6fcdd4b8faa30d3ffd90859a591f51 (patch) | |
tree | 725026e2192e57b98366947ab53c73689ca040a8 | |
parent | bf277b0ccea7d2422b85e232017ce3fddbe9c49c (diff) |
net: remove delay at device dismantle
I noticed extra one second delay in device dismantle, tracked down to
a call to dst_dev_event() while some call_rcu() are still in RCU queues.
These call_rcu() were posted by rt_free(struct rtable *rt) calls.
We then wait a little (but one second) in netdev_wait_allrefs() before
kicking again NETDEV_UNREGISTER.
As the call_rcu() are now completed, dst_dev_event() can do the needed
device swap on busy dst.
To solve this problem, add a new NETDEV_UNREGISTER_FINAL, called
after a rcu_barrier(), but outside of RTNL lock.
Use NETDEV_UNREGISTER_FINAL with care !
Change dst_dev_event() handler to react to NETDEV_UNREGISTER_FINAL
Also remove NETDEV_UNREGISTER_BATCH, as its not used anymore after
IP cache removal.
With help from Gao feng
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Mahesh Bandewar <maheshb@google.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/netdevice.h | 2 | ||||
-rw-r--r-- | net/core/dev.c | 22 | ||||
-rw-r--r-- | net/core/dst.c | 2 | ||||
-rw-r--r-- | net/core/fib_rules.c | 3 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 2 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 6 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 8 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 6 |
8 files changed, 27 insertions, 24 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4936f09a9333..9ad7fa8c10e0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -1553,7 +1553,7 @@ struct packet_type { | |||
1553 | #define NETDEV_PRE_TYPE_CHANGE 0x000E | 1553 | #define NETDEV_PRE_TYPE_CHANGE 0x000E |
1554 | #define NETDEV_POST_TYPE_CHANGE 0x000F | 1554 | #define NETDEV_POST_TYPE_CHANGE 0x000F |
1555 | #define NETDEV_POST_INIT 0x0010 | 1555 | #define NETDEV_POST_INIT 0x0010 |
1556 | #define NETDEV_UNREGISTER_BATCH 0x0011 | 1556 | #define NETDEV_UNREGISTER_FINAL 0x0011 |
1557 | #define NETDEV_RELEASE 0x0012 | 1557 | #define NETDEV_RELEASE 0x0012 |
1558 | #define NETDEV_NOTIFY_PEERS 0x0013 | 1558 | #define NETDEV_NOTIFY_PEERS 0x0013 |
1559 | #define NETDEV_JOIN 0x0014 | 1559 | #define NETDEV_JOIN 0x0014 |
diff --git a/net/core/dev.c b/net/core/dev.c index 088923fe4066..0640d2a859c6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -1406,7 +1406,6 @@ rollback: | |||
1406 | nb->notifier_call(nb, NETDEV_DOWN, dev); | 1406 | nb->notifier_call(nb, NETDEV_DOWN, dev); |
1407 | } | 1407 | } |
1408 | nb->notifier_call(nb, NETDEV_UNREGISTER, dev); | 1408 | nb->notifier_call(nb, NETDEV_UNREGISTER, dev); |
1409 | nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev); | ||
1410 | } | 1409 | } |
1411 | } | 1410 | } |
1412 | 1411 | ||
@@ -1448,7 +1447,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb) | |||
1448 | nb->notifier_call(nb, NETDEV_DOWN, dev); | 1447 | nb->notifier_call(nb, NETDEV_DOWN, dev); |
1449 | } | 1448 | } |
1450 | nb->notifier_call(nb, NETDEV_UNREGISTER, dev); | 1449 | nb->notifier_call(nb, NETDEV_UNREGISTER, dev); |
1451 | nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev); | ||
1452 | } | 1450 | } |
1453 | } | 1451 | } |
1454 | unlock: | 1452 | unlock: |
@@ -1468,7 +1466,8 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); | |||
1468 | 1466 | ||
1469 | int call_netdevice_notifiers(unsigned long val, struct net_device *dev) | 1467 | int call_netdevice_notifiers(unsigned long val, struct net_device *dev) |
1470 | { | 1468 | { |
1471 | ASSERT_RTNL(); | 1469 | if (val != NETDEV_UNREGISTER_FINAL) |
1470 | ASSERT_RTNL(); | ||
1472 | return raw_notifier_call_chain(&netdev_chain, val, dev); | 1471 | return raw_notifier_call_chain(&netdev_chain, val, dev); |
1473 | } | 1472 | } |
1474 | EXPORT_SYMBOL(call_netdevice_notifiers); | 1473 | EXPORT_SYMBOL(call_netdevice_notifiers); |
@@ -5331,10 +5330,6 @@ static void rollback_registered_many(struct list_head *head) | |||
5331 | netdev_unregister_kobject(dev); | 5330 | netdev_unregister_kobject(dev); |
5332 | } | 5331 | } |
5333 | 5332 | ||
5334 | /* Process any work delayed until the end of the batch */ | ||
5335 | dev = list_first_entry(head, struct net_device, unreg_list); | ||
5336 | call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); | ||
5337 | |||
5338 | synchronize_net(); | 5333 | synchronize_net(); |
5339 | 5334 | ||
5340 | list_for_each_entry(dev, head, unreg_list) | 5335 | list_for_each_entry(dev, head, unreg_list) |
@@ -5787,9 +5782,8 @@ static void netdev_wait_allrefs(struct net_device *dev) | |||
5787 | 5782 | ||
5788 | /* Rebroadcast unregister notification */ | 5783 | /* Rebroadcast unregister notification */ |
5789 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); | 5784 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
5790 | /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users | 5785 | rcu_barrier(); |
5791 | * should have already handle it the first time */ | 5786 | call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); |
5792 | |||
5793 | if (test_bit(__LINK_STATE_LINKWATCH_PENDING, | 5787 | if (test_bit(__LINK_STATE_LINKWATCH_PENDING, |
5794 | &dev->state)) { | 5788 | &dev->state)) { |
5795 | /* We must not have linkwatch events | 5789 | /* We must not have linkwatch events |
@@ -5851,9 +5845,8 @@ void netdev_run_todo(void) | |||
5851 | 5845 | ||
5852 | __rtnl_unlock(); | 5846 | __rtnl_unlock(); |
5853 | 5847 | ||
5854 | /* Wait for rcu callbacks to finish before attempting to drain | 5848 | |
5855 | * the device list. This usually avoids a 250ms wait. | 5849 | /* Wait for rcu callbacks to finish before next phase */ |
5856 | */ | ||
5857 | if (!list_empty(&list)) | 5850 | if (!list_empty(&list)) |
5858 | rcu_barrier(); | 5851 | rcu_barrier(); |
5859 | 5852 | ||
@@ -5862,6 +5855,8 @@ void netdev_run_todo(void) | |||
5862 | = list_first_entry(&list, struct net_device, todo_list); | 5855 | = list_first_entry(&list, struct net_device, todo_list); |
5863 | list_del(&dev->todo_list); | 5856 | list_del(&dev->todo_list); |
5864 | 5857 | ||
5858 | call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); | ||
5859 | |||
5865 | if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { | 5860 | if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { |
5866 | pr_err("network todo '%s' but state %d\n", | 5861 | pr_err("network todo '%s' but state %d\n", |
5867 | dev->name, dev->reg_state); | 5862 | dev->name, dev->reg_state); |
@@ -6256,7 +6251,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
6256 | the device is just moving and can keep their slaves up. | 6251 | the device is just moving and can keep their slaves up. |
6257 | */ | 6252 | */ |
6258 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); | 6253 | call_netdevice_notifiers(NETDEV_UNREGISTER, dev); |
6259 | call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); | ||
6260 | rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); | 6254 | rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); |
6261 | 6255 | ||
6262 | /* | 6256 | /* |
diff --git a/net/core/dst.c b/net/core/dst.c index 56d63612e1e4..f6593d238e9a 100644 --- a/net/core/dst.c +++ b/net/core/dst.c | |||
@@ -374,7 +374,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, | |||
374 | struct dst_entry *dst, *last = NULL; | 374 | struct dst_entry *dst, *last = NULL; |
375 | 375 | ||
376 | switch (event) { | 376 | switch (event) { |
377 | case NETDEV_UNREGISTER: | 377 | case NETDEV_UNREGISTER_FINAL: |
378 | case NETDEV_DOWN: | 378 | case NETDEV_DOWN: |
379 | mutex_lock(&dst_gc_mutex); | 379 | mutex_lock(&dst_gc_mutex); |
380 | for (dst = dst_busy_list; dst; dst = dst->next) { | 380 | for (dst = dst_busy_list; dst; dst = dst->next) { |
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index ab7db83236c9..585093755c23 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c | |||
@@ -711,15 +711,16 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, | |||
711 | struct net *net = dev_net(dev); | 711 | struct net *net = dev_net(dev); |
712 | struct fib_rules_ops *ops; | 712 | struct fib_rules_ops *ops; |
713 | 713 | ||
714 | ASSERT_RTNL(); | ||
715 | 714 | ||
716 | switch (event) { | 715 | switch (event) { |
717 | case NETDEV_REGISTER: | 716 | case NETDEV_REGISTER: |
717 | ASSERT_RTNL(); | ||
718 | list_for_each_entry(ops, &net->rules_ops, list) | 718 | list_for_each_entry(ops, &net->rules_ops, list) |
719 | attach_rules(&ops->rules_list, dev); | 719 | attach_rules(&ops->rules_list, dev); |
720 | break; | 720 | break; |
721 | 721 | ||
722 | case NETDEV_UNREGISTER: | 722 | case NETDEV_UNREGISTER: |
723 | ASSERT_RTNL(); | ||
723 | list_for_each_entry(ops, &net->rules_ops, list) | 724 | list_for_each_entry(ops, &net->rules_ops, list) |
724 | detach_rules(&ops->rules_list, dev); | 725 | detach_rules(&ops->rules_list, dev); |
725 | break; | 726 | break; |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 34d975b0f277..c64efcff8078 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -2358,7 +2358,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi | |||
2358 | case NETDEV_PRE_TYPE_CHANGE: | 2358 | case NETDEV_PRE_TYPE_CHANGE: |
2359 | case NETDEV_GOING_DOWN: | 2359 | case NETDEV_GOING_DOWN: |
2360 | case NETDEV_UNREGISTER: | 2360 | case NETDEV_UNREGISTER: |
2361 | case NETDEV_UNREGISTER_BATCH: | 2361 | case NETDEV_UNREGISTER_FINAL: |
2362 | case NETDEV_RELEASE: | 2362 | case NETDEV_RELEASE: |
2363 | case NETDEV_JOIN: | 2363 | case NETDEV_JOIN: |
2364 | break; | 2364 | break; |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index adf273f8ad2e..6a5e6e4b142c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -1147,8 +1147,12 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
1147 | void *ptr) | 1147 | void *ptr) |
1148 | { | 1148 | { |
1149 | struct net_device *dev = ptr; | 1149 | struct net_device *dev = ptr; |
1150 | struct in_device *in_dev = __in_dev_get_rtnl(dev); | 1150 | struct in_device *in_dev; |
1151 | 1151 | ||
1152 | if (event == NETDEV_UNREGISTER_FINAL) | ||
1153 | goto out; | ||
1154 | |||
1155 | in_dev = __in_dev_get_rtnl(dev); | ||
1152 | ASSERT_RTNL(); | 1156 | ASSERT_RTNL(); |
1153 | 1157 | ||
1154 | if (!in_dev) { | 1158 | if (!in_dev) { |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 7f073a38c87d..fd7d9ae64f16 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -1041,7 +1041,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, | |||
1041 | static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) | 1041 | static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) |
1042 | { | 1042 | { |
1043 | struct net_device *dev = ptr; | 1043 | struct net_device *dev = ptr; |
1044 | struct in_device *in_dev = __in_dev_get_rtnl(dev); | 1044 | struct in_device *in_dev; |
1045 | struct net *net = dev_net(dev); | 1045 | struct net *net = dev_net(dev); |
1046 | 1046 | ||
1047 | if (event == NETDEV_UNREGISTER) { | 1047 | if (event == NETDEV_UNREGISTER) { |
@@ -1050,9 +1050,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
1050 | return NOTIFY_DONE; | 1050 | return NOTIFY_DONE; |
1051 | } | 1051 | } |
1052 | 1052 | ||
1053 | if (!in_dev) | 1053 | if (event == NETDEV_UNREGISTER_FINAL) |
1054 | return NOTIFY_DONE; | 1054 | return NOTIFY_DONE; |
1055 | 1055 | ||
1056 | in_dev = __in_dev_get_rtnl(dev); | ||
1057 | |||
1056 | switch (event) { | 1058 | switch (event) { |
1057 | case NETDEV_UP: | 1059 | case NETDEV_UP: |
1058 | for_ifa(in_dev) { | 1060 | for_ifa(in_dev) { |
@@ -1071,8 +1073,6 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
1071 | case NETDEV_CHANGE: | 1073 | case NETDEV_CHANGE: |
1072 | rt_cache_flush(dev_net(dev), 0); | 1074 | rt_cache_flush(dev_net(dev), 0); |
1073 | break; | 1075 | break; |
1074 | case NETDEV_UNREGISTER_BATCH: | ||
1075 | break; | ||
1076 | } | 1076 | } |
1077 | return NOTIFY_DONE; | 1077 | return NOTIFY_DONE; |
1078 | } | 1078 | } |
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6bc85f7c31e3..e581009cb09e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c | |||
@@ -2566,10 +2566,14 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, | |||
2566 | void *data) | 2566 | void *data) |
2567 | { | 2567 | { |
2568 | struct net_device *dev = (struct net_device *) data; | 2568 | struct net_device *dev = (struct net_device *) data; |
2569 | struct inet6_dev *idev = __in6_dev_get(dev); | 2569 | struct inet6_dev *idev; |
2570 | int run_pending = 0; | 2570 | int run_pending = 0; |
2571 | int err; | 2571 | int err; |
2572 | 2572 | ||
2573 | if (event == NETDEV_UNREGISTER_FINAL) | ||
2574 | return NOTIFY_DONE; | ||
2575 | |||
2576 | idev = __in6_dev_get(dev); | ||
2573 | switch (event) { | 2577 | switch (event) { |
2574 | case NETDEV_REGISTER: | 2578 | case NETDEV_REGISTER: |
2575 | if (!idev && dev->mtu >= IPV6_MIN_MTU) { | 2579 | if (!idev && dev->mtu >= IPV6_MIN_MTU) { |