aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2013-09-24 00:19:49 -0400
committerDavid S. Miller <davem@davemloft.net>2013-09-28 18:09:15 -0400
commit50624c934db18ab90aaea4908f60dd39aab4e6e5 (patch)
tree039af57854883e192f5977dc4504ba4849cbb2dd
parent7df37ff33dc122f7bd0614d707939fe84322d264 (diff)
net: Delay default_device_exit_batch until no devices are unregistering v2
There is currently serialization network namespaces exiting and network devices exiting as the final part of netdev_run_todo does not happen under the rtnl_lock. This is compounded by the fact that the only list of devices unregistering in netdev_run_todo is local to the netdev_run_todo. This lack of serialization in extreme cases results in network devices unregistering in netdev_run_todo after the loopback device of their network namespace has been freed (making dst_ifdown unsafe), and after the their network namespace has exited (making the NETDEV_UNREGISTER, and NETDEV_UNREGISTER_FINAL callbacks unsafe). Add the missing serialization by a per network namespace count of how many network devices are unregistering and having a wait queue that is woken up whenever the count is decreased. The count and wait queue allow default_device_exit_batch to wait until all of the unregistration activity for a network namespace has finished before proceeding to unregister the loopback device and then allowing the network namespace to exit. Only a single global wait queue is used because there is a single global lock, and there is a single waiter, per network namespace wait queues would be a waste of resources. The per network namespace count of unregistering devices gives a progress guarantee because the number of network devices unregistering in an exiting network namespace must ultimately drop to zero (assuming network device unregistration completes). The basic logic remains the same as in v1. This patch is now half comment and half rtnl_lock_unregistering an expanded version of wait_event performs no extra work in the common case where no network devices are unregistering when we get to default_device_exit_batch. Reported-by: Francesco Ruggeri <fruggeri@aristanetworks.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/net_namespace.h1
-rw-r--r--net/core/dev.c49
2 files changed, 49 insertions, 1 deletions
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 1313456a0994..9d22f08896c6 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -74,6 +74,7 @@ struct net {
74 struct hlist_head *dev_index_head; 74 struct hlist_head *dev_index_head;
75 unsigned int dev_base_seq; /* protected by rtnl_mutex */ 75 unsigned int dev_base_seq; /* protected by rtnl_mutex */
76 int ifindex; 76 int ifindex;
77 unsigned int dev_unreg_count;
77 78
78 /* core fib_rules */ 79 /* core fib_rules */
79 struct list_head rules_ops; 80 struct list_head rules_ops;
diff --git a/net/core/dev.c b/net/core/dev.c
index 5c713f2239cc..65f829cfd928 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5247,10 +5247,12 @@ static int dev_new_index(struct net *net)
5247 5247
5248/* Delayed registration/unregisteration */ 5248/* Delayed registration/unregisteration */
5249static LIST_HEAD(net_todo_list); 5249static LIST_HEAD(net_todo_list);
5250static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
5250 5251
5251static void net_set_todo(struct net_device *dev) 5252static void net_set_todo(struct net_device *dev)
5252{ 5253{
5253 list_add_tail(&dev->todo_list, &net_todo_list); 5254 list_add_tail(&dev->todo_list, &net_todo_list);
5255 dev_net(dev)->dev_unreg_count++;
5254} 5256}
5255 5257
5256static void rollback_registered_many(struct list_head *head) 5258static void rollback_registered_many(struct list_head *head)
@@ -5918,6 +5920,12 @@ void netdev_run_todo(void)
5918 if (dev->destructor) 5920 if (dev->destructor)
5919 dev->destructor(dev); 5921 dev->destructor(dev);
5920 5922
5923 /* Report a network device has been unregistered */
5924 rtnl_lock();
5925 dev_net(dev)->dev_unreg_count--;
5926 __rtnl_unlock();
5927 wake_up(&netdev_unregistering_wq);
5928
5921 /* Free network device */ 5929 /* Free network device */
5922 kobject_put(&dev->dev.kobj); 5930 kobject_put(&dev->dev.kobj);
5923 } 5931 }
@@ -6603,6 +6611,34 @@ static void __net_exit default_device_exit(struct net *net)
6603 rtnl_unlock(); 6611 rtnl_unlock();
6604} 6612}
6605 6613
6614static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
6615{
6616 /* Return with the rtnl_lock held when there are no network
6617 * devices unregistering in any network namespace in net_list.
6618 */
6619 struct net *net;
6620 bool unregistering;
6621 DEFINE_WAIT(wait);
6622
6623 for (;;) {
6624 prepare_to_wait(&netdev_unregistering_wq, &wait,
6625 TASK_UNINTERRUPTIBLE);
6626 unregistering = false;
6627 rtnl_lock();
6628 list_for_each_entry(net, net_list, exit_list) {
6629 if (net->dev_unreg_count > 0) {
6630 unregistering = true;
6631 break;
6632 }
6633 }
6634 if (!unregistering)
6635 break;
6636 __rtnl_unlock();
6637 schedule();
6638 }
6639 finish_wait(&netdev_unregistering_wq, &wait);
6640}
6641
6606static void __net_exit default_device_exit_batch(struct list_head *net_list) 6642static void __net_exit default_device_exit_batch(struct list_head *net_list)
6607{ 6643{
6608 /* At exit all network devices most be removed from a network 6644 /* At exit all network devices most be removed from a network
@@ -6614,7 +6650,18 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
6614 struct net *net; 6650 struct net *net;
6615 LIST_HEAD(dev_kill_list); 6651 LIST_HEAD(dev_kill_list);
6616 6652
6617 rtnl_lock(); 6653 /* To prevent network device cleanup code from dereferencing
6654 * loopback devices or network devices that have been freed
6655 * wait here for all pending unregistrations to complete,
6656 * before unregistring the loopback device and allowing the
6657 * network namespace be freed.
6658 *
6659 * The netdev todo list containing all network devices
6660 * unregistrations that happen in default_device_exit_batch
6661 * will run in the rtnl_unlock() at the end of
6662 * default_device_exit_batch.
6663 */
6664 rtnl_lock_unregistering(net_list);
6618 list_for_each_entry(net, net_list, exit_list) { 6665 list_for_each_entry(net, net_list, exit_list) {
6619 for_each_netdev_reverse(net, dev) { 6666 for_each_netdev_reverse(net, dev) {
6620 if (dev->rtnl_link_ops) 6667 if (dev->rtnl_link_ops)