diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2012-08-01 13:33:47 -0400 |
---|---|---|
committer | Eric W. Biederman <ebiederm@xmission.com> | 2012-11-19 08:59:10 -0500 |
commit | 0a01f2cc390e10633a54f72c608cc3fe19a50c3d (patch) | |
tree | e713a1c45b5ce125a5d33b61d528cd45264d47a7 /kernel/pid_namespace.c | |
parent | 17cf22c33e1f1b5e435469c84e43872579497653 (diff) |
pidns: Make the pidns proc mount/umount logic obvious.
Track the number of pids in the proc hash table. When the number of
pids goes to 0 schedule work to unmount the kernel mount of proc.
Move the mount of proc into alloc_pid when we allocate the pid for
init.
Remove the surprising calls of pid_ns_release proc in fork and
proc_flush_task. Those code paths really shouldn't know about proc
namespace implementation details and people have demonstrated several
times that finding and understanding those code paths is difficult and
non-obvious.
Because of the call path detach pid is alwasy called with the
rtnl_lock held free_pid is not allowed to sleep, so the work to
unmounting proc is moved to a work queue. This has the side benefit
of not blocking the entire world waiting for the unnecessary
rcu_barrier in deactivate_locked_super.
In the process of making the code clear and obvious this fixes a bug
reported by Gao feng <gaofeng@cn.fujitsu.com> where we would leak a
mount of proc during clone(CLONE_NEWPID|CLONE_NEWNET) if copy_pid_ns
succeeded and copy_net_ns failed.
Acked-by: "Serge E. Hallyn" <serge@hallyn.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Diffstat (limited to 'kernel/pid_namespace.c')
-rw-r--r-- | kernel/pid_namespace.c | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index b2604950aa50..84591cfeefc1 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -72,6 +72,12 @@ err_alloc: | |||
72 | return NULL; | 72 | return NULL; |
73 | } | 73 | } |
74 | 74 | ||
75 | static void proc_cleanup_work(struct work_struct *work) | ||
76 | { | ||
77 | struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work); | ||
78 | pid_ns_release_proc(ns); | ||
79 | } | ||
80 | |||
75 | /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ | 81 | /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ |
76 | #define MAX_PID_NS_LEVEL 32 | 82 | #define MAX_PID_NS_LEVEL 32 |
77 | 83 | ||
@@ -105,6 +111,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns | |||
105 | ns->level = level; | 111 | ns->level = level; |
106 | ns->parent = get_pid_ns(parent_pid_ns); | 112 | ns->parent = get_pid_ns(parent_pid_ns); |
107 | ns->user_ns = get_user_ns(user_ns); | 113 | ns->user_ns = get_user_ns(user_ns); |
114 | INIT_WORK(&ns->proc_work, proc_cleanup_work); | ||
108 | 115 | ||
109 | set_bit(0, ns->pidmap[0].page); | 116 | set_bit(0, ns->pidmap[0].page); |
110 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); | 117 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); |
@@ -112,15 +119,8 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns | |||
112 | for (i = 1; i < PIDMAP_ENTRIES; i++) | 119 | for (i = 1; i < PIDMAP_ENTRIES; i++) |
113 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); | 120 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); |
114 | 121 | ||
115 | err = pid_ns_prepare_proc(ns); | ||
116 | if (err) | ||
117 | goto out_put_parent_pid_ns; | ||
118 | |||
119 | return ns; | 122 | return ns; |
120 | 123 | ||
121 | out_put_parent_pid_ns: | ||
122 | put_pid_ns(parent_pid_ns); | ||
123 | put_user_ns(user_ns); | ||
124 | out_free_map: | 124 | out_free_map: |
125 | kfree(ns->pidmap[0].page); | 125 | kfree(ns->pidmap[0].page); |
126 | out_free: | 126 | out_free: |