aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPavel Emelyanov <xemul@openvz.org>2007-10-19 02:40:08 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-19 14:53:39 -0400
commit07543f5c75cee744b791cf7716c69571486fe753 (patch)
tree3e0d78f869114e5a6e2629fc157ee8f9023316bd
parent425fb2b4bf5dde24be4a82e9a2c344bb49ac92e4 (diff)
pid namespaces: make proc have multiple superblocks - one for each namespace
Each pid namespace have to be visible through its own proc mount. Thus we need to have per-namespace proc trees with their own superblocks. We cannot easily show different pid namespace via one global proc tree, since each pid refers to different tasks in different namespaces. E.g. pid 1 refers to the init task in the initial namespace and to some other task when seeing from another namespace. Moreover - pid, exisintg in one namespace may not exist in the other. This approach has one move advantage is that the tasks from the init namespace can see what tasks live in another namespace by reading entries from another proc tree. Signed-off-by: Pavel Emelyanov <xemul@openvz.org> Cc: Oleg Nesterov <oleg@tv-sign.ru> Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com> Cc: Paul Menage <menage@google.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/root.c67
-rw-r--r--include/linux/pid_namespace.h3
-rw-r--r--include/linux/proc_fs.h3
4 files changed, 69 insertions, 6 deletions
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 99ca00485fc3..abe6a3f04368 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -448,7 +448,7 @@ out_mod:
448 return NULL; 448 return NULL;
449} 449}
450 450
451int proc_fill_super(struct super_block *s, void *data, int silent) 451int proc_fill_super(struct super_block *s)
452{ 452{
453 struct inode * root_inode; 453 struct inode * root_inode;
454 454
diff --git a/fs/proc/root.c b/fs/proc/root.c
index cf3046638b09..94e9d734384e 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -18,32 +18,90 @@
18#include <linux/bitops.h> 18#include <linux/bitops.h>
19#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
20#include <linux/mount.h> 20#include <linux/mount.h>
21#include <linux/pid_namespace.h>
21 22
22#include "internal.h" 23#include "internal.h"
23 24
24struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver; 25struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
25 26
27static int proc_test_super(struct super_block *sb, void *data)
28{
29 return sb->s_fs_info == data;
30}
31
32static int proc_set_super(struct super_block *sb, void *data)
33{
34 struct pid_namespace *ns;
35
36 ns = (struct pid_namespace *)data;
37 sb->s_fs_info = get_pid_ns(ns);
38 return set_anon_super(sb, NULL);
39}
40
26static int proc_get_sb(struct file_system_type *fs_type, 41static int proc_get_sb(struct file_system_type *fs_type,
27 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 42 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
28{ 43{
44 int err;
45 struct super_block *sb;
46 struct pid_namespace *ns;
47 struct proc_inode *ei;
48
29 if (proc_mnt) { 49 if (proc_mnt) {
30 /* Seed the root directory with a pid so it doesn't need 50 /* Seed the root directory with a pid so it doesn't need
31 * to be special in base.c. I would do this earlier but 51 * to be special in base.c. I would do this earlier but
32 * the only task alive when /proc is mounted the first time 52 * the only task alive when /proc is mounted the first time
33 * is the init_task and it doesn't have any pids. 53 * is the init_task and it doesn't have any pids.
34 */ 54 */
35 struct proc_inode *ei;
36 ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode); 55 ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode);
37 if (!ei->pid) 56 if (!ei->pid)
38 ei->pid = find_get_pid(1); 57 ei->pid = find_get_pid(1);
39 } 58 }
40 return get_sb_single(fs_type, flags, data, proc_fill_super, mnt); 59
60 if (flags & MS_KERNMOUNT)
61 ns = (struct pid_namespace *)data;
62 else
63 ns = current->nsproxy->pid_ns;
64
65 sb = sget(fs_type, proc_test_super, proc_set_super, ns);
66 if (IS_ERR(sb))
67 return PTR_ERR(sb);
68
69 if (!sb->s_root) {
70 sb->s_flags = flags;
71 err = proc_fill_super(sb);
72 if (err) {
73 up_write(&sb->s_umount);
74 deactivate_super(sb);
75 return err;
76 }
77
78 ei = PROC_I(sb->s_root->d_inode);
79 if (!ei->pid) {
80 rcu_read_lock();
81 ei->pid = get_pid(find_pid_ns(1, ns));
82 rcu_read_unlock();
83 }
84
85 sb->s_flags |= MS_ACTIVE;
86 ns->proc_mnt = mnt;
87 }
88
89 return simple_set_mnt(mnt, sb);
90}
91
92static void proc_kill_sb(struct super_block *sb)
93{
94 struct pid_namespace *ns;
95
96 ns = (struct pid_namespace *)sb->s_fs_info;
97 kill_anon_super(sb);
98 put_pid_ns(ns);
41} 99}
42 100
43static struct file_system_type proc_fs_type = { 101static struct file_system_type proc_fs_type = {
44 .name = "proc", 102 .name = "proc",
45 .get_sb = proc_get_sb, 103 .get_sb = proc_get_sb,
46 .kill_sb = kill_anon_super, 104 .kill_sb = proc_kill_sb,
47}; 105};
48 106
49void __init proc_root_init(void) 107void __init proc_root_init(void)
@@ -54,12 +112,13 @@ void __init proc_root_init(void)
54 err = register_filesystem(&proc_fs_type); 112 err = register_filesystem(&proc_fs_type);
55 if (err) 113 if (err)
56 return; 114 return;
57 proc_mnt = kern_mount(&proc_fs_type); 115 proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns);
58 err = PTR_ERR(proc_mnt); 116 err = PTR_ERR(proc_mnt);
59 if (IS_ERR(proc_mnt)) { 117 if (IS_ERR(proc_mnt)) {
60 unregister_filesystem(&proc_fs_type); 118 unregister_filesystem(&proc_fs_type);
61 return; 119 return;
62 } 120 }
121
63 proc_misc_init(); 122 proc_misc_init();
64 123
65 proc_net_init(); 124 proc_net_init();
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 14376ebf4bac..416d4e4ce94e 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -23,6 +23,9 @@ struct pid_namespace {
23 struct kmem_cache *pid_cachep; 23 struct kmem_cache *pid_cachep;
24 int level; 24 int level;
25 struct pid_namespace *parent; 25 struct pid_namespace *parent;
26#ifdef CONFIG_PROC_FS
27 struct vfsmount *proc_mnt;
28#endif
26}; 29};
27 30
28extern struct pid_namespace init_pid_ns; 31extern struct pid_namespace init_pid_ns;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index dbd601c7244c..cbc1038c7900 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -125,7 +125,8 @@ extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
125extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); 125extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
126 126
127extern struct vfsmount *proc_mnt; 127extern struct vfsmount *proc_mnt;
128extern int proc_fill_super(struct super_block *,void *,int); 128struct pid_namespace;
129extern int proc_fill_super(struct super_block *);
129extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); 130extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
130 131
131/* 132/*