diff options
author | Pavel Emelyanov <xemul@openvz.org> | 2007-10-19 02:40:08 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-19 14:53:39 -0400 |
commit | 07543f5c75cee744b791cf7716c69571486fe753 (patch) | |
tree | 3e0d78f869114e5a6e2629fc157ee8f9023316bd | |
parent | 425fb2b4bf5dde24be4a82e9a2c344bb49ac92e4 (diff) |
pid namespaces: make proc have multiple superblocks - one for each namespace
Each pid namespace have to be visible through its own proc mount. Thus we
need to have per-namespace proc trees with their own superblocks.
We cannot easily show different pid namespace via one global proc tree, since
each pid refers to different tasks in different namespaces. E.g. pid 1
refers to the init task in the initial namespace and to some other task when
seeing from another namespace. Moreover - pid, exisintg in one namespace may
not exist in the other.
This approach has one move advantage is that the tasks from the init namespace
can see what tasks live in another namespace by reading entries from another
proc tree.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/proc/inode.c | 2 | ||||
-rw-r--r-- | fs/proc/root.c | 67 | ||||
-rw-r--r-- | include/linux/pid_namespace.h | 3 | ||||
-rw-r--r-- | include/linux/proc_fs.h | 3 |
4 files changed, 69 insertions, 6 deletions
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 99ca00485fc3..abe6a3f04368 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -448,7 +448,7 @@ out_mod: | |||
448 | return NULL; | 448 | return NULL; |
449 | } | 449 | } |
450 | 450 | ||
451 | int proc_fill_super(struct super_block *s, void *data, int silent) | 451 | int proc_fill_super(struct super_block *s) |
452 | { | 452 | { |
453 | struct inode * root_inode; | 453 | struct inode * root_inode; |
454 | 454 | ||
diff --git a/fs/proc/root.c b/fs/proc/root.c index cf3046638b09..94e9d734384e 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -18,32 +18,90 @@ | |||
18 | #include <linux/bitops.h> | 18 | #include <linux/bitops.h> |
19 | #include <linux/smp_lock.h> | 19 | #include <linux/smp_lock.h> |
20 | #include <linux/mount.h> | 20 | #include <linux/mount.h> |
21 | #include <linux/pid_namespace.h> | ||
21 | 22 | ||
22 | #include "internal.h" | 23 | #include "internal.h" |
23 | 24 | ||
24 | struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver; | 25 | struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver; |
25 | 26 | ||
27 | static int proc_test_super(struct super_block *sb, void *data) | ||
28 | { | ||
29 | return sb->s_fs_info == data; | ||
30 | } | ||
31 | |||
32 | static int proc_set_super(struct super_block *sb, void *data) | ||
33 | { | ||
34 | struct pid_namespace *ns; | ||
35 | |||
36 | ns = (struct pid_namespace *)data; | ||
37 | sb->s_fs_info = get_pid_ns(ns); | ||
38 | return set_anon_super(sb, NULL); | ||
39 | } | ||
40 | |||
26 | static int proc_get_sb(struct file_system_type *fs_type, | 41 | static int proc_get_sb(struct file_system_type *fs_type, |
27 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | 42 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) |
28 | { | 43 | { |
44 | int err; | ||
45 | struct super_block *sb; | ||
46 | struct pid_namespace *ns; | ||
47 | struct proc_inode *ei; | ||
48 | |||
29 | if (proc_mnt) { | 49 | if (proc_mnt) { |
30 | /* Seed the root directory with a pid so it doesn't need | 50 | /* Seed the root directory with a pid so it doesn't need |
31 | * to be special in base.c. I would do this earlier but | 51 | * to be special in base.c. I would do this earlier but |
32 | * the only task alive when /proc is mounted the first time | 52 | * the only task alive when /proc is mounted the first time |
33 | * is the init_task and it doesn't have any pids. | 53 | * is the init_task and it doesn't have any pids. |
34 | */ | 54 | */ |
35 | struct proc_inode *ei; | ||
36 | ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode); | 55 | ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode); |
37 | if (!ei->pid) | 56 | if (!ei->pid) |
38 | ei->pid = find_get_pid(1); | 57 | ei->pid = find_get_pid(1); |
39 | } | 58 | } |
40 | return get_sb_single(fs_type, flags, data, proc_fill_super, mnt); | 59 | |
60 | if (flags & MS_KERNMOUNT) | ||
61 | ns = (struct pid_namespace *)data; | ||
62 | else | ||
63 | ns = current->nsproxy->pid_ns; | ||
64 | |||
65 | sb = sget(fs_type, proc_test_super, proc_set_super, ns); | ||
66 | if (IS_ERR(sb)) | ||
67 | return PTR_ERR(sb); | ||
68 | |||
69 | if (!sb->s_root) { | ||
70 | sb->s_flags = flags; | ||
71 | err = proc_fill_super(sb); | ||
72 | if (err) { | ||
73 | up_write(&sb->s_umount); | ||
74 | deactivate_super(sb); | ||
75 | return err; | ||
76 | } | ||
77 | |||
78 | ei = PROC_I(sb->s_root->d_inode); | ||
79 | if (!ei->pid) { | ||
80 | rcu_read_lock(); | ||
81 | ei->pid = get_pid(find_pid_ns(1, ns)); | ||
82 | rcu_read_unlock(); | ||
83 | } | ||
84 | |||
85 | sb->s_flags |= MS_ACTIVE; | ||
86 | ns->proc_mnt = mnt; | ||
87 | } | ||
88 | |||
89 | return simple_set_mnt(mnt, sb); | ||
90 | } | ||
91 | |||
92 | static void proc_kill_sb(struct super_block *sb) | ||
93 | { | ||
94 | struct pid_namespace *ns; | ||
95 | |||
96 | ns = (struct pid_namespace *)sb->s_fs_info; | ||
97 | kill_anon_super(sb); | ||
98 | put_pid_ns(ns); | ||
41 | } | 99 | } |
42 | 100 | ||
43 | static struct file_system_type proc_fs_type = { | 101 | static struct file_system_type proc_fs_type = { |
44 | .name = "proc", | 102 | .name = "proc", |
45 | .get_sb = proc_get_sb, | 103 | .get_sb = proc_get_sb, |
46 | .kill_sb = kill_anon_super, | 104 | .kill_sb = proc_kill_sb, |
47 | }; | 105 | }; |
48 | 106 | ||
49 | void __init proc_root_init(void) | 107 | void __init proc_root_init(void) |
@@ -54,12 +112,13 @@ void __init proc_root_init(void) | |||
54 | err = register_filesystem(&proc_fs_type); | 112 | err = register_filesystem(&proc_fs_type); |
55 | if (err) | 113 | if (err) |
56 | return; | 114 | return; |
57 | proc_mnt = kern_mount(&proc_fs_type); | 115 | proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); |
58 | err = PTR_ERR(proc_mnt); | 116 | err = PTR_ERR(proc_mnt); |
59 | if (IS_ERR(proc_mnt)) { | 117 | if (IS_ERR(proc_mnt)) { |
60 | unregister_filesystem(&proc_fs_type); | 118 | unregister_filesystem(&proc_fs_type); |
61 | return; | 119 | return; |
62 | } | 120 | } |
121 | |||
63 | proc_misc_init(); | 122 | proc_misc_init(); |
64 | 123 | ||
65 | proc_net_init(); | 124 | proc_net_init(); |
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 14376ebf4bac..416d4e4ce94e 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h | |||
@@ -23,6 +23,9 @@ struct pid_namespace { | |||
23 | struct kmem_cache *pid_cachep; | 23 | struct kmem_cache *pid_cachep; |
24 | int level; | 24 | int level; |
25 | struct pid_namespace *parent; | 25 | struct pid_namespace *parent; |
26 | #ifdef CONFIG_PROC_FS | ||
27 | struct vfsmount *proc_mnt; | ||
28 | #endif | ||
26 | }; | 29 | }; |
27 | 30 | ||
28 | extern struct pid_namespace init_pid_ns; | 31 | extern struct pid_namespace init_pid_ns; |
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index dbd601c7244c..cbc1038c7900 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h | |||
@@ -125,7 +125,8 @@ extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, | |||
125 | extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); | 125 | extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); |
126 | 126 | ||
127 | extern struct vfsmount *proc_mnt; | 127 | extern struct vfsmount *proc_mnt; |
128 | extern int proc_fill_super(struct super_block *,void *,int); | 128 | struct pid_namespace; |
129 | extern int proc_fill_super(struct super_block *); | ||
129 | extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); | 130 | extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); |
130 | 131 | ||
131 | /* | 132 | /* |