diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2010-03-07 19:41:34 -0500 |
---|---|---|
committer | Eric W. Biederman <ebiederm@xmission.com> | 2011-05-10 17:31:44 -0400 |
commit | 6b4e306aa3dc94a0545eb9279475b1ab6209a31f (patch) | |
tree | ca8c6dec0805076f0b5ba7c547e3cb2004e3aea2 /fs/proc/namespaces.c | |
parent | 0ee5623f9a6e52df90a78bd21179f8ab370e102e (diff) |
ns: proc files for namespace naming policy.
Create files under /proc/<pid>/ns/ to allow controlling the
namespaces of a process.
This addresses three specific problems that can make namespaces hard to
work with.
- Namespaces require a dedicated process to pin them in memory.
- It is not possible to use a namespace unless you are the child
of the original creator.
- Namespaces don't have names that userspace can use to talk about
them.
The namespace files under /proc/<pid>/ns/ can be opened and the
file descriptor can be used to talk about a specific namespace, and
to keep the specified namespace alive.
A namespace can be kept alive by either holding the file descriptor
open or bind mounting the file someplace else. aka:
mount --bind /proc/self/ns/net /some/filesystem/path
mount --bind /proc/self/fd/<N> /some/filesystem/path
This allows namespaces to be named with userspace policy.
It requires additional support to make use of these filedescriptors
and that will be comming in the following patches.
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Diffstat (limited to 'fs/proc/namespaces.c')
-rw-r--r-- | fs/proc/namespaces.c | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c new file mode 100644 index 000000000000..6ae9f07d59ee --- /dev/null +++ b/fs/proc/namespaces.c | |||
@@ -0,0 +1,188 @@ | |||
1 | #include <linux/proc_fs.h> | ||
2 | #include <linux/nsproxy.h> | ||
3 | #include <linux/sched.h> | ||
4 | #include <linux/ptrace.h> | ||
5 | #include <linux/fs_struct.h> | ||
6 | #include <linux/mount.h> | ||
7 | #include <linux/path.h> | ||
8 | #include <linux/namei.h> | ||
9 | #include <linux/file.h> | ||
10 | #include <linux/utsname.h> | ||
11 | #include <net/net_namespace.h> | ||
12 | #include <linux/mnt_namespace.h> | ||
13 | #include <linux/ipc_namespace.h> | ||
14 | #include <linux/pid_namespace.h> | ||
15 | #include "internal.h" | ||
16 | |||
17 | |||
18 | static const struct proc_ns_operations *ns_entries[] = { | ||
19 | }; | ||
20 | |||
21 | static const struct file_operations ns_file_operations = { | ||
22 | .llseek = no_llseek, | ||
23 | }; | ||
24 | |||
25 | static struct dentry *proc_ns_instantiate(struct inode *dir, | ||
26 | struct dentry *dentry, struct task_struct *task, const void *ptr) | ||
27 | { | ||
28 | const struct proc_ns_operations *ns_ops = ptr; | ||
29 | struct inode *inode; | ||
30 | struct proc_inode *ei; | ||
31 | struct dentry *error = ERR_PTR(-ENOENT); | ||
32 | |||
33 | inode = proc_pid_make_inode(dir->i_sb, task); | ||
34 | if (!inode) | ||
35 | goto out; | ||
36 | |||
37 | ei = PROC_I(inode); | ||
38 | inode->i_mode = S_IFREG|S_IRUSR; | ||
39 | inode->i_fop = &ns_file_operations; | ||
40 | ei->ns_ops = ns_ops; | ||
41 | ei->ns = ns_ops->get(task); | ||
42 | if (!ei->ns) | ||
43 | goto out_iput; | ||
44 | |||
45 | dentry->d_op = &pid_dentry_operations; | ||
46 | d_add(dentry, inode); | ||
47 | /* Close the race of the process dying before we return the dentry */ | ||
48 | if (pid_revalidate(dentry, NULL)) | ||
49 | error = NULL; | ||
50 | out: | ||
51 | return error; | ||
52 | out_iput: | ||
53 | iput(inode); | ||
54 | goto out; | ||
55 | } | ||
56 | |||
57 | static int proc_ns_fill_cache(struct file *filp, void *dirent, | ||
58 | filldir_t filldir, struct task_struct *task, | ||
59 | const struct proc_ns_operations *ops) | ||
60 | { | ||
61 | return proc_fill_cache(filp, dirent, filldir, | ||
62 | ops->name, strlen(ops->name), | ||
63 | proc_ns_instantiate, task, ops); | ||
64 | } | ||
65 | |||
66 | static int proc_ns_dir_readdir(struct file *filp, void *dirent, | ||
67 | filldir_t filldir) | ||
68 | { | ||
69 | int i; | ||
70 | struct dentry *dentry = filp->f_path.dentry; | ||
71 | struct inode *inode = dentry->d_inode; | ||
72 | struct task_struct *task = get_proc_task(inode); | ||
73 | const struct proc_ns_operations **entry, **last; | ||
74 | ino_t ino; | ||
75 | int ret; | ||
76 | |||
77 | ret = -ENOENT; | ||
78 | if (!task) | ||
79 | goto out_no_task; | ||
80 | |||
81 | ret = -EPERM; | ||
82 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | ||
83 | goto out; | ||
84 | |||
85 | ret = 0; | ||
86 | i = filp->f_pos; | ||
87 | switch (i) { | ||
88 | case 0: | ||
89 | ino = inode->i_ino; | ||
90 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | ||
91 | goto out; | ||
92 | i++; | ||
93 | filp->f_pos++; | ||
94 | /* fall through */ | ||
95 | case 1: | ||
96 | ino = parent_ino(dentry); | ||
97 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) | ||
98 | goto out; | ||
99 | i++; | ||
100 | filp->f_pos++; | ||
101 | /* fall through */ | ||
102 | default: | ||
103 | i -= 2; | ||
104 | if (i >= ARRAY_SIZE(ns_entries)) { | ||
105 | ret = 1; | ||
106 | goto out; | ||
107 | } | ||
108 | entry = ns_entries + i; | ||
109 | last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; | ||
110 | while (entry <= last) { | ||
111 | if (proc_ns_fill_cache(filp, dirent, filldir, | ||
112 | task, *entry) < 0) | ||
113 | goto out; | ||
114 | filp->f_pos++; | ||
115 | entry++; | ||
116 | } | ||
117 | } | ||
118 | |||
119 | ret = 1; | ||
120 | out: | ||
121 | put_task_struct(task); | ||
122 | out_no_task: | ||
123 | return ret; | ||
124 | } | ||
125 | |||
126 | const struct file_operations proc_ns_dir_operations = { | ||
127 | .read = generic_read_dir, | ||
128 | .readdir = proc_ns_dir_readdir, | ||
129 | }; | ||
130 | |||
131 | static struct dentry *proc_ns_dir_lookup(struct inode *dir, | ||
132 | struct dentry *dentry, struct nameidata *nd) | ||
133 | { | ||
134 | struct dentry *error; | ||
135 | struct task_struct *task = get_proc_task(dir); | ||
136 | const struct proc_ns_operations **entry, **last; | ||
137 | unsigned int len = dentry->d_name.len; | ||
138 | |||
139 | error = ERR_PTR(-ENOENT); | ||
140 | |||
141 | if (!task) | ||
142 | goto out_no_task; | ||
143 | |||
144 | error = ERR_PTR(-EPERM); | ||
145 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | ||
146 | goto out; | ||
147 | |||
148 | last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; | ||
149 | for (entry = ns_entries; entry <= last; entry++) { | ||
150 | if (strlen((*entry)->name) != len) | ||
151 | continue; | ||
152 | if (!memcmp(dentry->d_name.name, (*entry)->name, len)) | ||
153 | break; | ||
154 | } | ||
155 | if (entry > last) | ||
156 | goto out; | ||
157 | |||
158 | error = proc_ns_instantiate(dir, dentry, task, *entry); | ||
159 | out: | ||
160 | put_task_struct(task); | ||
161 | out_no_task: | ||
162 | return error; | ||
163 | } | ||
164 | |||
165 | const struct inode_operations proc_ns_dir_inode_operations = { | ||
166 | .lookup = proc_ns_dir_lookup, | ||
167 | .getattr = pid_getattr, | ||
168 | .setattr = proc_setattr, | ||
169 | }; | ||
170 | |||
171 | struct file *proc_ns_fget(int fd) | ||
172 | { | ||
173 | struct file *file; | ||
174 | |||
175 | file = fget(fd); | ||
176 | if (!file) | ||
177 | return ERR_PTR(-EBADF); | ||
178 | |||
179 | if (file->f_op != &ns_file_operations) | ||
180 | goto out_invalid; | ||
181 | |||
182 | return file; | ||
183 | |||
184 | out_invalid: | ||
185 | fput(file); | ||
186 | return ERR_PTR(-EINVAL); | ||
187 | } | ||
188 | |||