diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2010-03-07 19:41:34 -0500 |
---|---|---|
committer | Eric W. Biederman <ebiederm@xmission.com> | 2011-05-10 17:31:44 -0400 |
commit | 6b4e306aa3dc94a0545eb9279475b1ab6209a31f (patch) | |
tree | ca8c6dec0805076f0b5ba7c547e3cb2004e3aea2 /fs/proc | |
parent | 0ee5623f9a6e52df90a78bd21179f8ab370e102e (diff) |
ns: proc files for namespace naming policy.
Create files under /proc/<pid>/ns/ to allow controlling the
namespaces of a process.
This addresses three specific problems that can make namespaces hard to
work with.
- Namespaces require a dedicated process to pin them in memory.
- It is not possible to use a namespace unless you are the child
of the original creator.
- Namespaces don't have names that userspace can use to talk about
them.
The namespace files under /proc/<pid>/ns/ can be opened and the
file descriptor can be used to talk about a specific namespace, and
to keep the specified namespace alive.
A namespace can be kept alive by either holding the file descriptor
open or bind mounting the file someplace else. aka:
mount --bind /proc/self/ns/net /some/filesystem/path
mount --bind /proc/self/fd/<N> /some/filesystem/path
This allows namespaces to be named with userspace policy.
It requires additional support to make use of these filedescriptors
and that will be comming in the following patches.
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/Makefile | 1 | ||||
-rw-r--r-- | fs/proc/base.c | 20 | ||||
-rw-r--r-- | fs/proc/inode.c | 7 | ||||
-rw-r--r-- | fs/proc/internal.h | 18 | ||||
-rw-r--r-- | fs/proc/namespaces.c | 188 |
5 files changed, 223 insertions, 11 deletions
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index df434c5f28fb..c1c729335924 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile | |||
@@ -20,6 +20,7 @@ proc-y += stat.o | |||
20 | proc-y += uptime.o | 20 | proc-y += uptime.o |
21 | proc-y += version.o | 21 | proc-y += version.o |
22 | proc-y += softirqs.o | 22 | proc-y += softirqs.o |
23 | proc-y += namespaces.o | ||
23 | proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o | 24 | proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o |
24 | proc-$(CONFIG_NET) += proc_net.o | 25 | proc-$(CONFIG_NET) += proc_net.o |
25 | proc-$(CONFIG_PROC_KCORE) += kcore.o | 26 | proc-$(CONFIG_PROC_KCORE) += kcore.o |
diff --git a/fs/proc/base.c b/fs/proc/base.c index dfa532730e55..dc8bca72b002 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -600,7 +600,7 @@ static int proc_fd_access_allowed(struct inode *inode) | |||
600 | return allowed; | 600 | return allowed; |
601 | } | 601 | } |
602 | 602 | ||
603 | static int proc_setattr(struct dentry *dentry, struct iattr *attr) | 603 | int proc_setattr(struct dentry *dentry, struct iattr *attr) |
604 | { | 604 | { |
605 | int error; | 605 | int error; |
606 | struct inode *inode = dentry->d_inode; | 606 | struct inode *inode = dentry->d_inode; |
@@ -1736,8 +1736,7 @@ static int task_dumpable(struct task_struct *task) | |||
1736 | return 0; | 1736 | return 0; |
1737 | } | 1737 | } |
1738 | 1738 | ||
1739 | 1739 | struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) | |
1740 | static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) | ||
1741 | { | 1740 | { |
1742 | struct inode * inode; | 1741 | struct inode * inode; |
1743 | struct proc_inode *ei; | 1742 | struct proc_inode *ei; |
@@ -1779,7 +1778,7 @@ out_unlock: | |||
1779 | return NULL; | 1778 | return NULL; |
1780 | } | 1779 | } |
1781 | 1780 | ||
1782 | static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 1781 | int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
1783 | { | 1782 | { |
1784 | struct inode *inode = dentry->d_inode; | 1783 | struct inode *inode = dentry->d_inode; |
1785 | struct task_struct *task; | 1784 | struct task_struct *task; |
@@ -1820,7 +1819,7 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat | |||
1820 | * made this apply to all per process world readable and executable | 1819 | * made this apply to all per process world readable and executable |
1821 | * directories. | 1820 | * directories. |
1822 | */ | 1821 | */ |
1823 | static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) | 1822 | int pid_revalidate(struct dentry *dentry, struct nameidata *nd) |
1824 | { | 1823 | { |
1825 | struct inode *inode; | 1824 | struct inode *inode; |
1826 | struct task_struct *task; | 1825 | struct task_struct *task; |
@@ -1862,7 +1861,7 @@ static int pid_delete_dentry(const struct dentry * dentry) | |||
1862 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; | 1861 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; |
1863 | } | 1862 | } |
1864 | 1863 | ||
1865 | static const struct dentry_operations pid_dentry_operations = | 1864 | const struct dentry_operations pid_dentry_operations = |
1866 | { | 1865 | { |
1867 | .d_revalidate = pid_revalidate, | 1866 | .d_revalidate = pid_revalidate, |
1868 | .d_delete = pid_delete_dentry, | 1867 | .d_delete = pid_delete_dentry, |
@@ -1870,9 +1869,6 @@ static const struct dentry_operations pid_dentry_operations = | |||
1870 | 1869 | ||
1871 | /* Lookups */ | 1870 | /* Lookups */ |
1872 | 1871 | ||
1873 | typedef struct dentry *instantiate_t(struct inode *, struct dentry *, | ||
1874 | struct task_struct *, const void *); | ||
1875 | |||
1876 | /* | 1872 | /* |
1877 | * Fill a directory entry. | 1873 | * Fill a directory entry. |
1878 | * | 1874 | * |
@@ -1885,8 +1881,8 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *, | |||
1885 | * reported by readdir in sync with the inode numbers reported | 1881 | * reported by readdir in sync with the inode numbers reported |
1886 | * by stat. | 1882 | * by stat. |
1887 | */ | 1883 | */ |
1888 | static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | 1884 | int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, |
1889 | char *name, int len, | 1885 | const char *name, int len, |
1890 | instantiate_t instantiate, struct task_struct *task, const void *ptr) | 1886 | instantiate_t instantiate, struct task_struct *task, const void *ptr) |
1891 | { | 1887 | { |
1892 | struct dentry *child, *dir = filp->f_path.dentry; | 1888 | struct dentry *child, *dir = filp->f_path.dentry; |
@@ -2820,6 +2816,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2820 | DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), | 2816 | DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), |
2821 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), | 2817 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), |
2822 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), | 2818 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), |
2819 | DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), | ||
2823 | #ifdef CONFIG_NET | 2820 | #ifdef CONFIG_NET |
2824 | DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), | 2821 | DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), |
2825 | #endif | 2822 | #endif |
@@ -3168,6 +3165,7 @@ out_no_task: | |||
3168 | static const struct pid_entry tid_base_stuff[] = { | 3165 | static const struct pid_entry tid_base_stuff[] = { |
3169 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), | 3166 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), |
3170 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), | 3167 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), |
3168 | DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), | ||
3171 | REG("environ", S_IRUSR, proc_environ_operations), | 3169 | REG("environ", S_IRUSR, proc_environ_operations), |
3172 | INF("auxv", S_IRUSR, proc_pid_auxv), | 3170 | INF("auxv", S_IRUSR, proc_pid_auxv), |
3173 | ONE("status", S_IRUGO, proc_pid_status), | 3171 | ONE("status", S_IRUGO, proc_pid_status), |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index d15aa1b1cc8f..74b48cfa1bb2 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -28,6 +28,7 @@ static void proc_evict_inode(struct inode *inode) | |||
28 | { | 28 | { |
29 | struct proc_dir_entry *de; | 29 | struct proc_dir_entry *de; |
30 | struct ctl_table_header *head; | 30 | struct ctl_table_header *head; |
31 | const struct proc_ns_operations *ns_ops; | ||
31 | 32 | ||
32 | truncate_inode_pages(&inode->i_data, 0); | 33 | truncate_inode_pages(&inode->i_data, 0); |
33 | end_writeback(inode); | 34 | end_writeback(inode); |
@@ -44,6 +45,10 @@ static void proc_evict_inode(struct inode *inode) | |||
44 | rcu_assign_pointer(PROC_I(inode)->sysctl, NULL); | 45 | rcu_assign_pointer(PROC_I(inode)->sysctl, NULL); |
45 | sysctl_head_put(head); | 46 | sysctl_head_put(head); |
46 | } | 47 | } |
48 | /* Release any associated namespace */ | ||
49 | ns_ops = PROC_I(inode)->ns_ops; | ||
50 | if (ns_ops && ns_ops->put) | ||
51 | ns_ops->put(PROC_I(inode)->ns); | ||
47 | } | 52 | } |
48 | 53 | ||
49 | static struct kmem_cache * proc_inode_cachep; | 54 | static struct kmem_cache * proc_inode_cachep; |
@@ -62,6 +67,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb) | |||
62 | ei->pde = NULL; | 67 | ei->pde = NULL; |
63 | ei->sysctl = NULL; | 68 | ei->sysctl = NULL; |
64 | ei->sysctl_entry = NULL; | 69 | ei->sysctl_entry = NULL; |
70 | ei->ns = NULL; | ||
71 | ei->ns_ops = NULL; | ||
65 | inode = &ei->vfs_inode; | 72 | inode = &ei->vfs_inode; |
66 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 73 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
67 | return inode; | 74 | return inode; |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index c03e8d3a3a5b..96245a1b1a7c 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -119,3 +119,21 @@ struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); | |||
119 | */ | 119 | */ |
120 | int proc_readdir(struct file *, void *, filldir_t); | 120 | int proc_readdir(struct file *, void *, filldir_t); |
121 | struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); | 121 | struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); |
122 | |||
123 | |||
124 | |||
125 | /* Lookups */ | ||
126 | typedef struct dentry *instantiate_t(struct inode *, struct dentry *, | ||
127 | struct task_struct *, const void *); | ||
128 | int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | ||
129 | const char *name, int len, | ||
130 | instantiate_t instantiate, struct task_struct *task, const void *ptr); | ||
131 | int pid_revalidate(struct dentry *dentry, struct nameidata *nd); | ||
132 | struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task); | ||
133 | extern const struct dentry_operations pid_dentry_operations; | ||
134 | int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); | ||
135 | int proc_setattr(struct dentry *dentry, struct iattr *attr); | ||
136 | |||
137 | extern const struct inode_operations proc_ns_dir_inode_operations; | ||
138 | extern const struct file_operations proc_ns_dir_operations; | ||
139 | |||
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c new file mode 100644 index 000000000000..6ae9f07d59ee --- /dev/null +++ b/fs/proc/namespaces.c | |||
@@ -0,0 +1,188 @@ | |||
1 | #include <linux/proc_fs.h> | ||
2 | #include <linux/nsproxy.h> | ||
3 | #include <linux/sched.h> | ||
4 | #include <linux/ptrace.h> | ||
5 | #include <linux/fs_struct.h> | ||
6 | #include <linux/mount.h> | ||
7 | #include <linux/path.h> | ||
8 | #include <linux/namei.h> | ||
9 | #include <linux/file.h> | ||
10 | #include <linux/utsname.h> | ||
11 | #include <net/net_namespace.h> | ||
12 | #include <linux/mnt_namespace.h> | ||
13 | #include <linux/ipc_namespace.h> | ||
14 | #include <linux/pid_namespace.h> | ||
15 | #include "internal.h" | ||
16 | |||
17 | |||
18 | static const struct proc_ns_operations *ns_entries[] = { | ||
19 | }; | ||
20 | |||
21 | static const struct file_operations ns_file_operations = { | ||
22 | .llseek = no_llseek, | ||
23 | }; | ||
24 | |||
25 | static struct dentry *proc_ns_instantiate(struct inode *dir, | ||
26 | struct dentry *dentry, struct task_struct *task, const void *ptr) | ||
27 | { | ||
28 | const struct proc_ns_operations *ns_ops = ptr; | ||
29 | struct inode *inode; | ||
30 | struct proc_inode *ei; | ||
31 | struct dentry *error = ERR_PTR(-ENOENT); | ||
32 | |||
33 | inode = proc_pid_make_inode(dir->i_sb, task); | ||
34 | if (!inode) | ||
35 | goto out; | ||
36 | |||
37 | ei = PROC_I(inode); | ||
38 | inode->i_mode = S_IFREG|S_IRUSR; | ||
39 | inode->i_fop = &ns_file_operations; | ||
40 | ei->ns_ops = ns_ops; | ||
41 | ei->ns = ns_ops->get(task); | ||
42 | if (!ei->ns) | ||
43 | goto out_iput; | ||
44 | |||
45 | dentry->d_op = &pid_dentry_operations; | ||
46 | d_add(dentry, inode); | ||
47 | /* Close the race of the process dying before we return the dentry */ | ||
48 | if (pid_revalidate(dentry, NULL)) | ||
49 | error = NULL; | ||
50 | out: | ||
51 | return error; | ||
52 | out_iput: | ||
53 | iput(inode); | ||
54 | goto out; | ||
55 | } | ||
56 | |||
57 | static int proc_ns_fill_cache(struct file *filp, void *dirent, | ||
58 | filldir_t filldir, struct task_struct *task, | ||
59 | const struct proc_ns_operations *ops) | ||
60 | { | ||
61 | return proc_fill_cache(filp, dirent, filldir, | ||
62 | ops->name, strlen(ops->name), | ||
63 | proc_ns_instantiate, task, ops); | ||
64 | } | ||
65 | |||
66 | static int proc_ns_dir_readdir(struct file *filp, void *dirent, | ||
67 | filldir_t filldir) | ||
68 | { | ||
69 | int i; | ||
70 | struct dentry *dentry = filp->f_path.dentry; | ||
71 | struct inode *inode = dentry->d_inode; | ||
72 | struct task_struct *task = get_proc_task(inode); | ||
73 | const struct proc_ns_operations **entry, **last; | ||
74 | ino_t ino; | ||
75 | int ret; | ||
76 | |||
77 | ret = -ENOENT; | ||
78 | if (!task) | ||
79 | goto out_no_task; | ||
80 | |||
81 | ret = -EPERM; | ||
82 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | ||
83 | goto out; | ||
84 | |||
85 | ret = 0; | ||
86 | i = filp->f_pos; | ||
87 | switch (i) { | ||
88 | case 0: | ||
89 | ino = inode->i_ino; | ||
90 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | ||
91 | goto out; | ||
92 | i++; | ||
93 | filp->f_pos++; | ||
94 | /* fall through */ | ||
95 | case 1: | ||
96 | ino = parent_ino(dentry); | ||
97 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) | ||
98 | goto out; | ||
99 | i++; | ||
100 | filp->f_pos++; | ||
101 | /* fall through */ | ||
102 | default: | ||
103 | i -= 2; | ||
104 | if (i >= ARRAY_SIZE(ns_entries)) { | ||
105 | ret = 1; | ||
106 | goto out; | ||
107 | } | ||
108 | entry = ns_entries + i; | ||
109 | last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; | ||
110 | while (entry <= last) { | ||
111 | if (proc_ns_fill_cache(filp, dirent, filldir, | ||
112 | task, *entry) < 0) | ||
113 | goto out; | ||
114 | filp->f_pos++; | ||
115 | entry++; | ||
116 | } | ||
117 | } | ||
118 | |||
119 | ret = 1; | ||
120 | out: | ||
121 | put_task_struct(task); | ||
122 | out_no_task: | ||
123 | return ret; | ||
124 | } | ||
125 | |||
126 | const struct file_operations proc_ns_dir_operations = { | ||
127 | .read = generic_read_dir, | ||
128 | .readdir = proc_ns_dir_readdir, | ||
129 | }; | ||
130 | |||
131 | static struct dentry *proc_ns_dir_lookup(struct inode *dir, | ||
132 | struct dentry *dentry, struct nameidata *nd) | ||
133 | { | ||
134 | struct dentry *error; | ||
135 | struct task_struct *task = get_proc_task(dir); | ||
136 | const struct proc_ns_operations **entry, **last; | ||
137 | unsigned int len = dentry->d_name.len; | ||
138 | |||
139 | error = ERR_PTR(-ENOENT); | ||
140 | |||
141 | if (!task) | ||
142 | goto out_no_task; | ||
143 | |||
144 | error = ERR_PTR(-EPERM); | ||
145 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | ||
146 | goto out; | ||
147 | |||
148 | last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; | ||
149 | for (entry = ns_entries; entry <= last; entry++) { | ||
150 | if (strlen((*entry)->name) != len) | ||
151 | continue; | ||
152 | if (!memcmp(dentry->d_name.name, (*entry)->name, len)) | ||
153 | break; | ||
154 | } | ||
155 | if (entry > last) | ||
156 | goto out; | ||
157 | |||
158 | error = proc_ns_instantiate(dir, dentry, task, *entry); | ||
159 | out: | ||
160 | put_task_struct(task); | ||
161 | out_no_task: | ||
162 | return error; | ||
163 | } | ||
164 | |||
165 | const struct inode_operations proc_ns_dir_inode_operations = { | ||
166 | .lookup = proc_ns_dir_lookup, | ||
167 | .getattr = pid_getattr, | ||
168 | .setattr = proc_setattr, | ||
169 | }; | ||
170 | |||
171 | struct file *proc_ns_fget(int fd) | ||
172 | { | ||
173 | struct file *file; | ||
174 | |||
175 | file = fget(fd); | ||
176 | if (!file) | ||
177 | return ERR_PTR(-EBADF); | ||
178 | |||
179 | if (file->f_op != &ns_file_operations) | ||
180 | goto out_invalid; | ||
181 | |||
182 | return file; | ||
183 | |||
184 | out_invalid: | ||
185 | fput(file); | ||
186 | return ERR_PTR(-EINVAL); | ||
187 | } | ||
188 | |||