aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2010-03-07 19:41:34 -0500
committerEric W. Biederman <ebiederm@xmission.com>2011-05-10 17:31:44 -0400
commit6b4e306aa3dc94a0545eb9279475b1ab6209a31f (patch)
treeca8c6dec0805076f0b5ba7c547e3cb2004e3aea2
parent0ee5623f9a6e52df90a78bd21179f8ab370e102e (diff)
ns: proc files for namespace naming policy.
Create files under /proc/<pid>/ns/ to allow controlling the namespaces of a process. This addresses three specific problems that can make namespaces hard to work with. - Namespaces require a dedicated process to pin them in memory. - It is not possible to use a namespace unless you are the child of the original creator. - Namespaces don't have names that userspace can use to talk about them. The namespace files under /proc/<pid>/ns/ can be opened and the file descriptor can be used to talk about a specific namespace, and to keep the specified namespace alive. A namespace can be kept alive by either holding the file descriptor open or bind mounting the file someplace else. aka: mount --bind /proc/self/ns/net /some/filesystem/path mount --bind /proc/self/fd/<N> /some/filesystem/path This allows namespaces to be named with userspace policy. It requires additional support to make use of these filedescriptors and that will be comming in the following patches. Acked-by: Daniel Lezcano <daniel.lezcano@free.fr> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/base.c20
-rw-r--r--fs/proc/inode.c7
-rw-r--r--fs/proc/internal.h18
-rw-r--r--fs/proc/namespaces.c188
-rw-r--r--include/linux/proc_fs.h18
6 files changed, 241 insertions, 11 deletions
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index df434c5f28fb..c1c729335924 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -20,6 +20,7 @@ proc-y += stat.o
20proc-y += uptime.o 20proc-y += uptime.o
21proc-y += version.o 21proc-y += version.o
22proc-y += softirqs.o 22proc-y += softirqs.o
23proc-y += namespaces.o
23proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o 24proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
24proc-$(CONFIG_NET) += proc_net.o 25proc-$(CONFIG_NET) += proc_net.o
25proc-$(CONFIG_PROC_KCORE) += kcore.o 26proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dfa532730e55..dc8bca72b002 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -600,7 +600,7 @@ static int proc_fd_access_allowed(struct inode *inode)
600 return allowed; 600 return allowed;
601} 601}
602 602
603static int proc_setattr(struct dentry *dentry, struct iattr *attr) 603int proc_setattr(struct dentry *dentry, struct iattr *attr)
604{ 604{
605 int error; 605 int error;
606 struct inode *inode = dentry->d_inode; 606 struct inode *inode = dentry->d_inode;
@@ -1736,8 +1736,7 @@ static int task_dumpable(struct task_struct *task)
1736 return 0; 1736 return 0;
1737} 1737}
1738 1738
1739 1739struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
1740static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
1741{ 1740{
1742 struct inode * inode; 1741 struct inode * inode;
1743 struct proc_inode *ei; 1742 struct proc_inode *ei;
@@ -1779,7 +1778,7 @@ out_unlock:
1779 return NULL; 1778 return NULL;
1780} 1779}
1781 1780
1782static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 1781int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1783{ 1782{
1784 struct inode *inode = dentry->d_inode; 1783 struct inode *inode = dentry->d_inode;
1785 struct task_struct *task; 1784 struct task_struct *task;
@@ -1820,7 +1819,7 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
1820 * made this apply to all per process world readable and executable 1819 * made this apply to all per process world readable and executable
1821 * directories. 1820 * directories.
1822 */ 1821 */
1823static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1822int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1824{ 1823{
1825 struct inode *inode; 1824 struct inode *inode;
1826 struct task_struct *task; 1825 struct task_struct *task;
@@ -1862,7 +1861,7 @@ static int pid_delete_dentry(const struct dentry * dentry)
1862 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; 1861 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1863} 1862}
1864 1863
1865static const struct dentry_operations pid_dentry_operations = 1864const struct dentry_operations pid_dentry_operations =
1866{ 1865{
1867 .d_revalidate = pid_revalidate, 1866 .d_revalidate = pid_revalidate,
1868 .d_delete = pid_delete_dentry, 1867 .d_delete = pid_delete_dentry,
@@ -1870,9 +1869,6 @@ static const struct dentry_operations pid_dentry_operations =
1870 1869
1871/* Lookups */ 1870/* Lookups */
1872 1871
1873typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
1874 struct task_struct *, const void *);
1875
1876/* 1872/*
1877 * Fill a directory entry. 1873 * Fill a directory entry.
1878 * 1874 *
@@ -1885,8 +1881,8 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
1885 * reported by readdir in sync with the inode numbers reported 1881 * reported by readdir in sync with the inode numbers reported
1886 * by stat. 1882 * by stat.
1887 */ 1883 */
1888static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 1884int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
1889 char *name, int len, 1885 const char *name, int len,
1890 instantiate_t instantiate, struct task_struct *task, const void *ptr) 1886 instantiate_t instantiate, struct task_struct *task, const void *ptr)
1891{ 1887{
1892 struct dentry *child, *dir = filp->f_path.dentry; 1888 struct dentry *child, *dir = filp->f_path.dentry;
@@ -2820,6 +2816,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2820 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), 2816 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
2821 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2817 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
2822 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 2818 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2819 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
2823#ifdef CONFIG_NET 2820#ifdef CONFIG_NET
2824 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 2821 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
2825#endif 2822#endif
@@ -3168,6 +3165,7 @@ out_no_task:
3168static const struct pid_entry tid_base_stuff[] = { 3165static const struct pid_entry tid_base_stuff[] = {
3169 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 3166 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3170 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 3167 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
3168 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
3171 REG("environ", S_IRUSR, proc_environ_operations), 3169 REG("environ", S_IRUSR, proc_environ_operations),
3172 INF("auxv", S_IRUSR, proc_pid_auxv), 3170 INF("auxv", S_IRUSR, proc_pid_auxv),
3173 ONE("status", S_IRUGO, proc_pid_status), 3171 ONE("status", S_IRUGO, proc_pid_status),
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d15aa1b1cc8f..74b48cfa1bb2 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -28,6 +28,7 @@ static void proc_evict_inode(struct inode *inode)
28{ 28{
29 struct proc_dir_entry *de; 29 struct proc_dir_entry *de;
30 struct ctl_table_header *head; 30 struct ctl_table_header *head;
31 const struct proc_ns_operations *ns_ops;
31 32
32 truncate_inode_pages(&inode->i_data, 0); 33 truncate_inode_pages(&inode->i_data, 0);
33 end_writeback(inode); 34 end_writeback(inode);
@@ -44,6 +45,10 @@ static void proc_evict_inode(struct inode *inode)
44 rcu_assign_pointer(PROC_I(inode)->sysctl, NULL); 45 rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
45 sysctl_head_put(head); 46 sysctl_head_put(head);
46 } 47 }
48 /* Release any associated namespace */
49 ns_ops = PROC_I(inode)->ns_ops;
50 if (ns_ops && ns_ops->put)
51 ns_ops->put(PROC_I(inode)->ns);
47} 52}
48 53
49static struct kmem_cache * proc_inode_cachep; 54static struct kmem_cache * proc_inode_cachep;
@@ -62,6 +67,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
62 ei->pde = NULL; 67 ei->pde = NULL;
63 ei->sysctl = NULL; 68 ei->sysctl = NULL;
64 ei->sysctl_entry = NULL; 69 ei->sysctl_entry = NULL;
70 ei->ns = NULL;
71 ei->ns_ops = NULL;
65 inode = &ei->vfs_inode; 72 inode = &ei->vfs_inode;
66 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 73 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
67 return inode; 74 return inode;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c03e8d3a3a5b..96245a1b1a7c 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -119,3 +119,21 @@ struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
119 */ 119 */
120int proc_readdir(struct file *, void *, filldir_t); 120int proc_readdir(struct file *, void *, filldir_t);
121struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); 121struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
122
123
124
125/* Lookups */
126typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
127 struct task_struct *, const void *);
128int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
129 const char *name, int len,
130 instantiate_t instantiate, struct task_struct *task, const void *ptr);
131int pid_revalidate(struct dentry *dentry, struct nameidata *nd);
132struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
133extern const struct dentry_operations pid_dentry_operations;
134int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
135int proc_setattr(struct dentry *dentry, struct iattr *attr);
136
137extern const struct inode_operations proc_ns_dir_inode_operations;
138extern const struct file_operations proc_ns_dir_operations;
139
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
new file mode 100644
index 000000000000..6ae9f07d59ee
--- /dev/null
+++ b/fs/proc/namespaces.c
@@ -0,0 +1,188 @@
1#include <linux/proc_fs.h>
2#include <linux/nsproxy.h>
3#include <linux/sched.h>
4#include <linux/ptrace.h>
5#include <linux/fs_struct.h>
6#include <linux/mount.h>
7#include <linux/path.h>
8#include <linux/namei.h>
9#include <linux/file.h>
10#include <linux/utsname.h>
11#include <net/net_namespace.h>
12#include <linux/mnt_namespace.h>
13#include <linux/ipc_namespace.h>
14#include <linux/pid_namespace.h>
15#include "internal.h"
16
17
18static const struct proc_ns_operations *ns_entries[] = {
19};
20
21static const struct file_operations ns_file_operations = {
22 .llseek = no_llseek,
23};
24
25static struct dentry *proc_ns_instantiate(struct inode *dir,
26 struct dentry *dentry, struct task_struct *task, const void *ptr)
27{
28 const struct proc_ns_operations *ns_ops = ptr;
29 struct inode *inode;
30 struct proc_inode *ei;
31 struct dentry *error = ERR_PTR(-ENOENT);
32
33 inode = proc_pid_make_inode(dir->i_sb, task);
34 if (!inode)
35 goto out;
36
37 ei = PROC_I(inode);
38 inode->i_mode = S_IFREG|S_IRUSR;
39 inode->i_fop = &ns_file_operations;
40 ei->ns_ops = ns_ops;
41 ei->ns = ns_ops->get(task);
42 if (!ei->ns)
43 goto out_iput;
44
45 dentry->d_op = &pid_dentry_operations;
46 d_add(dentry, inode);
47 /* Close the race of the process dying before we return the dentry */
48 if (pid_revalidate(dentry, NULL))
49 error = NULL;
50out:
51 return error;
52out_iput:
53 iput(inode);
54 goto out;
55}
56
57static int proc_ns_fill_cache(struct file *filp, void *dirent,
58 filldir_t filldir, struct task_struct *task,
59 const struct proc_ns_operations *ops)
60{
61 return proc_fill_cache(filp, dirent, filldir,
62 ops->name, strlen(ops->name),
63 proc_ns_instantiate, task, ops);
64}
65
66static int proc_ns_dir_readdir(struct file *filp, void *dirent,
67 filldir_t filldir)
68{
69 int i;
70 struct dentry *dentry = filp->f_path.dentry;
71 struct inode *inode = dentry->d_inode;
72 struct task_struct *task = get_proc_task(inode);
73 const struct proc_ns_operations **entry, **last;
74 ino_t ino;
75 int ret;
76
77 ret = -ENOENT;
78 if (!task)
79 goto out_no_task;
80
81 ret = -EPERM;
82 if (!ptrace_may_access(task, PTRACE_MODE_READ))
83 goto out;
84
85 ret = 0;
86 i = filp->f_pos;
87 switch (i) {
88 case 0:
89 ino = inode->i_ino;
90 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
91 goto out;
92 i++;
93 filp->f_pos++;
94 /* fall through */
95 case 1:
96 ino = parent_ino(dentry);
97 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
98 goto out;
99 i++;
100 filp->f_pos++;
101 /* fall through */
102 default:
103 i -= 2;
104 if (i >= ARRAY_SIZE(ns_entries)) {
105 ret = 1;
106 goto out;
107 }
108 entry = ns_entries + i;
109 last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
110 while (entry <= last) {
111 if (proc_ns_fill_cache(filp, dirent, filldir,
112 task, *entry) < 0)
113 goto out;
114 filp->f_pos++;
115 entry++;
116 }
117 }
118
119 ret = 1;
120out:
121 put_task_struct(task);
122out_no_task:
123 return ret;
124}
125
126const struct file_operations proc_ns_dir_operations = {
127 .read = generic_read_dir,
128 .readdir = proc_ns_dir_readdir,
129};
130
131static struct dentry *proc_ns_dir_lookup(struct inode *dir,
132 struct dentry *dentry, struct nameidata *nd)
133{
134 struct dentry *error;
135 struct task_struct *task = get_proc_task(dir);
136 const struct proc_ns_operations **entry, **last;
137 unsigned int len = dentry->d_name.len;
138
139 error = ERR_PTR(-ENOENT);
140
141 if (!task)
142 goto out_no_task;
143
144 error = ERR_PTR(-EPERM);
145 if (!ptrace_may_access(task, PTRACE_MODE_READ))
146 goto out;
147
148 last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
149 for (entry = ns_entries; entry <= last; entry++) {
150 if (strlen((*entry)->name) != len)
151 continue;
152 if (!memcmp(dentry->d_name.name, (*entry)->name, len))
153 break;
154 }
155 if (entry > last)
156 goto out;
157
158 error = proc_ns_instantiate(dir, dentry, task, *entry);
159out:
160 put_task_struct(task);
161out_no_task:
162 return error;
163}
164
165const struct inode_operations proc_ns_dir_inode_operations = {
166 .lookup = proc_ns_dir_lookup,
167 .getattr = pid_getattr,
168 .setattr = proc_setattr,
169};
170
171struct file *proc_ns_fget(int fd)
172{
173 struct file *file;
174
175 file = fget(fd);
176 if (!file)
177 return ERR_PTR(-EBADF);
178
179 if (file->f_op != &ns_file_operations)
180 goto out_invalid;
181
182 return file;
183
184out_invalid:
185 fput(file);
186 return ERR_PTR(-EINVAL);
187}
188
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 838c1149251a..a6d2c6da5e5a 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -179,6 +179,8 @@ extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
179extern struct file *get_mm_exe_file(struct mm_struct *mm); 179extern struct file *get_mm_exe_file(struct mm_struct *mm);
180extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm); 180extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm);
181 181
182extern struct file *proc_ns_fget(int fd);
183
182#else 184#else
183 185
184#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) 186#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; })
@@ -239,6 +241,11 @@ static inline void dup_mm_exe_file(struct mm_struct *oldmm,
239 struct mm_struct *newmm) 241 struct mm_struct *newmm)
240{} 242{}
241 243
244static inline struct file *proc_ns_fget(int fd)
245{
246 return ERR_PTR(-EINVAL);
247}
248
242#endif /* CONFIG_PROC_FS */ 249#endif /* CONFIG_PROC_FS */
243 250
244#if !defined(CONFIG_PROC_KCORE) 251#if !defined(CONFIG_PROC_KCORE)
@@ -250,6 +257,15 @@ kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
250extern void kclist_add(struct kcore_list *, void *, size_t, int type); 257extern void kclist_add(struct kcore_list *, void *, size_t, int type);
251#endif 258#endif
252 259
260struct nsproxy;
261struct proc_ns_operations {
262 const char *name;
263 int type;
264 void *(*get)(struct task_struct *task);
265 void (*put)(void *ns);
266 int (*install)(struct nsproxy *nsproxy, void *ns);
267};
268
253union proc_op { 269union proc_op {
254 int (*proc_get_link)(struct inode *, struct path *); 270 int (*proc_get_link)(struct inode *, struct path *);
255 int (*proc_read)(struct task_struct *task, char *page); 271 int (*proc_read)(struct task_struct *task, char *page);
@@ -268,6 +284,8 @@ struct proc_inode {
268 struct proc_dir_entry *pde; 284 struct proc_dir_entry *pde;
269 struct ctl_table_header *sysctl; 285 struct ctl_table_header *sysctl;
270 struct ctl_table *sysctl_entry; 286 struct ctl_table *sysctl_entry;
287 void *ns;
288 const struct proc_ns_operations *ns_ops;
271 struct inode vfs_inode; 289 struct inode vfs_inode;
272}; 290};
273 291