aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2014-11-01 10:57:28 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2014-12-10 21:30:20 -0500
commite149ed2b805fefdccf7ccdfc19eca22fdd4514ac (patch)
tree8c9cd88deff8c7309ca2acb8d4cb475aaca47b14 /fs
parentf77c80142e1afe6d5c16975ca5d7d1fc324b16f9 (diff)
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now. It's not mountable (not even registered, so it's not in /proc/filesystems, etc.). Files on it *are* bindable - we explicitly permit that in do_loopback(). This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well. get_proc_ns() is a macro now (it's simply returning ->i_private; would have been an inline, if not for header ordering headache). proc_ns_inode() is an ex-parrot. The interface used in procfs is ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops). Dentries and inodes are never hashed; a non-counting reference to dentry is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path() if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details of that mechanism. As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt; it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets from ns_get_path(). Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile2
-rw-r--r--fs/internal.h5
-rw-r--r--fs/namespace.c9
-rw-r--r--fs/nsfs.c161
-rw-r--r--fs/proc/inode.c5
-rw-r--r--fs/proc/namespaces.c152
6 files changed, 186 insertions, 148 deletions
diff --git a/fs/Makefile b/fs/Makefile
index 34a1b9dea6dd..34393376eaa2 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
11 attr.o bad_inode.o file.o filesystems.o namespace.o \ 11 attr.o bad_inode.o file.o filesystems.o namespace.o \
12 seq_file.o xattr.o libfs.o fs-writeback.o \ 12 seq_file.o xattr.o libfs.o fs-writeback.o \
13 pnode.o splice.o sync.o utimes.o \ 13 pnode.o splice.o sync.o utimes.o \
14 stack.o fs_struct.o statfs.o fs_pin.o 14 stack.o fs_struct.o statfs.o fs_pin.o nsfs.o
15 15
16ifeq ($(CONFIG_BLOCK),y) 16ifeq ($(CONFIG_BLOCK),y)
17obj-y += buffer.o block_dev.o direct-io.o mpage.o 17obj-y += buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/internal.h b/fs/internal.h
index 757ba2abf21e..e9a61fe67575 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -147,3 +147,8 @@ extern const struct file_operations pipefifo_fops;
147 */ 147 */
148extern void sb_pin_kill(struct super_block *sb); 148extern void sb_pin_kill(struct super_block *sb);
149extern void mnt_pin_kill(struct mount *m); 149extern void mnt_pin_kill(struct mount *m);
150
151/*
152 * fs/nsfs.c
153 */
154extern struct dentry_operations ns_dentry_operations;
diff --git a/fs/namespace.c b/fs/namespace.c
index 9dfb4cac0c41..30df6e7dd807 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1569,8 +1569,8 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
1569static bool is_mnt_ns_file(struct dentry *dentry) 1569static bool is_mnt_ns_file(struct dentry *dentry)
1570{ 1570{
1571 /* Is this a proxy for a mount namespace? */ 1571 /* Is this a proxy for a mount namespace? */
1572 struct inode *inode = dentry->d_inode; 1572 return dentry->d_op == &ns_dentry_operations &&
1573 return proc_ns_inode(inode) && dentry->d_fsdata == &mntns_operations; 1573 dentry->d_fsdata == &mntns_operations;
1574} 1574}
1575 1575
1576struct mnt_namespace *to_mnt_ns(struct ns_common *ns) 1576struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
@@ -2016,7 +2016,10 @@ static int do_loopback(struct path *path, const char *old_name,
2016 if (IS_MNT_UNBINDABLE(old)) 2016 if (IS_MNT_UNBINDABLE(old))
2017 goto out2; 2017 goto out2;
2018 2018
2019 if (!check_mnt(parent) || !check_mnt(old)) 2019 if (!check_mnt(parent))
2020 goto out2;
2021
2022 if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
2020 goto out2; 2023 goto out2;
2021 2024
2022 if (!recurse && has_locked_children(old, old_path.dentry)) 2025 if (!recurse && has_locked_children(old, old_path.dentry))
diff --git a/fs/nsfs.c b/fs/nsfs.c
new file mode 100644
index 000000000000..af1b24fa899d
--- /dev/null
+++ b/fs/nsfs.c
@@ -0,0 +1,161 @@
1#include <linux/mount.h>
2#include <linux/file.h>
3#include <linux/fs.h>
4#include <linux/proc_ns.h>
5#include <linux/magic.h>
6#include <linux/ktime.h>
7
8static struct vfsmount *nsfs_mnt;
9
10static const struct file_operations ns_file_operations = {
11 .llseek = no_llseek,
12};
13
14static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
15{
16 struct inode *inode = dentry->d_inode;
17 const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
18
19 return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
20 ns_ops->name, inode->i_ino);
21}
22
23static void ns_prune_dentry(struct dentry *dentry)
24{
25 struct inode *inode = dentry->d_inode;
26 if (inode) {
27 struct ns_common *ns = inode->i_private;
28 atomic_long_set(&ns->stashed, 0);
29 }
30}
31
32const struct dentry_operations ns_dentry_operations =
33{
34 .d_prune = ns_prune_dentry,
35 .d_delete = always_delete_dentry,
36 .d_dname = ns_dname,
37};
38
39static void nsfs_evict(struct inode *inode)
40{
41 struct ns_common *ns = inode->i_private;
42 clear_inode(inode);
43 ns->ops->put(ns);
44}
45
46void *ns_get_path(struct path *path, struct task_struct *task,
47 const struct proc_ns_operations *ns_ops)
48{
49 struct vfsmount *mnt = mntget(nsfs_mnt);
50 struct qstr qname = { .name = "", };
51 struct dentry *dentry;
52 struct inode *inode;
53 struct ns_common *ns;
54 unsigned long d;
55
56again:
57 ns = ns_ops->get(task);
58 if (!ns) {
59 mntput(mnt);
60 return ERR_PTR(-ENOENT);
61 }
62 rcu_read_lock();
63 d = atomic_long_read(&ns->stashed);
64 if (!d)
65 goto slow;
66 dentry = (struct dentry *)d;
67 if (!lockref_get_not_dead(&dentry->d_lockref))
68 goto slow;
69 rcu_read_unlock();
70 ns_ops->put(ns);
71got_it:
72 path->mnt = mnt;
73 path->dentry = dentry;
74 return NULL;
75slow:
76 rcu_read_unlock();
77 inode = new_inode_pseudo(mnt->mnt_sb);
78 if (!inode) {
79 ns_ops->put(ns);
80 mntput(mnt);
81 return ERR_PTR(-ENOMEM);
82 }
83 inode->i_ino = ns->inum;
84 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
85 inode->i_flags |= S_IMMUTABLE;
86 inode->i_mode = S_IFREG | S_IRUGO;
87 inode->i_fop = &ns_file_operations;
88 inode->i_private = ns;
89
90 dentry = d_alloc_pseudo(mnt->mnt_sb, &qname);
91 if (!dentry) {
92 iput(inode);
93 mntput(mnt);
94 return ERR_PTR(-ENOMEM);
95 }
96 d_instantiate(dentry, inode);
97 dentry->d_fsdata = (void *)ns_ops;
98 d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
99 if (d) {
100 d_delete(dentry); /* make sure ->d_prune() does nothing */
101 dput(dentry);
102 cpu_relax();
103 goto again;
104 }
105 goto got_it;
106}
107
108int ns_get_name(char *buf, size_t size, struct task_struct *task,
109 const struct proc_ns_operations *ns_ops)
110{
111 struct ns_common *ns;
112 int res = -ENOENT;
113 ns = ns_ops->get(task);
114 if (ns) {
115 res = snprintf(buf, size, "%s:[%u]", ns_ops->name, ns->inum);
116 ns_ops->put(ns);
117 }
118 return res;
119}
120
121struct file *proc_ns_fget(int fd)
122{
123 struct file *file;
124
125 file = fget(fd);
126 if (!file)
127 return ERR_PTR(-EBADF);
128
129 if (file->f_op != &ns_file_operations)
130 goto out_invalid;
131
132 return file;
133
134out_invalid:
135 fput(file);
136 return ERR_PTR(-EINVAL);
137}
138
139static const struct super_operations nsfs_ops = {
140 .statfs = simple_statfs,
141 .evict_inode = nsfs_evict,
142};
143static struct dentry *nsfs_mount(struct file_system_type *fs_type,
144 int flags, const char *dev_name, void *data)
145{
146 return mount_pseudo(fs_type, "nsfs:", &nsfs_ops,
147 &ns_dentry_operations, NSFS_MAGIC);
148}
149static struct file_system_type nsfs = {
150 .name = "nsfs",
151 .mount = nsfs_mount,
152 .kill_sb = kill_anon_super,
153};
154
155void __init nsfs_init(void)
156{
157 nsfs_mnt = kern_mount(&nsfs);
158 if (IS_ERR(nsfs_mnt))
159 panic("can't set nsfs up\n");
160 nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER;
161}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index a212996e0987..57a9be9a6668 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -32,7 +32,6 @@ static void proc_evict_inode(struct inode *inode)
32{ 32{
33 struct proc_dir_entry *de; 33 struct proc_dir_entry *de;
34 struct ctl_table_header *head; 34 struct ctl_table_header *head;
35 struct ns_common *ns;
36 35
37 truncate_inode_pages_final(&inode->i_data); 36 truncate_inode_pages_final(&inode->i_data);
38 clear_inode(inode); 37 clear_inode(inode);
@@ -49,10 +48,6 @@ static void proc_evict_inode(struct inode *inode)
49 RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL); 48 RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
50 sysctl_head_put(head); 49 sysctl_head_put(head);
51 } 50 }
52 /* Release any associated namespace */
53 ns = PROC_I(inode)->ns.ns;
54 if (ns && ns->ops)
55 ns->ops->put(ns);
56} 51}
57 52
58static struct kmem_cache * proc_inode_cachep; 53static struct kmem_cache * proc_inode_cachep;
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 18fc1cf899de..aaaac77abad0 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -1,10 +1,6 @@
1#include <linux/proc_fs.h> 1#include <linux/proc_fs.h>
2#include <linux/nsproxy.h> 2#include <linux/nsproxy.h>
3#include <linux/sched.h>
4#include <linux/ptrace.h> 3#include <linux/ptrace.h>
5#include <linux/fs_struct.h>
6#include <linux/mount.h>
7#include <linux/path.h>
8#include <linux/namei.h> 4#include <linux/namei.h>
9#include <linux/file.h> 5#include <linux/file.h>
10#include <linux/utsname.h> 6#include <linux/utsname.h>
@@ -34,139 +30,45 @@ static const struct proc_ns_operations *ns_entries[] = {
34 &mntns_operations, 30 &mntns_operations,
35}; 31};
36 32
37static const struct file_operations ns_file_operations = {
38 .llseek = no_llseek,
39};
40
41static const struct inode_operations ns_inode_operations = {
42 .setattr = proc_setattr,
43};
44
45static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
46{
47 struct inode *inode = dentry->d_inode;
48 const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
49
50 return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
51 ns_ops->name, inode->i_ino);
52}
53
54const struct dentry_operations ns_dentry_operations =
55{
56 .d_delete = always_delete_dentry,
57 .d_dname = ns_dname,
58};
59
60static struct dentry *proc_ns_get_dentry(struct super_block *sb,
61 struct task_struct *task, const struct proc_ns_operations *ns_ops)
62{
63 struct dentry *dentry, *result;
64 struct inode *inode;
65 struct proc_inode *ei;
66 struct qstr qname = { .name = "", };
67 struct ns_common *ns;
68
69 ns = ns_ops->get(task);
70 if (!ns)
71 return ERR_PTR(-ENOENT);
72
73 dentry = d_alloc_pseudo(sb, &qname);
74 if (!dentry) {
75 ns_ops->put(ns);
76 return ERR_PTR(-ENOMEM);
77 }
78 dentry->d_fsdata = (void *)ns_ops;
79
80 inode = iget_locked(sb, ns->inum);
81 if (!inode) {
82 dput(dentry);
83 ns_ops->put(ns);
84 return ERR_PTR(-ENOMEM);
85 }
86
87 ei = PROC_I(inode);
88 if (inode->i_state & I_NEW) {
89 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
90 inode->i_op = &ns_inode_operations;
91 inode->i_mode = S_IFREG | S_IRUGO;
92 inode->i_fop = &ns_file_operations;
93 ei->ns.ns_ops = ns_ops;
94 ei->ns.ns = ns;
95 unlock_new_inode(inode);
96 } else {
97 ns_ops->put(ns);
98 }
99
100 d_set_d_op(dentry, &ns_dentry_operations);
101 result = d_instantiate_unique(dentry, inode);
102 if (result) {
103 dput(dentry);
104 dentry = result;
105 }
106
107 return dentry;
108}
109
110static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) 33static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
111{ 34{
112 struct inode *inode = dentry->d_inode; 35 struct inode *inode = dentry->d_inode;
113 struct super_block *sb = inode->i_sb; 36 const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops;
114 struct proc_inode *ei = PROC_I(inode);
115 struct task_struct *task; 37 struct task_struct *task;
116 struct path ns_path; 38 struct path ns_path;
117 void *error = ERR_PTR(-EACCES); 39 void *error = ERR_PTR(-EACCES);
118 40
119 task = get_proc_task(inode); 41 task = get_proc_task(inode);
120 if (!task) 42 if (!task)
121 goto out; 43 return error;
122 44
123 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 45 if (ptrace_may_access(task, PTRACE_MODE_READ)) {
124 goto out_put_task; 46 error = ns_get_path(&ns_path, task, ns_ops);
125 47 if (!error)
126 ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns.ns_ops); 48 nd_jump_link(nd, &ns_path);
127 if (IS_ERR(ns_path.dentry)) {
128 error = ERR_CAST(ns_path.dentry);
129 goto out_put_task;
130 } 49 }
131
132 ns_path.mnt = mntget(nd->path.mnt);
133 nd_jump_link(nd, &ns_path);
134 error = NULL;
135
136out_put_task:
137 put_task_struct(task); 50 put_task_struct(task);
138out:
139 return error; 51 return error;
140} 52}
141 53
142static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen) 54static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
143{ 55{
144 struct inode *inode = dentry->d_inode; 56 struct inode *inode = dentry->d_inode;
145 struct proc_inode *ei = PROC_I(inode); 57 const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops;
146 const struct proc_ns_operations *ns_ops = ei->ns.ns_ops;
147 struct task_struct *task; 58 struct task_struct *task;
148 struct ns_common *ns;
149 char name[50]; 59 char name[50];
150 int res = -EACCES; 60 int res = -EACCES;
151 61
152 task = get_proc_task(inode); 62 task = get_proc_task(inode);
153 if (!task) 63 if (!task)
154 goto out; 64 return res;
155
156 if (!ptrace_may_access(task, PTRACE_MODE_READ))
157 goto out_put_task;
158 65
159 res = -ENOENT; 66 if (ptrace_may_access(task, PTRACE_MODE_READ)) {
160 ns = ns_ops->get(task); 67 res = ns_get_name(name, sizeof(name), task, ns_ops);
161 if (!ns) 68 if (res >= 0)
162 goto out_put_task; 69 res = readlink_copy(buffer, buflen, name);
163 70 }
164 snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns->inum);
165 res = readlink_copy(buffer, buflen, name);
166 ns_ops->put(ns);
167out_put_task:
168 put_task_struct(task); 71 put_task_struct(task);
169out:
170 return res; 72 return res;
171} 73}
172 74
@@ -268,31 +170,3 @@ const struct inode_operations proc_ns_dir_inode_operations = {
268 .getattr = pid_getattr, 170 .getattr = pid_getattr,
269 .setattr = proc_setattr, 171 .setattr = proc_setattr,
270}; 172};
271
272struct file *proc_ns_fget(int fd)
273{
274 struct file *file;
275
276 file = fget(fd);
277 if (!file)
278 return ERR_PTR(-EBADF);
279
280 if (file->f_op != &ns_file_operations)
281 goto out_invalid;
282
283 return file;
284
285out_invalid:
286 fput(file);
287 return ERR_PTR(-EINVAL);
288}
289
290struct ns_common *get_proc_ns(struct inode *inode)
291{
292 return PROC_I(inode)->ns.ns;
293}
294
295bool proc_ns_inode(struct inode *inode)
296{
297 return inode->i_fop == &ns_file_operations;
298}