aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nsfs.c
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2016-09-22 21:00:36 -0400
committerEric W. Biederman <ebiederm@xmission.com>2016-09-22 21:00:36 -0400
commit78725596644be0181c46f55c52aadfb8c70bcdb7 (patch)
treebaaea28de07a45f932f7674cfcd6c83522940770 /fs/nsfs.c
parent93f0a88bd4ad99a515f500a09f4a489ff03073eb (diff)
parent6ad92bf63e45f97e306da48cd1cbce6e4fef1e5d (diff)
Merge branch 'nsfs-ioctls' into HEAD
From: Andrey Vagin <avagin@openvz.org> Each namespace has an owning user namespace and now there is not way to discover these relationships. Pid and user namepaces are hierarchical. There is no way to discover parent-child relationships too. Why we may want to know relationships between namespaces? One use would be visualization, in order to understand the running system. Another would be to answer the question: what capability does process X have to perform operations on a resource governed by namespace Y? One more use-case (which usually called abnormal) is checkpoint/restart. In CRIU we are going to dump and restore nested namespaces. There [1] was a discussion about which interface to choose to determing relationships between namespaces. Eric suggested to add two ioctl-s [2]: > Grumble, Grumble. I think this may actually a case for creating ioctls > for these two cases. Now that random nsfs file descriptors are bind > mountable the original reason for using proc files is not as pressing. > > One ioctl for the user namespace that owns a file descriptor. > One ioctl for the parent namespace of a namespace file descriptor. Here is an implementaions of these ioctl-s. $ man man7/namespaces.7 ... Since Linux 4.X, the following ioctl(2) calls are supported for namespace file descriptors. The correct syntax is: fd = ioctl(ns_fd, ioctl_type); where ioctl_type is one of the following: NS_GET_USERNS Returns a file descriptor that refers to an owning user names‐ pace. NS_GET_PARENT Returns a file descriptor that refers to a parent namespace. This ioctl(2) can be used for pid and user namespaces. For user namespaces, NS_GET_PARENT and NS_GET_USERNS have the same meaning. In addition to generic ioctl(2) errors, the following specific ones can occur: EINVAL NS_GET_PARENT was called for a nonhierarchical namespace. EPERM The requested namespace is outside of the current namespace scope. [1] https://lkml.org/lkml/2016/7/6/158 [2] https://lkml.org/lkml/2016/7/9/101 Changes for v2: * don't return ENOENT for init_user_ns and init_pid_ns. There is nothing outside of the init namespace, so we can return EPERM in this case too. > The fewer special cases the easier the code is to get > correct, and the easier it is to read. // Eric Changes for v3: * rename ns->get_owner() to ns->owner(). get_* usually means that it grabs a reference. Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: James Bottomley <James.Bottomley@HansenPartnership.com> Cc: "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com> Cc: "W. Trevor King" <wking@tremily.us> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Serge Hallyn <serge.hallyn@canonical.com>
Diffstat (limited to 'fs/nsfs.c')
-rw-r--r--fs/nsfs.c100
1 files changed, 87 insertions, 13 deletions
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 8f20d6016e20..fb7b397a1297 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -5,11 +5,16 @@
5#include <linux/magic.h> 5#include <linux/magic.h>
6#include <linux/ktime.h> 6#include <linux/ktime.h>
7#include <linux/seq_file.h> 7#include <linux/seq_file.h>
8#include <linux/user_namespace.h>
9#include <linux/nsfs.h>
8 10
9static struct vfsmount *nsfs_mnt; 11static struct vfsmount *nsfs_mnt;
10 12
13static long ns_ioctl(struct file *filp, unsigned int ioctl,
14 unsigned long arg);
11static const struct file_operations ns_file_operations = { 15static const struct file_operations ns_file_operations = {
12 .llseek = no_llseek, 16 .llseek = no_llseek,
17 .unlocked_ioctl = ns_ioctl,
13}; 18};
14 19
15static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) 20static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
@@ -44,22 +49,14 @@ static void nsfs_evict(struct inode *inode)
44 ns->ops->put(ns); 49 ns->ops->put(ns);
45} 50}
46 51
47void *ns_get_path(struct path *path, struct task_struct *task, 52static void *__ns_get_path(struct path *path, struct ns_common *ns)
48 const struct proc_ns_operations *ns_ops)
49{ 53{
50 struct vfsmount *mnt = mntget(nsfs_mnt); 54 struct vfsmount *mnt = mntget(nsfs_mnt);
51 struct qstr qname = { .name = "", }; 55 struct qstr qname = { .name = "", };
52 struct dentry *dentry; 56 struct dentry *dentry;
53 struct inode *inode; 57 struct inode *inode;
54 struct ns_common *ns;
55 unsigned long d; 58 unsigned long d;
56 59
57again:
58 ns = ns_ops->get(task);
59 if (!ns) {
60 mntput(mnt);
61 return ERR_PTR(-ENOENT);
62 }
63 rcu_read_lock(); 60 rcu_read_lock();
64 d = atomic_long_read(&ns->stashed); 61 d = atomic_long_read(&ns->stashed);
65 if (!d) 62 if (!d)
@@ -68,7 +65,7 @@ again:
68 if (!lockref_get_not_dead(&dentry->d_lockref)) 65 if (!lockref_get_not_dead(&dentry->d_lockref))
69 goto slow; 66 goto slow;
70 rcu_read_unlock(); 67 rcu_read_unlock();
71 ns_ops->put(ns); 68 ns->ops->put(ns);
72got_it: 69got_it:
73 path->mnt = mnt; 70 path->mnt = mnt;
74 path->dentry = dentry; 71 path->dentry = dentry;
@@ -77,7 +74,7 @@ slow:
77 rcu_read_unlock(); 74 rcu_read_unlock();
78 inode = new_inode_pseudo(mnt->mnt_sb); 75 inode = new_inode_pseudo(mnt->mnt_sb);
79 if (!inode) { 76 if (!inode) {
80 ns_ops->put(ns); 77 ns->ops->put(ns);
81 mntput(mnt); 78 mntput(mnt);
82 return ERR_PTR(-ENOMEM); 79 return ERR_PTR(-ENOMEM);
83 } 80 }
@@ -95,17 +92,94 @@ slow:
95 return ERR_PTR(-ENOMEM); 92 return ERR_PTR(-ENOMEM);
96 } 93 }
97 d_instantiate(dentry, inode); 94 d_instantiate(dentry, inode);
98 dentry->d_fsdata = (void *)ns_ops; 95 dentry->d_fsdata = (void *)ns->ops;
99 d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); 96 d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
100 if (d) { 97 if (d) {
101 d_delete(dentry); /* make sure ->d_prune() does nothing */ 98 d_delete(dentry); /* make sure ->d_prune() does nothing */
102 dput(dentry); 99 dput(dentry);
100 mntput(mnt);
103 cpu_relax(); 101 cpu_relax();
104 goto again; 102 return ERR_PTR(-EAGAIN);
105 } 103 }
106 goto got_it; 104 goto got_it;
107} 105}
108 106
107void *ns_get_path(struct path *path, struct task_struct *task,
108 const struct proc_ns_operations *ns_ops)
109{
110 struct ns_common *ns;
111 void *ret;
112
113again:
114 ns = ns_ops->get(task);
115 if (!ns)
116 return ERR_PTR(-ENOENT);
117
118 ret = __ns_get_path(path, ns);
119 if (IS_ERR(ret) && PTR_ERR(ret) == -EAGAIN)
120 goto again;
121 return ret;
122}
123
124static int open_related_ns(struct ns_common *ns,
125 struct ns_common *(*get_ns)(struct ns_common *ns))
126{
127 struct path path = {};
128 struct file *f;
129 void *err;
130 int fd;
131
132 fd = get_unused_fd_flags(O_CLOEXEC);
133 if (fd < 0)
134 return fd;
135
136 while (1) {
137 struct ns_common *relative;
138
139 relative = get_ns(ns);
140 if (IS_ERR(relative)) {
141 put_unused_fd(fd);
142 return PTR_ERR(relative);
143 }
144
145 err = __ns_get_path(&path, relative);
146 if (IS_ERR(err) && PTR_ERR(err) == -EAGAIN)
147 continue;
148 break;
149 }
150 if (IS_ERR(err)) {
151 put_unused_fd(fd);
152 return PTR_ERR(err);
153 }
154
155 f = dentry_open(&path, O_RDONLY, current_cred());
156 path_put(&path);
157 if (IS_ERR(f)) {
158 put_unused_fd(fd);
159 fd = PTR_ERR(f);
160 } else
161 fd_install(fd, f);
162
163 return fd;
164}
165
166static long ns_ioctl(struct file *filp, unsigned int ioctl,
167 unsigned long arg)
168{
169 struct ns_common *ns = get_proc_ns(file_inode(filp));
170
171 switch (ioctl) {
172 case NS_GET_USERNS:
173 return open_related_ns(ns, ns_get_owner);
174 case NS_GET_PARENT:
175 if (!ns->ops->get_parent)
176 return -EINVAL;
177 return open_related_ns(ns, ns->ops->get_parent);
178 default:
179 return -ENOTTY;
180 }
181}
182
109int ns_get_name(char *buf, size_t size, struct task_struct *task, 183int ns_get_name(char *buf, size_t size, struct task_struct *task,
110 const struct proc_ns_operations *ns_ops) 184 const struct proc_ns_operations *ns_ops)
111{ 185{