aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 18:44:47 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 18:44:47 -0500
commit6a2b60b17b3e48a418695a94bd2420f6ab32e519 (patch)
tree54b7792fa68b8890f710fa6398b6ba8626a039a8
parent9228ff90387e276ad67b10c0eb525c9d6a57d5e9 (diff)
parent98f842e675f96ffac96e6c50315790912b2812be (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull user namespace changes from Eric Biederman: "While small this set of changes is very significant with respect to containers in general and user namespaces in particular. The user space interface is now complete. This set of changes adds support for unprivileged users to create user namespaces and as a user namespace root to create other namespaces. The tyranny of supporting suid root preventing unprivileged users from using cool new kernel features is broken. This set of changes completes the work on setns, adding support for the pid, user, mount namespaces. This set of changes includes a bunch of basic pid namespace cleanups/simplifications. Of particular significance is the rework of the pid namespace cleanup so it no longer requires sending out tendrils into all kinds of unexpected cleanup paths for operation. At least one case of broken error handling is fixed by this cleanup. The files under /proc/<pid>/ns/ have been converted from regular files to magic symlinks which prevents incorrect caching by the VFS, ensuring the files always refer to the namespace the process is currently using and ensuring that the ptrace_mayaccess permission checks are always applied. The files under /proc/<pid>/ns/ have been given stable inode numbers so it is now possible to see if different processes share the same namespaces. Through the David Miller's net tree are changes to relax many of the permission checks in the networking stack to allowing the user namespace root to usefully use the networking stack. Similar changes for the mount namespace and the pid namespace are coming through my tree. Two small changes to add user namespace support were commited here adn in David Miller's -net tree so that I could complete the work on the /proc/<pid>/ns/ files in this tree. Work remains to make it safe to build user namespaces and 9p, afs, ceph, cifs, coda, gfs2, ncpfs, nfs, nfsd, ocfs2, and xfs so the Kconfig guard remains in place preventing that user namespaces from being built when any of those filesystems are enabled. Future design work remains to allow root users outside of the initial user namespace to mount more than just /proc and /sys." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (38 commits) proc: Usable inode numbers for the namespace file descriptors. proc: Fix the namespace inode permission checks. proc: Generalize proc inode allocation userns: Allow unprivilged mounts of proc and sysfs userns: For /proc/self/{uid,gid}_map derive the lower userns from the struct file procfs: Print task uids and gids in the userns that opened the proc file userns: Implement unshare of the user namespace userns: Implent proc namespace operations userns: Kill task_user_ns userns: Make create_new_namespaces take a user_ns parameter userns: Allow unprivileged use of setns. userns: Allow unprivileged users to create new namespaces userns: Allow setting a userns mapping to your current uid. userns: Allow chown and setgid preservation userns: Allow unprivileged users to create user namespaces. userns: Ignore suid and sgid on binaries if the uid or gid can not be mapped userns: fix return value on mntns_install() failure vfs: Allow unprivileged manipulation of the mount namespace. vfs: Only support slave subtrees across different user namespaces vfs: Add a user namespace reference from struct mnt_namespace ...
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c2
-rw-r--r--arch/um/drivers/mconsole_kern.c2
-rw-r--r--drivers/staging/android/binder.c3
-rw-r--r--fs/attr.c11
-rw-r--r--fs/autofs4/autofs_i.h8
-rw-r--r--fs/autofs4/dev-ioctl.c4
-rw-r--r--fs/autofs4/inode.c24
-rw-r--r--fs/autofs4/waitq.c5
-rw-r--r--fs/exec.c9
-rw-r--r--fs/fuse/dev.c4
-rw-r--r--fs/fuse/dir.c20
-rw-r--r--fs/fuse/fuse_i.h4
-rw-r--r--fs/fuse/inode.c23
-rw-r--r--fs/hppfs/hppfs.c2
-rw-r--r--fs/mount.h3
-rw-r--r--fs/namespace.c211
-rw-r--r--fs/open.c2
-rw-r--r--fs/pnode.h1
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c169
-rw-r--r--fs/proc/generic.c26
-rw-r--r--fs/proc/inode.c6
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/namespaces.c185
-rw-r--r--fs/proc/root.c17
-rw-r--r--fs/proc/self.c59
-rw-r--r--fs/sysfs/mount.c1
-rw-r--r--include/linux/cred.h2
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/ipc_namespace.h9
-rw-r--r--include/linux/mnt_namespace.h3
-rw-r--r--include/linux/nsproxy.h2
-rw-r--r--include/linux/pid_namespace.h11
-rw-r--r--include/linux/proc_fs.h26
-rw-r--r--include/linux/user_namespace.h10
-rw-r--r--include/linux/utsname.h7
-rw-r--r--include/net/net_namespace.h2
-rw-r--r--init/Kconfig2
-rw-r--r--init/main.c1
-rw-r--r--init/version.c2
-rw-r--r--ipc/msgutil.c2
-rw-r--r--ipc/namespace.c32
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/events/core.c2
-rw-r--r--kernel/exit.c12
-rw-r--r--kernel/fork.c69
-rw-r--r--kernel/nsproxy.c36
-rw-r--r--kernel/pid.c47
-rw-r--r--kernel/pid_namespace.c112
-rw-r--r--kernel/ptrace.c10
-rw-r--r--kernel/sched/core.c10
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/sysctl_binary.c2
-rw-r--r--kernel/user.c2
-rw-r--r--kernel/user_namespace.c147
-rw-r--r--kernel/utsname.c33
-rw-r--r--net/core/net_namespace.c31
-rw-r--r--security/yama/yama_lsm.c12
59 files changed, 996 insertions, 451 deletions
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 965d381abd7..25db92a8e1c 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -1094,7 +1094,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private)
1094 LOAD_INT(c), LOAD_FRAC(c), 1094 LOAD_INT(c), LOAD_FRAC(c),
1095 count_active_contexts(), 1095 count_active_contexts(),
1096 atomic_read(&nr_spu_contexts), 1096 atomic_read(&nr_spu_contexts),
1097 current->nsproxy->pid_ns->last_pid); 1097 task_active_pid_ns(current)->last_pid);
1098 return 0; 1098 return 0;
1099} 1099}
1100 1100
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 49e3b49e552..4bd82ac0210 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -123,7 +123,7 @@ void mconsole_log(struct mc_request *req)
123 123
124void mconsole_proc(struct mc_request *req) 124void mconsole_proc(struct mc_request *req)
125{ 125{
126 struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt; 126 struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt;
127 char *buf; 127 char *buf;
128 int len; 128 int len;
129 struct file *file; 129 struct file *file;
diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c
index 4a36e9ab8cf..2d12e8a1f82 100644
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@ -35,6 +35,7 @@
35#include <linux/uaccess.h> 35#include <linux/uaccess.h>
36#include <linux/vmalloc.h> 36#include <linux/vmalloc.h>
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <linux/pid_namespace.h>
38 39
39#include "binder.h" 40#include "binder.h"
40#include "binder_trace.h" 41#include "binder_trace.h"
@@ -2320,7 +2321,7 @@ retry:
2320 if (t->from) { 2321 if (t->from) {
2321 struct task_struct *sender = t->from->proc->tsk; 2322 struct task_struct *sender = t->from->proc->tsk;
2322 tr.sender_pid = task_tgid_nr_ns(sender, 2323 tr.sender_pid = task_tgid_nr_ns(sender,
2323 current->nsproxy->pid_ns); 2324 task_active_pid_ns(current));
2324 } else { 2325 } else {
2325 tr.sender_pid = 0; 2326 tr.sender_pid = 0;
2326 } 2327 }
diff --git a/fs/attr.c b/fs/attr.c
index cce7df53b69..1449adb14ef 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -49,14 +49,15 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
49 /* Make sure a caller can chown. */ 49 /* Make sure a caller can chown. */
50 if ((ia_valid & ATTR_UID) && 50 if ((ia_valid & ATTR_UID) &&
51 (!uid_eq(current_fsuid(), inode->i_uid) || 51 (!uid_eq(current_fsuid(), inode->i_uid) ||
52 !uid_eq(attr->ia_uid, inode->i_uid)) && !capable(CAP_CHOWN)) 52 !uid_eq(attr->ia_uid, inode->i_uid)) &&
53 !inode_capable(inode, CAP_CHOWN))
53 return -EPERM; 54 return -EPERM;
54 55
55 /* Make sure caller can chgrp. */ 56 /* Make sure caller can chgrp. */
56 if ((ia_valid & ATTR_GID) && 57 if ((ia_valid & ATTR_GID) &&
57 (!uid_eq(current_fsuid(), inode->i_uid) || 58 (!uid_eq(current_fsuid(), inode->i_uid) ||
58 (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) && 59 (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
59 !capable(CAP_CHOWN)) 60 !inode_capable(inode, CAP_CHOWN))
60 return -EPERM; 61 return -EPERM;
61 62
62 /* Make sure a caller can chmod. */ 63 /* Make sure a caller can chmod. */
@@ -65,7 +66,8 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
65 return -EPERM; 66 return -EPERM;
66 /* Also check the setgid bit! */ 67 /* Also check the setgid bit! */
67 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : 68 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
68 inode->i_gid) && !capable(CAP_FSETID)) 69 inode->i_gid) &&
70 !inode_capable(inode, CAP_FSETID))
69 attr->ia_mode &= ~S_ISGID; 71 attr->ia_mode &= ~S_ISGID;
70 } 72 }
71 73
@@ -157,7 +159,8 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
157 if (ia_valid & ATTR_MODE) { 159 if (ia_valid & ATTR_MODE) {
158 umode_t mode = attr->ia_mode; 160 umode_t mode = attr->ia_mode;
159 161
160 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) 162 if (!in_group_p(inode->i_gid) &&
163 !inode_capable(inode, CAP_FSETID))
161 mode &= ~S_ISGID; 164 mode &= ~S_ISGID;
162 inode->i_mode = mode; 165 inode->i_mode = mode;
163 } 166 }
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 908e1845541..b785e770795 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -74,8 +74,8 @@ struct autofs_info {
74 unsigned long last_used; 74 unsigned long last_used;
75 atomic_t count; 75 atomic_t count;
76 76
77 uid_t uid; 77 kuid_t uid;
78 gid_t gid; 78 kgid_t gid;
79}; 79};
80 80
81#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ 81#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
@@ -89,8 +89,8 @@ struct autofs_wait_queue {
89 struct qstr name; 89 struct qstr name;
90 u32 dev; 90 u32 dev;
91 u64 ino; 91 u64 ino;
92 uid_t uid; 92 kuid_t uid;
93 gid_t gid; 93 kgid_t gid;
94 pid_t pid; 94 pid_t pid;
95 pid_t tgid; 95 pid_t tgid;
96 /* This is for status reporting upon return */ 96 /* This is for status reporting upon return */
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index a16214109d3..9f68a37bb2b 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -437,8 +437,8 @@ static int autofs_dev_ioctl_requester(struct file *fp,
437 err = 0; 437 err = 0;
438 autofs4_expire_wait(path.dentry); 438 autofs4_expire_wait(path.dentry);
439 spin_lock(&sbi->fs_lock); 439 spin_lock(&sbi->fs_lock);
440 param->requester.uid = ino->uid; 440 param->requester.uid = from_kuid_munged(current_user_ns(), ino->uid);
441 param->requester.gid = ino->gid; 441 param->requester.gid = from_kgid_munged(current_user_ns(), ino->gid);
442 spin_unlock(&sbi->fs_lock); 442 spin_unlock(&sbi->fs_lock);
443 } 443 }
444 path_put(&path); 444 path_put(&path);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 8a4fed8ead3..b104726e2d0 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -36,8 +36,8 @@ struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi)
36 36
37void autofs4_clean_ino(struct autofs_info *ino) 37void autofs4_clean_ino(struct autofs_info *ino)
38{ 38{
39 ino->uid = 0; 39 ino->uid = GLOBAL_ROOT_UID;
40 ino->gid = 0; 40 ino->gid = GLOBAL_ROOT_GID;
41 ino->last_used = jiffies; 41 ino->last_used = jiffies;
42} 42}
43 43
@@ -79,10 +79,12 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root)
79 return 0; 79 return 0;
80 80
81 seq_printf(m, ",fd=%d", sbi->pipefd); 81 seq_printf(m, ",fd=%d", sbi->pipefd);
82 if (root_inode->i_uid != 0) 82 if (!uid_eq(root_inode->i_uid, GLOBAL_ROOT_UID))
83 seq_printf(m, ",uid=%u", root_inode->i_uid); 83 seq_printf(m, ",uid=%u",
84 if (root_inode->i_gid != 0) 84 from_kuid_munged(&init_user_ns, root_inode->i_uid));
85 seq_printf(m, ",gid=%u", root_inode->i_gid); 85 if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID))
86 seq_printf(m, ",gid=%u",
87 from_kgid_munged(&init_user_ns, root_inode->i_gid));
86 seq_printf(m, ",pgrp=%d", sbi->oz_pgrp); 88 seq_printf(m, ",pgrp=%d", sbi->oz_pgrp);
87 seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ); 89 seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ);
88 seq_printf(m, ",minproto=%d", sbi->min_proto); 90 seq_printf(m, ",minproto=%d", sbi->min_proto);
@@ -126,7 +128,7 @@ static const match_table_t tokens = {
126 {Opt_err, NULL} 128 {Opt_err, NULL}
127}; 129};
128 130
129static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, 131static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
130 pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto) 132 pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto)
131{ 133{
132 char *p; 134 char *p;
@@ -159,12 +161,16 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
159 case Opt_uid: 161 case Opt_uid:
160 if (match_int(args, &option)) 162 if (match_int(args, &option))
161 return 1; 163 return 1;
162 *uid = option; 164 *uid = make_kuid(current_user_ns(), option);
165 if (!uid_valid(*uid))
166 return 1;
163 break; 167 break;
164 case Opt_gid: 168 case Opt_gid:
165 if (match_int(args, &option)) 169 if (match_int(args, &option))
166 return 1; 170 return 1;
167 *gid = option; 171 *gid = make_kgid(current_user_ns(), option);
172 if (!gid_valid(*gid))
173 return 1;
168 break; 174 break;
169 case Opt_pgrp: 175 case Opt_pgrp:
170 if (match_int(args, &option)) 176 if (match_int(args, &option))
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index dce436e595c..03bc1d347d8 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -154,6 +154,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
154 case autofs_ptype_expire_direct: 154 case autofs_ptype_expire_direct:
155 { 155 {
156 struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet; 156 struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet;
157 struct user_namespace *user_ns = sbi->pipe->f_cred->user_ns;
157 158
158 pktsz = sizeof(*packet); 159 pktsz = sizeof(*packet);
159 160
@@ -163,8 +164,8 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
163 packet->name[wq->name.len] = '\0'; 164 packet->name[wq->name.len] = '\0';
164 packet->dev = wq->dev; 165 packet->dev = wq->dev;
165 packet->ino = wq->ino; 166 packet->ino = wq->ino;
166 packet->uid = wq->uid; 167 packet->uid = from_kuid_munged(user_ns, wq->uid);
167 packet->gid = wq->gid; 168 packet->gid = from_kgid_munged(user_ns, wq->gid);
168 packet->pid = wq->pid; 169 packet->pid = wq->pid;
169 packet->tgid = wq->tgid; 170 packet->tgid = wq->tgid;
170 break; 171 break;
diff --git a/fs/exec.c b/fs/exec.c
index 721a2992951..b71b08ce712 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1266,14 +1266,13 @@ int prepare_binprm(struct linux_binprm *bprm)
1266 bprm->cred->egid = current_egid(); 1266 bprm->cred->egid = current_egid();
1267 1267
1268 if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) && 1268 if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
1269 !current->no_new_privs) { 1269 !current->no_new_privs &&
1270 kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
1271 kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
1270 /* Set-uid? */ 1272 /* Set-uid? */
1271 if (mode & S_ISUID) { 1273 if (mode & S_ISUID) {
1272 if (!kuid_has_mapping(bprm->cred->user_ns, inode->i_uid))
1273 return -EPERM;
1274 bprm->per_clear |= PER_CLEAR_ON_SETID; 1274 bprm->per_clear |= PER_CLEAR_ON_SETID;
1275 bprm->cred->euid = inode->i_uid; 1275 bprm->cred->euid = inode->i_uid;
1276
1277 } 1276 }
1278 1277
1279 /* Set-gid? */ 1278 /* Set-gid? */
@@ -1283,8 +1282,6 @@ int prepare_binprm(struct linux_binprm *bprm)
1283 * executable. 1282 * executable.
1284 */ 1283 */
1285 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { 1284 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1286 if (!kgid_has_mapping(bprm->cred->user_ns, inode->i_gid))
1287 return -EPERM;
1288 bprm->per_clear |= PER_CLEAR_ON_SETID; 1285 bprm->per_clear |= PER_CLEAR_ON_SETID;
1289 bprm->cred->egid = inode->i_gid; 1286 bprm->cred->egid = inode->i_gid;
1290 } 1287 }
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8c23fa7a91e..c16335315e5 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -92,8 +92,8 @@ static void __fuse_put_request(struct fuse_req *req)
92 92
93static void fuse_req_init_context(struct fuse_req *req) 93static void fuse_req_init_context(struct fuse_req *req)
94{ 94{
95 req->in.h.uid = current_fsuid(); 95 req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
96 req->in.h.gid = current_fsgid(); 96 req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
97 req->in.h.pid = current->pid; 97 req->in.h.pid = current->pid;
98} 98}
99 99
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 324bc085053..b7c09f9eb40 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -818,8 +818,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
818 stat->ino = attr->ino; 818 stat->ino = attr->ino;
819 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 819 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
820 stat->nlink = attr->nlink; 820 stat->nlink = attr->nlink;
821 stat->uid = attr->uid; 821 stat->uid = make_kuid(&init_user_ns, attr->uid);
822 stat->gid = attr->gid; 822 stat->gid = make_kgid(&init_user_ns, attr->gid);
823 stat->rdev = inode->i_rdev; 823 stat->rdev = inode->i_rdev;
824 stat->atime.tv_sec = attr->atime; 824 stat->atime.tv_sec = attr->atime;
825 stat->atime.tv_nsec = attr->atimensec; 825 stat->atime.tv_nsec = attr->atimensec;
@@ -1007,12 +1007,12 @@ int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
1007 rcu_read_lock(); 1007 rcu_read_lock();
1008 ret = 0; 1008 ret = 0;
1009 cred = __task_cred(task); 1009 cred = __task_cred(task);
1010 if (cred->euid == fc->user_id && 1010 if (uid_eq(cred->euid, fc->user_id) &&
1011 cred->suid == fc->user_id && 1011 uid_eq(cred->suid, fc->user_id) &&
1012 cred->uid == fc->user_id && 1012 uid_eq(cred->uid, fc->user_id) &&
1013 cred->egid == fc->group_id && 1013 gid_eq(cred->egid, fc->group_id) &&
1014 cred->sgid == fc->group_id && 1014 gid_eq(cred->sgid, fc->group_id) &&
1015 cred->gid == fc->group_id) 1015 gid_eq(cred->gid, fc->group_id))
1016 ret = 1; 1016 ret = 1;
1017 rcu_read_unlock(); 1017 rcu_read_unlock();
1018 1018
@@ -1306,9 +1306,9 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
1306 if (ivalid & ATTR_MODE) 1306 if (ivalid & ATTR_MODE)
1307 arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; 1307 arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
1308 if (ivalid & ATTR_UID) 1308 if (ivalid & ATTR_UID)
1309 arg->valid |= FATTR_UID, arg->uid = iattr->ia_uid; 1309 arg->valid |= FATTR_UID, arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
1310 if (ivalid & ATTR_GID) 1310 if (ivalid & ATTR_GID)
1311 arg->valid |= FATTR_GID, arg->gid = iattr->ia_gid; 1311 arg->valid |= FATTR_GID, arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
1312 if (ivalid & ATTR_SIZE) 1312 if (ivalid & ATTR_SIZE)
1313 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; 1313 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
1314 if (ivalid & ATTR_ATIME) { 1314 if (ivalid & ATTR_ATIME) {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e24dd74e306..e105a53fc72 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -333,10 +333,10 @@ struct fuse_conn {
333 atomic_t count; 333 atomic_t count;
334 334
335 /** The user id for this mount */ 335 /** The user id for this mount */
336 uid_t user_id; 336 kuid_t user_id;
337 337
338 /** The group id for this mount */ 338 /** The group id for this mount */
339 gid_t group_id; 339 kgid_t group_id;
340 340
341 /** The fuse mount flags for this mount */ 341 /** The fuse mount flags for this mount */
342 unsigned flags; 342 unsigned flags;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f0eda124cff..73ca6b72bea 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -60,8 +60,8 @@ MODULE_PARM_DESC(max_user_congthresh,
60struct fuse_mount_data { 60struct fuse_mount_data {
61 int fd; 61 int fd;
62 unsigned rootmode; 62 unsigned rootmode;
63 unsigned user_id; 63 kuid_t user_id;
64 unsigned group_id; 64 kgid_t group_id;
65 unsigned fd_present:1; 65 unsigned fd_present:1;
66 unsigned rootmode_present:1; 66 unsigned rootmode_present:1;
67 unsigned user_id_present:1; 67 unsigned user_id_present:1;
@@ -164,8 +164,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
164 inode->i_ino = fuse_squash_ino(attr->ino); 164 inode->i_ino = fuse_squash_ino(attr->ino);
165 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 165 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
166 set_nlink(inode, attr->nlink); 166 set_nlink(inode, attr->nlink);
167 inode->i_uid = attr->uid; 167 inode->i_uid = make_kuid(&init_user_ns, attr->uid);
168 inode->i_gid = attr->gid; 168 inode->i_gid = make_kgid(&init_user_ns, attr->gid);
169 inode->i_blocks = attr->blocks; 169 inode->i_blocks = attr->blocks;
170 inode->i_atime.tv_sec = attr->atime; 170 inode->i_atime.tv_sec = attr->atime;
171 inode->i_atime.tv_nsec = attr->atimensec; 171 inode->i_atime.tv_nsec = attr->atimensec;
@@ -492,14 +492,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
492 case OPT_USER_ID: 492 case OPT_USER_ID:
493 if (match_int(&args[0], &value)) 493 if (match_int(&args[0], &value))
494 return 0; 494 return 0;
495 d->user_id = value; 495 d->user_id = make_kuid(current_user_ns(), value);
496 if (!uid_valid(d->user_id))
497 return 0;
496 d->user_id_present = 1; 498 d->user_id_present = 1;
497 break; 499 break;
498 500
499 case OPT_GROUP_ID: 501 case OPT_GROUP_ID:
500 if (match_int(&args[0], &value)) 502 if (match_int(&args[0], &value))
501 return 0; 503 return 0;
502 d->group_id = value; 504 d->group_id = make_kgid(current_user_ns(), value);
505 if (!gid_valid(d->group_id))
506 return 0;
503 d->group_id_present = 1; 507 d->group_id_present = 1;
504 break; 508 break;
505 509
@@ -540,8 +544,8 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
540 struct super_block *sb = root->d_sb; 544 struct super_block *sb = root->d_sb;
541 struct fuse_conn *fc = get_fuse_conn_super(sb); 545 struct fuse_conn *fc = get_fuse_conn_super(sb);
542 546
543 seq_printf(m, ",user_id=%u", fc->user_id); 547 seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id));
544 seq_printf(m, ",group_id=%u", fc->group_id); 548 seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id));
545 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) 549 if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
546 seq_puts(m, ",default_permissions"); 550 seq_puts(m, ",default_permissions");
547 if (fc->flags & FUSE_ALLOW_OTHER) 551 if (fc->flags & FUSE_ALLOW_OTHER)
@@ -989,7 +993,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
989 if (!file) 993 if (!file)
990 goto err; 994 goto err;
991 995
992 if (file->f_op != &fuse_dev_operations) 996 if ((file->f_op != &fuse_dev_operations) ||
997 (file->f_cred->user_ns != &init_user_ns))
993 goto err_fput; 998 goto err_fput;
994 999
995 fc = kmalloc(sizeof(*fc), GFP_KERNEL); 1000 fc = kmalloc(sizeof(*fc), GFP_KERNEL);
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 78f21f8dc2e..43b315f2002 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -710,7 +710,7 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
710 struct vfsmount *proc_mnt; 710 struct vfsmount *proc_mnt;
711 int err = -ENOENT; 711 int err = -ENOENT;
712 712
713 proc_mnt = mntget(current->nsproxy->pid_ns->proc_mnt); 713 proc_mnt = mntget(task_active_pid_ns(current)->proc_mnt);
714 if (IS_ERR(proc_mnt)) 714 if (IS_ERR(proc_mnt))
715 goto out; 715 goto out;
716 716
diff --git a/fs/mount.h b/fs/mount.h
index 4f291f9de64..cd500798040 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -4,8 +4,11 @@
4 4
5struct mnt_namespace { 5struct mnt_namespace {
6 atomic_t count; 6 atomic_t count;
7 unsigned int proc_inum;
7 struct mount * root; 8 struct mount * root;
8 struct list_head list; 9 struct list_head list;
10 struct user_namespace *user_ns;
11 u64 seq; /* Sequence number to prevent loops */
9 wait_queue_head_t poll; 12 wait_queue_head_t poll;
10 int event; 13 int event;
11}; 14};
diff --git a/fs/namespace.c b/fs/namespace.c
index 24960626bb6..c1bbe86f492 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -12,6 +12,7 @@
12#include <linux/export.h> 12#include <linux/export.h>
13#include <linux/capability.h> 13#include <linux/capability.h>
14#include <linux/mnt_namespace.h> 14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
15#include <linux/namei.h> 16#include <linux/namei.h>
16#include <linux/security.h> 17#include <linux/security.h>
17#include <linux/idr.h> 18#include <linux/idr.h>
@@ -20,6 +21,7 @@
20#include <linux/fs_struct.h> /* get_fs_root et.al. */ 21#include <linux/fs_struct.h> /* get_fs_root et.al. */
21#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ 22#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
22#include <linux/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/proc_fs.h>
23#include "pnode.h" 25#include "pnode.h"
24#include "internal.h" 26#include "internal.h"
25 27
@@ -784,7 +786,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
784 if (!mnt) 786 if (!mnt)
785 return ERR_PTR(-ENOMEM); 787 return ERR_PTR(-ENOMEM);
786 788
787 if (flag & (CL_SLAVE | CL_PRIVATE)) 789 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
788 mnt->mnt_group_id = 0; /* not a peer of original */ 790 mnt->mnt_group_id = 0; /* not a peer of original */
789 else 791 else
790 mnt->mnt_group_id = old->mnt_group_id; 792 mnt->mnt_group_id = old->mnt_group_id;
@@ -805,7 +807,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
805 list_add_tail(&mnt->mnt_instance, &sb->s_mounts); 807 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
806 br_write_unlock(&vfsmount_lock); 808 br_write_unlock(&vfsmount_lock);
807 809
808 if (flag & CL_SLAVE) { 810 if ((flag & CL_SLAVE) ||
811 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
809 list_add(&mnt->mnt_slave, &old->mnt_slave_list); 812 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
810 mnt->mnt_master = old; 813 mnt->mnt_master = old;
811 CLEAR_MNT_SHARED(mnt); 814 CLEAR_MNT_SHARED(mnt);
@@ -1266,7 +1269,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1266 goto dput_and_out; 1269 goto dput_and_out;
1267 1270
1268 retval = -EPERM; 1271 retval = -EPERM;
1269 if (!capable(CAP_SYS_ADMIN)) 1272 if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
1270 goto dput_and_out; 1273 goto dput_and_out;
1271 1274
1272 retval = do_umount(mnt, flags); 1275 retval = do_umount(mnt, flags);
@@ -1292,7 +1295,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
1292 1295
1293static int mount_is_safe(struct path *path) 1296static int mount_is_safe(struct path *path)
1294{ 1297{
1295 if (capable(CAP_SYS_ADMIN)) 1298 if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
1296 return 0; 1299 return 0;
1297 return -EPERM; 1300 return -EPERM;
1298#ifdef notyet 1301#ifdef notyet
@@ -1308,6 +1311,26 @@ static int mount_is_safe(struct path *path)
1308#endif 1311#endif
1309} 1312}
1310 1313
1314static bool mnt_ns_loop(struct path *path)
1315{
1316 /* Could bind mounting the mount namespace inode cause a
1317 * mount namespace loop?
1318 */
1319 struct inode *inode = path->dentry->d_inode;
1320 struct proc_inode *ei;
1321 struct mnt_namespace *mnt_ns;
1322
1323 if (!proc_ns_inode(inode))
1324 return false;
1325
1326 ei = PROC_I(inode);
1327 if (ei->ns_ops != &mntns_operations)
1328 return false;
1329
1330 mnt_ns = ei->ns;
1331 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1332}
1333
1311struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, 1334struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1312 int flag) 1335 int flag)
1313{ 1336{
@@ -1610,7 +1633,7 @@ static int do_change_type(struct path *path, int flag)
1610 int type; 1633 int type;
1611 int err = 0; 1634 int err = 0;
1612 1635
1613 if (!capable(CAP_SYS_ADMIN)) 1636 if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
1614 return -EPERM; 1637 return -EPERM;
1615 1638
1616 if (path->dentry != path->mnt->mnt_root) 1639 if (path->dentry != path->mnt->mnt_root)
@@ -1655,6 +1678,10 @@ static int do_loopback(struct path *path, const char *old_name,
1655 if (err) 1678 if (err)
1656 return err; 1679 return err;
1657 1680
1681 err = -EINVAL;
1682 if (mnt_ns_loop(&old_path))
1683 goto out;
1684
1658 err = lock_mount(path); 1685 err = lock_mount(path);
1659 if (err) 1686 if (err)
1660 goto out; 1687 goto out;
@@ -1770,7 +1797,7 @@ static int do_move_mount(struct path *path, const char *old_name)
1770 struct mount *p; 1797 struct mount *p;
1771 struct mount *old; 1798 struct mount *old;
1772 int err = 0; 1799 int err = 0;
1773 if (!capable(CAP_SYS_ADMIN)) 1800 if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
1774 return -EPERM; 1801 return -EPERM;
1775 if (!old_name || !*old_name) 1802 if (!old_name || !*old_name)
1776 return -EINVAL; 1803 return -EINVAL;
@@ -1857,21 +1884,6 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1857 return ERR_PTR(err); 1884 return ERR_PTR(err);
1858} 1885}
1859 1886
1860static struct vfsmount *
1861do_kern_mount(const char *fstype, int flags, const char *name, void *data)
1862{
1863 struct file_system_type *type = get_fs_type(fstype);
1864 struct vfsmount *mnt;
1865 if (!type)
1866 return ERR_PTR(-ENODEV);
1867 mnt = vfs_kern_mount(type, flags, name, data);
1868 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
1869 !mnt->mnt_sb->s_subtype)
1870 mnt = fs_set_subtype(mnt, fstype);
1871 put_filesystem(type);
1872 return mnt;
1873}
1874
1875/* 1887/*
1876 * add a mount into a namespace's mount tree 1888 * add a mount into a namespace's mount tree
1877 */ 1889 */
@@ -1917,20 +1929,46 @@ unlock:
1917 * create a new mount for userspace and request it to be added into the 1929 * create a new mount for userspace and request it to be added into the
1918 * namespace's tree 1930 * namespace's tree
1919 */ 1931 */
1920static int do_new_mount(struct path *path, const char *type, int flags, 1932static int do_new_mount(struct path *path, const char *fstype, int flags,
1921 int mnt_flags, const char *name, void *data) 1933 int mnt_flags, const char *name, void *data)
1922{ 1934{
1935 struct file_system_type *type;
1936 struct user_namespace *user_ns;
1923 struct vfsmount *mnt; 1937 struct vfsmount *mnt;
1924 int err; 1938 int err;
1925 1939
1926 if (!type) 1940 if (!fstype)
1927 return -EINVAL; 1941 return -EINVAL;
1928 1942
1929 /* we need capabilities... */ 1943 /* we need capabilities... */
1930 if (!capable(CAP_SYS_ADMIN)) 1944 user_ns = real_mount(path->mnt)->mnt_ns->user_ns;
1945 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1931 return -EPERM; 1946 return -EPERM;
1932 1947
1933 mnt = do_kern_mount(type, flags, name, data); 1948 type = get_fs_type(fstype);
1949 if (!type)
1950 return -ENODEV;
1951
1952 if (user_ns != &init_user_ns) {
1953 if (!(type->fs_flags & FS_USERNS_MOUNT)) {
1954 put_filesystem(type);
1955 return -EPERM;
1956 }
1957 /* Only in special cases allow devices from mounts
1958 * created outside the initial user namespace.
1959 */
1960 if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
1961 flags |= MS_NODEV;
1962 mnt_flags |= MNT_NODEV;
1963 }
1964 }
1965
1966 mnt = vfs_kern_mount(type, flags, name, data);
1967 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
1968 !mnt->mnt_sb->s_subtype)
1969 mnt = fs_set_subtype(mnt, fstype);
1970
1971 put_filesystem(type);
1934 if (IS_ERR(mnt)) 1972 if (IS_ERR(mnt))
1935 return PTR_ERR(mnt); 1973 return PTR_ERR(mnt);
1936 1974
@@ -2261,18 +2299,42 @@ dput_out:
2261 return retval; 2299 return retval;
2262} 2300}
2263 2301
2264static struct mnt_namespace *alloc_mnt_ns(void) 2302static void free_mnt_ns(struct mnt_namespace *ns)
2303{
2304 proc_free_inum(ns->proc_inum);
2305 put_user_ns(ns->user_ns);
2306 kfree(ns);
2307}
2308
2309/*
2310 * Assign a sequence number so we can detect when we attempt to bind
2311 * mount a reference to an older mount namespace into the current
2312 * mount namespace, preventing reference counting loops. A 64bit
2313 * number incrementing at 10Ghz will take 12,427 years to wrap which
2314 * is effectively never, so we can ignore the possibility.
2315 */
2316static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
2317
2318static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2265{ 2319{
2266 struct mnt_namespace *new_ns; 2320 struct mnt_namespace *new_ns;
2321 int ret;
2267 2322
2268 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); 2323 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2269 if (!new_ns) 2324 if (!new_ns)
2270 return ERR_PTR(-ENOMEM); 2325 return ERR_PTR(-ENOMEM);
2326 ret = proc_alloc_inum(&new_ns->proc_inum);
2327 if (ret) {
2328 kfree(new_ns);
2329 return ERR_PTR(ret);
2330 }
2331 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
2271 atomic_set(&new_ns->count, 1); 2332 atomic_set(&new_ns->count, 1);
2272 new_ns->root = NULL; 2333 new_ns->root = NULL;
2273 INIT_LIST_HEAD(&new_ns->list); 2334 INIT_LIST_HEAD(&new_ns->list);
2274 init_waitqueue_head(&new_ns->poll); 2335 init_waitqueue_head(&new_ns->poll);
2275 new_ns->event = 0; 2336 new_ns->event = 0;
2337 new_ns->user_ns = get_user_ns(user_ns);
2276 return new_ns; 2338 return new_ns;
2277} 2339}
2278 2340
@@ -2281,24 +2343,28 @@ static struct mnt_namespace *alloc_mnt_ns(void)
2281 * copied from the namespace of the passed in task structure. 2343 * copied from the namespace of the passed in task structure.
2282 */ 2344 */
2283static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, 2345static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2284 struct fs_struct *fs) 2346 struct user_namespace *user_ns, struct fs_struct *fs)
2285{ 2347{
2286 struct mnt_namespace *new_ns; 2348 struct mnt_namespace *new_ns;
2287 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; 2349 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
2288 struct mount *p, *q; 2350 struct mount *p, *q;
2289 struct mount *old = mnt_ns->root; 2351 struct mount *old = mnt_ns->root;
2290 struct mount *new; 2352 struct mount *new;
2353 int copy_flags;
2291 2354
2292 new_ns = alloc_mnt_ns(); 2355 new_ns = alloc_mnt_ns(user_ns);
2293 if (IS_ERR(new_ns)) 2356 if (IS_ERR(new_ns))
2294 return new_ns; 2357 return new_ns;
2295 2358
2296 down_write(&namespace_sem); 2359 down_write(&namespace_sem);
2297 /* First pass: copy the tree topology */ 2360 /* First pass: copy the tree topology */
2298 new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE); 2361 copy_flags = CL_COPY_ALL | CL_EXPIRE;
2362 if (user_ns != mnt_ns->user_ns)
2363 copy_flags |= CL_SHARED_TO_SLAVE;
2364 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2299 if (IS_ERR(new)) { 2365 if (IS_ERR(new)) {
2300 up_write(&namespace_sem); 2366 up_write(&namespace_sem);
2301 kfree(new_ns); 2367 free_mnt_ns(new_ns);
2302 return ERR_CAST(new); 2368 return ERR_CAST(new);
2303 } 2369 }
2304 new_ns->root = new; 2370 new_ns->root = new;
@@ -2339,7 +2405,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2339} 2405}
2340 2406
2341struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, 2407struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2342 struct fs_struct *new_fs) 2408 struct user_namespace *user_ns, struct fs_struct *new_fs)
2343{ 2409{
2344 struct mnt_namespace *new_ns; 2410 struct mnt_namespace *new_ns;
2345 2411
@@ -2349,7 +2415,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2349 if (!(flags & CLONE_NEWNS)) 2415 if (!(flags & CLONE_NEWNS))
2350 return ns; 2416 return ns;
2351 2417
2352 new_ns = dup_mnt_ns(ns, new_fs); 2418 new_ns = dup_mnt_ns(ns, user_ns, new_fs);
2353 2419
2354 put_mnt_ns(ns); 2420 put_mnt_ns(ns);
2355 return new_ns; 2421 return new_ns;
@@ -2361,7 +2427,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2361 */ 2427 */
2362static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) 2428static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2363{ 2429{
2364 struct mnt_namespace *new_ns = alloc_mnt_ns(); 2430 struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
2365 if (!IS_ERR(new_ns)) { 2431 if (!IS_ERR(new_ns)) {
2366 struct mount *mnt = real_mount(m); 2432 struct mount *mnt = real_mount(m);
2367 mnt->mnt_ns = new_ns; 2433 mnt->mnt_ns = new_ns;
@@ -2501,7 +2567,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2501 struct mount *new_mnt, *root_mnt; 2567 struct mount *new_mnt, *root_mnt;
2502 int error; 2568 int error;
2503 2569
2504 if (!capable(CAP_SYS_ADMIN)) 2570 if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
2505 return -EPERM; 2571 return -EPERM;
2506 2572
2507 error = user_path_dir(new_root, &new); 2573 error = user_path_dir(new_root, &new);
@@ -2583,8 +2649,13 @@ static void __init init_mount_tree(void)
2583 struct vfsmount *mnt; 2649 struct vfsmount *mnt;
2584 struct mnt_namespace *ns; 2650 struct mnt_namespace *ns;
2585 struct path root; 2651 struct path root;
2652 struct file_system_type *type;
2586 2653
2587 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 2654 type = get_fs_type("rootfs");
2655 if (!type)
2656 panic("Can't find rootfs type");
2657 mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
2658 put_filesystem(type);
2588 if (IS_ERR(mnt)) 2659 if (IS_ERR(mnt))
2589 panic("Can't create rootfs"); 2660 panic("Can't create rootfs");
2590 2661
@@ -2647,7 +2718,7 @@ void put_mnt_ns(struct mnt_namespace *ns)
2647 br_write_unlock(&vfsmount_lock); 2718 br_write_unlock(&vfsmount_lock);
2648 up_write(&namespace_sem); 2719 up_write(&namespace_sem);
2649 release_mounts(&umount_list); 2720 release_mounts(&umount_list);
2650 kfree(ns); 2721 free_mnt_ns(ns);
2651} 2722}
2652 2723
2653struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) 2724struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
@@ -2681,3 +2752,71 @@ bool our_mnt(struct vfsmount *mnt)
2681{ 2752{
2682 return check_mnt(real_mount(mnt)); 2753 return check_mnt(real_mount(mnt));
2683} 2754}
2755
2756static void *mntns_get(struct task_struct *task)
2757{
2758 struct mnt_namespace *ns = NULL;
2759 struct nsproxy *nsproxy;
2760
2761 rcu_read_lock();
2762 nsproxy = task_nsproxy(task);
2763 if (nsproxy) {
2764 ns = nsproxy->mnt_ns;
2765 get_mnt_ns(ns);
2766 }
2767 rcu_read_unlock();
2768
2769 return ns;
2770}
2771
2772static void mntns_put(void *ns)
2773{
2774 put_mnt_ns(ns);
2775}
2776
2777static int mntns_install(struct nsproxy *nsproxy, void *ns)
2778{
2779 struct fs_struct *fs = current->fs;
2780 struct mnt_namespace *mnt_ns = ns;
2781 struct path root;
2782
2783 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
2784 !nsown_capable(CAP_SYS_CHROOT))
2785 return -EPERM;
2786
2787 if (fs->users != 1)
2788 return -EINVAL;
2789
2790 get_mnt_ns(mnt_ns);
2791 put_mnt_ns(nsproxy->mnt_ns);
2792 nsproxy->mnt_ns = mnt_ns;
2793
2794 /* Find the root */
2795 root.mnt = &mnt_ns->root->mnt;
2796 root.dentry = mnt_ns->root->mnt.mnt_root;
2797 path_get(&root);
2798 while(d_mountpoint(root.dentry) && follow_down_one(&root))
2799 ;
2800
2801 /* Update the pwd and root */
2802 set_fs_pwd(fs, &root);
2803 set_fs_root(fs, &root);
2804
2805 path_put(&root);
2806 return 0;
2807}
2808
2809static unsigned int mntns_inum(void *ns)
2810{
2811 struct mnt_namespace *mnt_ns = ns;
2812 return mnt_ns->proc_inum;
2813}
2814
2815const struct proc_ns_operations mntns_operations = {
2816 .name = "mnt",
2817 .type = CLONE_NEWNS,
2818 .get = mntns_get,
2819 .put = mntns_put,
2820 .install = mntns_install,
2821 .inum = mntns_inum,
2822};
diff --git a/fs/open.c b/fs/open.c
index 59071f55bf7..182d8667b7b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -435,7 +435,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename)
435 goto dput_and_out; 435 goto dput_and_out;
436 436
437 error = -EPERM; 437 error = -EPERM;
438 if (!capable(CAP_SYS_CHROOT)) 438 if (!nsown_capable(CAP_SYS_CHROOT))
439 goto dput_and_out; 439 goto dput_and_out;
440 error = security_path_chroot(&path); 440 error = security_path_chroot(&path);
441 if (error) 441 if (error)
diff --git a/fs/pnode.h b/fs/pnode.h
index 65c60979d54..19b853a3445 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -22,6 +22,7 @@
22#define CL_COPY_ALL 0x04 22#define CL_COPY_ALL 0x04
23#define CL_MAKE_SHARED 0x08 23#define CL_MAKE_SHARED 0x08
24#define CL_PRIVATE 0x10 24#define CL_PRIVATE 0x10
25#define CL_SHARED_TO_SLAVE 0x20
25 26
26static inline void set_mnt_shared(struct mount *mnt) 27static inline void set_mnt_shared(struct mount *mnt)
27{ 28{
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 99349efbbc2..981b0560193 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -21,6 +21,7 @@ proc-y += uptime.o
21proc-y += version.o 21proc-y += version.o
22proc-y += softirqs.o 22proc-y += softirqs.o
23proc-y += namespaces.o 23proc-y += namespaces.o
24proc-y += self.o
24proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o 25proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
25proc-$(CONFIG_NET) += proc_net.o 26proc-$(CONFIG_NET) += proc_net.o
26proc-$(CONFIG_PROC_KCORE) += kcore.o 27proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index d3696708fc1..d66248a1919 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -162,7 +162,7 @@ static inline const char *get_task_state(struct task_struct *tsk)
162static inline void task_state(struct seq_file *m, struct pid_namespace *ns, 162static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
163 struct pid *pid, struct task_struct *p) 163 struct pid *pid, struct task_struct *p)
164{ 164{
165 struct user_namespace *user_ns = current_user_ns(); 165 struct user_namespace *user_ns = seq_user_ns(m);
166 struct group_info *group_info; 166 struct group_info *group_info;
167 int g; 167 int g;
168 struct fdtable *fdt = NULL; 168 struct fdtable *fdt = NULL;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index aa63d25157b..5a5a0be40e4 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2345,146 +2345,6 @@ static const struct file_operations proc_coredump_filter_operations = {
2345}; 2345};
2346#endif 2346#endif
2347 2347
2348/*
2349 * /proc/self:
2350 */
2351static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
2352 int buflen)
2353{
2354 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
2355 pid_t tgid = task_tgid_nr_ns(current, ns);
2356 char tmp[PROC_NUMBUF];
2357 if (!tgid)
2358 return -ENOENT;
2359 sprintf(tmp, "%d", tgid);
2360 return vfs_readlink(dentry,buffer,buflen,tmp);
2361}
2362
2363static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
2364{
2365 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
2366 pid_t tgid = task_tgid_nr_ns(current, ns);
2367 char *name = ERR_PTR(-ENOENT);
2368 if (tgid) {
2369 /* 11 for max length of signed int in decimal + NULL term */
2370 name = kmalloc(12, GFP_KERNEL);
2371 if (!name)
2372 name = ERR_PTR(-ENOMEM);
2373 else
2374 sprintf(name, "%d", tgid);
2375 }
2376 nd_set_link(nd, name);
2377 return NULL;
2378}
2379
2380static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
2381 void *cookie)
2382{
2383 char *s = nd_get_link(nd);
2384 if (!IS_ERR(s))
2385 kfree(s);
2386}
2387
2388static const struct inode_operations proc_self_inode_operations = {
2389 .readlink = proc_self_readlink,
2390 .follow_link = proc_self_follow_link,
2391 .put_link = proc_self_put_link,
2392};
2393
2394/*
2395 * proc base
2396 *
2397 * These are the directory entries in the root directory of /proc
2398 * that properly belong to the /proc filesystem, as they describe
2399 * describe something that is process related.
2400 */
2401static const struct pid_entry proc_base_stuff[] = {
2402 NOD("self", S_IFLNK|S_IRWXUGO,
2403 &proc_self_inode_operations, NULL, {}),
2404};
2405
2406static struct dentry *proc_base_instantiate(struct inode *dir,
2407 struct dentry *dentry, struct task_struct *task, const void *ptr)
2408{
2409 const struct pid_entry *p = ptr;
2410 struct inode *inode;
2411 struct proc_inode *ei;
2412 struct dentry *error;
2413
2414 /* Allocate the inode */
2415 error = ERR_PTR(-ENOMEM);
2416 inode = new_inode(dir->i_sb);
2417 if (!inode)
2418 goto out;
2419
2420 /* Initialize the inode */
2421 ei = PROC_I(inode);
2422 inode->i_ino = get_next_ino();
2423 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2424
2425 /*
2426 * grab the reference to the task.
2427 */
2428 ei->pid = get_task_pid(task, PIDTYPE_PID);
2429 if (!ei->pid)
2430 goto out_iput;
2431
2432 inode->i_mode = p->mode;
2433 if (S_ISDIR(inode->i_mode))
2434 set_nlink(inode, 2);
2435 if (S_ISLNK(inode->i_mode))
2436 inode->i_size = 64;
2437 if (p->iop)
2438 inode->i_op = p->iop;
2439 if (p->fop)
2440 inode->i_fop = p->fop;
2441 ei->op = p->op;
2442 d_add(dentry, inode);
2443 error = NULL;
2444out:
2445 return error;
2446out_iput:
2447 iput(inode);
2448 goto out;
2449}
2450
2451static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
2452{
2453 struct dentry *error;
2454 struct task_struct *task = get_proc_task(dir);
2455 const struct pid_entry *p, *last;
2456
2457 error = ERR_PTR(-ENOENT);
2458
2459 if (!task)
2460 goto out_no_task;
2461
2462 /* Lookup the directory entry */
2463 last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
2464 for (p = proc_base_stuff; p <= last; p++) {
2465 if (p->len != dentry->d_name.len)
2466 continue;
2467 if (!memcmp(dentry->d_name.name, p->name, p->len))
2468 break;
2469 }
2470 if (p > last)
2471 goto out;
2472
2473 error = proc_base_instantiate(dir, dentry, task, p);
2474
2475out:
2476 put_task_struct(task);
2477out_no_task:
2478 return error;
2479}
2480
2481static int proc_base_fill_cache(struct file *filp, void *dirent,
2482 filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
2483{
2484 return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
2485 proc_base_instantiate, task, p);
2486}
2487
2488#ifdef CONFIG_TASK_IO_ACCOUNTING 2348#ifdef CONFIG_TASK_IO_ACCOUNTING
2489static int do_io_accounting(struct task_struct *task, char *buffer, int whole) 2349static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2490{ 2350{
@@ -2839,10 +2699,6 @@ void proc_flush_task(struct task_struct *task)
2839 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, 2699 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
2840 tgid->numbers[i].nr); 2700 tgid->numbers[i].nr);
2841 } 2701 }
2842
2843 upid = &pid->numbers[pid->level];
2844 if (upid->nr == 1)
2845 pid_ns_release_proc(upid->ns);
2846} 2702}
2847 2703
2848static struct dentry *proc_pid_instantiate(struct inode *dir, 2704static struct dentry *proc_pid_instantiate(struct inode *dir,
@@ -2876,15 +2732,11 @@ out:
2876 2732
2877struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) 2733struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
2878{ 2734{
2879 struct dentry *result; 2735 struct dentry *result = NULL;
2880 struct task_struct *task; 2736 struct task_struct *task;
2881 unsigned tgid; 2737 unsigned tgid;
2882 struct pid_namespace *ns; 2738 struct pid_namespace *ns;
2883 2739
2884 result = proc_base_lookup(dir, dentry);
2885 if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
2886 goto out;
2887
2888 tgid = name_to_int(dentry); 2740 tgid = name_to_int(dentry);
2889 if (tgid == ~0U) 2741 if (tgid == ~0U)
2890 goto out; 2742 goto out;
@@ -2947,7 +2799,7 @@ retry:
2947 return iter; 2799 return iter;
2948} 2800}
2949 2801
2950#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) 2802#define TGID_OFFSET (FIRST_PROCESS_ENTRY)
2951 2803
2952static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 2804static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
2953 struct tgid_iter iter) 2805 struct tgid_iter iter)
@@ -2967,25 +2819,12 @@ static int fake_filldir(void *buf, const char *name, int namelen,
2967/* for the /proc/ directory itself, after non-process stuff has been done */ 2819/* for the /proc/ directory itself, after non-process stuff has been done */
2968int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 2820int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2969{ 2821{
2970 unsigned int nr;
2971 struct task_struct *reaper;
2972 struct tgid_iter iter; 2822 struct tgid_iter iter;
2973 struct pid_namespace *ns; 2823 struct pid_namespace *ns;
2974 filldir_t __filldir; 2824 filldir_t __filldir;
2975 2825
2976 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) 2826 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
2977 goto out_no_task; 2827 goto out;
2978 nr = filp->f_pos - FIRST_PROCESS_ENTRY;
2979
2980 reaper = get_proc_task(filp->f_path.dentry->d_inode);
2981 if (!reaper)
2982 goto out_no_task;
2983
2984 for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
2985 const struct pid_entry *p = &proc_base_stuff[nr];
2986 if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
2987 goto out;
2988 }
2989 2828
2990 ns = filp->f_dentry->d_sb->s_fs_info; 2829 ns = filp->f_dentry->d_sb->s_fs_info;
2991 iter.task = NULL; 2830 iter.task = NULL;
@@ -3006,8 +2845,6 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
3006 } 2845 }
3007 filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; 2846 filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
3008out: 2847out:
3009 put_task_struct(reaper);
3010out_no_task:
3011 return 0; 2848 return 0;
3012} 2849}
3013 2850
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 0d80cef4cfb..7b3ae3cc0ef 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -350,14 +350,14 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
350 * Return an inode number between PROC_DYNAMIC_FIRST and 350 * Return an inode number between PROC_DYNAMIC_FIRST and
351 * 0xffffffff, or zero on failure. 351 * 0xffffffff, or zero on failure.
352 */ 352 */
353static unsigned int get_inode_number(void) 353int proc_alloc_inum(unsigned int *inum)
354{ 354{
355 unsigned int i; 355 unsigned int i;
356 int error; 356 int error;
357 357
358retry: 358retry:
359 if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0) 359 if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL))
360 return 0; 360 return -ENOMEM;
361 361
362 spin_lock(&proc_inum_lock); 362 spin_lock(&proc_inum_lock);
363 error = ida_get_new(&proc_inum_ida, &i); 363 error = ida_get_new(&proc_inum_ida, &i);
@@ -365,18 +365,19 @@ retry:
365 if (error == -EAGAIN) 365 if (error == -EAGAIN)
366 goto retry; 366 goto retry;
367 else if (error) 367 else if (error)
368 return 0; 368 return error;
369 369
370 if (i > UINT_MAX - PROC_DYNAMIC_FIRST) { 370 if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
371 spin_lock(&proc_inum_lock); 371 spin_lock(&proc_inum_lock);
372 ida_remove(&proc_inum_ida, i); 372 ida_remove(&proc_inum_ida, i);
373 spin_unlock(&proc_inum_lock); 373 spin_unlock(&proc_inum_lock);
374 return 0; 374 return -ENOSPC;
375 } 375 }
376 return PROC_DYNAMIC_FIRST + i; 376 *inum = PROC_DYNAMIC_FIRST + i;
377 return 0;
377} 378}
378 379
379static void release_inode_number(unsigned int inum) 380void proc_free_inum(unsigned int inum)
380{ 381{
381 spin_lock(&proc_inum_lock); 382 spin_lock(&proc_inum_lock);
382 ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); 383 ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
@@ -554,13 +555,12 @@ static const struct inode_operations proc_dir_inode_operations = {
554 555
555static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) 556static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
556{ 557{
557 unsigned int i;
558 struct proc_dir_entry *tmp; 558 struct proc_dir_entry *tmp;
559 int ret;
559 560
560 i = get_inode_number(); 561 ret = proc_alloc_inum(&dp->low_ino);
561 if (i == 0) 562 if (ret)
562 return -EAGAIN; 563 return ret;
563 dp->low_ino = i;
564 564
565 if (S_ISDIR(dp->mode)) { 565 if (S_ISDIR(dp->mode)) {
566 if (dp->proc_iops == NULL) { 566 if (dp->proc_iops == NULL) {
@@ -764,7 +764,7 @@ EXPORT_SYMBOL(proc_create_data);
764 764
765static void free_proc_entry(struct proc_dir_entry *de) 765static void free_proc_entry(struct proc_dir_entry *de)
766{ 766{
767 release_inode_number(de->low_ino); 767 proc_free_inum(de->low_ino);
768 768
769 if (S_ISLNK(de->mode)) 769 if (S_ISLNK(de->mode))
770 kfree(de->data); 770 kfree(de->data);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 3b22bbdee9e..439ae688650 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -31,6 +31,7 @@ static void proc_evict_inode(struct inode *inode)
31 struct proc_dir_entry *de; 31 struct proc_dir_entry *de;
32 struct ctl_table_header *head; 32 struct ctl_table_header *head;
33 const struct proc_ns_operations *ns_ops; 33 const struct proc_ns_operations *ns_ops;
34 void *ns;
34 35
35 truncate_inode_pages(&inode->i_data, 0); 36 truncate_inode_pages(&inode->i_data, 0);
36 clear_inode(inode); 37 clear_inode(inode);
@@ -49,8 +50,9 @@ static void proc_evict_inode(struct inode *inode)
49 } 50 }
50 /* Release any associated namespace */ 51 /* Release any associated namespace */
51 ns_ops = PROC_I(inode)->ns_ops; 52 ns_ops = PROC_I(inode)->ns_ops;
52 if (ns_ops && ns_ops->put) 53 ns = PROC_I(inode)->ns;
53 ns_ops->put(PROC_I(inode)->ns); 54 if (ns_ops && ns)
55 ns_ops->put(ns);
54} 56}
55 57
56static struct kmem_cache * proc_inode_cachep; 58static struct kmem_cache * proc_inode_cachep;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 43973b084ab..252544c0520 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -15,6 +15,7 @@ struct ctl_table_header;
15struct mempolicy; 15struct mempolicy;
16 16
17extern struct proc_dir_entry proc_root; 17extern struct proc_dir_entry proc_root;
18extern void proc_self_init(void);
18#ifdef CONFIG_PROC_SYSCTL 19#ifdef CONFIG_PROC_SYSCTL
19extern int proc_sys_init(void); 20extern int proc_sys_init(void);
20extern void sysctl_head_put(struct ctl_table_header *head); 21extern void sysctl_head_put(struct ctl_table_header *head);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index b178ed733c3..b7a47196c8c 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -11,6 +11,7 @@
11#include <net/net_namespace.h> 11#include <net/net_namespace.h>
12#include <linux/ipc_namespace.h> 12#include <linux/ipc_namespace.h>
13#include <linux/pid_namespace.h> 13#include <linux/pid_namespace.h>
14#include <linux/user_namespace.h>
14#include "internal.h" 15#include "internal.h"
15 16
16 17
@@ -24,12 +25,168 @@ static const struct proc_ns_operations *ns_entries[] = {
24#ifdef CONFIG_IPC_NS 25#ifdef CONFIG_IPC_NS
25 &ipcns_operations, 26 &ipcns_operations,
26#endif 27#endif
28#ifdef CONFIG_PID_NS
29 &pidns_operations,
30#endif
31#ifdef CONFIG_USER_NS
32 &userns_operations,
33#endif
34 &mntns_operations,
27}; 35};
28 36
29static const struct file_operations ns_file_operations = { 37static const struct file_operations ns_file_operations = {
30 .llseek = no_llseek, 38 .llseek = no_llseek,
31}; 39};
32 40
41static const struct inode_operations ns_inode_operations = {
42 .setattr = proc_setattr,
43};
44
45static int ns_delete_dentry(const struct dentry *dentry)
46{
47 /* Don't cache namespace inodes when not in use */
48 return 1;
49}
50
51static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
52{
53 struct inode *inode = dentry->d_inode;
54 const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
55
56 return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
57 ns_ops->name, inode->i_ino);
58}
59
60const struct dentry_operations ns_dentry_operations =
61{
62 .d_delete = ns_delete_dentry,
63 .d_dname = ns_dname,
64};
65
66static struct dentry *proc_ns_get_dentry(struct super_block *sb,
67 struct task_struct *task, const struct proc_ns_operations *ns_ops)
68{
69 struct dentry *dentry, *result;
70 struct inode *inode;
71 struct proc_inode *ei;
72 struct qstr qname = { .name = "", };
73 void *ns;
74
75 ns = ns_ops->get(task);
76 if (!ns)
77 return ERR_PTR(-ENOENT);
78
79 dentry = d_alloc_pseudo(sb, &qname);
80 if (!dentry) {
81 ns_ops->put(ns);
82 return ERR_PTR(-ENOMEM);
83 }
84
85 inode = iget_locked(sb, ns_ops->inum(ns));
86 if (!inode) {
87 dput(dentry);
88 ns_ops->put(ns);
89 return ERR_PTR(-ENOMEM);
90 }
91
92 ei = PROC_I(inode);
93 if (inode->i_state & I_NEW) {
94 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
95 inode->i_op = &ns_inode_operations;
96 inode->i_mode = S_IFREG | S_IRUGO;
97 inode->i_fop = &ns_file_operations;
98 ei->ns_ops = ns_ops;
99 ei->ns = ns;
100 unlock_new_inode(inode);
101 } else {
102 ns_ops->put(ns);
103 }
104
105 d_set_d_op(dentry, &ns_dentry_operations);
106 result = d_instantiate_unique(dentry, inode);
107 if (result) {
108 dput(dentry);
109 dentry = result;
110 }
111
112 return dentry;
113}
114
115static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
116{
117 struct inode *inode = dentry->d_inode;
118 struct super_block *sb = inode->i_sb;
119 struct proc_inode *ei = PROC_I(inode);
120 struct task_struct *task;
121 struct dentry *ns_dentry;
122 void *error = ERR_PTR(-EACCES);
123
124 task = get_proc_task(inode);
125 if (!task)
126 goto out;
127
128 if (!ptrace_may_access(task, PTRACE_MODE_READ))
129 goto out_put_task;
130
131 ns_dentry = proc_ns_get_dentry(sb, task, ei->ns_ops);
132 if (IS_ERR(ns_dentry)) {
133 error = ERR_CAST(ns_dentry);
134 goto out_put_task;
135 }
136
137 dput(nd->path.dentry);
138 nd->path.dentry = ns_dentry;
139 error = NULL;
140
141out_put_task:
142 put_task_struct(task);
143out:
144 return error;
145}
146
147static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
148{
149 struct inode *inode = dentry->d_inode;
150 struct proc_inode *ei = PROC_I(inode);
151 const struct proc_ns_operations *ns_ops = ei->ns_ops;
152 struct task_struct *task;
153 void *ns;
154 char name[50];
155 int len = -EACCES;
156
157 task = get_proc_task(inode);
158 if (!task)
159 goto out;
160
161 if (!ptrace_may_access(task, PTRACE_MODE_READ))
162 goto out_put_task;
163
164 len = -ENOENT;
165 ns = ns_ops->get(task);
166 if (!ns)
167 goto out_put_task;
168
169 snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns));
170 len = strlen(name);
171
172 if (len > buflen)
173 len = buflen;
174 if (copy_to_user(buffer, name, len))
175 len = -EFAULT;
176
177 ns_ops->put(ns);
178out_put_task:
179 put_task_struct(task);
180out:
181 return len;
182}
183
184static const struct inode_operations proc_ns_link_inode_operations = {
185 .readlink = proc_ns_readlink,
186 .follow_link = proc_ns_follow_link,
187 .setattr = proc_setattr,
188};
189
33static struct dentry *proc_ns_instantiate(struct inode *dir, 190static struct dentry *proc_ns_instantiate(struct inode *dir,
34 struct dentry *dentry, struct task_struct *task, const void *ptr) 191 struct dentry *dentry, struct task_struct *task, const void *ptr)
35{ 192{
@@ -37,21 +194,15 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
37 struct inode *inode; 194 struct inode *inode;
38 struct proc_inode *ei; 195 struct proc_inode *ei;
39 struct dentry *error = ERR_PTR(-ENOENT); 196 struct dentry *error = ERR_PTR(-ENOENT);
40 void *ns;
41 197
42 inode = proc_pid_make_inode(dir->i_sb, task); 198 inode = proc_pid_make_inode(dir->i_sb, task);
43 if (!inode) 199 if (!inode)
44 goto out; 200 goto out;
45 201
46 ns = ns_ops->get(task);
47 if (!ns)
48 goto out_iput;
49
50 ei = PROC_I(inode); 202 ei = PROC_I(inode);
51 inode->i_mode = S_IFREG|S_IRUSR; 203 inode->i_mode = S_IFLNK|S_IRWXUGO;
52 inode->i_fop = &ns_file_operations; 204 inode->i_op = &proc_ns_link_inode_operations;
53 ei->ns_ops = ns_ops; 205 ei->ns_ops = ns_ops;
54 ei->ns = ns;
55 206
56 d_set_d_op(dentry, &pid_dentry_operations); 207 d_set_d_op(dentry, &pid_dentry_operations);
57 d_add(dentry, inode); 208 d_add(dentry, inode);
@@ -60,9 +211,6 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
60 error = NULL; 211 error = NULL;
61out: 212out:
62 return error; 213 return error;
63out_iput:
64 iput(inode);
65 goto out;
66} 214}
67 215
68static int proc_ns_fill_cache(struct file *filp, void *dirent, 216static int proc_ns_fill_cache(struct file *filp, void *dirent,
@@ -89,10 +237,6 @@ static int proc_ns_dir_readdir(struct file *filp, void *dirent,
89 if (!task) 237 if (!task)
90 goto out_no_task; 238 goto out_no_task;
91 239
92 ret = -EPERM;
93 if (!ptrace_may_access(task, PTRACE_MODE_READ))
94 goto out;
95
96 ret = 0; 240 ret = 0;
97 i = filp->f_pos; 241 i = filp->f_pos;
98 switch (i) { 242 switch (i) {
@@ -152,10 +296,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
152 if (!task) 296 if (!task)
153 goto out_no_task; 297 goto out_no_task;
154 298
155 error = ERR_PTR(-EPERM);
156 if (!ptrace_may_access(task, PTRACE_MODE_READ))
157 goto out;
158
159 last = &ns_entries[ARRAY_SIZE(ns_entries)]; 299 last = &ns_entries[ARRAY_SIZE(ns_entries)];
160 for (entry = ns_entries; entry < last; entry++) { 300 for (entry = ns_entries; entry < last; entry++) {
161 if (strlen((*entry)->name) != len) 301 if (strlen((*entry)->name) != len)
@@ -163,7 +303,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
163 if (!memcmp(dentry->d_name.name, (*entry)->name, len)) 303 if (!memcmp(dentry->d_name.name, (*entry)->name, len))
164 break; 304 break;
165 } 305 }
166 error = ERR_PTR(-ENOENT);
167 if (entry == last) 306 if (entry == last)
168 goto out; 307 goto out;
169 308
@@ -198,3 +337,7 @@ out_invalid:
198 return ERR_PTR(-EINVAL); 337 return ERR_PTR(-EINVAL);
199} 338}
200 339
340bool proc_ns_inode(struct inode *inode)
341{
342 return inode->i_fop == &ns_file_operations;
343}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9889a92d2e0..c6e9fac26ba 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -100,14 +100,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
100 int err; 100 int err;
101 struct super_block *sb; 101 struct super_block *sb;
102 struct pid_namespace *ns; 102 struct pid_namespace *ns;
103 struct proc_inode *ei;
104 char *options; 103 char *options;
105 104
106 if (flags & MS_KERNMOUNT) { 105 if (flags & MS_KERNMOUNT) {
107 ns = (struct pid_namespace *)data; 106 ns = (struct pid_namespace *)data;
108 options = NULL; 107 options = NULL;
109 } else { 108 } else {
110 ns = current->nsproxy->pid_ns; 109 ns = task_active_pid_ns(current);
111 options = data; 110 options = data;
112 } 111 }
113 112
@@ -130,13 +129,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
130 sb->s_flags |= MS_ACTIVE; 129 sb->s_flags |= MS_ACTIVE;
131 } 130 }
132 131
133 ei = PROC_I(sb->s_root->d_inode);
134 if (!ei->pid) {
135 rcu_read_lock();
136 ei->pid = get_pid(find_pid_ns(1, ns));
137 rcu_read_unlock();
138 }
139
140 return dget(sb->s_root); 132 return dget(sb->s_root);
141} 133}
142 134
@@ -153,6 +145,7 @@ static struct file_system_type proc_fs_type = {
153 .name = "proc", 145 .name = "proc",
154 .mount = proc_mount, 146 .mount = proc_mount,
155 .kill_sb = proc_kill_sb, 147 .kill_sb = proc_kill_sb,
148 .fs_flags = FS_USERNS_MOUNT,
156}; 149};
157 150
158void __init proc_root_init(void) 151void __init proc_root_init(void)
@@ -163,12 +156,8 @@ void __init proc_root_init(void)
163 err = register_filesystem(&proc_fs_type); 156 err = register_filesystem(&proc_fs_type);
164 if (err) 157 if (err)
165 return; 158 return;
166 err = pid_ns_prepare_proc(&init_pid_ns);
167 if (err) {
168 unregister_filesystem(&proc_fs_type);
169 return;
170 }
171 159
160 proc_self_init();
172 proc_symlink("mounts", NULL, "self/mounts"); 161 proc_symlink("mounts", NULL, "self/mounts");
173 162
174 proc_net_init(); 163 proc_net_init();
diff --git a/fs/proc/self.c b/fs/proc/self.c
new file mode 100644
index 00000000000..aa5cc3bff14
--- /dev/null
+++ b/fs/proc/self.c
@@ -0,0 +1,59 @@
1#include <linux/proc_fs.h>
2#include <linux/sched.h>
3#include <linux/namei.h>
4
5/*
6 * /proc/self:
7 */
8static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
9 int buflen)
10{
11 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
12 pid_t tgid = task_tgid_nr_ns(current, ns);
13 char tmp[PROC_NUMBUF];
14 if (!tgid)
15 return -ENOENT;
16 sprintf(tmp, "%d", tgid);
17 return vfs_readlink(dentry,buffer,buflen,tmp);
18}
19
20static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
21{
22 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
23 pid_t tgid = task_tgid_nr_ns(current, ns);
24 char *name = ERR_PTR(-ENOENT);
25 if (tgid) {
26 /* 11 for max length of signed int in decimal + NULL term */
27 name = kmalloc(12, GFP_KERNEL);
28 if (!name)
29 name = ERR_PTR(-ENOMEM);
30 else
31 sprintf(name, "%d", tgid);
32 }
33 nd_set_link(nd, name);
34 return NULL;
35}
36
37static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
38 void *cookie)
39{
40 char *s = nd_get_link(nd);
41 if (!IS_ERR(s))
42 kfree(s);
43}
44
45static const struct inode_operations proc_self_inode_operations = {
46 .readlink = proc_self_readlink,
47 .follow_link = proc_self_follow_link,
48 .put_link = proc_self_put_link,
49};
50
51void __init proc_self_init(void)
52{
53 struct proc_dir_entry *proc_self_symlink;
54 mode_t mode;
55
56 mode = S_IFLNK | S_IRWXUGO;
57 proc_self_symlink = proc_create("self", mode, NULL, NULL );
58 proc_self_symlink->proc_iops = &proc_self_inode_operations;
59}
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 71eb7e25392..db940a9be04 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -149,6 +149,7 @@ static struct file_system_type sysfs_fs_type = {
149 .name = "sysfs", 149 .name = "sysfs",
150 .mount = sysfs_mount, 150 .mount = sysfs_mount,
151 .kill_sb = sysfs_kill_sb, 151 .kill_sb = sysfs_kill_sb,
152 .fs_flags = FS_USERNS_MOUNT,
152}; 153};
153 154
154int __init sysfs_init(void) 155int __init sysfs_init(void)
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 0142aacb70b..abb2cd50f6b 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -344,10 +344,8 @@ static inline void put_cred(const struct cred *_cred)
344extern struct user_namespace init_user_ns; 344extern struct user_namespace init_user_ns;
345#ifdef CONFIG_USER_NS 345#ifdef CONFIG_USER_NS
346#define current_user_ns() (current_cred_xxx(user_ns)) 346#define current_user_ns() (current_cred_xxx(user_ns))
347#define task_user_ns(task) (task_cred_xxx((task), user_ns))
348#else 347#else
349#define current_user_ns() (&init_user_ns) 348#define current_user_ns() (&init_user_ns)
350#define task_user_ns(task) (&init_user_ns)
351#endif 349#endif
352 350
353 351
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 408fb1e77a0..035521b4652 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1810,6 +1810,8 @@ struct file_system_type {
1810#define FS_REQUIRES_DEV 1 1810#define FS_REQUIRES_DEV 1
1811#define FS_BINARY_MOUNTDATA 2 1811#define FS_BINARY_MOUNTDATA 2
1812#define FS_HAS_SUBTYPE 4 1812#define FS_HAS_SUBTYPE 4
1813#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
1814#define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */
1813#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ 1815#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
1814#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ 1816#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
1815 struct dentry *(*mount) (struct file_system_type *, int, 1817 struct dentry *(*mount) (struct file_system_type *, int,
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 5499c92a915..fe771978e87 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -67,6 +67,8 @@ struct ipc_namespace {
67 67
68 /* user_ns which owns the ipc ns */ 68 /* user_ns which owns the ipc ns */
69 struct user_namespace *user_ns; 69 struct user_namespace *user_ns;
70
71 unsigned int proc_inum;
70}; 72};
71 73
72extern struct ipc_namespace init_ipc_ns; 74extern struct ipc_namespace init_ipc_ns;
@@ -133,7 +135,8 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
133 135
134#if defined(CONFIG_IPC_NS) 136#if defined(CONFIG_IPC_NS)
135extern struct ipc_namespace *copy_ipcs(unsigned long flags, 137extern struct ipc_namespace *copy_ipcs(unsigned long flags,
136 struct task_struct *tsk); 138 struct user_namespace *user_ns, struct ipc_namespace *ns);
139
137static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) 140static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
138{ 141{
139 if (ns) 142 if (ns)
@@ -144,12 +147,12 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
144extern void put_ipc_ns(struct ipc_namespace *ns); 147extern void put_ipc_ns(struct ipc_namespace *ns);
145#else 148#else
146static inline struct ipc_namespace *copy_ipcs(unsigned long flags, 149static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
147 struct task_struct *tsk) 150 struct user_namespace *user_ns, struct ipc_namespace *ns)
148{ 151{
149 if (flags & CLONE_NEWIPC) 152 if (flags & CLONE_NEWIPC)
150 return ERR_PTR(-EINVAL); 153 return ERR_PTR(-EINVAL);
151 154
152 return tsk->nsproxy->ipc_ns; 155 return ns;
153} 156}
154 157
155static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) 158static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 5a8e3903d77..12b2ab51032 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -4,9 +4,10 @@
4 4
5struct mnt_namespace; 5struct mnt_namespace;
6struct fs_struct; 6struct fs_struct;
7struct user_namespace;
7 8
8extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, 9extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
9 struct fs_struct *); 10 struct user_namespace *, struct fs_struct *);
10extern void put_mnt_ns(struct mnt_namespace *ns); 11extern void put_mnt_ns(struct mnt_namespace *ns);
11 12
12extern const struct file_operations proc_mounts_operations; 13extern const struct file_operations proc_mounts_operations;
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index cc37a55ad00..10e5947491c 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -67,7 +67,7 @@ void exit_task_namespaces(struct task_struct *tsk);
67void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); 67void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
68void free_nsproxy(struct nsproxy *ns); 68void free_nsproxy(struct nsproxy *ns);
69int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, 69int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
70 struct fs_struct *); 70 struct cred *, struct fs_struct *);
71int __init nsproxy_cache_init(void); 71int __init nsproxy_cache_init(void);
72 72
73static inline void put_nsproxy(struct nsproxy *ns) 73static inline void put_nsproxy(struct nsproxy *ns)
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 65e3e87eacc..bf285999273 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -21,6 +21,7 @@ struct pid_namespace {
21 struct kref kref; 21 struct kref kref;
22 struct pidmap pidmap[PIDMAP_ENTRIES]; 22 struct pidmap pidmap[PIDMAP_ENTRIES];
23 int last_pid; 23 int last_pid;
24 int nr_hashed;
24 struct task_struct *child_reaper; 25 struct task_struct *child_reaper;
25 struct kmem_cache *pid_cachep; 26 struct kmem_cache *pid_cachep;
26 unsigned int level; 27 unsigned int level;
@@ -31,9 +32,12 @@ struct pid_namespace {
31#ifdef CONFIG_BSD_PROCESS_ACCT 32#ifdef CONFIG_BSD_PROCESS_ACCT
32 struct bsd_acct_struct *bacct; 33 struct bsd_acct_struct *bacct;
33#endif 34#endif
35 struct user_namespace *user_ns;
36 struct work_struct proc_work;
34 kgid_t pid_gid; 37 kgid_t pid_gid;
35 int hide_pid; 38 int hide_pid;
36 int reboot; /* group exit code if this pidns was rebooted */ 39 int reboot; /* group exit code if this pidns was rebooted */
40 unsigned int proc_inum;
37}; 41};
38 42
39extern struct pid_namespace init_pid_ns; 43extern struct pid_namespace init_pid_ns;
@@ -46,7 +50,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
46 return ns; 50 return ns;
47} 51}
48 52
49extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); 53extern struct pid_namespace *copy_pid_ns(unsigned long flags,
54 struct user_namespace *user_ns, struct pid_namespace *ns);
50extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); 55extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
51extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd); 56extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
52extern void put_pid_ns(struct pid_namespace *ns); 57extern void put_pid_ns(struct pid_namespace *ns);
@@ -59,8 +64,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
59 return ns; 64 return ns;
60} 65}
61 66
62static inline struct pid_namespace * 67static inline struct pid_namespace *copy_pid_ns(unsigned long flags,
63copy_pid_ns(unsigned long flags, struct pid_namespace *ns) 68 struct user_namespace *user_ns, struct pid_namespace *ns)
64{ 69{
65 if (flags & CLONE_NEWPID) 70 if (flags & CLONE_NEWPID)
66 ns = ERR_PTR(-EINVAL); 71 ns = ERR_PTR(-EINVAL);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 3fd2e871ff1..2e24018b7ce 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -28,7 +28,11 @@ struct mm_struct;
28 */ 28 */
29 29
30enum { 30enum {
31 PROC_ROOT_INO = 1, 31 PROC_ROOT_INO = 1,
32 PROC_IPC_INIT_INO = 0xEFFFFFFFU,
33 PROC_UTS_INIT_INO = 0xEFFFFFFEU,
34 PROC_USER_INIT_INO = 0xEFFFFFFDU,
35 PROC_PID_INIT_INO = 0xEFFFFFFCU,
32}; 36};
33 37
34/* 38/*
@@ -174,7 +178,10 @@ extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
174 struct proc_dir_entry *parent); 178 struct proc_dir_entry *parent);
175 179
176extern struct file *proc_ns_fget(int fd); 180extern struct file *proc_ns_fget(int fd);
181extern bool proc_ns_inode(struct inode *inode);
177 182
183extern int proc_alloc_inum(unsigned int *pino);
184extern void proc_free_inum(unsigned int inum);
178#else 185#else
179 186
180#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) 187#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; })
@@ -229,6 +236,19 @@ static inline struct file *proc_ns_fget(int fd)
229 return ERR_PTR(-EINVAL); 236 return ERR_PTR(-EINVAL);
230} 237}
231 238
239static inline bool proc_ns_inode(struct inode *inode)
240{
241 return false;
242}
243
244static inline int proc_alloc_inum(unsigned int *inum)
245{
246 *inum = 1;
247 return 0;
248}
249static inline void proc_free_inum(unsigned int inum)
250{
251}
232#endif /* CONFIG_PROC_FS */ 252#endif /* CONFIG_PROC_FS */
233 253
234#if !defined(CONFIG_PROC_KCORE) 254#if !defined(CONFIG_PROC_KCORE)
@@ -247,10 +267,14 @@ struct proc_ns_operations {
247 void *(*get)(struct task_struct *task); 267 void *(*get)(struct task_struct *task);
248 void (*put)(void *ns); 268 void (*put)(void *ns);
249 int (*install)(struct nsproxy *nsproxy, void *ns); 269 int (*install)(struct nsproxy *nsproxy, void *ns);
270 unsigned int (*inum)(void *ns);
250}; 271};
251extern const struct proc_ns_operations netns_operations; 272extern const struct proc_ns_operations netns_operations;
252extern const struct proc_ns_operations utsns_operations; 273extern const struct proc_ns_operations utsns_operations;
253extern const struct proc_ns_operations ipcns_operations; 274extern const struct proc_ns_operations ipcns_operations;
275extern const struct proc_ns_operations pidns_operations;
276extern const struct proc_ns_operations userns_operations;
277extern const struct proc_ns_operations mntns_operations;
254 278
255union proc_op { 279union proc_op {
256 int (*proc_get_link)(struct dentry *, struct path *); 280 int (*proc_get_link)(struct dentry *, struct path *);
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 95142cae446..b9bd2e6c73c 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -25,6 +25,7 @@ struct user_namespace {
25 struct user_namespace *parent; 25 struct user_namespace *parent;
26 kuid_t owner; 26 kuid_t owner;
27 kgid_t group; 27 kgid_t group;
28 unsigned int proc_inum;
28}; 29};
29 30
30extern struct user_namespace init_user_ns; 31extern struct user_namespace init_user_ns;
@@ -39,6 +40,7 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
39} 40}
40 41
41extern int create_user_ns(struct cred *new); 42extern int create_user_ns(struct cred *new);
43extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred);
42extern void free_user_ns(struct kref *kref); 44extern void free_user_ns(struct kref *kref);
43 45
44static inline void put_user_ns(struct user_namespace *ns) 46static inline void put_user_ns(struct user_namespace *ns)
@@ -66,6 +68,14 @@ static inline int create_user_ns(struct cred *new)
66 return -EINVAL; 68 return -EINVAL;
67} 69}
68 70
71static inline int unshare_userns(unsigned long unshare_flags,
72 struct cred **new_cred)
73{
74 if (unshare_flags & CLONE_NEWUSER)
75 return -EINVAL;
76 return 0;
77}
78
69static inline void put_user_ns(struct user_namespace *ns) 79static inline void put_user_ns(struct user_namespace *ns)
70{ 80{
71} 81}
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 2b345206722..239e27733d6 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -23,6 +23,7 @@ struct uts_namespace {
23 struct kref kref; 23 struct kref kref;
24 struct new_utsname name; 24 struct new_utsname name;
25 struct user_namespace *user_ns; 25 struct user_namespace *user_ns;
26 unsigned int proc_inum;
26}; 27};
27extern struct uts_namespace init_uts_ns; 28extern struct uts_namespace init_uts_ns;
28 29
@@ -33,7 +34,7 @@ static inline void get_uts_ns(struct uts_namespace *ns)
33} 34}
34 35
35extern struct uts_namespace *copy_utsname(unsigned long flags, 36extern struct uts_namespace *copy_utsname(unsigned long flags,
36 struct task_struct *tsk); 37 struct user_namespace *user_ns, struct uts_namespace *old_ns);
37extern void free_uts_ns(struct kref *kref); 38extern void free_uts_ns(struct kref *kref);
38 39
39static inline void put_uts_ns(struct uts_namespace *ns) 40static inline void put_uts_ns(struct uts_namespace *ns)
@@ -50,12 +51,12 @@ static inline void put_uts_ns(struct uts_namespace *ns)
50} 51}
51 52
52static inline struct uts_namespace *copy_utsname(unsigned long flags, 53static inline struct uts_namespace *copy_utsname(unsigned long flags,
53 struct task_struct *tsk) 54 struct user_namespace *user_ns, struct uts_namespace *old_ns)
54{ 55{
55 if (flags & CLONE_NEWUTS) 56 if (flags & CLONE_NEWUTS)
56 return ERR_PTR(-EINVAL); 57 return ERR_PTR(-EINVAL);
57 58
58 return tsk->nsproxy->uts_ns; 59 return old_ns;
59} 60}
60#endif 61#endif
61 62
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index c5a43f56b79..de644bcd861 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -56,6 +56,8 @@ struct net {
56 56
57 struct user_namespace *user_ns; /* Owning user namespace */ 57 struct user_namespace *user_ns; /* Owning user namespace */
58 58
59 unsigned int proc_inum;
60
59 struct proc_dir_entry *proc_net; 61 struct proc_dir_entry *proc_net;
60 struct proc_dir_entry *proc_net_stat; 62 struct proc_dir_entry *proc_net_stat;
61 63
diff --git a/init/Kconfig b/init/Kconfig
index 1a207efca59..675d8a2326c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1069,11 +1069,9 @@ config UIDGID_CONVERTED
1069 # Filesystems 1069 # Filesystems
1070 depends on 9P_FS = n 1070 depends on 9P_FS = n
1071 depends on AFS_FS = n 1071 depends on AFS_FS = n
1072 depends on AUTOFS4_FS = n
1073 depends on CEPH_FS = n 1072 depends on CEPH_FS = n
1074 depends on CIFS = n 1073 depends on CIFS = n
1075 depends on CODA_FS = n 1074 depends on CODA_FS = n
1076 depends on FUSE_FS = n
1077 depends on GFS2_FS = n 1075 depends on GFS2_FS = n
1078 depends on NCP_FS = n 1076 depends on NCP_FS = n
1079 depends on NFSD = n 1077 depends on NFSD = n
diff --git a/init/main.c b/init/main.c
index 63ae904a99a..baf1f0f5c46 100644
--- a/init/main.c
+++ b/init/main.c
@@ -812,7 +812,6 @@ static int __ref kernel_init(void *unused)
812 system_state = SYSTEM_RUNNING; 812 system_state = SYSTEM_RUNNING;
813 numa_default_policy(); 813 numa_default_policy();
814 814
815 current->signal->flags |= SIGNAL_UNKILLABLE;
816 flush_delayed_fput(); 815 flush_delayed_fput();
817 816
818 if (ramdisk_execute_command) { 817 if (ramdisk_execute_command) {
diff --git a/init/version.c b/init/version.c
index 86fe0ccb997..58170f18912 100644
--- a/init/version.c
+++ b/init/version.c
@@ -12,6 +12,7 @@
12#include <linux/utsname.h> 12#include <linux/utsname.h>
13#include <generated/utsrelease.h> 13#include <generated/utsrelease.h>
14#include <linux/version.h> 14#include <linux/version.h>
15#include <linux/proc_fs.h>
15 16
16#ifndef CONFIG_KALLSYMS 17#ifndef CONFIG_KALLSYMS
17#define version(a) Version_ ## a 18#define version(a) Version_ ## a
@@ -34,6 +35,7 @@ struct uts_namespace init_uts_ns = {
34 .domainname = UTS_DOMAINNAME, 35 .domainname = UTS_DOMAINNAME,
35 }, 36 },
36 .user_ns = &init_user_ns, 37 .user_ns = &init_user_ns,
38 .proc_inum = PROC_UTS_INIT_INO,
37}; 39};
38EXPORT_SYMBOL_GPL(init_uts_ns); 40EXPORT_SYMBOL_GPL(init_uts_ns);
39 41
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 26143d377c9..6471f1bdae9 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -16,6 +16,7 @@
16#include <linux/msg.h> 16#include <linux/msg.h>
17#include <linux/ipc_namespace.h> 17#include <linux/ipc_namespace.h>
18#include <linux/utsname.h> 18#include <linux/utsname.h>
19#include <linux/proc_fs.h>
19#include <asm/uaccess.h> 20#include <asm/uaccess.h>
20 21
21#include "util.h" 22#include "util.h"
@@ -30,6 +31,7 @@ DEFINE_SPINLOCK(mq_lock);
30struct ipc_namespace init_ipc_ns = { 31struct ipc_namespace init_ipc_ns = {
31 .count = ATOMIC_INIT(1), 32 .count = ATOMIC_INIT(1),
32 .user_ns = &init_user_ns, 33 .user_ns = &init_user_ns,
34 .proc_inum = PROC_IPC_INIT_INO,
33}; 35};
34 36
35atomic_t nr_ipc_ns = ATOMIC_INIT(1); 37atomic_t nr_ipc_ns = ATOMIC_INIT(1);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index f362298c5ce..cf3386a51de 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -16,7 +16,7 @@
16 16
17#include "util.h" 17#include "util.h"
18 18
19static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk, 19static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
20 struct ipc_namespace *old_ns) 20 struct ipc_namespace *old_ns)
21{ 21{
22 struct ipc_namespace *ns; 22 struct ipc_namespace *ns;
@@ -26,9 +26,16 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
26 if (ns == NULL) 26 if (ns == NULL)
27 return ERR_PTR(-ENOMEM); 27 return ERR_PTR(-ENOMEM);
28 28
29 err = proc_alloc_inum(&ns->proc_inum);
30 if (err) {
31 kfree(ns);
32 return ERR_PTR(err);
33 }
34
29 atomic_set(&ns->count, 1); 35 atomic_set(&ns->count, 1);
30 err = mq_init_ns(ns); 36 err = mq_init_ns(ns);
31 if (err) { 37 if (err) {
38 proc_free_inum(ns->proc_inum);
32 kfree(ns); 39 kfree(ns);
33 return ERR_PTR(err); 40 return ERR_PTR(err);
34 } 41 }
@@ -46,19 +53,17 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
46 ipcns_notify(IPCNS_CREATED); 53 ipcns_notify(IPCNS_CREATED);
47 register_ipcns_notifier(ns); 54 register_ipcns_notifier(ns);
48 55
49 ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns)); 56 ns->user_ns = get_user_ns(user_ns);
50 57
51 return ns; 58 return ns;
52} 59}
53 60
54struct ipc_namespace *copy_ipcs(unsigned long flags, 61struct ipc_namespace *copy_ipcs(unsigned long flags,
55 struct task_struct *tsk) 62 struct user_namespace *user_ns, struct ipc_namespace *ns)
56{ 63{
57 struct ipc_namespace *ns = tsk->nsproxy->ipc_ns;
58
59 if (!(flags & CLONE_NEWIPC)) 64 if (!(flags & CLONE_NEWIPC))
60 return get_ipc_ns(ns); 65 return get_ipc_ns(ns);
61 return create_ipc_ns(tsk, ns); 66 return create_ipc_ns(user_ns, ns);
62} 67}
63 68
64/* 69/*
@@ -113,6 +118,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
113 */ 118 */
114 ipcns_notify(IPCNS_REMOVED); 119 ipcns_notify(IPCNS_REMOVED);
115 put_user_ns(ns->user_ns); 120 put_user_ns(ns->user_ns);
121 proc_free_inum(ns->proc_inum);
116 kfree(ns); 122 kfree(ns);
117} 123}
118 124
@@ -161,8 +167,12 @@ static void ipcns_put(void *ns)
161 return put_ipc_ns(ns); 167 return put_ipc_ns(ns);
162} 168}
163 169
164static int ipcns_install(struct nsproxy *nsproxy, void *ns) 170static int ipcns_install(struct nsproxy *nsproxy, void *new)
165{ 171{
172 struct ipc_namespace *ns = new;
173 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
174 return -EPERM;
175
166 /* Ditch state from the old ipc namespace */ 176 /* Ditch state from the old ipc namespace */
167 exit_sem(current); 177 exit_sem(current);
168 put_ipc_ns(nsproxy->ipc_ns); 178 put_ipc_ns(nsproxy->ipc_ns);
@@ -170,10 +180,18 @@ static int ipcns_install(struct nsproxy *nsproxy, void *ns)
170 return 0; 180 return 0;
171} 181}
172 182
183static unsigned int ipcns_inum(void *vp)
184{
185 struct ipc_namespace *ns = vp;
186
187 return ns->proc_inum;
188}
189
173const struct proc_ns_operations ipcns_operations = { 190const struct proc_ns_operations ipcns_operations = {
174 .name = "ipc", 191 .name = "ipc",
175 .type = CLONE_NEWIPC, 192 .type = CLONE_NEWIPC,
176 .get = ipcns_get, 193 .get = ipcns_get,
177 .put = ipcns_put, 194 .put = ipcns_put,
178 .install = ipcns_install, 195 .install = ipcns_install,
196 .inum = ipcns_inum,
179}; 197};
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f34c41bfaa3..9915ffe0137 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3409,7 +3409,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
3409{ 3409{
3410 struct cgroup_pidlist *l; 3410 struct cgroup_pidlist *l;
3411 /* don't need task_nsproxy() if we're looking at ourself */ 3411 /* don't need task_nsproxy() if we're looking at ourself */
3412 struct pid_namespace *ns = current->nsproxy->pid_ns; 3412 struct pid_namespace *ns = task_active_pid_ns(current);
3413 3413
3414 /* 3414 /*
3415 * We can't drop the pidlist_mutex before taking the l->mutex in case 3415 * We can't drop the pidlist_mutex before taking the l->mutex in case
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f9ff5493171..301079d06f2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6155,7 +6155,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
6155 6155
6156 event->parent = parent_event; 6156 event->parent = parent_event;
6157 6157
6158 event->ns = get_pid_ns(current->nsproxy->pid_ns); 6158 event->ns = get_pid_ns(task_active_pid_ns(current));
6159 event->id = atomic64_inc_return(&perf_event_id); 6159 event->id = atomic64_inc_return(&perf_event_id);
6160 6160
6161 event->state = PERF_EVENT_STATE_INACTIVE; 6161 event->state = PERF_EVENT_STATE_INACTIVE;
diff --git a/kernel/exit.c b/kernel/exit.c
index 50d2e93c36e..b4df2193721 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -72,18 +72,6 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
72 list_del_rcu(&p->tasks); 72 list_del_rcu(&p->tasks);
73 list_del_init(&p->sibling); 73 list_del_init(&p->sibling);
74 __this_cpu_dec(process_counts); 74 __this_cpu_dec(process_counts);
75 /*
76 * If we are the last child process in a pid namespace to be
77 * reaped, notify the reaper sleeping zap_pid_ns_processes().
78 */
79 if (IS_ENABLED(CONFIG_PID_NS)) {
80 struct task_struct *parent = p->real_parent;
81
82 if ((task_active_pid_ns(parent)->child_reaper == parent) &&
83 list_empty(&parent->children) &&
84 (parent->flags & PF_EXITING))
85 wake_up_process(parent);
86 }
87 } 75 }
88 list_del_rcu(&p->thread_group); 76 list_del_rcu(&p->thread_group);
89} 77}
diff --git a/kernel/fork.c b/kernel/fork.c
index 115d6c2e4cc..c36c4e301ef 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1044,8 +1044,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
1044 atomic_set(&sig->live, 1); 1044 atomic_set(&sig->live, 1);
1045 atomic_set(&sig->sigcnt, 1); 1045 atomic_set(&sig->sigcnt, 1);
1046 init_waitqueue_head(&sig->wait_chldexit); 1046 init_waitqueue_head(&sig->wait_chldexit);
1047 if (clone_flags & CLONE_NEWPID)
1048 sig->flags |= SIGNAL_UNKILLABLE;
1049 sig->curr_target = tsk; 1047 sig->curr_target = tsk;
1050 init_sigpending(&sig->shared_pending); 1048 init_sigpending(&sig->shared_pending);
1051 INIT_LIST_HEAD(&sig->posix_timers); 1049 INIT_LIST_HEAD(&sig->posix_timers);
@@ -1438,8 +1436,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1438 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); 1436 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
1439 1437
1440 if (thread_group_leader(p)) { 1438 if (thread_group_leader(p)) {
1441 if (is_child_reaper(pid)) 1439 if (is_child_reaper(pid)) {
1442 p->nsproxy->pid_ns->child_reaper = p; 1440 ns_of_pid(pid)->child_reaper = p;
1441 p->signal->flags |= SIGNAL_UNKILLABLE;
1442 }
1443 1443
1444 p->signal->leader_pid = pid; 1444 p->signal->leader_pid = pid;
1445 p->signal->tty = tty_kref_get(current->signal->tty); 1445 p->signal->tty = tty_kref_get(current->signal->tty);
@@ -1473,8 +1473,6 @@ bad_fork_cleanup_io:
1473 if (p->io_context) 1473 if (p->io_context)
1474 exit_io_context(p); 1474 exit_io_context(p);
1475bad_fork_cleanup_namespaces: 1475bad_fork_cleanup_namespaces:
1476 if (unlikely(clone_flags & CLONE_NEWPID))
1477 pid_ns_release_proc(p->nsproxy->pid_ns);
1478 exit_task_namespaces(p); 1476 exit_task_namespaces(p);
1479bad_fork_cleanup_mm: 1477bad_fork_cleanup_mm:
1480 if (p->mm) 1478 if (p->mm)
@@ -1554,15 +1552,9 @@ long do_fork(unsigned long clone_flags,
1554 * Do some preliminary argument and permissions checking before we 1552 * Do some preliminary argument and permissions checking before we
1555 * actually start allocating stuff 1553 * actually start allocating stuff
1556 */ 1554 */
1557 if (clone_flags & CLONE_NEWUSER) { 1555 if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) {
1558 if (clone_flags & CLONE_THREAD) 1556 if (clone_flags & (CLONE_THREAD|CLONE_PARENT))
1559 return -EINVAL; 1557 return -EINVAL;
1560 /* hopefully this check will go away when userns support is
1561 * complete
1562 */
1563 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
1564 !capable(CAP_SETGID))
1565 return -EPERM;
1566 } 1558 }
1567 1559
1568 /* 1560 /*
@@ -1724,7 +1716,8 @@ static int check_unshare_flags(unsigned long unshare_flags)
1724{ 1716{
1725 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| 1717 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
1726 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| 1718 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
1727 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) 1719 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
1720 CLONE_NEWUSER|CLONE_NEWPID))
1728 return -EINVAL; 1721 return -EINVAL;
1729 /* 1722 /*
1730 * Not implemented, but pretend it works if there is nothing to 1723 * Not implemented, but pretend it works if there is nothing to
@@ -1791,19 +1784,40 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1791{ 1784{
1792 struct fs_struct *fs, *new_fs = NULL; 1785 struct fs_struct *fs, *new_fs = NULL;
1793 struct files_struct *fd, *new_fd = NULL; 1786 struct files_struct *fd, *new_fd = NULL;
1787 struct cred *new_cred = NULL;
1794 struct nsproxy *new_nsproxy = NULL; 1788 struct nsproxy *new_nsproxy = NULL;
1795 int do_sysvsem = 0; 1789 int do_sysvsem = 0;
1796 int err; 1790 int err;
1797 1791
1798 err = check_unshare_flags(unshare_flags); 1792 /*
1799 if (err) 1793 * If unsharing a user namespace must also unshare the thread.
1800 goto bad_unshare_out; 1794 */
1801 1795 if (unshare_flags & CLONE_NEWUSER)
1796 unshare_flags |= CLONE_THREAD;
1797 /*
1798 * If unsharing a pid namespace must also unshare the thread.
1799 */
1800 if (unshare_flags & CLONE_NEWPID)
1801 unshare_flags |= CLONE_THREAD;
1802 /*
1803 * If unsharing a thread from a thread group, must also unshare vm.
1804 */
1805 if (unshare_flags & CLONE_THREAD)
1806 unshare_flags |= CLONE_VM;
1807 /*
1808 * If unsharing vm, must also unshare signal handlers.
1809 */
1810 if (unshare_flags & CLONE_VM)
1811 unshare_flags |= CLONE_SIGHAND;
1802 /* 1812 /*
1803 * If unsharing namespace, must also unshare filesystem information. 1813 * If unsharing namespace, must also unshare filesystem information.
1804 */ 1814 */
1805 if (unshare_flags & CLONE_NEWNS) 1815 if (unshare_flags & CLONE_NEWNS)
1806 unshare_flags |= CLONE_FS; 1816 unshare_flags |= CLONE_FS;
1817
1818 err = check_unshare_flags(unshare_flags);
1819 if (err)
1820 goto bad_unshare_out;
1807 /* 1821 /*
1808 * CLONE_NEWIPC must also detach from the undolist: after switching 1822 * CLONE_NEWIPC must also detach from the undolist: after switching
1809 * to a new ipc namespace, the semaphore arrays from the old 1823 * to a new ipc namespace, the semaphore arrays from the old
@@ -1817,11 +1831,15 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1817 err = unshare_fd(unshare_flags, &new_fd); 1831 err = unshare_fd(unshare_flags, &new_fd);
1818 if (err) 1832 if (err)
1819 goto bad_unshare_cleanup_fs; 1833 goto bad_unshare_cleanup_fs;
1820 err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs); 1834 err = unshare_userns(unshare_flags, &new_cred);
1821 if (err) 1835 if (err)
1822 goto bad_unshare_cleanup_fd; 1836 goto bad_unshare_cleanup_fd;
1837 err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
1838 new_cred, new_fs);
1839 if (err)
1840 goto bad_unshare_cleanup_cred;
1823 1841
1824 if (new_fs || new_fd || do_sysvsem || new_nsproxy) { 1842 if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
1825 if (do_sysvsem) { 1843 if (do_sysvsem) {
1826 /* 1844 /*
1827 * CLONE_SYSVSEM is equivalent to sys_exit(). 1845 * CLONE_SYSVSEM is equivalent to sys_exit().
@@ -1854,11 +1872,20 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1854 } 1872 }
1855 1873
1856 task_unlock(current); 1874 task_unlock(current);
1875
1876 if (new_cred) {
1877 /* Install the new user namespace */
1878 commit_creds(new_cred);
1879 new_cred = NULL;
1880 }
1857 } 1881 }
1858 1882
1859 if (new_nsproxy) 1883 if (new_nsproxy)
1860 put_nsproxy(new_nsproxy); 1884 put_nsproxy(new_nsproxy);
1861 1885
1886bad_unshare_cleanup_cred:
1887 if (new_cred)
1888 put_cred(new_cred);
1862bad_unshare_cleanup_fd: 1889bad_unshare_cleanup_fd:
1863 if (new_fd) 1890 if (new_fd)
1864 put_files_struct(new_fd); 1891 put_files_struct(new_fd);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 7e1c3de1ce4..78e2ecb2016 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -57,7 +57,8 @@ static inline struct nsproxy *create_nsproxy(void)
57 * leave it to the caller to do proper locking and attach it to task. 57 * leave it to the caller to do proper locking and attach it to task.
58 */ 58 */
59static struct nsproxy *create_new_namespaces(unsigned long flags, 59static struct nsproxy *create_new_namespaces(unsigned long flags,
60 struct task_struct *tsk, struct fs_struct *new_fs) 60 struct task_struct *tsk, struct user_namespace *user_ns,
61 struct fs_struct *new_fs)
61{ 62{
62 struct nsproxy *new_nsp; 63 struct nsproxy *new_nsp;
63 int err; 64 int err;
@@ -66,31 +67,31 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
66 if (!new_nsp) 67 if (!new_nsp)
67 return ERR_PTR(-ENOMEM); 68 return ERR_PTR(-ENOMEM);
68 69
69 new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); 70 new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
70 if (IS_ERR(new_nsp->mnt_ns)) { 71 if (IS_ERR(new_nsp->mnt_ns)) {
71 err = PTR_ERR(new_nsp->mnt_ns); 72 err = PTR_ERR(new_nsp->mnt_ns);
72 goto out_ns; 73 goto out_ns;
73 } 74 }
74 75
75 new_nsp->uts_ns = copy_utsname(flags, tsk); 76 new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);
76 if (IS_ERR(new_nsp->uts_ns)) { 77 if (IS_ERR(new_nsp->uts_ns)) {
77 err = PTR_ERR(new_nsp->uts_ns); 78 err = PTR_ERR(new_nsp->uts_ns);
78 goto out_uts; 79 goto out_uts;
79 } 80 }
80 81
81 new_nsp->ipc_ns = copy_ipcs(flags, tsk); 82 new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);
82 if (IS_ERR(new_nsp->ipc_ns)) { 83 if (IS_ERR(new_nsp->ipc_ns)) {
83 err = PTR_ERR(new_nsp->ipc_ns); 84 err = PTR_ERR(new_nsp->ipc_ns);
84 goto out_ipc; 85 goto out_ipc;
85 } 86 }
86 87
87 new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk)); 88 new_nsp->pid_ns = copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns);
88 if (IS_ERR(new_nsp->pid_ns)) { 89 if (IS_ERR(new_nsp->pid_ns)) {
89 err = PTR_ERR(new_nsp->pid_ns); 90 err = PTR_ERR(new_nsp->pid_ns);
90 goto out_pid; 91 goto out_pid;
91 } 92 }
92 93
93 new_nsp->net_ns = copy_net_ns(flags, task_cred_xxx(tsk, user_ns), tsk->nsproxy->net_ns); 94 new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
94 if (IS_ERR(new_nsp->net_ns)) { 95 if (IS_ERR(new_nsp->net_ns)) {
95 err = PTR_ERR(new_nsp->net_ns); 96 err = PTR_ERR(new_nsp->net_ns);
96 goto out_net; 97 goto out_net;
@@ -122,6 +123,7 @@ out_ns:
122int copy_namespaces(unsigned long flags, struct task_struct *tsk) 123int copy_namespaces(unsigned long flags, struct task_struct *tsk)
123{ 124{
124 struct nsproxy *old_ns = tsk->nsproxy; 125 struct nsproxy *old_ns = tsk->nsproxy;
126 struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
125 struct nsproxy *new_ns; 127 struct nsproxy *new_ns;
126 int err = 0; 128 int err = 0;
127 129
@@ -134,7 +136,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
134 CLONE_NEWPID | CLONE_NEWNET))) 136 CLONE_NEWPID | CLONE_NEWNET)))
135 return 0; 137 return 0;
136 138
137 if (!capable(CAP_SYS_ADMIN)) { 139 if (!ns_capable(user_ns, CAP_SYS_ADMIN)) {
138 err = -EPERM; 140 err = -EPERM;
139 goto out; 141 goto out;
140 } 142 }
@@ -151,7 +153,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
151 goto out; 153 goto out;
152 } 154 }
153 155
154 new_ns = create_new_namespaces(flags, tsk, tsk->fs); 156 new_ns = create_new_namespaces(flags, tsk,
157 task_cred_xxx(tsk, user_ns), tsk->fs);
155 if (IS_ERR(new_ns)) { 158 if (IS_ERR(new_ns)) {
156 err = PTR_ERR(new_ns); 159 err = PTR_ERR(new_ns);
157 goto out; 160 goto out;
@@ -183,19 +186,21 @@ void free_nsproxy(struct nsproxy *ns)
183 * On success, returns the new nsproxy. 186 * On success, returns the new nsproxy.
184 */ 187 */
185int unshare_nsproxy_namespaces(unsigned long unshare_flags, 188int unshare_nsproxy_namespaces(unsigned long unshare_flags,
186 struct nsproxy **new_nsp, struct fs_struct *new_fs) 189 struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs)
187{ 190{
191 struct user_namespace *user_ns;
188 int err = 0; 192 int err = 0;
189 193
190 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 194 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
191 CLONE_NEWNET))) 195 CLONE_NEWNET | CLONE_NEWPID)))
192 return 0; 196 return 0;
193 197
194 if (!capable(CAP_SYS_ADMIN)) 198 user_ns = new_cred ? new_cred->user_ns : current_user_ns();
199 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
195 return -EPERM; 200 return -EPERM;
196 201
197 *new_nsp = create_new_namespaces(unshare_flags, current, 202 *new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
198 new_fs ? new_fs : current->fs); 203 new_fs ? new_fs : current->fs);
199 if (IS_ERR(*new_nsp)) { 204 if (IS_ERR(*new_nsp)) {
200 err = PTR_ERR(*new_nsp); 205 err = PTR_ERR(*new_nsp);
201 goto out; 206 goto out;
@@ -241,9 +246,6 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
241 struct file *file; 246 struct file *file;
242 int err; 247 int err;
243 248
244 if (!capable(CAP_SYS_ADMIN))
245 return -EPERM;
246
247 file = proc_ns_fget(fd); 249 file = proc_ns_fget(fd);
248 if (IS_ERR(file)) 250 if (IS_ERR(file))
249 return PTR_ERR(file); 251 return PTR_ERR(file);
@@ -254,7 +256,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
254 if (nstype && (ops->type != nstype)) 256 if (nstype && (ops->type != nstype))
255 goto out; 257 goto out;
256 258
257 new_nsproxy = create_new_namespaces(0, tsk, tsk->fs); 259 new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
258 if (IS_ERR(new_nsproxy)) { 260 if (IS_ERR(new_nsproxy)) {
259 err = PTR_ERR(new_nsproxy); 261 err = PTR_ERR(new_nsproxy);
260 goto out; 262 goto out;
diff --git a/kernel/pid.c b/kernel/pid.c
index fd996c1ed9f..3e2cf8100ac 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -36,6 +36,7 @@
36#include <linux/pid_namespace.h> 36#include <linux/pid_namespace.h>
37#include <linux/init_task.h> 37#include <linux/init_task.h>
38#include <linux/syscalls.h> 38#include <linux/syscalls.h>
39#include <linux/proc_fs.h>
39 40
40#define pid_hashfn(nr, ns) \ 41#define pid_hashfn(nr, ns) \
41 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) 42 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -78,6 +79,8 @@ struct pid_namespace init_pid_ns = {
78 .last_pid = 0, 79 .last_pid = 0,
79 .level = 0, 80 .level = 0,
80 .child_reaper = &init_task, 81 .child_reaper = &init_task,
82 .user_ns = &init_user_ns,
83 .proc_inum = PROC_PID_INIT_INO,
81}; 84};
82EXPORT_SYMBOL_GPL(init_pid_ns); 85EXPORT_SYMBOL_GPL(init_pid_ns);
83 86
@@ -269,8 +272,24 @@ void free_pid(struct pid *pid)
269 unsigned long flags; 272 unsigned long flags;
270 273
271 spin_lock_irqsave(&pidmap_lock, flags); 274 spin_lock_irqsave(&pidmap_lock, flags);
272 for (i = 0; i <= pid->level; i++) 275 for (i = 0; i <= pid->level; i++) {
273 hlist_del_rcu(&pid->numbers[i].pid_chain); 276 struct upid *upid = pid->numbers + i;
277 struct pid_namespace *ns = upid->ns;
278 hlist_del_rcu(&upid->pid_chain);
279 switch(--ns->nr_hashed) {
280 case 1:
281 /* When all that is left in the pid namespace
282 * is the reaper wake up the reaper. The reaper
283 * may be sleeping in zap_pid_ns_processes().
284 */
285 wake_up_process(ns->child_reaper);
286 break;
287 case 0:
288 ns->nr_hashed = -1;
289 schedule_work(&ns->proc_work);
290 break;
291 }
292 }
274 spin_unlock_irqrestore(&pidmap_lock, flags); 293 spin_unlock_irqrestore(&pidmap_lock, flags);
275 294
276 for (i = 0; i <= pid->level; i++) 295 for (i = 0; i <= pid->level; i++)
@@ -292,6 +311,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
292 goto out; 311 goto out;
293 312
294 tmp = ns; 313 tmp = ns;
314 pid->level = ns->level;
295 for (i = ns->level; i >= 0; i--) { 315 for (i = ns->level; i >= 0; i--) {
296 nr = alloc_pidmap(tmp); 316 nr = alloc_pidmap(tmp);
297 if (nr < 0) 317 if (nr < 0)
@@ -302,22 +322,32 @@ struct pid *alloc_pid(struct pid_namespace *ns)
302 tmp = tmp->parent; 322 tmp = tmp->parent;
303 } 323 }
304 324
325 if (unlikely(is_child_reaper(pid))) {
326 if (pid_ns_prepare_proc(ns))
327 goto out_free;
328 }
329
305 get_pid_ns(ns); 330 get_pid_ns(ns);
306 pid->level = ns->level;
307 atomic_set(&pid->count, 1); 331 atomic_set(&pid->count, 1);
308 for (type = 0; type < PIDTYPE_MAX; ++type) 332 for (type = 0; type < PIDTYPE_MAX; ++type)
309 INIT_HLIST_HEAD(&pid->tasks[type]); 333 INIT_HLIST_HEAD(&pid->tasks[type]);
310 334
311 upid = pid->numbers + ns->level; 335 upid = pid->numbers + ns->level;
312 spin_lock_irq(&pidmap_lock); 336 spin_lock_irq(&pidmap_lock);
313 for ( ; upid >= pid->numbers; --upid) 337 if (ns->nr_hashed < 0)
338 goto out_unlock;
339 for ( ; upid >= pid->numbers; --upid) {
314 hlist_add_head_rcu(&upid->pid_chain, 340 hlist_add_head_rcu(&upid->pid_chain,
315 &pid_hash[pid_hashfn(upid->nr, upid->ns)]); 341 &pid_hash[pid_hashfn(upid->nr, upid->ns)]);
342 upid->ns->nr_hashed++;
343 }
316 spin_unlock_irq(&pidmap_lock); 344 spin_unlock_irq(&pidmap_lock);
317 345
318out: 346out:
319 return pid; 347 return pid;
320 348
349out_unlock:
350 spin_unlock(&pidmap_lock);
321out_free: 351out_free:
322 while (++i <= ns->level) 352 while (++i <= ns->level)
323 free_pidmap(pid->numbers + i); 353 free_pidmap(pid->numbers + i);
@@ -344,7 +374,7 @@ EXPORT_SYMBOL_GPL(find_pid_ns);
344 374
345struct pid *find_vpid(int nr) 375struct pid *find_vpid(int nr)
346{ 376{
347 return find_pid_ns(nr, current->nsproxy->pid_ns); 377 return find_pid_ns(nr, task_active_pid_ns(current));
348} 378}
349EXPORT_SYMBOL_GPL(find_vpid); 379EXPORT_SYMBOL_GPL(find_vpid);
350 380
@@ -428,7 +458,7 @@ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
428 458
429struct task_struct *find_task_by_vpid(pid_t vnr) 459struct task_struct *find_task_by_vpid(pid_t vnr)
430{ 460{
431 return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns); 461 return find_task_by_pid_ns(vnr, task_active_pid_ns(current));
432} 462}
433 463
434struct pid *get_task_pid(struct task_struct *task, enum pid_type type) 464struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
@@ -483,7 +513,7 @@ EXPORT_SYMBOL_GPL(pid_nr_ns);
483 513
484pid_t pid_vnr(struct pid *pid) 514pid_t pid_vnr(struct pid *pid)
485{ 515{
486 return pid_nr_ns(pid, current->nsproxy->pid_ns); 516 return pid_nr_ns(pid, task_active_pid_ns(current));
487} 517}
488EXPORT_SYMBOL_GPL(pid_vnr); 518EXPORT_SYMBOL_GPL(pid_vnr);
489 519
@@ -494,7 +524,7 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
494 524
495 rcu_read_lock(); 525 rcu_read_lock();
496 if (!ns) 526 if (!ns)
497 ns = current->nsproxy->pid_ns; 527 ns = task_active_pid_ns(current);
498 if (likely(pid_alive(task))) { 528 if (likely(pid_alive(task))) {
499 if (type != PIDTYPE_PID) 529 if (type != PIDTYPE_PID)
500 task = task->group_leader; 530 task = task->group_leader;
@@ -569,6 +599,7 @@ void __init pidmap_init(void)
569 /* Reserve PID 0. We never call free_pidmap(0) */ 599 /* Reserve PID 0. We never call free_pidmap(0) */
570 set_bit(0, init_pid_ns.pidmap[0].page); 600 set_bit(0, init_pid_ns.pidmap[0].page);
571 atomic_dec(&init_pid_ns.pidmap[0].nr_free); 601 atomic_dec(&init_pid_ns.pidmap[0].nr_free);
602 init_pid_ns.nr_hashed = 1;
572 603
573 init_pid_ns.pid_cachep = KMEM_CACHE(pid, 604 init_pid_ns.pid_cachep = KMEM_CACHE(pid,
574 SLAB_HWCACHE_ALIGN | SLAB_PANIC); 605 SLAB_HWCACHE_ALIGN | SLAB_PANIC);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 7b07cc0dfb7..560da0dab23 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -10,6 +10,7 @@
10 10
11#include <linux/pid.h> 11#include <linux/pid.h>
12#include <linux/pid_namespace.h> 12#include <linux/pid_namespace.h>
13#include <linux/user_namespace.h>
13#include <linux/syscalls.h> 14#include <linux/syscalls.h>
14#include <linux/err.h> 15#include <linux/err.h>
15#include <linux/acct.h> 16#include <linux/acct.h>
@@ -71,10 +72,17 @@ err_alloc:
71 return NULL; 72 return NULL;
72} 73}
73 74
75static void proc_cleanup_work(struct work_struct *work)
76{
77 struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work);
78 pid_ns_release_proc(ns);
79}
80
74/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ 81/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
75#define MAX_PID_NS_LEVEL 32 82#define MAX_PID_NS_LEVEL 32
76 83
77static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns) 84static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
85 struct pid_namespace *parent_pid_ns)
78{ 86{
79 struct pid_namespace *ns; 87 struct pid_namespace *ns;
80 unsigned int level = parent_pid_ns->level + 1; 88 unsigned int level = parent_pid_ns->level + 1;
@@ -99,9 +107,15 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
99 if (ns->pid_cachep == NULL) 107 if (ns->pid_cachep == NULL)
100 goto out_free_map; 108 goto out_free_map;
101 109
110 err = proc_alloc_inum(&ns->proc_inum);
111 if (err)
112 goto out_free_map;
113
102 kref_init(&ns->kref); 114 kref_init(&ns->kref);
103 ns->level = level; 115 ns->level = level;
104 ns->parent = get_pid_ns(parent_pid_ns); 116 ns->parent = get_pid_ns(parent_pid_ns);
117 ns->user_ns = get_user_ns(user_ns);
118 INIT_WORK(&ns->proc_work, proc_cleanup_work);
105 119
106 set_bit(0, ns->pidmap[0].page); 120 set_bit(0, ns->pidmap[0].page);
107 atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); 121 atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
@@ -109,14 +123,8 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
109 for (i = 1; i < PIDMAP_ENTRIES; i++) 123 for (i = 1; i < PIDMAP_ENTRIES; i++)
110 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); 124 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
111 125
112 err = pid_ns_prepare_proc(ns);
113 if (err)
114 goto out_put_parent_pid_ns;
115
116 return ns; 126 return ns;
117 127
118out_put_parent_pid_ns:
119 put_pid_ns(parent_pid_ns);
120out_free_map: 128out_free_map:
121 kfree(ns->pidmap[0].page); 129 kfree(ns->pidmap[0].page);
122out_free: 130out_free:
@@ -129,18 +137,21 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
129{ 137{
130 int i; 138 int i;
131 139
140 proc_free_inum(ns->proc_inum);
132 for (i = 0; i < PIDMAP_ENTRIES; i++) 141 for (i = 0; i < PIDMAP_ENTRIES; i++)
133 kfree(ns->pidmap[i].page); 142 kfree(ns->pidmap[i].page);
143 put_user_ns(ns->user_ns);
134 kmem_cache_free(pid_ns_cachep, ns); 144 kmem_cache_free(pid_ns_cachep, ns);
135} 145}
136 146
137struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) 147struct pid_namespace *copy_pid_ns(unsigned long flags,
148 struct user_namespace *user_ns, struct pid_namespace *old_ns)
138{ 149{
139 if (!(flags & CLONE_NEWPID)) 150 if (!(flags & CLONE_NEWPID))
140 return get_pid_ns(old_ns); 151 return get_pid_ns(old_ns);
141 if (flags & (CLONE_THREAD|CLONE_PARENT)) 152 if (task_active_pid_ns(current) != old_ns)
142 return ERR_PTR(-EINVAL); 153 return ERR_PTR(-EINVAL);
143 return create_pid_namespace(old_ns); 154 return create_pid_namespace(user_ns, old_ns);
144} 155}
145 156
146static void free_pid_ns(struct kref *kref) 157static void free_pid_ns(struct kref *kref)
@@ -211,22 +222,15 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
211 222
212 /* 223 /*
213 * sys_wait4() above can't reap the TASK_DEAD children. 224 * sys_wait4() above can't reap the TASK_DEAD children.
214 * Make sure they all go away, see __unhash_process(). 225 * Make sure they all go away, see free_pid().
215 */ 226 */
216 for (;;) { 227 for (;;) {
217 bool need_wait = false; 228 set_current_state(TASK_UNINTERRUPTIBLE);
218 229 if (pid_ns->nr_hashed == 1)
219 read_lock(&tasklist_lock);
220 if (!list_empty(&current->children)) {
221 __set_current_state(TASK_UNINTERRUPTIBLE);
222 need_wait = true;
223 }
224 read_unlock(&tasklist_lock);
225
226 if (!need_wait)
227 break; 230 break;
228 schedule(); 231 schedule();
229 } 232 }
233 __set_current_state(TASK_RUNNING);
230 234
231 if (pid_ns->reboot) 235 if (pid_ns->reboot)
232 current->signal->group_exit_code = pid_ns->reboot; 236 current->signal->group_exit_code = pid_ns->reboot;
@@ -239,9 +243,10 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
239static int pid_ns_ctl_handler(struct ctl_table *table, int write, 243static int pid_ns_ctl_handler(struct ctl_table *table, int write,
240 void __user *buffer, size_t *lenp, loff_t *ppos) 244 void __user *buffer, size_t *lenp, loff_t *ppos)
241{ 245{
246 struct pid_namespace *pid_ns = task_active_pid_ns(current);
242 struct ctl_table tmp = *table; 247 struct ctl_table tmp = *table;
243 248
244 if (write && !capable(CAP_SYS_ADMIN)) 249 if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN))
245 return -EPERM; 250 return -EPERM;
246 251
247 /* 252 /*
@@ -250,7 +255,7 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write,
250 * it should synchronize its usage with external means. 255 * it should synchronize its usage with external means.
251 */ 256 */
252 257
253 tmp.data = &current->nsproxy->pid_ns->last_pid; 258 tmp.data = &pid_ns->last_pid;
254 return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 259 return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
255} 260}
256 261
@@ -299,6 +304,67 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
299 return 0; 304 return 0;
300} 305}
301 306
307static void *pidns_get(struct task_struct *task)
308{
309 struct pid_namespace *ns;
310
311 rcu_read_lock();
312 ns = get_pid_ns(task_active_pid_ns(task));
313 rcu_read_unlock();
314
315 return ns;
316}
317
318static void pidns_put(void *ns)
319{
320 put_pid_ns(ns);
321}
322
323static int pidns_install(struct nsproxy *nsproxy, void *ns)
324{
325 struct pid_namespace *active = task_active_pid_ns(current);
326 struct pid_namespace *ancestor, *new = ns;
327
328 if (!ns_capable(new->user_ns, CAP_SYS_ADMIN))
329 return -EPERM;
330
331 /*
332 * Only allow entering the current active pid namespace
333 * or a child of the current active pid namespace.
334 *
335 * This is required for fork to return a usable pid value and
336 * this maintains the property that processes and their
337 * children can not escape their current pid namespace.
338 */
339 if (new->level < active->level)
340 return -EINVAL;
341
342 ancestor = new;
343 while (ancestor->level > active->level)
344 ancestor = ancestor->parent;
345 if (ancestor != active)
346 return -EINVAL;
347
348 put_pid_ns(nsproxy->pid_ns);
349 nsproxy->pid_ns = get_pid_ns(new);
350 return 0;
351}
352
353static unsigned int pidns_inum(void *ns)
354{
355 struct pid_namespace *pid_ns = ns;
356 return pid_ns->proc_inum;
357}
358
359const struct proc_ns_operations pidns_operations = {
360 .name = "pid",
361 .type = CLONE_NEWPID,
362 .get = pidns_get,
363 .put = pidns_put,
364 .install = pidns_install,
365 .inum = pidns_inum,
366};
367
302static __init int pid_namespaces_init(void) 368static __init int pid_namespaces_init(void)
303{ 369{
304 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); 370 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1f5e55dda95..7b09b88862c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -215,8 +215,12 @@ ok:
215 smp_rmb(); 215 smp_rmb();
216 if (task->mm) 216 if (task->mm)
217 dumpable = get_dumpable(task->mm); 217 dumpable = get_dumpable(task->mm);
218 if (!dumpable && !ptrace_has_cap(task_user_ns(task), mode)) 218 rcu_read_lock();
219 if (!dumpable && !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
220 rcu_read_unlock();
219 return -EPERM; 221 return -EPERM;
222 }
223 rcu_read_unlock();
220 224
221 return security_ptrace_access_check(task, mode); 225 return security_ptrace_access_check(task, mode);
222} 226}
@@ -280,8 +284,10 @@ static int ptrace_attach(struct task_struct *task, long request,
280 284
281 if (seize) 285 if (seize)
282 flags |= PT_SEIZED; 286 flags |= PT_SEIZED;
283 if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE)) 287 rcu_read_lock();
288 if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE))
284 flags |= PT_PTRACE_CAP; 289 flags |= PT_PTRACE_CAP;
290 rcu_read_unlock();
285 task->ptrace = flags; 291 task->ptrace = flags;
286 292
287 __ptrace_link(task, current); 293 __ptrace_link(task, current);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c1fb82104bf..257002c13bb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4097,8 +4097,14 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
4097 goto out_free_cpus_allowed; 4097 goto out_free_cpus_allowed;
4098 } 4098 }
4099 retval = -EPERM; 4099 retval = -EPERM;
4100 if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE)) 4100 if (!check_same_owner(p)) {
4101 goto out_unlock; 4101 rcu_read_lock();
4102 if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
4103 rcu_read_unlock();
4104 goto out_unlock;
4105 }
4106 rcu_read_unlock();
4107 }
4102 4108
4103 retval = security_task_setscheduler(p); 4109 retval = security_task_setscheduler(p);
4104 if (retval) 4110 if (retval)
diff --git a/kernel/signal.c b/kernel/signal.c
index a49c7f36ceb..580a91e6347 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1753,7 +1753,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
1753 * see comment in do_notify_parent() about the following 4 lines 1753 * see comment in do_notify_parent() about the following 4 lines
1754 */ 1754 */
1755 rcu_read_lock(); 1755 rcu_read_lock();
1756 info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns); 1756 info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(parent));
1757 info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk)); 1757 info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk));
1758 rcu_read_unlock(); 1758 rcu_read_unlock();
1759 1759
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 65bdcf198d4..5a638445050 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1344,7 +1344,7 @@ static ssize_t binary_sysctl(const int *name, int nlen,
1344 goto out_putname; 1344 goto out_putname;
1345 } 1345 }
1346 1346
1347 mnt = current->nsproxy->pid_ns->proc_mnt; 1347 mnt = task_active_pid_ns(current)->proc_mnt;
1348 file = file_open_root(mnt->mnt_root, mnt, pathname, flags); 1348 file = file_open_root(mnt->mnt_root, mnt, pathname, flags);
1349 result = PTR_ERR(file); 1349 result = PTR_ERR(file);
1350 if (IS_ERR(file)) 1350 if (IS_ERR(file))
diff --git a/kernel/user.c b/kernel/user.c
index 750acffbe9e..33acb5e53a5 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -16,6 +16,7 @@
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/export.h> 17#include <linux/export.h>
18#include <linux/user_namespace.h> 18#include <linux/user_namespace.h>
19#include <linux/proc_fs.h>
19 20
20/* 21/*
21 * userns count is 1 for root user, 1 for init_uts_ns, 22 * userns count is 1 for root user, 1 for init_uts_ns,
@@ -51,6 +52,7 @@ struct user_namespace init_user_ns = {
51 }, 52 },
52 .owner = GLOBAL_ROOT_UID, 53 .owner = GLOBAL_ROOT_UID,
53 .group = GLOBAL_ROOT_GID, 54 .group = GLOBAL_ROOT_GID,
55 .proc_inum = PROC_USER_INIT_INO,
54}; 56};
55EXPORT_SYMBOL_GPL(init_user_ns); 57EXPORT_SYMBOL_GPL(init_user_ns);
56 58
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 456a6b9fba3..f5975ccf934 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -9,6 +9,7 @@
9#include <linux/nsproxy.h> 9#include <linux/nsproxy.h>
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <linux/user_namespace.h> 11#include <linux/user_namespace.h>
12#include <linux/proc_fs.h>
12#include <linux/highuid.h> 13#include <linux/highuid.h>
13#include <linux/cred.h> 14#include <linux/cred.h>
14#include <linux/securebits.h> 15#include <linux/securebits.h>
@@ -26,6 +27,24 @@ static struct kmem_cache *user_ns_cachep __read_mostly;
26static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, 27static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
27 struct uid_gid_map *map); 28 struct uid_gid_map *map);
28 29
30static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
31{
32 /* Start with the same capabilities as init but useless for doing
33 * anything as the capabilities are bound to the new user namespace.
34 */
35 cred->securebits = SECUREBITS_DEFAULT;
36 cred->cap_inheritable = CAP_EMPTY_SET;
37 cred->cap_permitted = CAP_FULL_SET;
38 cred->cap_effective = CAP_FULL_SET;
39 cred->cap_bset = CAP_FULL_SET;
40#ifdef CONFIG_KEYS
41 key_put(cred->request_key_auth);
42 cred->request_key_auth = NULL;
43#endif
44 /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
45 cred->user_ns = user_ns;
46}
47
29/* 48/*
30 * Create a new user namespace, deriving the creator from the user in the 49 * Create a new user namespace, deriving the creator from the user in the
31 * passed credentials, and replacing that user with the new root user for the 50 * passed credentials, and replacing that user with the new root user for the
@@ -39,6 +58,7 @@ int create_user_ns(struct cred *new)
39 struct user_namespace *ns, *parent_ns = new->user_ns; 58 struct user_namespace *ns, *parent_ns = new->user_ns;
40 kuid_t owner = new->euid; 59 kuid_t owner = new->euid;
41 kgid_t group = new->egid; 60 kgid_t group = new->egid;
61 int ret;
42 62
43 /* The creator needs a mapping in the parent user namespace 63 /* The creator needs a mapping in the parent user namespace
44 * or else we won't be able to reasonably tell userspace who 64 * or else we won't be able to reasonably tell userspace who
@@ -52,38 +72,45 @@ int create_user_ns(struct cred *new)
52 if (!ns) 72 if (!ns)
53 return -ENOMEM; 73 return -ENOMEM;
54 74
75 ret = proc_alloc_inum(&ns->proc_inum);
76 if (ret) {
77 kmem_cache_free(user_ns_cachep, ns);
78 return ret;
79 }
80
55 kref_init(&ns->kref); 81 kref_init(&ns->kref);
82 /* Leave the new->user_ns reference with the new user namespace. */
56 ns->parent = parent_ns; 83 ns->parent = parent_ns;
57 ns->owner = owner; 84 ns->owner = owner;
58 ns->group = group; 85 ns->group = group;
59 86
60 /* Start with the same capabilities as init but useless for doing 87 set_cred_user_ns(new, ns);
61 * anything as the capabilities are bound to the new user namespace.
62 */
63 new->securebits = SECUREBITS_DEFAULT;
64 new->cap_inheritable = CAP_EMPTY_SET;
65 new->cap_permitted = CAP_FULL_SET;
66 new->cap_effective = CAP_FULL_SET;
67 new->cap_bset = CAP_FULL_SET;
68#ifdef CONFIG_KEYS
69 key_put(new->request_key_auth);
70 new->request_key_auth = NULL;
71#endif
72 /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
73
74 /* Leave the new->user_ns reference with the new user namespace. */
75 /* Leave the reference to our user_ns with the new cred. */
76 new->user_ns = ns;
77 88
78 return 0; 89 return 0;
79} 90}
80 91
92int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
93{
94 struct cred *cred;
95
96 if (!(unshare_flags & CLONE_NEWUSER))
97 return 0;
98
99 cred = prepare_creds();
100 if (!cred)
101 return -ENOMEM;
102
103 *new_cred = cred;
104 return create_user_ns(cred);
105}
106
81void free_user_ns(struct kref *kref) 107void free_user_ns(struct kref *kref)
82{ 108{
83 struct user_namespace *parent, *ns = 109 struct user_namespace *parent, *ns =
84 container_of(kref, struct user_namespace, kref); 110 container_of(kref, struct user_namespace, kref);
85 111
86 parent = ns->parent; 112 parent = ns->parent;
113 proc_free_inum(ns->proc_inum);
87 kmem_cache_free(user_ns_cachep, ns); 114 kmem_cache_free(user_ns_cachep, ns);
88 put_user_ns(parent); 115 put_user_ns(parent);
89} 116}
@@ -372,7 +399,7 @@ static int uid_m_show(struct seq_file *seq, void *v)
372 struct user_namespace *lower_ns; 399 struct user_namespace *lower_ns;
373 uid_t lower; 400 uid_t lower;
374 401
375 lower_ns = current_user_ns(); 402 lower_ns = seq_user_ns(seq);
376 if ((lower_ns == ns) && lower_ns->parent) 403 if ((lower_ns == ns) && lower_ns->parent)
377 lower_ns = lower_ns->parent; 404 lower_ns = lower_ns->parent;
378 405
@@ -393,7 +420,7 @@ static int gid_m_show(struct seq_file *seq, void *v)
393 struct user_namespace *lower_ns; 420 struct user_namespace *lower_ns;
394 gid_t lower; 421 gid_t lower;
395 422
396 lower_ns = current_user_ns(); 423 lower_ns = seq_user_ns(seq);
397 if ((lower_ns == ns) && lower_ns->parent) 424 if ((lower_ns == ns) && lower_ns->parent)
398 lower_ns = lower_ns->parent; 425 lower_ns = lower_ns->parent;
399 426
@@ -669,10 +696,14 @@ ssize_t proc_uid_map_write(struct file *file, const char __user *buf, size_t siz
669{ 696{
670 struct seq_file *seq = file->private_data; 697 struct seq_file *seq = file->private_data;
671 struct user_namespace *ns = seq->private; 698 struct user_namespace *ns = seq->private;
699 struct user_namespace *seq_ns = seq_user_ns(seq);
672 700
673 if (!ns->parent) 701 if (!ns->parent)
674 return -EPERM; 702 return -EPERM;
675 703
704 if ((seq_ns != ns) && (seq_ns != ns->parent))
705 return -EPERM;
706
676 return map_write(file, buf, size, ppos, CAP_SETUID, 707 return map_write(file, buf, size, ppos, CAP_SETUID,
677 &ns->uid_map, &ns->parent->uid_map); 708 &ns->uid_map, &ns->parent->uid_map);
678} 709}
@@ -681,10 +712,14 @@ ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t siz
681{ 712{
682 struct seq_file *seq = file->private_data; 713 struct seq_file *seq = file->private_data;
683 struct user_namespace *ns = seq->private; 714 struct user_namespace *ns = seq->private;
715 struct user_namespace *seq_ns = seq_user_ns(seq);
684 716
685 if (!ns->parent) 717 if (!ns->parent)
686 return -EPERM; 718 return -EPERM;
687 719
720 if ((seq_ns != ns) && (seq_ns != ns->parent))
721 return -EPERM;
722
688 return map_write(file, buf, size, ppos, CAP_SETGID, 723 return map_write(file, buf, size, ppos, CAP_SETGID,
689 &ns->gid_map, &ns->parent->gid_map); 724 &ns->gid_map, &ns->parent->gid_map);
690} 725}
@@ -709,6 +744,21 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t
709static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, 744static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
710 struct uid_gid_map *new_map) 745 struct uid_gid_map *new_map)
711{ 746{
747 /* Allow mapping to your own filesystem ids */
748 if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) {
749 u32 id = new_map->extent[0].lower_first;
750 if (cap_setid == CAP_SETUID) {
751 kuid_t uid = make_kuid(ns->parent, id);
752 if (uid_eq(uid, current_fsuid()))
753 return true;
754 }
755 else if (cap_setid == CAP_SETGID) {
756 kgid_t gid = make_kgid(ns->parent, id);
757 if (gid_eq(gid, current_fsgid()))
758 return true;
759 }
760 }
761
712 /* Allow anyone to set a mapping that doesn't require privilege */ 762 /* Allow anyone to set a mapping that doesn't require privilege */
713 if (!cap_valid(cap_setid)) 763 if (!cap_valid(cap_setid))
714 return true; 764 return true;
@@ -722,6 +772,65 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
722 return false; 772 return false;
723} 773}
724 774
775static void *userns_get(struct task_struct *task)
776{
777 struct user_namespace *user_ns;
778
779 rcu_read_lock();
780 user_ns = get_user_ns(__task_cred(task)->user_ns);
781 rcu_read_unlock();
782
783 return user_ns;
784}
785
786static void userns_put(void *ns)
787{
788 put_user_ns(ns);
789}
790
791static int userns_install(struct nsproxy *nsproxy, void *ns)
792{
793 struct user_namespace *user_ns = ns;
794 struct cred *cred;
795
796 /* Don't allow gaining capabilities by reentering
797 * the same user namespace.
798 */
799 if (user_ns == current_user_ns())
800 return -EINVAL;
801
802 /* Threaded many not enter a different user namespace */
803 if (atomic_read(&current->mm->mm_users) > 1)
804 return -EINVAL;
805
806 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
807 return -EPERM;
808
809 cred = prepare_creds();
810 if (!cred)
811 return -ENOMEM;
812
813 put_user_ns(cred->user_ns);
814 set_cred_user_ns(cred, get_user_ns(user_ns));
815
816 return commit_creds(cred);
817}
818
819static unsigned int userns_inum(void *ns)
820{
821 struct user_namespace *user_ns = ns;
822 return user_ns->proc_inum;
823}
824
825const struct proc_ns_operations userns_operations = {
826 .name = "user",
827 .type = CLONE_NEWUSER,
828 .get = userns_get,
829 .put = userns_put,
830 .install = userns_install,
831 .inum = userns_inum,
832};
833
725static __init int user_namespaces_init(void) 834static __init int user_namespaces_init(void)
726{ 835{
727 user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); 836 user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 679d97a5d3f..f6336d51d64 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -32,18 +32,25 @@ static struct uts_namespace *create_uts_ns(void)
32 * @old_ns: namespace to clone 32 * @old_ns: namespace to clone
33 * Return NULL on error (failure to kmalloc), new ns otherwise 33 * Return NULL on error (failure to kmalloc), new ns otherwise
34 */ 34 */
35static struct uts_namespace *clone_uts_ns(struct task_struct *tsk, 35static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
36 struct uts_namespace *old_ns) 36 struct uts_namespace *old_ns)
37{ 37{
38 struct uts_namespace *ns; 38 struct uts_namespace *ns;
39 int err;
39 40
40 ns = create_uts_ns(); 41 ns = create_uts_ns();
41 if (!ns) 42 if (!ns)
42 return ERR_PTR(-ENOMEM); 43 return ERR_PTR(-ENOMEM);
43 44
45 err = proc_alloc_inum(&ns->proc_inum);
46 if (err) {
47 kfree(ns);
48 return ERR_PTR(err);
49 }
50
44 down_read(&uts_sem); 51 down_read(&uts_sem);
45 memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); 52 memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
46 ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns)); 53 ns->user_ns = get_user_ns(user_ns);
47 up_read(&uts_sem); 54 up_read(&uts_sem);
48 return ns; 55 return ns;
49} 56}
@@ -55,9 +62,8 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
55 * versa. 62 * versa.
56 */ 63 */
57struct uts_namespace *copy_utsname(unsigned long flags, 64struct uts_namespace *copy_utsname(unsigned long flags,
58 struct task_struct *tsk) 65 struct user_namespace *user_ns, struct uts_namespace *old_ns)
59{ 66{
60 struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
61 struct uts_namespace *new_ns; 67 struct uts_namespace *new_ns;
62 68
63 BUG_ON(!old_ns); 69 BUG_ON(!old_ns);
@@ -66,7 +72,7 @@ struct uts_namespace *copy_utsname(unsigned long flags,
66 if (!(flags & CLONE_NEWUTS)) 72 if (!(flags & CLONE_NEWUTS))
67 return old_ns; 73 return old_ns;
68 74
69 new_ns = clone_uts_ns(tsk, old_ns); 75 new_ns = clone_uts_ns(user_ns, old_ns);
70 76
71 put_uts_ns(old_ns); 77 put_uts_ns(old_ns);
72 return new_ns; 78 return new_ns;
@@ -78,6 +84,7 @@ void free_uts_ns(struct kref *kref)
78 84
79 ns = container_of(kref, struct uts_namespace, kref); 85 ns = container_of(kref, struct uts_namespace, kref);
80 put_user_ns(ns->user_ns); 86 put_user_ns(ns->user_ns);
87 proc_free_inum(ns->proc_inum);
81 kfree(ns); 88 kfree(ns);
82} 89}
83 90
@@ -102,19 +109,31 @@ static void utsns_put(void *ns)
102 put_uts_ns(ns); 109 put_uts_ns(ns);
103} 110}
104 111
105static int utsns_install(struct nsproxy *nsproxy, void *ns) 112static int utsns_install(struct nsproxy *nsproxy, void *new)
106{ 113{
114 struct uts_namespace *ns = new;
115
116 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
117 return -EPERM;
118
107 get_uts_ns(ns); 119 get_uts_ns(ns);
108 put_uts_ns(nsproxy->uts_ns); 120 put_uts_ns(nsproxy->uts_ns);
109 nsproxy->uts_ns = ns; 121 nsproxy->uts_ns = ns;
110 return 0; 122 return 0;
111} 123}
112 124
125static unsigned int utsns_inum(void *vp)
126{
127 struct uts_namespace *ns = vp;
128
129 return ns->proc_inum;
130}
131
113const struct proc_ns_operations utsns_operations = { 132const struct proc_ns_operations utsns_operations = {
114 .name = "uts", 133 .name = "uts",
115 .type = CLONE_NEWUTS, 134 .type = CLONE_NEWUTS,
116 .get = utsns_get, 135 .get = utsns_get,
117 .put = utsns_put, 136 .put = utsns_put,
118 .install = utsns_install, 137 .install = utsns_install,
138 .inum = utsns_inum,
119}; 139};
120
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 6456439cbbd..2e9a3132b8d 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -381,6 +381,21 @@ struct net *get_net_ns_by_pid(pid_t pid)
381} 381}
382EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 382EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
383 383
384static __net_init int net_ns_net_init(struct net *net)
385{
386 return proc_alloc_inum(&net->proc_inum);
387}
388
389static __net_exit void net_ns_net_exit(struct net *net)
390{
391 proc_free_inum(net->proc_inum);
392}
393
394static struct pernet_operations __net_initdata net_ns_ops = {
395 .init = net_ns_net_init,
396 .exit = net_ns_net_exit,
397};
398
384static int __init net_ns_init(void) 399static int __init net_ns_init(void)
385{ 400{
386 struct net_generic *ng; 401 struct net_generic *ng;
@@ -412,6 +427,8 @@ static int __init net_ns_init(void)
412 427
413 mutex_unlock(&net_mutex); 428 mutex_unlock(&net_mutex);
414 429
430 register_pernet_subsys(&net_ns_ops);
431
415 return 0; 432 return 0;
416} 433}
417 434
@@ -630,16 +647,28 @@ static void netns_put(void *ns)
630 647
631static int netns_install(struct nsproxy *nsproxy, void *ns) 648static int netns_install(struct nsproxy *nsproxy, void *ns)
632{ 649{
650 struct net *net = ns;
651
652 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN))
653 return -EPERM;
654
633 put_net(nsproxy->net_ns); 655 put_net(nsproxy->net_ns);
634 nsproxy->net_ns = get_net(ns); 656 nsproxy->net_ns = get_net(net);
635 return 0; 657 return 0;
636} 658}
637 659
660static unsigned int netns_inum(void *ns)
661{
662 struct net *net = ns;
663 return net->proc_inum;
664}
665
638const struct proc_ns_operations netns_operations = { 666const struct proc_ns_operations netns_operations = {
639 .name = "net", 667 .name = "net",
640 .type = CLONE_NEWNET, 668 .type = CLONE_NEWNET,
641 .get = netns_get, 669 .get = netns_get,
642 .put = netns_put, 670 .put = netns_put,
643 .install = netns_install, 671 .install = netns_install,
672 .inum = netns_inum,
644}; 673};
645#endif 674#endif
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index 2663145d119..23414b93771 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -298,14 +298,18 @@ int yama_ptrace_access_check(struct task_struct *child,
298 /* No additional restrictions. */ 298 /* No additional restrictions. */
299 break; 299 break;
300 case YAMA_SCOPE_RELATIONAL: 300 case YAMA_SCOPE_RELATIONAL:
301 rcu_read_lock();
301 if (!task_is_descendant(current, child) && 302 if (!task_is_descendant(current, child) &&
302 !ptracer_exception_found(current, child) && 303 !ptracer_exception_found(current, child) &&
303 !ns_capable(task_user_ns(child), CAP_SYS_PTRACE)) 304 !ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE))
304 rc = -EPERM; 305 rc = -EPERM;
306 rcu_read_unlock();
305 break; 307 break;
306 case YAMA_SCOPE_CAPABILITY: 308 case YAMA_SCOPE_CAPABILITY:
307 if (!ns_capable(task_user_ns(child), CAP_SYS_PTRACE)) 309 rcu_read_lock();
310 if (!ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE))
308 rc = -EPERM; 311 rc = -EPERM;
312 rcu_read_unlock();
309 break; 313 break;
310 case YAMA_SCOPE_NO_ATTACH: 314 case YAMA_SCOPE_NO_ATTACH:
311 default: 315 default:
@@ -343,8 +347,10 @@ int yama_ptrace_traceme(struct task_struct *parent)
343 /* Only disallow PTRACE_TRACEME on more aggressive settings. */ 347 /* Only disallow PTRACE_TRACEME on more aggressive settings. */
344 switch (ptrace_scope) { 348 switch (ptrace_scope) {
345 case YAMA_SCOPE_CAPABILITY: 349 case YAMA_SCOPE_CAPABILITY:
346 if (!ns_capable(task_user_ns(parent), CAP_SYS_PTRACE)) 350 rcu_read_lock();
351 if (!ns_capable(__task_cred(parent)->user_ns, CAP_SYS_PTRACE))
347 rc = -EPERM; 352 rc = -EPERM;
353 rcu_read_unlock();
348 break; 354 break;
349 case YAMA_SCOPE_NO_ATTACH: 355 case YAMA_SCOPE_NO_ATTACH:
350 rc = -EPERM; 356 rc = -EPERM;