aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c2
-rw-r--r--arch/um/drivers/mconsole_kern.c2
-rw-r--r--drivers/staging/android/binder.c3
-rw-r--r--fs/attr.c11
-rw-r--r--fs/autofs4/autofs_i.h8
-rw-r--r--fs/autofs4/dev-ioctl.c4
-rw-r--r--fs/autofs4/inode.c24
-rw-r--r--fs/autofs4/waitq.c5
-rw-r--r--fs/exec.c9
-rw-r--r--fs/fuse/dev.c4
-rw-r--r--fs/fuse/dir.c20
-rw-r--r--fs/fuse/fuse_i.h4
-rw-r--r--fs/fuse/inode.c23
-rw-r--r--fs/hppfs/hppfs.c2
-rw-r--r--fs/mount.h3
-rw-r--r--fs/namespace.c211
-rw-r--r--fs/open.c2
-rw-r--r--fs/pnode.h1
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c169
-rw-r--r--fs/proc/generic.c26
-rw-r--r--fs/proc/inode.c6
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/namespaces.c185
-rw-r--r--fs/proc/root.c17
-rw-r--r--fs/proc/self.c59
-rw-r--r--fs/sysfs/mount.c1
-rw-r--r--include/linux/cred.h2
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/ipc_namespace.h9
-rw-r--r--include/linux/mnt_namespace.h3
-rw-r--r--include/linux/nsproxy.h2
-rw-r--r--include/linux/pid_namespace.h11
-rw-r--r--include/linux/proc_fs.h26
-rw-r--r--include/linux/user_namespace.h10
-rw-r--r--include/linux/utsname.h7
-rw-r--r--include/net/net_namespace.h2
-rw-r--r--init/Kconfig2
-rw-r--r--init/main.c1
-rw-r--r--init/version.c2
-rw-r--r--ipc/msgutil.c2
-rw-r--r--ipc/namespace.c32
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/events/core.c2
-rw-r--r--kernel/exit.c12
-rw-r--r--kernel/fork.c69
-rw-r--r--kernel/nsproxy.c36
-rw-r--r--kernel/pid.c47
-rw-r--r--kernel/pid_namespace.c112
-rw-r--r--kernel/ptrace.c10
-rw-r--r--kernel/sched/core.c10
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/sysctl_binary.c2
-rw-r--r--kernel/user.c2
-rw-r--r--kernel/user_namespace.c147
-rw-r--r--kernel/utsname.c33
-rw-r--r--net/core/net_namespace.c31
-rw-r--r--security/yama/yama_lsm.c12
59 files changed, 996 insertions, 451 deletions
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 965d381abd75..25db92a8e1cf 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -1094,7 +1094,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private)
1094 LOAD_INT(c), LOAD_FRAC(c), 1094 LOAD_INT(c), LOAD_FRAC(c),
1095 count_active_contexts(), 1095 count_active_contexts(),
1096 atomic_read(&nr_spu_contexts), 1096 atomic_read(&nr_spu_contexts),
1097 current->nsproxy->pid_ns->last_pid); 1097 task_active_pid_ns(current)->last_pid);
1098 return 0; 1098 return 0;
1099} 1099}
1100 1100
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 49e3b49e552f..4bd82ac0210f 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -123,7 +123,7 @@ void mconsole_log(struct mc_request *req)
123 123
124void mconsole_proc(struct mc_request *req) 124void mconsole_proc(struct mc_request *req)
125{ 125{
126 struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt; 126 struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt;
127 char *buf; 127 char *buf;
128 int len; 128 int len;
129 struct file *file; 129 struct file *file;
diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c
index 4a36e9ab8cf7..2d12e8a1f82e 100644
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@ -35,6 +35,7 @@
35#include <linux/uaccess.h> 35#include <linux/uaccess.h>
36#include <linux/vmalloc.h> 36#include <linux/vmalloc.h>
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <linux/pid_namespace.h>
38 39
39#include "binder.h" 40#include "binder.h"
40#include "binder_trace.h" 41#include "binder_trace.h"
@@ -2320,7 +2321,7 @@ retry:
2320 if (t->from) { 2321 if (t->from) {
2321 struct task_struct *sender = t->from->proc->tsk; 2322 struct task_struct *sender = t->from->proc->tsk;
2322 tr.sender_pid = task_tgid_nr_ns(sender, 2323 tr.sender_pid = task_tgid_nr_ns(sender,
2323 current->nsproxy->pid_ns); 2324 task_active_pid_ns(current));
2324 } else { 2325 } else {
2325 tr.sender_pid = 0; 2326 tr.sender_pid = 0;
2326 } 2327 }
diff --git a/fs/attr.c b/fs/attr.c
index cce7df53b694..1449adb14ef6 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -49,14 +49,15 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
49 /* Make sure a caller can chown. */ 49 /* Make sure a caller can chown. */
50 if ((ia_valid & ATTR_UID) && 50 if ((ia_valid & ATTR_UID) &&
51 (!uid_eq(current_fsuid(), inode->i_uid) || 51 (!uid_eq(current_fsuid(), inode->i_uid) ||
52 !uid_eq(attr->ia_uid, inode->i_uid)) && !capable(CAP_CHOWN)) 52 !uid_eq(attr->ia_uid, inode->i_uid)) &&
53 !inode_capable(inode, CAP_CHOWN))
53 return -EPERM; 54 return -EPERM;
54 55
55 /* Make sure caller can chgrp. */ 56 /* Make sure caller can chgrp. */
56 if ((ia_valid & ATTR_GID) && 57 if ((ia_valid & ATTR_GID) &&
57 (!uid_eq(current_fsuid(), inode->i_uid) || 58 (!uid_eq(current_fsuid(), inode->i_uid) ||
58 (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) && 59 (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
59 !capable(CAP_CHOWN)) 60 !inode_capable(inode, CAP_CHOWN))
60 return -EPERM; 61 return -EPERM;
61 62
62 /* Make sure a caller can chmod. */ 63 /* Make sure a caller can chmod. */
@@ -65,7 +66,8 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
65 return -EPERM; 66 return -EPERM;
66 /* Also check the setgid bit! */ 67 /* Also check the setgid bit! */
67 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : 68 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
68 inode->i_gid) && !capable(CAP_FSETID)) 69 inode->i_gid) &&
70 !inode_capable(inode, CAP_FSETID))
69 attr->ia_mode &= ~S_ISGID; 71 attr->ia_mode &= ~S_ISGID;
70 } 72 }
71 73
@@ -157,7 +159,8 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
157 if (ia_valid & ATTR_MODE) { 159 if (ia_valid & ATTR_MODE) {
158 umode_t mode = attr->ia_mode; 160 umode_t mode = attr->ia_mode;
159 161
160 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) 162 if (!in_group_p(inode->i_gid) &&
163 !inode_capable(inode, CAP_FSETID))
161 mode &= ~S_ISGID; 164 mode &= ~S_ISGID;
162 inode->i_mode = mode; 165 inode->i_mode = mode;
163 } 166 }
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 908e18455413..b785e7707959 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -74,8 +74,8 @@ struct autofs_info {
74 unsigned long last_used; 74 unsigned long last_used;
75 atomic_t count; 75 atomic_t count;
76 76
77 uid_t uid; 77 kuid_t uid;
78 gid_t gid; 78 kgid_t gid;
79}; 79};
80 80
81#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ 81#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
@@ -89,8 +89,8 @@ struct autofs_wait_queue {
89 struct qstr name; 89 struct qstr name;
90 u32 dev; 90 u32 dev;
91 u64 ino; 91 u64 ino;
92 uid_t uid; 92 kuid_t uid;
93 gid_t gid; 93 kgid_t gid;
94 pid_t pid; 94 pid_t pid;
95 pid_t tgid; 95 pid_t tgid;
96 /* This is for status reporting upon return */ 96 /* This is for status reporting upon return */
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index a16214109d31..9f68a37bb2b2 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -437,8 +437,8 @@ static int autofs_dev_ioctl_requester(struct file *fp,
437 err = 0; 437 err = 0;
438 autofs4_expire_wait(path.dentry); 438 autofs4_expire_wait(path.dentry);
439 spin_lock(&sbi->fs_lock); 439 spin_lock(&sbi->fs_lock);
440 param->requester.uid = ino->uid; 440 param->requester.uid = from_kuid_munged(current_user_ns(), ino->uid);
441 param->requester.gid = ino->gid; 441 param->requester.gid = from_kgid_munged(current_user_ns(), ino->gid);
442 spin_unlock(&sbi->fs_lock); 442 spin_unlock(&sbi->fs_lock);
443 } 443 }
444 path_put(&path); 444 path_put(&path);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 8a4fed8ead30..b104726e2d0a 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -36,8 +36,8 @@ struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi)
36 36
37void autofs4_clean_ino(struct autofs_info *ino) 37void autofs4_clean_ino(struct autofs_info *ino)
38{ 38{
39 ino->uid = 0; 39 ino->uid = GLOBAL_ROOT_UID;
40 ino->gid = 0; 40 ino->gid = GLOBAL_ROOT_GID;
41 ino->last_used = jiffies; 41 ino->last_used = jiffies;
42} 42}
43 43
@@ -79,10 +79,12 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root)
79 return 0; 79 return 0;
80 80
81 seq_printf(m, ",fd=%d", sbi->pipefd); 81 seq_printf(m, ",fd=%d", sbi->pipefd);
82 if (root_inode->i_uid != 0) 82 if (!uid_eq(root_inode->i_uid, GLOBAL_ROOT_UID))
83 seq_printf(m, ",uid=%u", root_inode->i_uid); 83 seq_printf(m, ",uid=%u",
84 if (root_inode->i_gid != 0) 84 from_kuid_munged(&init_user_ns, root_inode->i_uid));
85 seq_printf(m, ",gid=%u", root_inode->i_gid); 85 if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID))
86 seq_printf(m, ",gid=%u",
87 from_kgid_munged(&init_user_ns, root_inode->i_gid));
86 seq_printf(m, ",pgrp=%d", sbi->oz_pgrp); 88 seq_printf(m, ",pgrp=%d", sbi->oz_pgrp);
87 seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ); 89 seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ);
88 seq_printf(m, ",minproto=%d", sbi->min_proto); 90 seq_printf(m, ",minproto=%d", sbi->min_proto);
@@ -126,7 +128,7 @@ static const match_table_t tokens = {
126 {Opt_err, NULL} 128 {Opt_err, NULL}
127}; 129};
128 130
129static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, 131static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
130 pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto) 132 pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto)
131{ 133{
132 char *p; 134 char *p;
@@ -159,12 +161,16 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
159 case Opt_uid: 161 case Opt_uid:
160 if (match_int(args, &option)) 162 if (match_int(args, &option))
161 return 1; 163 return 1;
162 *uid = option; 164 *uid = make_kuid(current_user_ns(), option);
165 if (!uid_valid(*uid))
166 return 1;
163 break; 167 break;
164 case Opt_gid: 168 case Opt_gid:
165 if (match_int(args, &option)) 169 if (match_int(args, &option))
166 return 1; 170 return 1;
167 *gid = option; 171 *gid = make_kgid(current_user_ns(), option);
172 if (!gid_valid(*gid))
173 return 1;
168 break; 174 break;
169 case Opt_pgrp: 175 case Opt_pgrp:
170 if (match_int(args, &option)) 176 if (match_int(args, &option))
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index dce436e595c1..03bc1d347d8e 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -154,6 +154,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
154 case autofs_ptype_expire_direct: 154 case autofs_ptype_expire_direct:
155 { 155 {
156 struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet; 156 struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet;
157 struct user_namespace *user_ns = sbi->pipe->f_cred->user_ns;
157 158
158 pktsz = sizeof(*packet); 159 pktsz = sizeof(*packet);
159 160
@@ -163,8 +164,8 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
163 packet->name[wq->name.len] = '\0'; 164 packet->name[wq->name.len] = '\0';
164 packet->dev = wq->dev; 165 packet->dev = wq->dev;
165 packet->ino = wq->ino; 166 packet->ino = wq->ino;
166 packet->uid = wq->uid; 167 packet->uid = from_kuid_munged(user_ns, wq->uid);
167 packet->gid = wq->gid; 168 packet->gid = from_kgid_munged(user_ns, wq->gid);
168 packet->pid = wq->pid; 169 packet->pid = wq->pid;
169 packet->tgid = wq->tgid; 170 packet->tgid = wq->tgid;
170 break; 171 break;
diff --git a/fs/exec.c b/fs/exec.c
index 721a29929511..b71b08ce7120 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1266,14 +1266,13 @@ int prepare_binprm(struct linux_binprm *bprm)
1266 bprm->cred->egid = current_egid(); 1266 bprm->cred->egid = current_egid();
1267 1267
1268 if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) && 1268 if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
1269 !current->no_new_privs) { 1269 !current->no_new_privs &&
1270 kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
1271 kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
1270 /* Set-uid? */ 1272 /* Set-uid? */
1271 if (mode & S_ISUID) { 1273 if (mode & S_ISUID) {
1272 if (!kuid_has_mapping(bprm->cred->user_ns, inode->i_uid))
1273 return -EPERM;
1274 bprm->per_clear |= PER_CLEAR_ON_SETID; 1274 bprm->per_clear |= PER_CLEAR_ON_SETID;
1275 bprm->cred->euid = inode->i_uid; 1275 bprm->cred->euid = inode->i_uid;
1276
1277 } 1276 }
1278 1277
1279 /* Set-gid? */ 1278 /* Set-gid? */
@@ -1283,8 +1282,6 @@ int prepare_binprm(struct linux_binprm *bprm)
1283 * executable. 1282 * executable.
1284 */ 1283 */
1285 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { 1284 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1286 if (!kgid_has_mapping(bprm->cred->user_ns, inode->i_gid))
1287 return -EPERM;
1288 bprm->per_clear |= PER_CLEAR_ON_SETID; 1285 bprm->per_clear |= PER_CLEAR_ON_SETID;
1289 bprm->cred->egid = inode->i_gid; 1286 bprm->cred->egid = inode->i_gid;
1290 } 1287 }
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8c23fa7a91e6..c16335315e5d 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -92,8 +92,8 @@ static void __fuse_put_request(struct fuse_req *req)
92 92
93static void fuse_req_init_context(struct fuse_req *req) 93static void fuse_req_init_context(struct fuse_req *req)
94{ 94{
95 req->in.h.uid = current_fsuid(); 95 req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
96 req->in.h.gid = current_fsgid(); 96 req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
97 req->in.h.pid = current->pid; 97 req->in.h.pid = current->pid;
98} 98}
99 99
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 324bc0850534..b7c09f9eb40c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -818,8 +818,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
818 stat->ino = attr->ino; 818 stat->ino = attr->ino;
819 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 819 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
820 stat->nlink = attr->nlink; 820 stat->nlink = attr->nlink;
821 stat->uid = attr->uid; 821 stat->uid = make_kuid(&init_user_ns, attr->uid);
822 stat->gid = attr->gid; 822 stat->gid = make_kgid(&init_user_ns, attr->gid);
823 stat->rdev = inode->i_rdev; 823 stat->rdev = inode->i_rdev;
824 stat->atime.tv_sec = attr->atime; 824 stat->atime.tv_sec = attr->atime;
825 stat->atime.tv_nsec = attr->atimensec; 825 stat->atime.tv_nsec = attr->atimensec;
@@ -1007,12 +1007,12 @@ int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
1007 rcu_read_lock(); 1007 rcu_read_lock();
1008 ret = 0; 1008 ret = 0;
1009 cred = __task_cred(task); 1009 cred = __task_cred(task);
1010 if (cred->euid == fc->user_id && 1010 if (uid_eq(cred->euid, fc->user_id) &&
1011 cred->suid == fc->user_id && 1011 uid_eq(cred->suid, fc->user_id) &&
1012 cred->uid == fc->user_id && 1012 uid_eq(cred->uid, fc->user_id) &&
1013 cred->egid == fc->group_id && 1013 gid_eq(cred->egid, fc->group_id) &&
1014 cred->sgid == fc->group_id && 1014 gid_eq(cred->sgid, fc->group_id) &&
1015 cred->gid == fc->group_id) 1015 gid_eq(cred->gid, fc->group_id))
1016 ret = 1; 1016 ret = 1;
1017 rcu_read_unlock(); 1017 rcu_read_unlock();
1018 1018
@@ -1306,9 +1306,9 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
1306 if (ivalid & ATTR_MODE) 1306 if (ivalid & ATTR_MODE)
1307 arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; 1307 arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
1308 if (ivalid & ATTR_UID) 1308 if (ivalid & ATTR_UID)
1309 arg->valid |= FATTR_UID, arg->uid = iattr->ia_uid; 1309 arg->valid |= FATTR_UID, arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
1310 if (ivalid & ATTR_GID) 1310 if (ivalid & ATTR_GID)
1311 arg->valid |= FATTR_GID, arg->gid = iattr->ia_gid; 1311 arg->valid |= FATTR_GID, arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
1312 if (ivalid & ATTR_SIZE) 1312 if (ivalid & ATTR_SIZE)
1313 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; 1313 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
1314 if (ivalid & ATTR_ATIME) { 1314 if (ivalid & ATTR_ATIME) {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e24dd74e3068..e105a53fc72d 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -333,10 +333,10 @@ struct fuse_conn {
333 atomic_t count; 333 atomic_t count;
334 334
335 /** The user id for this mount */ 335 /** The user id for this mount */
336 uid_t user_id; 336 kuid_t user_id;
337 337
338 /** The group id for this mount */ 338 /** The group id for this mount */
339 gid_t group_id; 339 kgid_t group_id;
340 340
341 /** The fuse mount flags for this mount */ 341 /** The fuse mount flags for this mount */
342 unsigned flags; 342 unsigned flags;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f0eda124cffb..73ca6b72beaf 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -60,8 +60,8 @@ MODULE_PARM_DESC(max_user_congthresh,
60struct fuse_mount_data { 60struct fuse_mount_data {
61 int fd; 61 int fd;
62 unsigned rootmode; 62 unsigned rootmode;
63 unsigned user_id; 63 kuid_t user_id;
64 unsigned group_id; 64 kgid_t group_id;
65 unsigned fd_present:1; 65 unsigned fd_present:1;
66 unsigned rootmode_present:1; 66 unsigned rootmode_present:1;
67 unsigned user_id_present:1; 67 unsigned user_id_present:1;
@@ -164,8 +164,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
164 inode->i_ino = fuse_squash_ino(attr->ino); 164 inode->i_ino = fuse_squash_ino(attr->ino);
165 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 165 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
166 set_nlink(inode, attr->nlink); 166 set_nlink(inode, attr->nlink);
167 inode->i_uid = attr->uid; 167 inode->i_uid = make_kuid(&init_user_ns, attr->uid);
168 inode->i_gid = attr->gid; 168 inode->i_gid = make_kgid(&init_user_ns, attr->gid);
169 inode->i_blocks = attr->blocks; 169 inode->i_blocks = attr->blocks;
170 inode->i_atime.tv_sec = attr->atime; 170 inode->i_atime.tv_sec = attr->atime;
171 inode->i_atime.tv_nsec = attr->atimensec; 171 inode->i_atime.tv_nsec = attr->atimensec;
@@ -492,14 +492,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
492 case OPT_USER_ID: 492 case OPT_USER_ID:
493 if (match_int(&args[0], &value)) 493 if (match_int(&args[0], &value))
494 return 0; 494 return 0;
495 d->user_id = value; 495 d->user_id = make_kuid(current_user_ns(), value);
496 if (!uid_valid(d->user_id))
497 return 0;
496 d->user_id_present = 1; 498 d->user_id_present = 1;
497 break; 499 break;
498 500
499 case OPT_GROUP_ID: 501 case OPT_GROUP_ID:
500 if (match_int(&args[0], &value)) 502 if (match_int(&args[0], &value))
501 return 0; 503 return 0;
502 d->group_id = value; 504 d->group_id = make_kgid(current_user_ns(), value);
505 if (!gid_valid(d->group_id))
506 return 0;
503 d->group_id_present = 1; 507 d->group_id_present = 1;
504 break; 508 break;
505 509
@@ -540,8 +544,8 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
540 struct super_block *sb = root->d_sb; 544 struct super_block *sb = root->d_sb;
541 struct fuse_conn *fc = get_fuse_conn_super(sb); 545 struct fuse_conn *fc = get_fuse_conn_super(sb);
542 546
543 seq_printf(m, ",user_id=%u", fc->user_id); 547 seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id));
544 seq_printf(m, ",group_id=%u", fc->group_id); 548 seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id));
545 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) 549 if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
546 seq_puts(m, ",default_permissions"); 550 seq_puts(m, ",default_permissions");
547 if (fc->flags & FUSE_ALLOW_OTHER) 551 if (fc->flags & FUSE_ALLOW_OTHER)
@@ -989,7 +993,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
989 if (!file) 993 if (!file)
990 goto err; 994 goto err;
991 995
992 if (file->f_op != &fuse_dev_operations) 996 if ((file->f_op != &fuse_dev_operations) ||
997 (file->f_cred->user_ns != &init_user_ns))
993 goto err_fput; 998 goto err_fput;
994 999
995 fc = kmalloc(sizeof(*fc), GFP_KERNEL); 1000 fc = kmalloc(sizeof(*fc), GFP_KERNEL);
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 78f21f8dc2ec..43b315f2002b 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -710,7 +710,7 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
710 struct vfsmount *proc_mnt; 710 struct vfsmount *proc_mnt;
711 int err = -ENOENT; 711 int err = -ENOENT;
712 712
713 proc_mnt = mntget(current->nsproxy->pid_ns->proc_mnt); 713 proc_mnt = mntget(task_active_pid_ns(current)->proc_mnt);
714 if (IS_ERR(proc_mnt)) 714 if (IS_ERR(proc_mnt))
715 goto out; 715 goto out;
716 716
diff --git a/fs/mount.h b/fs/mount.h
index 4f291f9de641..cd5007980400 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -4,8 +4,11 @@
4 4
5struct mnt_namespace { 5struct mnt_namespace {
6 atomic_t count; 6 atomic_t count;
7 unsigned int proc_inum;
7 struct mount * root; 8 struct mount * root;
8 struct list_head list; 9 struct list_head list;
10 struct user_namespace *user_ns;
11 u64 seq; /* Sequence number to prevent loops */
9 wait_queue_head_t poll; 12 wait_queue_head_t poll;
10 int event; 13 int event;
11}; 14};
diff --git a/fs/namespace.c b/fs/namespace.c
index 24960626bb6b..c1bbe86f4920 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -12,6 +12,7 @@
12#include <linux/export.h> 12#include <linux/export.h>
13#include <linux/capability.h> 13#include <linux/capability.h>
14#include <linux/mnt_namespace.h> 14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
15#include <linux/namei.h> 16#include <linux/namei.h>
16#include <linux/security.h> 17#include <linux/security.h>
17#include <linux/idr.h> 18#include <linux/idr.h>
@@ -20,6 +21,7 @@
20#include <linux/fs_struct.h> /* get_fs_root et.al. */ 21#include <linux/fs_struct.h> /* get_fs_root et.al. */
21#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ 22#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
22#include <linux/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/proc_fs.h>
23#include "pnode.h" 25#include "pnode.h"
24#include "internal.h" 26#include "internal.h"
25 27
@@ -784,7 +786,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
784 if (!mnt) 786 if (!mnt)
785 return ERR_PTR(-ENOMEM); 787 return ERR_PTR(-ENOMEM);
786 788
787 if (flag & (CL_SLAVE | CL_PRIVATE)) 789 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
788 mnt->mnt_group_id = 0; /* not a peer of original */ 790 mnt->mnt_group_id = 0; /* not a peer of original */
789 else 791 else
790 mnt->mnt_group_id = old->mnt_group_id; 792 mnt->mnt_group_id = old->mnt_group_id;
@@ -805,7 +807,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
805 list_add_tail(&mnt->mnt_instance, &sb->s_mounts); 807 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
806 br_write_unlock(&vfsmount_lock); 808 br_write_unlock(&vfsmount_lock);
807 809
808 if (flag & CL_SLAVE) { 810 if ((flag & CL_SLAVE) ||
811 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
809 list_add(&mnt->mnt_slave, &old->mnt_slave_list); 812 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
810 mnt->mnt_master = old; 813 mnt->mnt_master = old;
811 CLEAR_MNT_SHARED(mnt); 814 CLEAR_MNT_SHARED(mnt);
@@ -1266,7 +1269,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1266 goto dput_and_out; 1269 goto dput_and_out;
1267 1270
1268 retval = -EPERM; 1271 retval = -EPERM;
1269 if (!capable(CAP_SYS_ADMIN)) 1272 if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
1270 goto dput_and_out; 1273 goto dput_and_out;
1271 1274
1272 retval = do_umount(mnt, flags); 1275 retval = do_umount(mnt, flags);
@@ -1292,7 +1295,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
1292 1295
1293static int mount_is_safe(struct path *path) 1296static int mount_is_safe(struct path *path)
1294{ 1297{
1295 if (capable(CAP_SYS_ADMIN)) 1298 if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
1296 return 0; 1299 return 0;
1297 return -EPERM; 1300 return -EPERM;
1298#ifdef notyet 1301#ifdef notyet
@@ -1308,6 +1311,26 @@ static int mount_is_safe(struct path *path)
1308#endif 1311#endif
1309} 1312}
1310 1313
1314static bool mnt_ns_loop(struct path *path)
1315{
1316 /* Could bind mounting the mount namespace inode cause a
1317 * mount namespace loop?
1318 */
1319 struct inode *inode = path->dentry->d_inode;
1320 struct proc_inode *ei;
1321 struct mnt_namespace *mnt_ns;
1322
1323 if (!proc_ns_inode(inode))
1324 return false;
1325
1326 ei = PROC_I(inode);
1327 if (ei->ns_ops != &mntns_operations)
1328 return false;
1329
1330 mnt_ns = ei->ns;
1331 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1332}
1333
1311struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, 1334struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1312 int flag) 1335 int flag)
1313{ 1336{
@@ -1610,7 +1633,7 @@ static int do_change_type(struct path *path, int flag)
1610 int type; 1633 int type;
1611 int err = 0; 1634 int err = 0;
1612 1635
1613 if (!capable(CAP_SYS_ADMIN)) 1636 if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
1614 return -EPERM; 1637 return -EPERM;
1615 1638
1616 if (path->dentry != path->mnt->mnt_root) 1639 if (path->dentry != path->mnt->mnt_root)
@@ -1655,6 +1678,10 @@ static int do_loopback(struct path *path, const char *old_name,
1655 if (err) 1678 if (err)
1656 return err; 1679 return err;
1657 1680
1681 err = -EINVAL;
1682 if (mnt_ns_loop(&old_path))
1683 goto out;
1684
1658 err = lock_mount(path); 1685 err = lock_mount(path);
1659 if (err) 1686 if (err)
1660 goto out; 1687 goto out;
@@ -1770,7 +1797,7 @@ static int do_move_mount(struct path *path, const char *old_name)
1770 struct mount *p; 1797 struct mount *p;
1771 struct mount *old; 1798 struct mount *old;
1772 int err = 0; 1799 int err = 0;
1773 if (!capable(CAP_SYS_ADMIN)) 1800 if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
1774 return -EPERM; 1801 return -EPERM;
1775 if (!old_name || !*old_name) 1802 if (!old_name || !*old_name)
1776 return -EINVAL; 1803 return -EINVAL;
@@ -1857,21 +1884,6 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1857 return ERR_PTR(err); 1884 return ERR_PTR(err);
1858} 1885}
1859 1886
1860static struct vfsmount *
1861do_kern_mount(const char *fstype, int flags, const char *name, void *data)
1862{
1863 struct file_system_type *type = get_fs_type(fstype);
1864 struct vfsmount *mnt;
1865 if (!type)
1866 return ERR_PTR(-ENODEV);
1867 mnt = vfs_kern_mount(type, flags, name, data);
1868 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
1869 !mnt->mnt_sb->s_subtype)
1870 mnt = fs_set_subtype(mnt, fstype);
1871 put_filesystem(type);
1872 return mnt;
1873}
1874
1875/* 1887/*
1876 * add a mount into a namespace's mount tree 1888 * add a mount into a namespace's mount tree
1877 */ 1889 */
@@ -1917,20 +1929,46 @@ unlock:
1917 * create a new mount for userspace and request it to be added into the 1929 * create a new mount for userspace and request it to be added into the
1918 * namespace's tree 1930 * namespace's tree
1919 */ 1931 */
1920static int do_new_mount(struct path *path, const char *type, int flags, 1932static int do_new_mount(struct path *path, const char *fstype, int flags,
1921 int mnt_flags, const char *name, void *data) 1933 int mnt_flags, const char *name, void *data)
1922{ 1934{
1935 struct file_system_type *type;
1936 struct user_namespace *user_ns;
1923 struct vfsmount *mnt; 1937 struct vfsmount *mnt;
1924 int err; 1938 int err;
1925 1939
1926 if (!type) 1940 if (!fstype)
1927 return -EINVAL; 1941 return -EINVAL;
1928 1942
1929 /* we need capabilities... */ 1943 /* we need capabilities... */
1930 if (!capable(CAP_SYS_ADMIN)) 1944 user_ns = real_mount(path->mnt)->mnt_ns->user_ns;
1945 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1931 return -EPERM; 1946 return -EPERM;
1932 1947
1933 mnt = do_kern_mount(type, flags, name, data); 1948 type = get_fs_type(fstype);
1949 if (!type)
1950 return -ENODEV;
1951
1952 if (user_ns != &init_user_ns) {
1953 if (!(type->fs_flags & FS_USERNS_MOUNT)) {
1954 put_filesystem(type);
1955 return -EPERM;
1956 }
1957 /* Only in special cases allow devices from mounts
1958 * created outside the initial user namespace.
1959 */
1960 if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
1961 flags |= MS_NODEV;
1962 mnt_flags |= MNT_NODEV;
1963 }
1964 }
1965
1966 mnt = vfs_kern_mount(type, flags, name, data);
1967 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
1968 !mnt->mnt_sb->s_subtype)
1969 mnt = fs_set_subtype(mnt, fstype);
1970
1971 put_filesystem(type);
1934 if (IS_ERR(mnt)) 1972 if (IS_ERR(mnt))
1935 return PTR_ERR(mnt); 1973 return PTR_ERR(mnt);
1936 1974
@@ -2261,18 +2299,42 @@ dput_out:
2261 return retval; 2299 return retval;
2262} 2300}
2263 2301
2264static struct mnt_namespace *alloc_mnt_ns(void) 2302static void free_mnt_ns(struct mnt_namespace *ns)
2303{
2304 proc_free_inum(ns->proc_inum);
2305 put_user_ns(ns->user_ns);
2306 kfree(ns);
2307}
2308
2309/*
2310 * Assign a sequence number so we can detect when we attempt to bind
2311 * mount a reference to an older mount namespace into the current
2312 * mount namespace, preventing reference counting loops. A 64bit
2313 * number incrementing at 10Ghz will take 12,427 years to wrap which
2314 * is effectively never, so we can ignore the possibility.
2315 */
2316static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
2317
2318static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2265{ 2319{
2266 struct mnt_namespace *new_ns; 2320 struct mnt_namespace *new_ns;
2321 int ret;
2267 2322
2268 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); 2323 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2269 if (!new_ns) 2324 if (!new_ns)
2270 return ERR_PTR(-ENOMEM); 2325 return ERR_PTR(-ENOMEM);
2326 ret = proc_alloc_inum(&new_ns->proc_inum);
2327 if (ret) {
2328 kfree(new_ns);
2329 return ERR_PTR(ret);
2330 }
2331 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
2271 atomic_set(&new_ns->count, 1); 2332 atomic_set(&new_ns->count, 1);
2272 new_ns->root = NULL; 2333 new_ns->root = NULL;
2273 INIT_LIST_HEAD(&new_ns->list); 2334 INIT_LIST_HEAD(&new_ns->list);
2274 init_waitqueue_head(&new_ns->poll); 2335 init_waitqueue_head(&new_ns->poll);
2275 new_ns->event = 0; 2336 new_ns->event = 0;
2337 new_ns->user_ns = get_user_ns(user_ns);
2276 return new_ns; 2338 return new_ns;
2277} 2339}
2278 2340
@@ -2281,24 +2343,28 @@ static struct mnt_namespace *alloc_mnt_ns(void)
2281 * copied from the namespace of the passed in task structure. 2343 * copied from the namespace of the passed in task structure.
2282 */ 2344 */
2283static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, 2345static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2284 struct fs_struct *fs) 2346 struct user_namespace *user_ns, struct fs_struct *fs)
2285{ 2347{
2286 struct mnt_namespace *new_ns; 2348 struct mnt_namespace *new_ns;
2287 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; 2349 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
2288 struct mount *p, *q; 2350 struct mount *p, *q;
2289 struct mount *old = mnt_ns->root; 2351 struct mount *old = mnt_ns->root;
2290 struct mount *new; 2352 struct mount *new;
2353 int copy_flags;
2291 2354
2292 new_ns = alloc_mnt_ns(); 2355 new_ns = alloc_mnt_ns(user_ns);
2293 if (IS_ERR(new_ns)) 2356 if (IS_ERR(new_ns))
2294 return new_ns; 2357 return new_ns;
2295 2358
2296 down_write(&namespace_sem); 2359 down_write(&namespace_sem);
2297 /* First pass: copy the tree topology */ 2360 /* First pass: copy the tree topology */
2298 new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE); 2361 copy_flags = CL_COPY_ALL | CL_EXPIRE;
2362 if (user_ns != mnt_ns->user_ns)
2363 copy_flags |= CL_SHARED_TO_SLAVE;
2364 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2299 if (IS_ERR(new)) { 2365 if (IS_ERR(new)) {
2300 up_write(&namespace_sem); 2366 up_write(&namespace_sem);
2301 kfree(new_ns); 2367 free_mnt_ns(new_ns);
2302 return ERR_CAST(new); 2368 return ERR_CAST(new);
2303 } 2369 }
2304 new_ns->root = new; 2370 new_ns->root = new;
@@ -2339,7 +2405,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2339} 2405}
2340 2406
2341struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, 2407struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2342 struct fs_struct *new_fs) 2408 struct user_namespace *user_ns, struct fs_struct *new_fs)
2343{ 2409{
2344 struct mnt_namespace *new_ns; 2410 struct mnt_namespace *new_ns;
2345 2411
@@ -2349,7 +2415,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2349 if (!(flags & CLONE_NEWNS)) 2415 if (!(flags & CLONE_NEWNS))
2350 return ns; 2416 return ns;
2351 2417
2352 new_ns = dup_mnt_ns(ns, new_fs); 2418 new_ns = dup_mnt_ns(ns, user_ns, new_fs);
2353 2419
2354 put_mnt_ns(ns); 2420 put_mnt_ns(ns);
2355 return new_ns; 2421 return new_ns;
@@ -2361,7 +2427,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2361 */ 2427 */
2362static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) 2428static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2363{ 2429{
2364 struct mnt_namespace *new_ns = alloc_mnt_ns(); 2430 struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
2365 if (!IS_ERR(new_ns)) { 2431 if (!IS_ERR(new_ns)) {
2366 struct mount *mnt = real_mount(m); 2432 struct mount *mnt = real_mount(m);
2367 mnt->mnt_ns = new_ns; 2433 mnt->mnt_ns = new_ns;
@@ -2501,7 +2567,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2501 struct mount *new_mnt, *root_mnt; 2567 struct mount *new_mnt, *root_mnt;
2502 int error; 2568 int error;
2503 2569
2504 if (!capable(CAP_SYS_ADMIN)) 2570 if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
2505 return -EPERM; 2571 return -EPERM;
2506 2572
2507 error = user_path_dir(new_root, &new); 2573 error = user_path_dir(new_root, &new);
@@ -2583,8 +2649,13 @@ static void __init init_mount_tree(void)
2583 struct vfsmount *mnt; 2649 struct vfsmount *mnt;
2584 struct mnt_namespace *ns; 2650 struct mnt_namespace *ns;
2585 struct path root; 2651 struct path root;
2652 struct file_system_type *type;
2586 2653
2587 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 2654 type = get_fs_type("rootfs");
2655 if (!type)
2656 panic("Can't find rootfs type");
2657 mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
2658 put_filesystem(type);
2588 if (IS_ERR(mnt)) 2659 if (IS_ERR(mnt))
2589 panic("Can't create rootfs"); 2660 panic("Can't create rootfs");
2590 2661
@@ -2647,7 +2718,7 @@ void put_mnt_ns(struct mnt_namespace *ns)
2647 br_write_unlock(&vfsmount_lock); 2718 br_write_unlock(&vfsmount_lock);
2648 up_write(&namespace_sem); 2719 up_write(&namespace_sem);
2649 release_mounts(&umount_list); 2720 release_mounts(&umount_list);
2650 kfree(ns); 2721 free_mnt_ns(ns);
2651} 2722}
2652 2723
2653struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) 2724struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
@@ -2681,3 +2752,71 @@ bool our_mnt(struct vfsmount *mnt)
2681{ 2752{
2682 return check_mnt(real_mount(mnt)); 2753 return check_mnt(real_mount(mnt));
2683} 2754}
2755
2756static void *mntns_get(struct task_struct *task)
2757{
2758 struct mnt_namespace *ns = NULL;
2759 struct nsproxy *nsproxy;
2760
2761 rcu_read_lock();
2762 nsproxy = task_nsproxy(task);
2763 if (nsproxy) {
2764 ns = nsproxy->mnt_ns;
2765 get_mnt_ns(ns);
2766 }
2767 rcu_read_unlock();
2768
2769 return ns;
2770}
2771
2772static void mntns_put(void *ns)
2773{
2774 put_mnt_ns(ns);
2775}
2776
2777static int mntns_install(struct nsproxy *nsproxy, void *ns)
2778{
2779 struct fs_struct *fs = current->fs;
2780 struct mnt_namespace *mnt_ns = ns;
2781 struct path root;
2782
2783 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
2784 !nsown_capable(CAP_SYS_CHROOT))
2785 return -EPERM;
2786
2787 if (fs->users != 1)
2788 return -EINVAL;
2789
2790 get_mnt_ns(mnt_ns);
2791 put_mnt_ns(nsproxy->mnt_ns);
2792 nsproxy->mnt_ns = mnt_ns;
2793
2794 /* Find the root */
2795 root.mnt = &mnt_ns->root->mnt;
2796 root.dentry = mnt_ns->root->mnt.mnt_root;
2797 path_get(&root);
2798 while(d_mountpoint(root.dentry) && follow_down_one(&root))
2799 ;
2800
2801 /* Update the pwd and root */
2802 set_fs_pwd(fs, &root);
2803 set_fs_root(fs, &root);
2804
2805 path_put(&root);
2806 return 0;
2807}
2808
2809static unsigned int mntns_inum(void *ns)
2810{
2811 struct mnt_namespace *mnt_ns = ns;
2812 return mnt_ns->proc_inum;
2813}
2814
2815const struct proc_ns_operations mntns_operations = {
2816 .name = "mnt",
2817 .type = CLONE_NEWNS,
2818 .get = mntns_get,
2819 .put = mntns_put,
2820 .install = mntns_install,
2821 .inum = mntns_inum,
2822};
diff --git a/fs/open.c b/fs/open.c
index 59071f55bf7f..182d8667b7bd 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -435,7 +435,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename)
435 goto dput_and_out; 435 goto dput_and_out;
436 436
437 error = -EPERM; 437 error = -EPERM;
438 if (!capable(CAP_SYS_CHROOT)) 438 if (!nsown_capable(CAP_SYS_CHROOT))
439 goto dput_and_out; 439 goto dput_and_out;
440 error = security_path_chroot(&path); 440 error = security_path_chroot(&path);
441 if (error) 441 if (error)
diff --git a/fs/pnode.h b/fs/pnode.h
index 65c60979d541..19b853a3445c 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -22,6 +22,7 @@
22#define CL_COPY_ALL 0x04 22#define CL_COPY_ALL 0x04
23#define CL_MAKE_SHARED 0x08 23#define CL_MAKE_SHARED 0x08
24#define CL_PRIVATE 0x10 24#define CL_PRIVATE 0x10
25#define CL_SHARED_TO_SLAVE 0x20
25 26
26static inline void set_mnt_shared(struct mount *mnt) 27static inline void set_mnt_shared(struct mount *mnt)
27{ 28{
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 99349efbbc2b..981b05601931 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -21,6 +21,7 @@ proc-y += uptime.o
21proc-y += version.o 21proc-y += version.o
22proc-y += softirqs.o 22proc-y += softirqs.o
23proc-y += namespaces.o 23proc-y += namespaces.o
24proc-y += self.o
24proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o 25proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
25proc-$(CONFIG_NET) += proc_net.o 26proc-$(CONFIG_NET) += proc_net.o
26proc-$(CONFIG_PROC_KCORE) += kcore.o 27proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index d3696708fc1a..d66248a1919b 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -162,7 +162,7 @@ static inline const char *get_task_state(struct task_struct *tsk)
162static inline void task_state(struct seq_file *m, struct pid_namespace *ns, 162static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
163 struct pid *pid, struct task_struct *p) 163 struct pid *pid, struct task_struct *p)
164{ 164{
165 struct user_namespace *user_ns = current_user_ns(); 165 struct user_namespace *user_ns = seq_user_ns(m);
166 struct group_info *group_info; 166 struct group_info *group_info;
167 int g; 167 int g;
168 struct fdtable *fdt = NULL; 168 struct fdtable *fdt = NULL;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index aa63d25157b8..5a5a0be40e40 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2345,146 +2345,6 @@ static const struct file_operations proc_coredump_filter_operations = {
2345}; 2345};
2346#endif 2346#endif
2347 2347
2348/*
2349 * /proc/self:
2350 */
2351static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
2352 int buflen)
2353{
2354 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
2355 pid_t tgid = task_tgid_nr_ns(current, ns);
2356 char tmp[PROC_NUMBUF];
2357 if (!tgid)
2358 return -ENOENT;
2359 sprintf(tmp, "%d", tgid);
2360 return vfs_readlink(dentry,buffer,buflen,tmp);
2361}
2362
2363static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
2364{
2365 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
2366 pid_t tgid = task_tgid_nr_ns(current, ns);
2367 char *name = ERR_PTR(-ENOENT);
2368 if (tgid) {
2369 /* 11 for max length of signed int in decimal + NULL term */
2370 name = kmalloc(12, GFP_KERNEL);
2371 if (!name)
2372 name = ERR_PTR(-ENOMEM);
2373 else
2374 sprintf(name, "%d", tgid);
2375 }
2376 nd_set_link(nd, name);
2377 return NULL;
2378}
2379
2380static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
2381 void *cookie)
2382{
2383 char *s = nd_get_link(nd);
2384 if (!IS_ERR(s))
2385 kfree(s);
2386}
2387
2388static const struct inode_operations proc_self_inode_operations = {
2389 .readlink = proc_self_readlink,
2390 .follow_link = proc_self_follow_link,
2391 .put_link = proc_self_put_link,
2392};
2393
2394/*
2395 * proc base
2396 *
2397 * These are the directory entries in the root directory of /proc
2398 * that properly belong to the /proc filesystem, as they describe
2399 * describe something that is process related.
2400 */
2401static const struct pid_entry proc_base_stuff[] = {
2402 NOD("self", S_IFLNK|S_IRWXUGO,
2403 &proc_self_inode_operations, NULL, {}),
2404};
2405
2406static struct dentry *proc_base_instantiate(struct inode *dir,
2407 struct dentry *dentry, struct task_struct *task, const void *ptr)
2408{
2409 const struct pid_entry *p = ptr;
2410 struct inode *inode;
2411 struct proc_inode *ei;
2412 struct dentry *error;
2413
2414 /* Allocate the inode */
2415 error = ERR_PTR(-ENOMEM);
2416 inode = new_inode(dir->i_sb);
2417 if (!inode)
2418 goto out;
2419
2420 /* Initialize the inode */
2421 ei = PROC_I(inode);
2422 inode->i_ino = get_next_ino();
2423 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2424
2425 /*
2426 * grab the reference to the task.
2427 */
2428 ei->pid = get_task_pid(task, PIDTYPE_PID);
2429 if (!ei->pid)
2430 goto out_iput;
2431
2432 inode->i_mode = p->mode;
2433 if (S_ISDIR(inode->i_mode))
2434 set_nlink(inode, 2);
2435 if (S_ISLNK(inode->i_mode))
2436 inode->i_size = 64;
2437 if (p->iop)
2438 inode->i_op = p->iop;
2439 if (p->fop)
2440 inode->i_fop = p->fop;
2441 ei->op = p->op;
2442 d_add(dentry, inode);
2443 error = NULL;
2444out:
2445 return error;
2446out_iput:
2447 iput(inode);
2448 goto out;
2449}
2450
2451static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
2452{
2453 struct dentry *error;
2454 struct task_struct *task = get_proc_task(dir);
2455 const struct pid_entry *p, *last;
2456
2457 error = ERR_PTR(-ENOENT);
2458
2459 if (!task)
2460 goto out_no_task;
2461
2462 /* Lookup the directory entry */
2463 last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
2464 for (p = proc_base_stuff; p <= last; p++) {
2465 if (p->len != dentry->d_name.len)
2466 continue;
2467 if (!memcmp(dentry->d_name.name, p->name, p->len))
2468 break;
2469 }
2470 if (p > last)
2471 goto out;
2472
2473 error = proc_base_instantiate(dir, dentry, task, p);
2474
2475out:
2476 put_task_struct(task);
2477out_no_task:
2478 return error;
2479}
2480
2481static int proc_base_fill_cache(struct file *filp, void *dirent,
2482 filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
2483{
2484 return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
2485 proc_base_instantiate, task, p);
2486}
2487
2488#ifdef CONFIG_TASK_IO_ACCOUNTING 2348#ifdef CONFIG_TASK_IO_ACCOUNTING
2489static int do_io_accounting(struct task_struct *task, char *buffer, int whole) 2349static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2490{ 2350{
@@ -2839,10 +2699,6 @@ void proc_flush_task(struct task_struct *task)
2839 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, 2699 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
2840 tgid->numbers[i].nr); 2700 tgid->numbers[i].nr);
2841 } 2701 }
2842
2843 upid = &pid->numbers[pid->level];
2844 if (upid->nr == 1)
2845 pid_ns_release_proc(upid->ns);
2846} 2702}
2847 2703
2848static struct dentry *proc_pid_instantiate(struct inode *dir, 2704static struct dentry *proc_pid_instantiate(struct inode *dir,
@@ -2876,15 +2732,11 @@ out:
2876 2732
2877struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) 2733struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
2878{ 2734{
2879 struct dentry *result; 2735 struct dentry *result = NULL;
2880 struct task_struct *task; 2736 struct task_struct *task;
2881 unsigned tgid; 2737 unsigned tgid;
2882 struct pid_namespace *ns; 2738 struct pid_namespace *ns;
2883 2739
2884 result = proc_base_lookup(dir, dentry);
2885 if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
2886 goto out;
2887
2888 tgid = name_to_int(dentry); 2740 tgid = name_to_int(dentry);
2889 if (tgid == ~0U) 2741 if (tgid == ~0U)
2890 goto out; 2742 goto out;
@@ -2947,7 +2799,7 @@ retry:
2947 return iter; 2799 return iter;
2948} 2800}
2949 2801
2950#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) 2802#define TGID_OFFSET (FIRST_PROCESS_ENTRY)
2951 2803
2952static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 2804static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
2953 struct tgid_iter iter) 2805 struct tgid_iter iter)
@@ -2967,25 +2819,12 @@ static int fake_filldir(void *buf, const char *name, int namelen,
2967/* for the /proc/ directory itself, after non-process stuff has been done */ 2819/* for the /proc/ directory itself, after non-process stuff has been done */
2968int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 2820int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2969{ 2821{
2970 unsigned int nr;
2971 struct task_struct *reaper;
2972 struct tgid_iter iter; 2822 struct tgid_iter iter;
2973 struct pid_namespace *ns; 2823 struct pid_namespace *ns;
2974 filldir_t __filldir; 2824 filldir_t __filldir;
2975 2825
2976 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) 2826 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
2977 goto out_no_task; 2827 goto out;
2978 nr = filp->f_pos - FIRST_PROCESS_ENTRY;
2979
2980 reaper = get_proc_task(filp->f_path.dentry->d_inode);
2981 if (!reaper)
2982 goto out_no_task;
2983
2984 for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
2985 const struct pid_entry *p = &proc_base_stuff[nr];
2986 if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
2987 goto out;
2988 }
2989 2828
2990 ns = filp->f_dentry->d_sb->s_fs_info; 2829 ns = filp->f_dentry->d_sb->s_fs_info;
2991 iter.task = NULL; 2830 iter.task = NULL;
@@ -3006,8 +2845,6 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
3006 } 2845 }
3007 filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; 2846 filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
3008out: 2847out:
3009 put_task_struct(reaper);
3010out_no_task:
3011 return 0; 2848 return 0;
3012} 2849}
3013 2850
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 0d80cef4cfb9..7b3ae3cc0ef9 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -350,14 +350,14 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
350 * Return an inode number between PROC_DYNAMIC_FIRST and 350 * Return an inode number between PROC_DYNAMIC_FIRST and
351 * 0xffffffff, or zero on failure. 351 * 0xffffffff, or zero on failure.
352 */ 352 */
353static unsigned int get_inode_number(void) 353int proc_alloc_inum(unsigned int *inum)
354{ 354{
355 unsigned int i; 355 unsigned int i;
356 int error; 356 int error;
357 357
358retry: 358retry:
359 if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0) 359 if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL))
360 return 0; 360 return -ENOMEM;
361 361
362 spin_lock(&proc_inum_lock); 362 spin_lock(&proc_inum_lock);
363 error = ida_get_new(&proc_inum_ida, &i); 363 error = ida_get_new(&proc_inum_ida, &i);
@@ -365,18 +365,19 @@ retry:
365 if (error == -EAGAIN) 365 if (error == -EAGAIN)
366 goto retry; 366 goto retry;
367 else if (error) 367 else if (error)
368 return 0; 368 return error;
369 369
370 if (i > UINT_MAX - PROC_DYNAMIC_FIRST) { 370 if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
371 spin_lock(&proc_inum_lock); 371 spin_lock(&proc_inum_lock);
372 ida_remove(&proc_inum_ida, i); 372 ida_remove(&proc_inum_ida, i);
373 spin_unlock(&proc_inum_lock); 373 spin_unlock(&proc_inum_lock);
374 return 0; 374 return -ENOSPC;
375 } 375 }
376 return PROC_DYNAMIC_FIRST + i; 376 *inum = PROC_DYNAMIC_FIRST + i;
377 return 0;
377} 378}
378 379
379static void release_inode_number(unsigned int inum) 380void proc_free_inum(unsigned int inum)
380{ 381{
381 spin_lock(&proc_inum_lock); 382 spin_lock(&proc_inum_lock);
382 ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); 383 ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
@@ -554,13 +555,12 @@ static const struct inode_operations proc_dir_inode_operations = {
554 555
555static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) 556static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
556{ 557{
557 unsigned int i;
558 struct proc_dir_entry *tmp; 558 struct proc_dir_entry *tmp;
559 int ret;
559 560
560 i = get_inode_number(); 561 ret = proc_alloc_inum(&dp->low_ino);
561 if (i == 0) 562 if (ret)
562 return -EAGAIN; 563 return ret;
563 dp->low_ino = i;
564 564
565 if (S_ISDIR(dp->mode)) { 565 if (S_ISDIR(dp->mode)) {
566 if (dp->proc_iops == NULL) { 566 if (dp->proc_iops == NULL) {
@@ -764,7 +764,7 @@ EXPORT_SYMBOL(proc_create_data);
764 764
765static void free_proc_entry(struct proc_dir_entry *de) 765static void free_proc_entry(struct proc_dir_entry *de)
766{ 766{
767 release_inode_number(de->low_ino); 767 proc_free_inum(de->low_ino);
768 768
769 if (S_ISLNK(de->mode)) 769 if (S_ISLNK(de->mode))
770 kfree(de->data); 770 kfree(de->data);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 3b22bbdee9ec..439ae6886507 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -31,6 +31,7 @@ static void proc_evict_inode(struct inode *inode)
31 struct proc_dir_entry *de; 31 struct proc_dir_entry *de;
32 struct ctl_table_header *head; 32 struct ctl_table_header *head;
33 const struct proc_ns_operations *ns_ops; 33 const struct proc_ns_operations *ns_ops;
34 void *ns;
34 35
35 truncate_inode_pages(&inode->i_data, 0); 36 truncate_inode_pages(&inode->i_data, 0);
36 clear_inode(inode); 37 clear_inode(inode);
@@ -49,8 +50,9 @@ static void proc_evict_inode(struct inode *inode)
49 } 50 }
50 /* Release any associated namespace */ 51 /* Release any associated namespace */
51 ns_ops = PROC_I(inode)->ns_ops; 52 ns_ops = PROC_I(inode)->ns_ops;
52 if (ns_ops && ns_ops->put) 53 ns = PROC_I(inode)->ns;
53 ns_ops->put(PROC_I(inode)->ns); 54 if (ns_ops && ns)
55 ns_ops->put(ns);
54} 56}
55 57
56static struct kmem_cache * proc_inode_cachep; 58static struct kmem_cache * proc_inode_cachep;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 43973b084abf..252544c05207 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -15,6 +15,7 @@ struct ctl_table_header;
15struct mempolicy; 15struct mempolicy;
16 16
17extern struct proc_dir_entry proc_root; 17extern struct proc_dir_entry proc_root;
18extern void proc_self_init(void);
18#ifdef CONFIG_PROC_SYSCTL 19#ifdef CONFIG_PROC_SYSCTL
19extern int proc_sys_init(void); 20extern int proc_sys_init(void);
20extern void sysctl_head_put(struct ctl_table_header *head); 21extern void sysctl_head_put(struct ctl_table_header *head);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index b178ed733c36..b7a47196c8c3 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -11,6 +11,7 @@
11#include <net/net_namespace.h> 11#include <net/net_namespace.h>
12#include <linux/ipc_namespace.h> 12#include <linux/ipc_namespace.h>
13#include <linux/pid_namespace.h> 13#include <linux/pid_namespace.h>
14#include <linux/user_namespace.h>
14#include "internal.h" 15#include "internal.h"
15 16
16 17
@@ -24,12 +25,168 @@ static const struct proc_ns_operations *ns_entries[] = {
24#ifdef CONFIG_IPC_NS 25#ifdef CONFIG_IPC_NS
25 &ipcns_operations, 26 &ipcns_operations,
26#endif 27#endif
28#ifdef CONFIG_PID_NS
29 &pidns_operations,
30#endif
31#ifdef CONFIG_USER_NS
32 &userns_operations,
33#endif
34 &mntns_operations,
27}; 35};
28 36
29static const struct file_operations ns_file_operations = { 37static const struct file_operations ns_file_operations = {
30 .llseek = no_llseek, 38 .llseek = no_llseek,
31}; 39};
32 40
41static const struct inode_operations ns_inode_operations = {
42 .setattr = proc_setattr,
43};
44
45static int ns_delete_dentry(const struct dentry *dentry)
46{
47 /* Don't cache namespace inodes when not in use */
48 return 1;
49}
50
51static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
52{
53 struct inode *inode = dentry->d_inode;
54 const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
55
56 return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
57 ns_ops->name, inode->i_ino);
58}
59
60const struct dentry_operations ns_dentry_operations =
61{
62 .d_delete = ns_delete_dentry,
63 .d_dname = ns_dname,
64};
65
66static struct dentry *proc_ns_get_dentry(struct super_block *sb,
67 struct task_struct *task, const struct proc_ns_operations *ns_ops)
68{
69 struct dentry *dentry, *result;
70 struct inode *inode;
71 struct proc_inode *ei;
72 struct qstr qname = { .name = "", };
73 void *ns;
74
75 ns = ns_ops->get(task);
76 if (!ns)
77 return ERR_PTR(-ENOENT);
78
79 dentry = d_alloc_pseudo(sb, &qname);
80 if (!dentry) {
81 ns_ops->put(ns);
82 return ERR_PTR(-ENOMEM);
83 }
84
85 inode = iget_locked(sb, ns_ops->inum(ns));
86 if (!inode) {
87 dput(dentry);
88 ns_ops->put(ns);
89 return ERR_PTR(-ENOMEM);
90 }
91
92 ei = PROC_I(inode);
93 if (inode->i_state & I_NEW) {
94 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
95 inode->i_op = &ns_inode_operations;
96 inode->i_mode = S_IFREG | S_IRUGO;
97 inode->i_fop = &ns_file_operations;
98 ei->ns_ops = ns_ops;
99 ei->ns = ns;
100 unlock_new_inode(inode);
101 } else {
102 ns_ops->put(ns);
103 }
104
105 d_set_d_op(dentry, &ns_dentry_operations);
106 result = d_instantiate_unique(dentry, inode);
107 if (result) {
108 dput(dentry);
109 dentry = result;
110 }
111
112 return dentry;
113}
114
115static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
116{
117 struct inode *inode = dentry->d_inode;
118 struct super_block *sb = inode->i_sb;
119 struct proc_inode *ei = PROC_I(inode);
120 struct task_struct *task;
121 struct dentry *ns_dentry;
122 void *error = ERR_PTR(-EACCES);
123
124 task = get_proc_task(inode);
125 if (!task)
126 goto out;
127
128 if (!ptrace_may_access(task, PTRACE_MODE_READ))
129 goto out_put_task;
130
131 ns_dentry = proc_ns_get_dentry(sb, task, ei->ns_ops);
132 if (IS_ERR(ns_dentry)) {
133 error = ERR_CAST(ns_dentry);
134 goto out_put_task;
135 }
136
137 dput(nd->path.dentry);
138 nd->path.dentry = ns_dentry;
139 error = NULL;
140
141out_put_task:
142 put_task_struct(task);
143out:
144 return error;
145}
146
147static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
148{
149 struct inode *inode = dentry->d_inode;
150 struct proc_inode *ei = PROC_I(inode);
151 const struct proc_ns_operations *ns_ops = ei->ns_ops;
152 struct task_struct *task;
153 void *ns;
154 char name[50];
155 int len = -EACCES;
156
157 task = get_proc_task(inode);
158 if (!task)
159 goto out;
160
161 if (!ptrace_may_access(task, PTRACE_MODE_READ))
162 goto out_put_task;
163
164 len = -ENOENT;
165 ns = ns_ops->get(task);
166 if (!ns)
167 goto out_put_task;
168
169 snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns));
170 len = strlen(name);
171
172 if (len > buflen)
173 len = buflen;
174 if (copy_to_user(buffer, name, len))
175 len = -EFAULT;
176
177 ns_ops->put(ns);
178out_put_task:
179 put_task_struct(task);
180out:
181 return len;
182}
183
184static const struct inode_operations proc_ns_link_inode_operations = {
185 .readlink = proc_ns_readlink,
186 .follow_link = proc_ns_follow_link,
187 .setattr = proc_setattr,
188};
189
33static struct dentry *proc_ns_instantiate(struct inode *dir, 190static struct dentry *proc_ns_instantiate(struct inode *dir,
34 struct dentry *dentry, struct task_struct *task, const void *ptr) 191 struct dentry *dentry, struct task_struct *task, const void *ptr)
35{ 192{
@@ -37,21 +194,15 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
37 struct inode *inode; 194 struct inode *inode;
38 struct proc_inode *ei; 195 struct proc_inode *ei;
39 struct dentry *error = ERR_PTR(-ENOENT); 196 struct dentry *error = ERR_PTR(-ENOENT);
40 void *ns;
41 197
42 inode = proc_pid_make_inode(dir->i_sb, task); 198 inode = proc_pid_make_inode(dir->i_sb, task);
43 if (!inode) 199 if (!inode)
44 goto out; 200 goto out;
45 201
46 ns = ns_ops->get(task);
47 if (!ns)
48 goto out_iput;
49
50 ei = PROC_I(inode); 202 ei = PROC_I(inode);
51 inode->i_mode = S_IFREG|S_IRUSR; 203 inode->i_mode = S_IFLNK|S_IRWXUGO;
52 inode->i_fop = &ns_file_operations; 204 inode->i_op = &proc_ns_link_inode_operations;
53 ei->ns_ops = ns_ops; 205 ei->ns_ops = ns_ops;
54 ei->ns = ns;
55 206
56 d_set_d_op(dentry, &pid_dentry_operations); 207 d_set_d_op(dentry, &pid_dentry_operations);
57 d_add(dentry, inode); 208 d_add(dentry, inode);
@@ -60,9 +211,6 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
60 error = NULL; 211 error = NULL;
61out: 212out:
62 return error; 213 return error;
63out_iput:
64 iput(inode);
65 goto out;
66} 214}
67 215
68static int proc_ns_fill_cache(struct file *filp, void *dirent, 216static int proc_ns_fill_cache(struct file *filp, void *dirent,
@@ -89,10 +237,6 @@ static int proc_ns_dir_readdir(struct file *filp, void *dirent,
89 if (!task) 237 if (!task)
90 goto out_no_task; 238 goto out_no_task;
91 239
92 ret = -EPERM;
93 if (!ptrace_may_access(task, PTRACE_MODE_READ))
94 goto out;
95
96 ret = 0; 240 ret = 0;
97 i = filp->f_pos; 241 i = filp->f_pos;
98 switch (i) { 242 switch (i) {
@@ -152,10 +296,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
152 if (!task) 296 if (!task)
153 goto out_no_task; 297 goto out_no_task;
154 298
155 error = ERR_PTR(-EPERM);
156 if (!ptrace_may_access(task, PTRACE_MODE_READ))
157 goto out;
158
159 last = &ns_entries[ARRAY_SIZE(ns_entries)]; 299 last = &ns_entries[ARRAY_SIZE(ns_entries)];
160 for (entry = ns_entries; entry < last; entry++) { 300 for (entry = ns_entries; entry < last; entry++) {
161 if (strlen((*entry)->name) != len) 301 if (strlen((*entry)->name) != len)
@@ -163,7 +303,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
163 if (!memcmp(dentry->d_name.name, (*entry)->name, len)) 303 if (!memcmp(dentry->d_name.name, (*entry)->name, len))
164 break; 304 break;
165 } 305 }
166 error = ERR_PTR(-ENOENT);
167 if (entry == last) 306 if (entry == last)
168 goto out; 307 goto out;
169 308
@@ -198,3 +337,7 @@ out_invalid:
198 return ERR_PTR(-EINVAL); 337 return ERR_PTR(-EINVAL);
199} 338}
200 339
340bool proc_ns_inode(struct inode *inode)
341{
342 return inode->i_fop == &ns_file_operations;
343}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9889a92d2e01..c6e9fac26bac 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -100,14 +100,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
100 int err; 100 int err;
101 struct super_block *sb; 101 struct super_block *sb;
102 struct pid_namespace *ns; 102 struct pid_namespace *ns;
103 struct proc_inode *ei;
104 char *options; 103 char *options;
105 104
106 if (flags & MS_KERNMOUNT) { 105 if (flags & MS_KERNMOUNT) {
107 ns = (struct pid_namespace *)data; 106 ns = (struct pid_namespace *)data;
108 options = NULL; 107 options = NULL;
109 } else { 108 } else {
110 ns = current->nsproxy->pid_ns; 109 ns = task_active_pid_ns(current);
111 options = data; 110 options = data;
112 } 111 }
113 112
@@ -130,13 +129,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
130 sb->s_flags |= MS_ACTIVE; 129 sb->s_flags |= MS_ACTIVE;
131 } 130 }
132 131
133 ei = PROC_I(sb->s_root->d_inode);
134 if (!ei->pid) {
135 rcu_read_lock();
136 ei->pid = get_pid(find_pid_ns(1, ns));
137 rcu_read_unlock();
138 }
139
140 return dget(sb->s_root); 132 return dget(sb->s_root);
141} 133}
142 134
@@ -153,6 +145,7 @@ static struct file_system_type proc_fs_type = {
153 .name = "proc", 145 .name = "proc",
154 .mount = proc_mount, 146 .mount = proc_mount,
155 .kill_sb = proc_kill_sb, 147 .kill_sb = proc_kill_sb,
148 .fs_flags = FS_USERNS_MOUNT,
156}; 149};
157 150
158void __init proc_root_init(void) 151void __init proc_root_init(void)
@@ -163,12 +156,8 @@ void __init proc_root_init(void)
163 err = register_filesystem(&proc_fs_type); 156 err = register_filesystem(&proc_fs_type);
164 if (err) 157 if (err)
165 return; 158 return;
166 err = pid_ns_prepare_proc(&init_pid_ns);
167 if (err) {
168 unregister_filesystem(&proc_fs_type);
169 return;
170 }
171 159
160 proc_self_init();
172 proc_symlink("mounts", NULL, "self/mounts"); 161 proc_symlink("mounts", NULL, "self/mounts");
173 162
174 proc_net_init(); 163 proc_net_init();
diff --git a/fs/proc/self.c b/fs/proc/self.c
new file mode 100644
index 000000000000..aa5cc3bff140
--- /dev/null
+++ b/fs/proc/self.c
@@ -0,0 +1,59 @@
1#include <linux/proc_fs.h>
2#include <linux/sched.h>
3#include <linux/namei.h>
4
5/*
6 * /proc/self:
7 */
8static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
9 int buflen)
10{
11 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
12 pid_t tgid = task_tgid_nr_ns(current, ns);
13 char tmp[PROC_NUMBUF];
14 if (!tgid)
15 return -ENOENT;
16 sprintf(tmp, "%d", tgid);
17 return vfs_readlink(dentry,buffer,buflen,tmp);
18}
19
20static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
21{
22 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
23 pid_t tgid = task_tgid_nr_ns(current, ns);
24 char *name = ERR_PTR(-ENOENT);
25 if (tgid) {
26 /* 11 for max length of signed int in decimal + NULL term */
27 name = kmalloc(12, GFP_KERNEL);
28 if (!name)
29 name = ERR_PTR(-ENOMEM);
30 else
31 sprintf(name, "%d", tgid);
32 }
33 nd_set_link(nd, name);
34 return NULL;
35}
36
37static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
38 void *cookie)
39{
40 char *s = nd_get_link(nd);
41 if (!IS_ERR(s))
42 kfree(s);
43}
44
45static const struct inode_operations proc_self_inode_operations = {
46 .readlink = proc_self_readlink,
47 .follow_link = proc_self_follow_link,
48 .put_link = proc_self_put_link,
49};
50
51void __init proc_self_init(void)
52{
53 struct proc_dir_entry *proc_self_symlink;
54 mode_t mode;
55
56 mode = S_IFLNK | S_IRWXUGO;
57 proc_self_symlink = proc_create("self", mode, NULL, NULL );
58 proc_self_symlink->proc_iops = &proc_self_inode_operations;
59}
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 71eb7e253927..db940a9be045 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -149,6 +149,7 @@ static struct file_system_type sysfs_fs_type = {
149 .name = "sysfs", 149 .name = "sysfs",
150 .mount = sysfs_mount, 150 .mount = sysfs_mount,
151 .kill_sb = sysfs_kill_sb, 151 .kill_sb = sysfs_kill_sb,
152 .fs_flags = FS_USERNS_MOUNT,
152}; 153};
153 154
154int __init sysfs_init(void) 155int __init sysfs_init(void)
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 0142aacb70b7..abb2cd50f6b2 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -344,10 +344,8 @@ static inline void put_cred(const struct cred *_cred)
344extern struct user_namespace init_user_ns; 344extern struct user_namespace init_user_ns;
345#ifdef CONFIG_USER_NS 345#ifdef CONFIG_USER_NS
346#define current_user_ns() (current_cred_xxx(user_ns)) 346#define current_user_ns() (current_cred_xxx(user_ns))
347#define task_user_ns(task) (task_cred_xxx((task), user_ns))
348#else 347#else
349#define current_user_ns() (&init_user_ns) 348#define current_user_ns() (&init_user_ns)
350#define task_user_ns(task) (&init_user_ns)
351#endif 349#endif
352 350
353 351
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 408fb1e77a0a..035521b46528 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1810,6 +1810,8 @@ struct file_system_type {
1810#define FS_REQUIRES_DEV 1 1810#define FS_REQUIRES_DEV 1
1811#define FS_BINARY_MOUNTDATA 2 1811#define FS_BINARY_MOUNTDATA 2
1812#define FS_HAS_SUBTYPE 4 1812#define FS_HAS_SUBTYPE 4
1813#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
1814#define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */
1813#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ 1815#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
1814#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ 1816#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
1815 struct dentry *(*mount) (struct file_system_type *, int, 1817 struct dentry *(*mount) (struct file_system_type *, int,
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 5499c92a9153..fe771978e877 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -67,6 +67,8 @@ struct ipc_namespace {
67 67
68 /* user_ns which owns the ipc ns */ 68 /* user_ns which owns the ipc ns */
69 struct user_namespace *user_ns; 69 struct user_namespace *user_ns;
70
71 unsigned int proc_inum;
70}; 72};
71 73
72extern struct ipc_namespace init_ipc_ns; 74extern struct ipc_namespace init_ipc_ns;
@@ -133,7 +135,8 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
133 135
134#if defined(CONFIG_IPC_NS) 136#if defined(CONFIG_IPC_NS)
135extern struct ipc_namespace *copy_ipcs(unsigned long flags, 137extern struct ipc_namespace *copy_ipcs(unsigned long flags,
136 struct task_struct *tsk); 138 struct user_namespace *user_ns, struct ipc_namespace *ns);
139
137static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) 140static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
138{ 141{
139 if (ns) 142 if (ns)
@@ -144,12 +147,12 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
144extern void put_ipc_ns(struct ipc_namespace *ns); 147extern void put_ipc_ns(struct ipc_namespace *ns);
145#else 148#else
146static inline struct ipc_namespace *copy_ipcs(unsigned long flags, 149static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
147 struct task_struct *tsk) 150 struct user_namespace *user_ns, struct ipc_namespace *ns)
148{ 151{
149 if (flags & CLONE_NEWIPC) 152 if (flags & CLONE_NEWIPC)
150 return ERR_PTR(-EINVAL); 153 return ERR_PTR(-EINVAL);
151 154
152 return tsk->nsproxy->ipc_ns; 155 return ns;
153} 156}
154 157
155static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) 158static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 5a8e3903d770..12b2ab510323 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -4,9 +4,10 @@
4 4
5struct mnt_namespace; 5struct mnt_namespace;
6struct fs_struct; 6struct fs_struct;
7struct user_namespace;
7 8
8extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, 9extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
9 struct fs_struct *); 10 struct user_namespace *, struct fs_struct *);
10extern void put_mnt_ns(struct mnt_namespace *ns); 11extern void put_mnt_ns(struct mnt_namespace *ns);
11 12
12extern const struct file_operations proc_mounts_operations; 13extern const struct file_operations proc_mounts_operations;
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index cc37a55ad004..10e5947491c7 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -67,7 +67,7 @@ void exit_task_namespaces(struct task_struct *tsk);
67void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); 67void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
68void free_nsproxy(struct nsproxy *ns); 68void free_nsproxy(struct nsproxy *ns);
69int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, 69int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
70 struct fs_struct *); 70 struct cred *, struct fs_struct *);
71int __init nsproxy_cache_init(void); 71int __init nsproxy_cache_init(void);
72 72
73static inline void put_nsproxy(struct nsproxy *ns) 73static inline void put_nsproxy(struct nsproxy *ns)
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 65e3e87eacc5..bf285999273a 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -21,6 +21,7 @@ struct pid_namespace {
21 struct kref kref; 21 struct kref kref;
22 struct pidmap pidmap[PIDMAP_ENTRIES]; 22 struct pidmap pidmap[PIDMAP_ENTRIES];
23 int last_pid; 23 int last_pid;
24 int nr_hashed;
24 struct task_struct *child_reaper; 25 struct task_struct *child_reaper;
25 struct kmem_cache *pid_cachep; 26 struct kmem_cache *pid_cachep;
26 unsigned int level; 27 unsigned int level;
@@ -31,9 +32,12 @@ struct pid_namespace {
31#ifdef CONFIG_BSD_PROCESS_ACCT 32#ifdef CONFIG_BSD_PROCESS_ACCT
32 struct bsd_acct_struct *bacct; 33 struct bsd_acct_struct *bacct;
33#endif 34#endif
35 struct user_namespace *user_ns;
36 struct work_struct proc_work;
34 kgid_t pid_gid; 37 kgid_t pid_gid;
35 int hide_pid; 38 int hide_pid;
36 int reboot; /* group exit code if this pidns was rebooted */ 39 int reboot; /* group exit code if this pidns was rebooted */
40 unsigned int proc_inum;
37}; 41};
38 42
39extern struct pid_namespace init_pid_ns; 43extern struct pid_namespace init_pid_ns;
@@ -46,7 +50,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
46 return ns; 50 return ns;
47} 51}
48 52
49extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); 53extern struct pid_namespace *copy_pid_ns(unsigned long flags,
54 struct user_namespace *user_ns, struct pid_namespace *ns);
50extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); 55extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
51extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd); 56extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
52extern void put_pid_ns(struct pid_namespace *ns); 57extern void put_pid_ns(struct pid_namespace *ns);
@@ -59,8 +64,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
59 return ns; 64 return ns;
60} 65}
61 66
62static inline struct pid_namespace * 67static inline struct pid_namespace *copy_pid_ns(unsigned long flags,
63copy_pid_ns(unsigned long flags, struct pid_namespace *ns) 68 struct user_namespace *user_ns, struct pid_namespace *ns)
64{ 69{
65 if (flags & CLONE_NEWPID) 70 if (flags & CLONE_NEWPID)
66 ns = ERR_PTR(-EINVAL); 71 ns = ERR_PTR(-EINVAL);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 3fd2e871ff1b..2e24018b7cec 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -28,7 +28,11 @@ struct mm_struct;
28 */ 28 */
29 29
30enum { 30enum {
31 PROC_ROOT_INO = 1, 31 PROC_ROOT_INO = 1,
32 PROC_IPC_INIT_INO = 0xEFFFFFFFU,
33 PROC_UTS_INIT_INO = 0xEFFFFFFEU,
34 PROC_USER_INIT_INO = 0xEFFFFFFDU,
35 PROC_PID_INIT_INO = 0xEFFFFFFCU,
32}; 36};
33 37
34/* 38/*
@@ -174,7 +178,10 @@ extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
174 struct proc_dir_entry *parent); 178 struct proc_dir_entry *parent);
175 179
176extern struct file *proc_ns_fget(int fd); 180extern struct file *proc_ns_fget(int fd);
181extern bool proc_ns_inode(struct inode *inode);
177 182
183extern int proc_alloc_inum(unsigned int *pino);
184extern void proc_free_inum(unsigned int inum);
178#else 185#else
179 186
180#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) 187#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; })
@@ -229,6 +236,19 @@ static inline struct file *proc_ns_fget(int fd)
229 return ERR_PTR(-EINVAL); 236 return ERR_PTR(-EINVAL);
230} 237}
231 238
239static inline bool proc_ns_inode(struct inode *inode)
240{
241 return false;
242}
243
244static inline int proc_alloc_inum(unsigned int *inum)
245{
246 *inum = 1;
247 return 0;
248}
249static inline void proc_free_inum(unsigned int inum)
250{
251}
232#endif /* CONFIG_PROC_FS */ 252#endif /* CONFIG_PROC_FS */
233 253
234#if !defined(CONFIG_PROC_KCORE) 254#if !defined(CONFIG_PROC_KCORE)
@@ -247,10 +267,14 @@ struct proc_ns_operations {
247 void *(*get)(struct task_struct *task); 267 void *(*get)(struct task_struct *task);
248 void (*put)(void *ns); 268 void (*put)(void *ns);
249 int (*install)(struct nsproxy *nsproxy, void *ns); 269 int (*install)(struct nsproxy *nsproxy, void *ns);
270 unsigned int (*inum)(void *ns);
250}; 271};
251extern const struct proc_ns_operations netns_operations; 272extern const struct proc_ns_operations netns_operations;
252extern const struct proc_ns_operations utsns_operations; 273extern const struct proc_ns_operations utsns_operations;
253extern const struct proc_ns_operations ipcns_operations; 274extern const struct proc_ns_operations ipcns_operations;
275extern const struct proc_ns_operations pidns_operations;
276extern const struct proc_ns_operations userns_operations;
277extern const struct proc_ns_operations mntns_operations;
254 278
255union proc_op { 279union proc_op {
256 int (*proc_get_link)(struct dentry *, struct path *); 280 int (*proc_get_link)(struct dentry *, struct path *);
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 95142cae446a..b9bd2e6c73cc 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -25,6 +25,7 @@ struct user_namespace {
25 struct user_namespace *parent; 25 struct user_namespace *parent;
26 kuid_t owner; 26 kuid_t owner;
27 kgid_t group; 27 kgid_t group;
28 unsigned int proc_inum;
28}; 29};
29 30
30extern struct user_namespace init_user_ns; 31extern struct user_namespace init_user_ns;
@@ -39,6 +40,7 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
39} 40}
40 41
41extern int create_user_ns(struct cred *new); 42extern int create_user_ns(struct cred *new);
43extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred);
42extern void free_user_ns(struct kref *kref); 44extern void free_user_ns(struct kref *kref);
43 45
44static inline void put_user_ns(struct user_namespace *ns) 46static inline void put_user_ns(struct user_namespace *ns)
@@ -66,6 +68,14 @@ static inline int create_user_ns(struct cred *new)
66 return -EINVAL; 68 return -EINVAL;
67} 69}
68 70
71static inline int unshare_userns(unsigned long unshare_flags,
72 struct cred **new_cred)
73{
74 if (unshare_flags & CLONE_NEWUSER)
75 return -EINVAL;
76 return 0;
77}
78
69static inline void put_user_ns(struct user_namespace *ns) 79static inline void put_user_ns(struct user_namespace *ns)
70{ 80{
71} 81}
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 2b345206722a..239e27733d6c 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -23,6 +23,7 @@ struct uts_namespace {
23 struct kref kref; 23 struct kref kref;
24 struct new_utsname name; 24 struct new_utsname name;
25 struct user_namespace *user_ns; 25 struct user_namespace *user_ns;
26 unsigned int proc_inum;
26}; 27};
27extern struct uts_namespace init_uts_ns; 28extern struct uts_namespace init_uts_ns;
28 29
@@ -33,7 +34,7 @@ static inline void get_uts_ns(struct uts_namespace *ns)
33} 34}
34 35
35extern struct uts_namespace *copy_utsname(unsigned long flags, 36extern struct uts_namespace *copy_utsname(unsigned long flags,
36 struct task_struct *tsk); 37 struct user_namespace *user_ns, struct uts_namespace *old_ns);
37extern void free_uts_ns(struct kref *kref); 38extern void free_uts_ns(struct kref *kref);
38 39
39static inline void put_uts_ns(struct uts_namespace *ns) 40static inline void put_uts_ns(struct uts_namespace *ns)
@@ -50,12 +51,12 @@ static inline void put_uts_ns(struct uts_namespace *ns)
50} 51}
51 52
52static inline struct uts_namespace *copy_utsname(unsigned long flags, 53static inline struct uts_namespace *copy_utsname(unsigned long flags,
53 struct task_struct *tsk) 54 struct user_namespace *user_ns, struct uts_namespace *old_ns)
54{ 55{
55 if (flags & CLONE_NEWUTS) 56 if (flags & CLONE_NEWUTS)
56 return ERR_PTR(-EINVAL); 57 return ERR_PTR(-EINVAL);
57 58
58 return tsk->nsproxy->uts_ns; 59 return old_ns;
59} 60}
60#endif 61#endif
61 62
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index c5a43f56b796..de644bcd8613 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -56,6 +56,8 @@ struct net {
56 56
57 struct user_namespace *user_ns; /* Owning user namespace */ 57 struct user_namespace *user_ns; /* Owning user namespace */
58 58
59 unsigned int proc_inum;
60
59 struct proc_dir_entry *proc_net; 61 struct proc_dir_entry *proc_net;
60 struct proc_dir_entry *proc_net_stat; 62 struct proc_dir_entry *proc_net_stat;
61 63
diff --git a/init/Kconfig b/init/Kconfig
index 1a207efca591..675d8a2326cf 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1069,11 +1069,9 @@ config UIDGID_CONVERTED
1069 # Filesystems 1069 # Filesystems
1070 depends on 9P_FS = n 1070 depends on 9P_FS = n
1071 depends on AFS_FS = n 1071 depends on AFS_FS = n
1072 depends on AUTOFS4_FS = n
1073 depends on CEPH_FS = n 1072 depends on CEPH_FS = n
1074 depends on CIFS = n 1073 depends on CIFS = n
1075 depends on CODA_FS = n 1074 depends on CODA_FS = n
1076 depends on FUSE_FS = n
1077 depends on GFS2_FS = n 1075 depends on GFS2_FS = n
1078 depends on NCP_FS = n 1076 depends on NCP_FS = n
1079 depends on NFSD = n 1077 depends on NFSD = n
diff --git a/init/main.c b/init/main.c
index 63ae904a99a8..baf1f0f5c461 100644
--- a/init/main.c
+++ b/init/main.c
@@ -812,7 +812,6 @@ static int __ref kernel_init(void *unused)
812 system_state = SYSTEM_RUNNING; 812 system_state = SYSTEM_RUNNING;
813 numa_default_policy(); 813 numa_default_policy();
814 814
815 current->signal->flags |= SIGNAL_UNKILLABLE;
816 flush_delayed_fput(); 815 flush_delayed_fput();
817 816
818 if (ramdisk_execute_command) { 817 if (ramdisk_execute_command) {
diff --git a/init/version.c b/init/version.c
index 86fe0ccb997a..58170f18912d 100644
--- a/init/version.c
+++ b/init/version.c
@@ -12,6 +12,7 @@
12#include <linux/utsname.h> 12#include <linux/utsname.h>
13#include <generated/utsrelease.h> 13#include <generated/utsrelease.h>
14#include <linux/version.h> 14#include <linux/version.h>
15#include <linux/proc_fs.h>
15 16
16#ifndef CONFIG_KALLSYMS 17#ifndef CONFIG_KALLSYMS
17#define version(a) Version_ ## a 18#define version(a) Version_ ## a
@@ -34,6 +35,7 @@ struct uts_namespace init_uts_ns = {
34 .domainname = UTS_DOMAINNAME, 35 .domainname = UTS_DOMAINNAME,
35 }, 36 },
36 .user_ns = &init_user_ns, 37 .user_ns = &init_user_ns,
38 .proc_inum = PROC_UTS_INIT_INO,
37}; 39};
38EXPORT_SYMBOL_GPL(init_uts_ns); 40EXPORT_SYMBOL_GPL(init_uts_ns);
39 41
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 26143d377c95..6471f1bdae96 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -16,6 +16,7 @@
16#include <linux/msg.h> 16#include <linux/msg.h>
17#include <linux/ipc_namespace.h> 17#include <linux/ipc_namespace.h>
18#include <linux/utsname.h> 18#include <linux/utsname.h>
19#include <linux/proc_fs.h>
19#include <asm/uaccess.h> 20#include <asm/uaccess.h>
20 21
21#include "util.h" 22#include "util.h"
@@ -30,6 +31,7 @@ DEFINE_SPINLOCK(mq_lock);
30struct ipc_namespace init_ipc_ns = { 31struct ipc_namespace init_ipc_ns = {
31 .count = ATOMIC_INIT(1), 32 .count = ATOMIC_INIT(1),
32 .user_ns = &init_user_ns, 33 .user_ns = &init_user_ns,
34 .proc_inum = PROC_IPC_INIT_INO,
33}; 35};
34 36
35atomic_t nr_ipc_ns = ATOMIC_INIT(1); 37atomic_t nr_ipc_ns = ATOMIC_INIT(1);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index f362298c5ce4..cf3386a51de2 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -16,7 +16,7 @@
16 16
17#include "util.h" 17#include "util.h"
18 18
19static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk, 19static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
20 struct ipc_namespace *old_ns) 20 struct ipc_namespace *old_ns)
21{ 21{
22 struct ipc_namespace *ns; 22 struct ipc_namespace *ns;
@@ -26,9 +26,16 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
26 if (ns == NULL) 26 if (ns == NULL)
27 return ERR_PTR(-ENOMEM); 27 return ERR_PTR(-ENOMEM);
28 28
29 err = proc_alloc_inum(&ns->proc_inum);
30 if (err) {
31 kfree(ns);
32 return ERR_PTR(err);
33 }
34
29 atomic_set(&ns->count, 1); 35 atomic_set(&ns->count, 1);
30 err = mq_init_ns(ns); 36 err = mq_init_ns(ns);
31 if (err) { 37 if (err) {
38 proc_free_inum(ns->proc_inum);
32 kfree(ns); 39 kfree(ns);
33 return ERR_PTR(err); 40 return ERR_PTR(err);
34 } 41 }
@@ -46,19 +53,17 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
46 ipcns_notify(IPCNS_CREATED); 53 ipcns_notify(IPCNS_CREATED);
47 register_ipcns_notifier(ns); 54 register_ipcns_notifier(ns);
48 55
49 ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns)); 56 ns->user_ns = get_user_ns(user_ns);
50 57
51 return ns; 58 return ns;
52} 59}
53 60
54struct ipc_namespace *copy_ipcs(unsigned long flags, 61struct ipc_namespace *copy_ipcs(unsigned long flags,
55 struct task_struct *tsk) 62 struct user_namespace *user_ns, struct ipc_namespace *ns)
56{ 63{
57 struct ipc_namespace *ns = tsk->nsproxy->ipc_ns;
58
59 if (!(flags & CLONE_NEWIPC)) 64 if (!(flags & CLONE_NEWIPC))
60 return get_ipc_ns(ns); 65 return get_ipc_ns(ns);
61 return create_ipc_ns(tsk, ns); 66 return create_ipc_ns(user_ns, ns);
62} 67}
63 68
64/* 69/*
@@ -113,6 +118,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
113 */ 118 */
114 ipcns_notify(IPCNS_REMOVED); 119 ipcns_notify(IPCNS_REMOVED);
115 put_user_ns(ns->user_ns); 120 put_user_ns(ns->user_ns);
121 proc_free_inum(ns->proc_inum);
116 kfree(ns); 122 kfree(ns);
117} 123}
118 124
@@ -161,8 +167,12 @@ static void ipcns_put(void *ns)
161 return put_ipc_ns(ns); 167 return put_ipc_ns(ns);
162} 168}
163 169
164static int ipcns_install(struct nsproxy *nsproxy, void *ns) 170static int ipcns_install(struct nsproxy *nsproxy, void *new)
165{ 171{
172 struct ipc_namespace *ns = new;
173 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
174 return -EPERM;
175
166 /* Ditch state from the old ipc namespace */ 176 /* Ditch state from the old ipc namespace */
167 exit_sem(current); 177 exit_sem(current);
168 put_ipc_ns(nsproxy->ipc_ns); 178 put_ipc_ns(nsproxy->ipc_ns);
@@ -170,10 +180,18 @@ static int ipcns_install(struct nsproxy *nsproxy, void *ns)
170 return 0; 180 return 0;
171} 181}
172 182
183static unsigned int ipcns_inum(void *vp)
184{
185 struct ipc_namespace *ns = vp;
186
187 return ns->proc_inum;
188}
189
173const struct proc_ns_operations ipcns_operations = { 190const struct proc_ns_operations ipcns_operations = {
174 .name = "ipc", 191 .name = "ipc",
175 .type = CLONE_NEWIPC, 192 .type = CLONE_NEWIPC,
176 .get = ipcns_get, 193 .get = ipcns_get,
177 .put = ipcns_put, 194 .put = ipcns_put,
178 .install = ipcns_install, 195 .install = ipcns_install,
196 .inum = ipcns_inum,
179}; 197};
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f34c41bfaa37..9915ffe01372 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3409,7 +3409,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
3409{ 3409{
3410 struct cgroup_pidlist *l; 3410 struct cgroup_pidlist *l;
3411 /* don't need task_nsproxy() if we're looking at ourself */ 3411 /* don't need task_nsproxy() if we're looking at ourself */
3412 struct pid_namespace *ns = current->nsproxy->pid_ns; 3412 struct pid_namespace *ns = task_active_pid_ns(current);
3413 3413
3414 /* 3414 /*
3415 * We can't drop the pidlist_mutex before taking the l->mutex in case 3415 * We can't drop the pidlist_mutex before taking the l->mutex in case
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f9ff5493171d..301079d06f24 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6155,7 +6155,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
6155 6155
6156 event->parent = parent_event; 6156 event->parent = parent_event;
6157 6157
6158 event->ns = get_pid_ns(current->nsproxy->pid_ns); 6158 event->ns = get_pid_ns(task_active_pid_ns(current));
6159 event->id = atomic64_inc_return(&perf_event_id); 6159 event->id = atomic64_inc_return(&perf_event_id);
6160 6160
6161 event->state = PERF_EVENT_STATE_INACTIVE; 6161 event->state = PERF_EVENT_STATE_INACTIVE;
diff --git a/kernel/exit.c b/kernel/exit.c
index 50d2e93c36ea..b4df21937216 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -72,18 +72,6 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
72 list_del_rcu(&p->tasks); 72 list_del_rcu(&p->tasks);
73 list_del_init(&p->sibling); 73 list_del_init(&p->sibling);
74 __this_cpu_dec(process_counts); 74 __this_cpu_dec(process_counts);
75 /*
76 * If we are the last child process in a pid namespace to be
77 * reaped, notify the reaper sleeping zap_pid_ns_processes().
78 */
79 if (IS_ENABLED(CONFIG_PID_NS)) {
80 struct task_struct *parent = p->real_parent;
81
82 if ((task_active_pid_ns(parent)->child_reaper == parent) &&
83 list_empty(&parent->children) &&
84 (parent->flags & PF_EXITING))
85 wake_up_process(parent);
86 }
87 } 75 }
88 list_del_rcu(&p->thread_group); 76 list_del_rcu(&p->thread_group);
89} 77}
diff --git a/kernel/fork.c b/kernel/fork.c
index 115d6c2e4cca..c36c4e301efe 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1044,8 +1044,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
1044 atomic_set(&sig->live, 1); 1044 atomic_set(&sig->live, 1);
1045 atomic_set(&sig->sigcnt, 1); 1045 atomic_set(&sig->sigcnt, 1);
1046 init_waitqueue_head(&sig->wait_chldexit); 1046 init_waitqueue_head(&sig->wait_chldexit);
1047 if (clone_flags & CLONE_NEWPID)
1048 sig->flags |= SIGNAL_UNKILLABLE;
1049 sig->curr_target = tsk; 1047 sig->curr_target = tsk;
1050 init_sigpending(&sig->shared_pending); 1048 init_sigpending(&sig->shared_pending);
1051 INIT_LIST_HEAD(&sig->posix_timers); 1049 INIT_LIST_HEAD(&sig->posix_timers);
@@ -1438,8 +1436,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1438 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); 1436 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
1439 1437
1440 if (thread_group_leader(p)) { 1438 if (thread_group_leader(p)) {
1441 if (is_child_reaper(pid)) 1439 if (is_child_reaper(pid)) {
1442 p->nsproxy->pid_ns->child_reaper = p; 1440 ns_of_pid(pid)->child_reaper = p;
1441 p->signal->flags |= SIGNAL_UNKILLABLE;
1442 }
1443 1443
1444 p->signal->leader_pid = pid; 1444 p->signal->leader_pid = pid;
1445 p->signal->tty = tty_kref_get(current->signal->tty); 1445 p->signal->tty = tty_kref_get(current->signal->tty);
@@ -1473,8 +1473,6 @@ bad_fork_cleanup_io:
1473 if (p->io_context) 1473 if (p->io_context)
1474 exit_io_context(p); 1474 exit_io_context(p);
1475bad_fork_cleanup_namespaces: 1475bad_fork_cleanup_namespaces:
1476 if (unlikely(clone_flags & CLONE_NEWPID))
1477 pid_ns_release_proc(p->nsproxy->pid_ns);
1478 exit_task_namespaces(p); 1476 exit_task_namespaces(p);
1479bad_fork_cleanup_mm: 1477bad_fork_cleanup_mm:
1480 if (p->mm) 1478 if (p->mm)
@@ -1554,15 +1552,9 @@ long do_fork(unsigned long clone_flags,
1554 * Do some preliminary argument and permissions checking before we 1552 * Do some preliminary argument and permissions checking before we
1555 * actually start allocating stuff 1553 * actually start allocating stuff
1556 */ 1554 */
1557 if (clone_flags & CLONE_NEWUSER) { 1555 if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) {
1558 if (clone_flags & CLONE_THREAD) 1556 if (clone_flags & (CLONE_THREAD|CLONE_PARENT))
1559 return -EINVAL; 1557 return -EINVAL;
1560 /* hopefully this check will go away when userns support is
1561 * complete
1562 */
1563 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
1564 !capable(CAP_SETGID))
1565 return -EPERM;
1566 } 1558 }
1567 1559
1568 /* 1560 /*
@@ -1724,7 +1716,8 @@ static int check_unshare_flags(unsigned long unshare_flags)
1724{ 1716{
1725 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| 1717 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
1726 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| 1718 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
1727 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) 1719 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
1720 CLONE_NEWUSER|CLONE_NEWPID))
1728 return -EINVAL; 1721 return -EINVAL;
1729 /* 1722 /*
1730 * Not implemented, but pretend it works if there is nothing to 1723 * Not implemented, but pretend it works if there is nothing to
@@ -1791,19 +1784,40 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1791{ 1784{
1792 struct fs_struct *fs, *new_fs = NULL; 1785 struct fs_struct *fs, *new_fs = NULL;
1793 struct files_struct *fd, *new_fd = NULL; 1786 struct files_struct *fd, *new_fd = NULL;
1787 struct cred *new_cred = NULL;
1794 struct nsproxy *new_nsproxy = NULL; 1788 struct nsproxy *new_nsproxy = NULL;
1795 int do_sysvsem = 0; 1789 int do_sysvsem = 0;
1796 int err; 1790 int err;
1797 1791
1798 err = check_unshare_flags(unshare_flags); 1792 /*
1799 if (err) 1793 * If unsharing a user namespace must also unshare the thread.
1800 goto bad_unshare_out; 1794 */
1801 1795 if (unshare_flags & CLONE_NEWUSER)
1796 unshare_flags |= CLONE_THREAD;
1797 /*
1798 * If unsharing a pid namespace must also unshare the thread.
1799 */
1800 if (unshare_flags & CLONE_NEWPID)
1801 unshare_flags |= CLONE_THREAD;
1802 /*
1803 * If unsharing a thread from a thread group, must also unshare vm.
1804 */
1805 if (unshare_flags & CLONE_THREAD)
1806 unshare_flags |= CLONE_VM;
1807 /*
1808 * If unsharing vm, must also unshare signal handlers.
1809 */
1810 if (unshare_flags & CLONE_VM)
1811 unshare_flags |= CLONE_SIGHAND;
1802 /* 1812 /*
1803 * If unsharing namespace, must also unshare filesystem information. 1813 * If unsharing namespace, must also unshare filesystem information.
1804 */ 1814 */
1805 if (unshare_flags & CLONE_NEWNS) 1815 if (unshare_flags & CLONE_NEWNS)
1806 unshare_flags |= CLONE_FS; 1816 unshare_flags |= CLONE_FS;
1817
1818 err = check_unshare_flags(unshare_flags);
1819 if (err)
1820 goto bad_unshare_out;
1807 /* 1821 /*
1808 * CLONE_NEWIPC must also detach from the undolist: after switching 1822 * CLONE_NEWIPC must also detach from the undolist: after switching
1809 * to a new ipc namespace, the semaphore arrays from the old 1823 * to a new ipc namespace, the semaphore arrays from the old
@@ -1817,11 +1831,15 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1817 err = unshare_fd(unshare_flags, &new_fd); 1831 err = unshare_fd(unshare_flags, &new_fd);
1818 if (err) 1832 if (err)
1819 goto bad_unshare_cleanup_fs; 1833 goto bad_unshare_cleanup_fs;
1820 err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs); 1834 err = unshare_userns(unshare_flags, &new_cred);
1821 if (err) 1835 if (err)
1822 goto bad_unshare_cleanup_fd; 1836 goto bad_unshare_cleanup_fd;
1837 err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
1838 new_cred, new_fs);
1839 if (err)
1840 goto bad_unshare_cleanup_cred;
1823 1841
1824 if (new_fs || new_fd || do_sysvsem || new_nsproxy) { 1842 if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
1825 if (do_sysvsem) { 1843 if (do_sysvsem) {
1826 /* 1844 /*
1827 * CLONE_SYSVSEM is equivalent to sys_exit(). 1845 * CLONE_SYSVSEM is equivalent to sys_exit().
@@ -1854,11 +1872,20 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1854 } 1872 }
1855 1873
1856 task_unlock(current); 1874 task_unlock(current);
1875
1876 if (new_cred) {
1877 /* Install the new user namespace */
1878 commit_creds(new_cred);
1879 new_cred = NULL;
1880 }
1857 } 1881 }
1858 1882
1859 if (new_nsproxy) 1883 if (new_nsproxy)
1860 put_nsproxy(new_nsproxy); 1884 put_nsproxy(new_nsproxy);
1861 1885
1886bad_unshare_cleanup_cred:
1887 if (new_cred)
1888 put_cred(new_cred);
1862bad_unshare_cleanup_fd: 1889bad_unshare_cleanup_fd:
1863 if (new_fd) 1890 if (new_fd)
1864 put_files_struct(new_fd); 1891 put_files_struct(new_fd);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 7e1c3de1ce45..78e2ecb20165 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -57,7 +57,8 @@ static inline struct nsproxy *create_nsproxy(void)
57 * leave it to the caller to do proper locking and attach it to task. 57 * leave it to the caller to do proper locking and attach it to task.
58 */ 58 */
59static struct nsproxy *create_new_namespaces(unsigned long flags, 59static struct nsproxy *create_new_namespaces(unsigned long flags,
60 struct task_struct *tsk, struct fs_struct *new_fs) 60 struct task_struct *tsk, struct user_namespace *user_ns,
61 struct fs_struct *new_fs)
61{ 62{
62 struct nsproxy *new_nsp; 63 struct nsproxy *new_nsp;
63 int err; 64 int err;
@@ -66,31 +67,31 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
66 if (!new_nsp) 67 if (!new_nsp)
67 return ERR_PTR(-ENOMEM); 68 return ERR_PTR(-ENOMEM);
68 69
69 new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); 70 new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
70 if (IS_ERR(new_nsp->mnt_ns)) { 71 if (IS_ERR(new_nsp->mnt_ns)) {
71 err = PTR_ERR(new_nsp->mnt_ns); 72 err = PTR_ERR(new_nsp->mnt_ns);
72 goto out_ns; 73 goto out_ns;
73 } 74 }
74 75
75 new_nsp->uts_ns = copy_utsname(flags, tsk); 76 new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);
76 if (IS_ERR(new_nsp->uts_ns)) { 77 if (IS_ERR(new_nsp->uts_ns)) {
77 err = PTR_ERR(new_nsp->uts_ns); 78 err = PTR_ERR(new_nsp->uts_ns);
78 goto out_uts; 79 goto out_uts;
79 } 80 }
80 81
81 new_nsp->ipc_ns = copy_ipcs(flags, tsk); 82 new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);
82 if (IS_ERR(new_nsp->ipc_ns)) { 83 if (IS_ERR(new_nsp->ipc_ns)) {
83 err = PTR_ERR(new_nsp->ipc_ns); 84 err = PTR_ERR(new_nsp->ipc_ns);
84 goto out_ipc; 85 goto out_ipc;
85 } 86 }
86 87
87 new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk)); 88 new_nsp->pid_ns = copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns);
88 if (IS_ERR(new_nsp->pid_ns)) { 89 if (IS_ERR(new_nsp->pid_ns)) {
89 err = PTR_ERR(new_nsp->pid_ns); 90 err = PTR_ERR(new_nsp->pid_ns);
90 goto out_pid; 91 goto out_pid;
91 } 92 }
92 93
93 new_nsp->net_ns = copy_net_ns(flags, task_cred_xxx(tsk, user_ns), tsk->nsproxy->net_ns); 94 new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
94 if (IS_ERR(new_nsp->net_ns)) { 95 if (IS_ERR(new_nsp->net_ns)) {
95 err = PTR_ERR(new_nsp->net_ns); 96 err = PTR_ERR(new_nsp->net_ns);
96 goto out_net; 97 goto out_net;
@@ -122,6 +123,7 @@ out_ns:
122int copy_namespaces(unsigned long flags, struct task_struct *tsk) 123int copy_namespaces(unsigned long flags, struct task_struct *tsk)
123{ 124{
124 struct nsproxy *old_ns = tsk->nsproxy; 125 struct nsproxy *old_ns = tsk->nsproxy;
126 struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
125 struct nsproxy *new_ns; 127 struct nsproxy *new_ns;
126 int err = 0; 128 int err = 0;
127 129
@@ -134,7 +136,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
134 CLONE_NEWPID | CLONE_NEWNET))) 136 CLONE_NEWPID | CLONE_NEWNET)))
135 return 0; 137 return 0;
136 138
137 if (!capable(CAP_SYS_ADMIN)) { 139 if (!ns_capable(user_ns, CAP_SYS_ADMIN)) {
138 err = -EPERM; 140 err = -EPERM;
139 goto out; 141 goto out;
140 } 142 }
@@ -151,7 +153,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
151 goto out; 153 goto out;
152 } 154 }
153 155
154 new_ns = create_new_namespaces(flags, tsk, tsk->fs); 156 new_ns = create_new_namespaces(flags, tsk,
157 task_cred_xxx(tsk, user_ns), tsk->fs);
155 if (IS_ERR(new_ns)) { 158 if (IS_ERR(new_ns)) {
156 err = PTR_ERR(new_ns); 159 err = PTR_ERR(new_ns);
157 goto out; 160 goto out;
@@ -183,19 +186,21 @@ void free_nsproxy(struct nsproxy *ns)
183 * On success, returns the new nsproxy. 186 * On success, returns the new nsproxy.
184 */ 187 */
185int unshare_nsproxy_namespaces(unsigned long unshare_flags, 188int unshare_nsproxy_namespaces(unsigned long unshare_flags,
186 struct nsproxy **new_nsp, struct fs_struct *new_fs) 189 struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs)
187{ 190{
191 struct user_namespace *user_ns;
188 int err = 0; 192 int err = 0;
189 193
190 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 194 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
191 CLONE_NEWNET))) 195 CLONE_NEWNET | CLONE_NEWPID)))
192 return 0; 196 return 0;
193 197
194 if (!capable(CAP_SYS_ADMIN)) 198 user_ns = new_cred ? new_cred->user_ns : current_user_ns();
199 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
195 return -EPERM; 200 return -EPERM;
196 201
197 *new_nsp = create_new_namespaces(unshare_flags, current, 202 *new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
198 new_fs ? new_fs : current->fs); 203 new_fs ? new_fs : current->fs);
199 if (IS_ERR(*new_nsp)) { 204 if (IS_ERR(*new_nsp)) {
200 err = PTR_ERR(*new_nsp); 205 err = PTR_ERR(*new_nsp);
201 goto out; 206 goto out;
@@ -241,9 +246,6 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
241 struct file *file; 246 struct file *file;
242 int err; 247 int err;
243 248
244 if (!capable(CAP_SYS_ADMIN))
245 return -EPERM;
246
247 file = proc_ns_fget(fd); 249 file = proc_ns_fget(fd);
248 if (IS_ERR(file)) 250 if (IS_ERR(file))
249 return PTR_ERR(file); 251 return PTR_ERR(file);
@@ -254,7 +256,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
254 if (nstype && (ops->type != nstype)) 256 if (nstype && (ops->type != nstype))
255 goto out; 257 goto out;
256 258
257 new_nsproxy = create_new_namespaces(0, tsk, tsk->fs); 259 new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
258 if (IS_ERR(new_nsproxy)) { 260 if (IS_ERR(new_nsproxy)) {
259 err = PTR_ERR(new_nsproxy); 261 err = PTR_ERR(new_nsproxy);
260 goto out; 262 goto out;
diff --git a/kernel/pid.c b/kernel/pid.c
index fd996c1ed9f8..3e2cf8100acc 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -36,6 +36,7 @@
36#include <linux/pid_namespace.h> 36#include <linux/pid_namespace.h>
37#include <linux/init_task.h> 37#include <linux/init_task.h>
38#include <linux/syscalls.h> 38#include <linux/syscalls.h>
39#include <linux/proc_fs.h>
39 40
40#define pid_hashfn(nr, ns) \ 41#define pid_hashfn(nr, ns) \
41 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) 42 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -78,6 +79,8 @@ struct pid_namespace init_pid_ns = {
78 .last_pid = 0, 79 .last_pid = 0,
79 .level = 0, 80 .level = 0,
80 .child_reaper = &init_task, 81 .child_reaper = &init_task,
82 .user_ns = &init_user_ns,
83 .proc_inum = PROC_PID_INIT_INO,
81}; 84};
82EXPORT_SYMBOL_GPL(init_pid_ns); 85EXPORT_SYMBOL_GPL(init_pid_ns);
83 86
@@ -269,8 +272,24 @@ void free_pid(struct pid *pid)
269 unsigned long flags; 272 unsigned long flags;
270 273
271 spin_lock_irqsave(&pidmap_lock, flags); 274 spin_lock_irqsave(&pidmap_lock, flags);
272 for (i = 0; i <= pid->level; i++) 275 for (i = 0; i <= pid->level; i++) {
273 hlist_del_rcu(&pid->numbers[i].pid_chain); 276 struct upid *upid = pid->numbers + i;
277 struct pid_namespace *ns = upid->ns;
278 hlist_del_rcu(&upid->pid_chain);
279 switch(--ns->nr_hashed) {
280 case 1:
281 /* When all that is left in the pid namespace
282 * is the reaper wake up the reaper. The reaper
283 * may be sleeping in zap_pid_ns_processes().
284 */
285 wake_up_process(ns->child_reaper);
286 break;
287 case 0:
288 ns->nr_hashed = -1;
289 schedule_work(&ns->proc_work);
290 break;
291 }
292 }
274 spin_unlock_irqrestore(&pidmap_lock, flags); 293 spin_unlock_irqrestore(&pidmap_lock, flags);
275 294
276 for (i = 0; i <= pid->level; i++) 295 for (i = 0; i <= pid->level; i++)
@@ -292,6 +311,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
292 goto out; 311 goto out;
293 312
294 tmp = ns; 313 tmp = ns;
314 pid->level = ns->level;
295 for (i = ns->level; i >= 0; i--) { 315 for (i = ns->level; i >= 0; i--) {
296 nr = alloc_pidmap(tmp); 316 nr = alloc_pidmap(tmp);
297 if (nr < 0) 317 if (nr < 0)
@@ -302,22 +322,32 @@ struct pid *alloc_pid(struct pid_namespace *ns)
302 tmp = tmp->parent; 322 tmp = tmp->parent;
303 } 323 }
304 324
325 if (unlikely(is_child_reaper(pid))) {
326 if (pid_ns_prepare_proc(ns))
327 goto out_free;
328 }
329
305 get_pid_ns(ns); 330 get_pid_ns(ns);
306 pid->level = ns->level;
307 atomic_set(&pid->count, 1); 331 atomic_set(&pid->count, 1);
308 for (type = 0; type < PIDTYPE_MAX; ++type) 332 for (type = 0; type < PIDTYPE_MAX; ++type)
309 INIT_HLIST_HEAD(&pid->tasks[type]); 333 INIT_HLIST_HEAD(&pid->tasks[type]);
310 334
311 upid = pid->numbers + ns->level; 335 upid = pid->numbers + ns->level;
312 spin_lock_irq(&pidmap_lock); 336 spin_lock_irq(&pidmap_lock);
313 for ( ; upid >= pid->numbers; --upid) 337 if (ns->nr_hashed < 0)
338 goto out_unlock;
339 for ( ; upid >= pid->numbers; --upid) {
314 hlist_add_head_rcu(&upid->pid_chain, 340 hlist_add_head_rcu(&upid->pid_chain,
315 &pid_hash[pid_hashfn(upid->nr, upid->ns)]); 341 &pid_hash[pid_hashfn(upid->nr, upid->ns)]);
342 upid->ns->nr_hashed++;
343 }
316 spin_unlock_irq(&pidmap_lock); 344 spin_unlock_irq(&pidmap_lock);
317 345
318out: 346out:
319 return pid; 347 return pid;
320 348
349out_unlock:
350 spin_unlock(&pidmap_lock);
321out_free: 351out_free:
322 while (++i <= ns->level) 352 while (++i <= ns->level)
323 free_pidmap(pid->numbers + i); 353 free_pidmap(pid->numbers + i);
@@ -344,7 +374,7 @@ EXPORT_SYMBOL_GPL(find_pid_ns);
344 374
345struct pid *find_vpid(int nr) 375struct pid *find_vpid(int nr)
346{ 376{
347 return find_pid_ns(nr, current->nsproxy->pid_ns); 377 return find_pid_ns(nr, task_active_pid_ns(current));
348} 378}
349EXPORT_SYMBOL_GPL(find_vpid); 379EXPORT_SYMBOL_GPL(find_vpid);
350 380
@@ -428,7 +458,7 @@ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
428 458
429struct task_struct *find_task_by_vpid(pid_t vnr) 459struct task_struct *find_task_by_vpid(pid_t vnr)
430{ 460{
431 return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns); 461 return find_task_by_pid_ns(vnr, task_active_pid_ns(current));
432} 462}
433 463
434struct pid *get_task_pid(struct task_struct *task, enum pid_type type) 464struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
@@ -483,7 +513,7 @@ EXPORT_SYMBOL_GPL(pid_nr_ns);
483 513
484pid_t pid_vnr(struct pid *pid) 514pid_t pid_vnr(struct pid *pid)
485{ 515{
486 return pid_nr_ns(pid, current->nsproxy->pid_ns); 516 return pid_nr_ns(pid, task_active_pid_ns(current));
487} 517}
488EXPORT_SYMBOL_GPL(pid_vnr); 518EXPORT_SYMBOL_GPL(pid_vnr);
489 519
@@ -494,7 +524,7 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
494 524
495 rcu_read_lock(); 525 rcu_read_lock();
496 if (!ns) 526 if (!ns)
497 ns = current->nsproxy->pid_ns; 527 ns = task_active_pid_ns(current);
498 if (likely(pid_alive(task))) { 528 if (likely(pid_alive(task))) {
499 if (type != PIDTYPE_PID) 529 if (type != PIDTYPE_PID)
500 task = task->group_leader; 530 task = task->group_leader;
@@ -569,6 +599,7 @@ void __init pidmap_init(void)
569 /* Reserve PID 0. We never call free_pidmap(0) */ 599 /* Reserve PID 0. We never call free_pidmap(0) */
570 set_bit(0, init_pid_ns.pidmap[0].page); 600 set_bit(0, init_pid_ns.pidmap[0].page);
571 atomic_dec(&init_pid_ns.pidmap[0].nr_free); 601 atomic_dec(&init_pid_ns.pidmap[0].nr_free);
602 init_pid_ns.nr_hashed = 1;
572 603
573 init_pid_ns.pid_cachep = KMEM_CACHE(pid, 604 init_pid_ns.pid_cachep = KMEM_CACHE(pid,
574 SLAB_HWCACHE_ALIGN | SLAB_PANIC); 605 SLAB_HWCACHE_ALIGN | SLAB_PANIC);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 7b07cc0dfb75..560da0dab230 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -10,6 +10,7 @@
10 10
11#include <linux/pid.h> 11#include <linux/pid.h>
12#include <linux/pid_namespace.h> 12#include <linux/pid_namespace.h>
13#include <linux/user_namespace.h>
13#include <linux/syscalls.h> 14#include <linux/syscalls.h>
14#include <linux/err.h> 15#include <linux/err.h>
15#include <linux/acct.h> 16#include <linux/acct.h>
@@ -71,10 +72,17 @@ err_alloc:
71 return NULL; 72 return NULL;
72} 73}
73 74
75static void proc_cleanup_work(struct work_struct *work)
76{
77 struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work);
78 pid_ns_release_proc(ns);
79}
80
74/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ 81/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
75#define MAX_PID_NS_LEVEL 32 82#define MAX_PID_NS_LEVEL 32
76 83
77static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns) 84static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
85 struct pid_namespace *parent_pid_ns)
78{ 86{
79 struct pid_namespace *ns; 87 struct pid_namespace *ns;
80 unsigned int level = parent_pid_ns->level + 1; 88 unsigned int level = parent_pid_ns->level + 1;
@@ -99,9 +107,15 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
99 if (ns->pid_cachep == NULL) 107 if (ns->pid_cachep == NULL)
100 goto out_free_map; 108 goto out_free_map;
101 109
110 err = proc_alloc_inum(&ns->proc_inum);
111 if (err)
112 goto out_free_map;
113
102 kref_init(&ns->kref); 114 kref_init(&ns->kref);
103 ns->level = level; 115 ns->level = level;
104 ns->parent = get_pid_ns(parent_pid_ns); 116 ns->parent = get_pid_ns(parent_pid_ns);
117 ns->user_ns = get_user_ns(user_ns);
118 INIT_WORK(&ns->proc_work, proc_cleanup_work);
105 119
106 set_bit(0, ns->pidmap[0].page); 120 set_bit(0, ns->pidmap[0].page);
107 atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); 121 atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
@@ -109,14 +123,8 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
109 for (i = 1; i < PIDMAP_ENTRIES; i++) 123 for (i = 1; i < PIDMAP_ENTRIES; i++)
110 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); 124 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
111 125
112 err = pid_ns_prepare_proc(ns);
113 if (err)
114 goto out_put_parent_pid_ns;
115
116 return ns; 126 return ns;
117 127
118out_put_parent_pid_ns:
119 put_pid_ns(parent_pid_ns);
120out_free_map: 128out_free_map:
121 kfree(ns->pidmap[0].page); 129 kfree(ns->pidmap[0].page);
122out_free: 130out_free:
@@ -129,18 +137,21 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
129{ 137{
130 int i; 138 int i;
131 139
140 proc_free_inum(ns->proc_inum);
132 for (i = 0; i < PIDMAP_ENTRIES; i++) 141 for (i = 0; i < PIDMAP_ENTRIES; i++)
133 kfree(ns->pidmap[i].page); 142 kfree(ns->pidmap[i].page);
143 put_user_ns(ns->user_ns);
134 kmem_cache_free(pid_ns_cachep, ns); 144 kmem_cache_free(pid_ns_cachep, ns);
135} 145}
136 146
137struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) 147struct pid_namespace *copy_pid_ns(unsigned long flags,
148 struct user_namespace *user_ns, struct pid_namespace *old_ns)
138{ 149{
139 if (!(flags & CLONE_NEWPID)) 150 if (!(flags & CLONE_NEWPID))
140 return get_pid_ns(old_ns); 151 return get_pid_ns(old_ns);
141 if (flags & (CLONE_THREAD|CLONE_PARENT)) 152 if (task_active_pid_ns(current) != old_ns)
142 return ERR_PTR(-EINVAL); 153 return ERR_PTR(-EINVAL);
143 return create_pid_namespace(old_ns); 154 return create_pid_namespace(user_ns, old_ns);
144} 155}
145 156
146static void free_pid_ns(struct kref *kref) 157static void free_pid_ns(struct kref *kref)
@@ -211,22 +222,15 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
211 222
212 /* 223 /*
213 * sys_wait4() above can't reap the TASK_DEAD children. 224 * sys_wait4() above can't reap the TASK_DEAD children.
214 * Make sure they all go away, see __unhash_process(). 225 * Make sure they all go away, see free_pid().
215 */ 226 */
216 for (;;) { 227 for (;;) {
217 bool need_wait = false; 228 set_current_state(TASK_UNINTERRUPTIBLE);
218 229 if (pid_ns->nr_hashed == 1)
219 read_lock(&tasklist_lock);
220 if (!list_empty(&current->children)) {
221 __set_current_state(TASK_UNINTERRUPTIBLE);
222 need_wait = true;
223 }
224 read_unlock(&tasklist_lock);
225
226 if (!need_wait)
227 break; 230 break;
228 schedule(); 231 schedule();
229 } 232 }
233 __set_current_state(TASK_RUNNING);
230 234
231 if (pid_ns->reboot) 235 if (pid_ns->reboot)
232 current->signal->group_exit_code = pid_ns->reboot; 236 current->signal->group_exit_code = pid_ns->reboot;
@@ -239,9 +243,10 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
239static int pid_ns_ctl_handler(struct ctl_table *table, int write, 243static int pid_ns_ctl_handler(struct ctl_table *table, int write,
240 void __user *buffer, size_t *lenp, loff_t *ppos) 244 void __user *buffer, size_t *lenp, loff_t *ppos)
241{ 245{
246 struct pid_namespace *pid_ns = task_active_pid_ns(current);
242 struct ctl_table tmp = *table; 247 struct ctl_table tmp = *table;
243 248
244 if (write && !capable(CAP_SYS_ADMIN)) 249 if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN))
245 return -EPERM; 250 return -EPERM;
246 251
247 /* 252 /*
@@ -250,7 +255,7 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write,
250 * it should synchronize its usage with external means. 255 * it should synchronize its usage with external means.
251 */ 256 */
252 257
253 tmp.data = &current->nsproxy->pid_ns->last_pid; 258 tmp.data = &pid_ns->last_pid;
254 return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 259 return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
255} 260}
256 261
@@ -299,6 +304,67 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
299 return 0; 304 return 0;
300} 305}
301 306
307static void *pidns_get(struct task_struct *task)
308{
309 struct pid_namespace *ns;
310
311 rcu_read_lock();
312 ns = get_pid_ns(task_active_pid_ns(task));
313 rcu_read_unlock();
314
315 return ns;
316}
317
318static void pidns_put(void *ns)
319{
320 put_pid_ns(ns);
321}
322
323static int pidns_install(struct nsproxy *nsproxy, void *ns)
324{
325 struct pid_namespace *active = task_active_pid_ns(current);
326 struct pid_namespace *ancestor, *new = ns;
327
328 if (!ns_capable(new->user_ns, CAP_SYS_ADMIN))
329 return -EPERM;
330
331 /*
332 * Only allow entering the current active pid namespace
333 * or a child of the current active pid namespace.
334 *
335 * This is required for fork to return a usable pid value and
336 * this maintains the property that processes and their
337 * children can not escape their current pid namespace.
338 */
339 if (new->level < active->level)
340 return -EINVAL;
341
342 ancestor = new;
343 while (ancestor->level > active->level)
344 ancestor = ancestor->parent;
345 if (ancestor != active)
346 return -EINVAL;
347
348 put_pid_ns(nsproxy->pid_ns);
349 nsproxy->pid_ns = get_pid_ns(new);
350 return 0;
351}
352
353static unsigned int pidns_inum(void *ns)
354{
355 struct pid_namespace *pid_ns = ns;
356 return pid_ns->proc_inum;
357}
358
359const struct proc_ns_operations pidns_operations = {
360 .name = "pid",
361 .type = CLONE_NEWPID,
362 .get = pidns_get,
363 .put = pidns_put,
364 .install = pidns_install,
365 .inum = pidns_inum,
366};
367
302static __init int pid_namespaces_init(void) 368static __init int pid_namespaces_init(void)
303{ 369{
304 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); 370 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1f5e55dda955..7b09b88862cc 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -215,8 +215,12 @@ ok:
215 smp_rmb(); 215 smp_rmb();
216 if (task->mm) 216 if (task->mm)
217 dumpable = get_dumpable(task->mm); 217 dumpable = get_dumpable(task->mm);
218 if (!dumpable && !ptrace_has_cap(task_user_ns(task), mode)) 218 rcu_read_lock();
219 if (!dumpable && !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
220 rcu_read_unlock();
219 return -EPERM; 221 return -EPERM;
222 }
223 rcu_read_unlock();
220 224
221 return security_ptrace_access_check(task, mode); 225 return security_ptrace_access_check(task, mode);
222} 226}
@@ -280,8 +284,10 @@ static int ptrace_attach(struct task_struct *task, long request,
280 284
281 if (seize) 285 if (seize)
282 flags |= PT_SEIZED; 286 flags |= PT_SEIZED;
283 if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE)) 287 rcu_read_lock();
288 if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE))
284 flags |= PT_PTRACE_CAP; 289 flags |= PT_PTRACE_CAP;
290 rcu_read_unlock();
285 task->ptrace = flags; 291 task->ptrace = flags;
286 292
287 __ptrace_link(task, current); 293 __ptrace_link(task, current);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c1fb82104bfb..257002c13bb0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4097,8 +4097,14 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
4097 goto out_free_cpus_allowed; 4097 goto out_free_cpus_allowed;
4098 } 4098 }
4099 retval = -EPERM; 4099 retval = -EPERM;
4100 if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE)) 4100 if (!check_same_owner(p)) {
4101 goto out_unlock; 4101 rcu_read_lock();
4102 if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
4103 rcu_read_unlock();
4104 goto out_unlock;
4105 }
4106 rcu_read_unlock();
4107 }
4102 4108
4103 retval = security_task_setscheduler(p); 4109 retval = security_task_setscheduler(p);
4104 if (retval) 4110 if (retval)
diff --git a/kernel/signal.c b/kernel/signal.c
index a49c7f36ceb3..580a91e63471 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1753,7 +1753,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
1753 * see comment in do_notify_parent() about the following 4 lines 1753 * see comment in do_notify_parent() about the following 4 lines
1754 */ 1754 */
1755 rcu_read_lock(); 1755 rcu_read_lock();
1756 info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns); 1756 info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(parent));
1757 info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk)); 1757 info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk));
1758 rcu_read_unlock(); 1758 rcu_read_unlock();
1759 1759
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 65bdcf198d4e..5a6384450501 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1344,7 +1344,7 @@ static ssize_t binary_sysctl(const int *name, int nlen,
1344 goto out_putname; 1344 goto out_putname;
1345 } 1345 }
1346 1346
1347 mnt = current->nsproxy->pid_ns->proc_mnt; 1347 mnt = task_active_pid_ns(current)->proc_mnt;
1348 file = file_open_root(mnt->mnt_root, mnt, pathname, flags); 1348 file = file_open_root(mnt->mnt_root, mnt, pathname, flags);
1349 result = PTR_ERR(file); 1349 result = PTR_ERR(file);
1350 if (IS_ERR(file)) 1350 if (IS_ERR(file))
diff --git a/kernel/user.c b/kernel/user.c
index 750acffbe9ec..33acb5e53a5f 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -16,6 +16,7 @@
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/export.h> 17#include <linux/export.h>
18#include <linux/user_namespace.h> 18#include <linux/user_namespace.h>
19#include <linux/proc_fs.h>
19 20
20/* 21/*
21 * userns count is 1 for root user, 1 for init_uts_ns, 22 * userns count is 1 for root user, 1 for init_uts_ns,
@@ -51,6 +52,7 @@ struct user_namespace init_user_ns = {
51 }, 52 },
52 .owner = GLOBAL_ROOT_UID, 53 .owner = GLOBAL_ROOT_UID,
53 .group = GLOBAL_ROOT_GID, 54 .group = GLOBAL_ROOT_GID,
55 .proc_inum = PROC_USER_INIT_INO,
54}; 56};
55EXPORT_SYMBOL_GPL(init_user_ns); 57EXPORT_SYMBOL_GPL(init_user_ns);
56 58
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 456a6b9fba34..f5975ccf9348 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -9,6 +9,7 @@
9#include <linux/nsproxy.h> 9#include <linux/nsproxy.h>
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <linux/user_namespace.h> 11#include <linux/user_namespace.h>
12#include <linux/proc_fs.h>
12#include <linux/highuid.h> 13#include <linux/highuid.h>
13#include <linux/cred.h> 14#include <linux/cred.h>
14#include <linux/securebits.h> 15#include <linux/securebits.h>
@@ -26,6 +27,24 @@ static struct kmem_cache *user_ns_cachep __read_mostly;
26static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, 27static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
27 struct uid_gid_map *map); 28 struct uid_gid_map *map);
28 29
30static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
31{
32 /* Start with the same capabilities as init but useless for doing
33 * anything as the capabilities are bound to the new user namespace.
34 */
35 cred->securebits = SECUREBITS_DEFAULT;
36 cred->cap_inheritable = CAP_EMPTY_SET;
37 cred->cap_permitted = CAP_FULL_SET;
38 cred->cap_effective = CAP_FULL_SET;
39 cred->cap_bset = CAP_FULL_SET;
40#ifdef CONFIG_KEYS
41 key_put(cred->request_key_auth);
42 cred->request_key_auth = NULL;
43#endif
44 /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
45 cred->user_ns = user_ns;
46}
47
29/* 48/*
30 * Create a new user namespace, deriving the creator from the user in the 49 * Create a new user namespace, deriving the creator from the user in the
31 * passed credentials, and replacing that user with the new root user for the 50 * passed credentials, and replacing that user with the new root user for the
@@ -39,6 +58,7 @@ int create_user_ns(struct cred *new)
39 struct user_namespace *ns, *parent_ns = new->user_ns; 58 struct user_namespace *ns, *parent_ns = new->user_ns;
40 kuid_t owner = new->euid; 59 kuid_t owner = new->euid;
41 kgid_t group = new->egid; 60 kgid_t group = new->egid;
61 int ret;
42 62
43 /* The creator needs a mapping in the parent user namespace 63 /* The creator needs a mapping in the parent user namespace
44 * or else we won't be able to reasonably tell userspace who 64 * or else we won't be able to reasonably tell userspace who
@@ -52,38 +72,45 @@ int create_user_ns(struct cred *new)
52 if (!ns) 72 if (!ns)
53 return -ENOMEM; 73 return -ENOMEM;
54 74
75 ret = proc_alloc_inum(&ns->proc_inum);
76 if (ret) {
77 kmem_cache_free(user_ns_cachep, ns);
78 return ret;
79 }
80
55 kref_init(&ns->kref); 81 kref_init(&ns->kref);
82 /* Leave the new->user_ns reference with the new user namespace. */
56 ns->parent = parent_ns; 83 ns->parent = parent_ns;
57 ns->owner = owner; 84 ns->owner = owner;
58 ns->group = group; 85 ns->group = group;
59 86
60 /* Start with the same capabilities as init but useless for doing 87 set_cred_user_ns(new, ns);
61 * anything as the capabilities are bound to the new user namespace.
62 */
63 new->securebits = SECUREBITS_DEFAULT;
64 new->cap_inheritable = CAP_EMPTY_SET;
65 new->cap_permitted = CAP_FULL_SET;
66 new->cap_effective = CAP_FULL_SET;
67 new->cap_bset = CAP_FULL_SET;
68#ifdef CONFIG_KEYS
69 key_put(new->request_key_auth);
70 new->request_key_auth = NULL;
71#endif
72 /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
73
74 /* Leave the new->user_ns reference with the new user namespace. */
75 /* Leave the reference to our user_ns with the new cred. */
76 new->user_ns = ns;
77 88
78 return 0; 89 return 0;
79} 90}
80 91
92int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
93{
94 struct cred *cred;
95
96 if (!(unshare_flags & CLONE_NEWUSER))
97 return 0;
98
99 cred = prepare_creds();
100 if (!cred)
101 return -ENOMEM;
102
103 *new_cred = cred;
104 return create_user_ns(cred);
105}
106
81void free_user_ns(struct kref *kref) 107void free_user_ns(struct kref *kref)
82{ 108{
83 struct user_namespace *parent, *ns = 109 struct user_namespace *parent, *ns =
84 container_of(kref, struct user_namespace, kref); 110 container_of(kref, struct user_namespace, kref);
85 111
86 parent = ns->parent; 112 parent = ns->parent;
113 proc_free_inum(ns->proc_inum);
87 kmem_cache_free(user_ns_cachep, ns); 114 kmem_cache_free(user_ns_cachep, ns);
88 put_user_ns(parent); 115 put_user_ns(parent);
89} 116}
@@ -372,7 +399,7 @@ static int uid_m_show(struct seq_file *seq, void *v)
372 struct user_namespace *lower_ns; 399 struct user_namespace *lower_ns;
373 uid_t lower; 400 uid_t lower;
374 401
375 lower_ns = current_user_ns(); 402 lower_ns = seq_user_ns(seq);
376 if ((lower_ns == ns) && lower_ns->parent) 403 if ((lower_ns == ns) && lower_ns->parent)
377 lower_ns = lower_ns->parent; 404 lower_ns = lower_ns->parent;
378 405
@@ -393,7 +420,7 @@ static int gid_m_show(struct seq_file *seq, void *v)
393 struct user_namespace *lower_ns; 420 struct user_namespace *lower_ns;
394 gid_t lower; 421 gid_t lower;
395 422
396 lower_ns = current_user_ns(); 423 lower_ns = seq_user_ns(seq);
397 if ((lower_ns == ns) && lower_ns->parent) 424 if ((lower_ns == ns) && lower_ns->parent)
398 lower_ns = lower_ns->parent; 425 lower_ns = lower_ns->parent;
399 426
@@ -669,10 +696,14 @@ ssize_t proc_uid_map_write(struct file *file, const char __user *buf, size_t siz
669{ 696{
670 struct seq_file *seq = file->private_data; 697 struct seq_file *seq = file->private_data;
671 struct user_namespace *ns = seq->private; 698 struct user_namespace *ns = seq->private;
699 struct user_namespace *seq_ns = seq_user_ns(seq);
672 700
673 if (!ns->parent) 701 if (!ns->parent)
674 return -EPERM; 702 return -EPERM;
675 703
704 if ((seq_ns != ns) && (seq_ns != ns->parent))
705 return -EPERM;
706
676 return map_write(file, buf, size, ppos, CAP_SETUID, 707 return map_write(file, buf, size, ppos, CAP_SETUID,
677 &ns->uid_map, &ns->parent->uid_map); 708 &ns->uid_map, &ns->parent->uid_map);
678} 709}
@@ -681,10 +712,14 @@ ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t siz
681{ 712{
682 struct seq_file *seq = file->private_data; 713 struct seq_file *seq = file->private_data;
683 struct user_namespace *ns = seq->private; 714 struct user_namespace *ns = seq->private;
715 struct user_namespace *seq_ns = seq_user_ns(seq);
684 716
685 if (!ns->parent) 717 if (!ns->parent)
686 return -EPERM; 718 return -EPERM;
687 719
720 if ((seq_ns != ns) && (seq_ns != ns->parent))
721 return -EPERM;
722
688 return map_write(file, buf, size, ppos, CAP_SETGID, 723 return map_write(file, buf, size, ppos, CAP_SETGID,
689 &ns->gid_map, &ns->parent->gid_map); 724 &ns->gid_map, &ns->parent->gid_map);
690} 725}
@@ -709,6 +744,21 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t
709static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, 744static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
710 struct uid_gid_map *new_map) 745 struct uid_gid_map *new_map)
711{ 746{
747 /* Allow mapping to your own filesystem ids */
748 if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) {
749 u32 id = new_map->extent[0].lower_first;
750 if (cap_setid == CAP_SETUID) {
751 kuid_t uid = make_kuid(ns->parent, id);
752 if (uid_eq(uid, current_fsuid()))
753 return true;
754 }
755 else if (cap_setid == CAP_SETGID) {
756 kgid_t gid = make_kgid(ns->parent, id);
757 if (gid_eq(gid, current_fsgid()))
758 return true;
759 }
760 }
761
712 /* Allow anyone to set a mapping that doesn't require privilege */ 762 /* Allow anyone to set a mapping that doesn't require privilege */
713 if (!cap_valid(cap_setid)) 763 if (!cap_valid(cap_setid))
714 return true; 764 return true;
@@ -722,6 +772,65 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
722 return false; 772 return false;
723} 773}
724 774
775static void *userns_get(struct task_struct *task)
776{
777 struct user_namespace *user_ns;
778
779 rcu_read_lock();
780 user_ns = get_user_ns(__task_cred(task)->user_ns);
781 rcu_read_unlock();
782
783 return user_ns;
784}
785
786static void userns_put(void *ns)
787{
788 put_user_ns(ns);
789}
790
791static int userns_install(struct nsproxy *nsproxy, void *ns)
792{
793 struct user_namespace *user_ns = ns;
794 struct cred *cred;
795
796 /* Don't allow gaining capabilities by reentering
797 * the same user namespace.
798 */
799 if (user_ns == current_user_ns())
800 return -EINVAL;
801
802 /* Threaded many not enter a different user namespace */
803 if (atomic_read(&current->mm->mm_users) > 1)
804 return -EINVAL;
805
806 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
807 return -EPERM;
808
809 cred = prepare_creds();
810 if (!cred)
811 return -ENOMEM;
812
813 put_user_ns(cred->user_ns);
814 set_cred_user_ns(cred, get_user_ns(user_ns));
815
816 return commit_creds(cred);
817}
818
819static unsigned int userns_inum(void *ns)
820{
821 struct user_namespace *user_ns = ns;
822 return user_ns->proc_inum;
823}
824
825const struct proc_ns_operations userns_operations = {
826 .name = "user",
827 .type = CLONE_NEWUSER,
828 .get = userns_get,
829 .put = userns_put,
830 .install = userns_install,
831 .inum = userns_inum,
832};
833
725static __init int user_namespaces_init(void) 834static __init int user_namespaces_init(void)
726{ 835{
727 user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); 836 user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 679d97a5d3fd..f6336d51d64c 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -32,18 +32,25 @@ static struct uts_namespace *create_uts_ns(void)
32 * @old_ns: namespace to clone 32 * @old_ns: namespace to clone
33 * Return NULL on error (failure to kmalloc), new ns otherwise 33 * Return NULL on error (failure to kmalloc), new ns otherwise
34 */ 34 */
35static struct uts_namespace *clone_uts_ns(struct task_struct *tsk, 35static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
36 struct uts_namespace *old_ns) 36 struct uts_namespace *old_ns)
37{ 37{
38 struct uts_namespace *ns; 38 struct uts_namespace *ns;
39 int err;
39 40
40 ns = create_uts_ns(); 41 ns = create_uts_ns();
41 if (!ns) 42 if (!ns)
42 return ERR_PTR(-ENOMEM); 43 return ERR_PTR(-ENOMEM);
43 44
45 err = proc_alloc_inum(&ns->proc_inum);
46 if (err) {
47 kfree(ns);
48 return ERR_PTR(err);
49 }
50
44 down_read(&uts_sem); 51 down_read(&uts_sem);
45 memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); 52 memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
46 ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns)); 53 ns->user_ns = get_user_ns(user_ns);
47 up_read(&uts_sem); 54 up_read(&uts_sem);
48 return ns; 55 return ns;
49} 56}
@@ -55,9 +62,8 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
55 * versa. 62 * versa.
56 */ 63 */
57struct uts_namespace *copy_utsname(unsigned long flags, 64struct uts_namespace *copy_utsname(unsigned long flags,
58 struct task_struct *tsk) 65 struct user_namespace *user_ns, struct uts_namespace *old_ns)
59{ 66{
60 struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
61 struct uts_namespace *new_ns; 67 struct uts_namespace *new_ns;
62 68
63 BUG_ON(!old_ns); 69 BUG_ON(!old_ns);
@@ -66,7 +72,7 @@ struct uts_namespace *copy_utsname(unsigned long flags,
66 if (!(flags & CLONE_NEWUTS)) 72 if (!(flags & CLONE_NEWUTS))
67 return old_ns; 73 return old_ns;
68 74
69 new_ns = clone_uts_ns(tsk, old_ns); 75 new_ns = clone_uts_ns(user_ns, old_ns);
70 76
71 put_uts_ns(old_ns); 77 put_uts_ns(old_ns);
72 return new_ns; 78 return new_ns;
@@ -78,6 +84,7 @@ void free_uts_ns(struct kref *kref)
78 84
79 ns = container_of(kref, struct uts_namespace, kref); 85 ns = container_of(kref, struct uts_namespace, kref);
80 put_user_ns(ns->user_ns); 86 put_user_ns(ns->user_ns);
87 proc_free_inum(ns->proc_inum);
81 kfree(ns); 88 kfree(ns);
82} 89}
83 90
@@ -102,19 +109,31 @@ static void utsns_put(void *ns)
102 put_uts_ns(ns); 109 put_uts_ns(ns);
103} 110}
104 111
105static int utsns_install(struct nsproxy *nsproxy, void *ns) 112static int utsns_install(struct nsproxy *nsproxy, void *new)
106{ 113{
114 struct uts_namespace *ns = new;
115
116 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
117 return -EPERM;
118
107 get_uts_ns(ns); 119 get_uts_ns(ns);
108 put_uts_ns(nsproxy->uts_ns); 120 put_uts_ns(nsproxy->uts_ns);
109 nsproxy->uts_ns = ns; 121 nsproxy->uts_ns = ns;
110 return 0; 122 return 0;
111} 123}
112 124
125static unsigned int utsns_inum(void *vp)
126{
127 struct uts_namespace *ns = vp;
128
129 return ns->proc_inum;
130}
131
113const struct proc_ns_operations utsns_operations = { 132const struct proc_ns_operations utsns_operations = {
114 .name = "uts", 133 .name = "uts",
115 .type = CLONE_NEWUTS, 134 .type = CLONE_NEWUTS,
116 .get = utsns_get, 135 .get = utsns_get,
117 .put = utsns_put, 136 .put = utsns_put,
118 .install = utsns_install, 137 .install = utsns_install,
138 .inum = utsns_inum,
119}; 139};
120
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 6456439cbbd9..2e9a3132b8dd 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -381,6 +381,21 @@ struct net *get_net_ns_by_pid(pid_t pid)
381} 381}
382EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 382EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
383 383
384static __net_init int net_ns_net_init(struct net *net)
385{
386 return proc_alloc_inum(&net->proc_inum);
387}
388
389static __net_exit void net_ns_net_exit(struct net *net)
390{
391 proc_free_inum(net->proc_inum);
392}
393
394static struct pernet_operations __net_initdata net_ns_ops = {
395 .init = net_ns_net_init,
396 .exit = net_ns_net_exit,
397};
398
384static int __init net_ns_init(void) 399static int __init net_ns_init(void)
385{ 400{
386 struct net_generic *ng; 401 struct net_generic *ng;
@@ -412,6 +427,8 @@ static int __init net_ns_init(void)
412 427
413 mutex_unlock(&net_mutex); 428 mutex_unlock(&net_mutex);
414 429
430 register_pernet_subsys(&net_ns_ops);
431
415 return 0; 432 return 0;
416} 433}
417 434
@@ -630,16 +647,28 @@ static void netns_put(void *ns)
630 647
631static int netns_install(struct nsproxy *nsproxy, void *ns) 648static int netns_install(struct nsproxy *nsproxy, void *ns)
632{ 649{
650 struct net *net = ns;
651
652 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN))
653 return -EPERM;
654
633 put_net(nsproxy->net_ns); 655 put_net(nsproxy->net_ns);
634 nsproxy->net_ns = get_net(ns); 656 nsproxy->net_ns = get_net(net);
635 return 0; 657 return 0;
636} 658}
637 659
660static unsigned int netns_inum(void *ns)
661{
662 struct net *net = ns;
663 return net->proc_inum;
664}
665
638const struct proc_ns_operations netns_operations = { 666const struct proc_ns_operations netns_operations = {
639 .name = "net", 667 .name = "net",
640 .type = CLONE_NEWNET, 668 .type = CLONE_NEWNET,
641 .get = netns_get, 669 .get = netns_get,
642 .put = netns_put, 670 .put = netns_put,
643 .install = netns_install, 671 .install = netns_install,
672 .inum = netns_inum,
644}; 673};
645#endif 674#endif
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index 2663145d1197..23414b93771f 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -298,14 +298,18 @@ int yama_ptrace_access_check(struct task_struct *child,
298 /* No additional restrictions. */ 298 /* No additional restrictions. */
299 break; 299 break;
300 case YAMA_SCOPE_RELATIONAL: 300 case YAMA_SCOPE_RELATIONAL:
301 rcu_read_lock();
301 if (!task_is_descendant(current, child) && 302 if (!task_is_descendant(current, child) &&
302 !ptracer_exception_found(current, child) && 303 !ptracer_exception_found(current, child) &&
303 !ns_capable(task_user_ns(child), CAP_SYS_PTRACE)) 304 !ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE))
304 rc = -EPERM; 305 rc = -EPERM;
306 rcu_read_unlock();
305 break; 307 break;
306 case YAMA_SCOPE_CAPABILITY: 308 case YAMA_SCOPE_CAPABILITY:
307 if (!ns_capable(task_user_ns(child), CAP_SYS_PTRACE)) 309 rcu_read_lock();
310 if (!ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE))
308 rc = -EPERM; 311 rc = -EPERM;
312 rcu_read_unlock();
309 break; 313 break;
310 case YAMA_SCOPE_NO_ATTACH: 314 case YAMA_SCOPE_NO_ATTACH:
311 default: 315 default:
@@ -343,8 +347,10 @@ int yama_ptrace_traceme(struct task_struct *parent)
343 /* Only disallow PTRACE_TRACEME on more aggressive settings. */ 347 /* Only disallow PTRACE_TRACEME on more aggressive settings. */
344 switch (ptrace_scope) { 348 switch (ptrace_scope) {
345 case YAMA_SCOPE_CAPABILITY: 349 case YAMA_SCOPE_CAPABILITY:
346 if (!ns_capable(task_user_ns(parent), CAP_SYS_PTRACE)) 350 rcu_read_lock();
351 if (!ns_capable(__task_cred(parent)->user_ns, CAP_SYS_PTRACE))
347 rc = -EPERM; 352 rc = -EPERM;
353 rcu_read_unlock();
348 break; 354 break;
349 case YAMA_SCOPE_NO_ATTACH: 355 case YAMA_SCOPE_NO_ATTACH:
350 rc = -EPERM; 356 rc = -EPERM;