aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/attr.c11
-rw-r--r--fs/autofs4/autofs_i.h8
-rw-r--r--fs/autofs4/dev-ioctl.c4
-rw-r--r--fs/autofs4/inode.c24
-rw-r--r--fs/autofs4/waitq.c5
-rw-r--r--fs/exec.c9
-rw-r--r--fs/fuse/dev.c4
-rw-r--r--fs/fuse/dir.c20
-rw-r--r--fs/fuse/fuse_i.h4
-rw-r--r--fs/fuse/inode.c23
-rw-r--r--fs/hppfs/hppfs.c2
-rw-r--r--fs/mount.h3
-rw-r--r--fs/namespace.c211
-rw-r--r--fs/open.c2
-rw-r--r--fs/pnode.h1
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c169
-rw-r--r--fs/proc/generic.c26
-rw-r--r--fs/proc/inode.c6
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/namespaces.c185
-rw-r--r--fs/proc/root.c17
-rw-r--r--fs/proc/self.c59
-rw-r--r--fs/sysfs/mount.c1
25 files changed, 493 insertions, 305 deletions
diff --git a/fs/attr.c b/fs/attr.c
index cce7df53b694..1449adb14ef6 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -49,14 +49,15 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
49 /* Make sure a caller can chown. */ 49 /* Make sure a caller can chown. */
50 if ((ia_valid & ATTR_UID) && 50 if ((ia_valid & ATTR_UID) &&
51 (!uid_eq(current_fsuid(), inode->i_uid) || 51 (!uid_eq(current_fsuid(), inode->i_uid) ||
52 !uid_eq(attr->ia_uid, inode->i_uid)) && !capable(CAP_CHOWN)) 52 !uid_eq(attr->ia_uid, inode->i_uid)) &&
53 !inode_capable(inode, CAP_CHOWN))
53 return -EPERM; 54 return -EPERM;
54 55
55 /* Make sure caller can chgrp. */ 56 /* Make sure caller can chgrp. */
56 if ((ia_valid & ATTR_GID) && 57 if ((ia_valid & ATTR_GID) &&
57 (!uid_eq(current_fsuid(), inode->i_uid) || 58 (!uid_eq(current_fsuid(), inode->i_uid) ||
58 (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) && 59 (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
59 !capable(CAP_CHOWN)) 60 !inode_capable(inode, CAP_CHOWN))
60 return -EPERM; 61 return -EPERM;
61 62
62 /* Make sure a caller can chmod. */ 63 /* Make sure a caller can chmod. */
@@ -65,7 +66,8 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
65 return -EPERM; 66 return -EPERM;
66 /* Also check the setgid bit! */ 67 /* Also check the setgid bit! */
67 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : 68 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
68 inode->i_gid) && !capable(CAP_FSETID)) 69 inode->i_gid) &&
70 !inode_capable(inode, CAP_FSETID))
69 attr->ia_mode &= ~S_ISGID; 71 attr->ia_mode &= ~S_ISGID;
70 } 72 }
71 73
@@ -157,7 +159,8 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
157 if (ia_valid & ATTR_MODE) { 159 if (ia_valid & ATTR_MODE) {
158 umode_t mode = attr->ia_mode; 160 umode_t mode = attr->ia_mode;
159 161
160 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) 162 if (!in_group_p(inode->i_gid) &&
163 !inode_capable(inode, CAP_FSETID))
161 mode &= ~S_ISGID; 164 mode &= ~S_ISGID;
162 inode->i_mode = mode; 165 inode->i_mode = mode;
163 } 166 }
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 908e18455413..b785e7707959 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -74,8 +74,8 @@ struct autofs_info {
74 unsigned long last_used; 74 unsigned long last_used;
75 atomic_t count; 75 atomic_t count;
76 76
77 uid_t uid; 77 kuid_t uid;
78 gid_t gid; 78 kgid_t gid;
79}; 79};
80 80
81#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ 81#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
@@ -89,8 +89,8 @@ struct autofs_wait_queue {
89 struct qstr name; 89 struct qstr name;
90 u32 dev; 90 u32 dev;
91 u64 ino; 91 u64 ino;
92 uid_t uid; 92 kuid_t uid;
93 gid_t gid; 93 kgid_t gid;
94 pid_t pid; 94 pid_t pid;
95 pid_t tgid; 95 pid_t tgid;
96 /* This is for status reporting upon return */ 96 /* This is for status reporting upon return */
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index a16214109d31..9f68a37bb2b2 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -437,8 +437,8 @@ static int autofs_dev_ioctl_requester(struct file *fp,
437 err = 0; 437 err = 0;
438 autofs4_expire_wait(path.dentry); 438 autofs4_expire_wait(path.dentry);
439 spin_lock(&sbi->fs_lock); 439 spin_lock(&sbi->fs_lock);
440 param->requester.uid = ino->uid; 440 param->requester.uid = from_kuid_munged(current_user_ns(), ino->uid);
441 param->requester.gid = ino->gid; 441 param->requester.gid = from_kgid_munged(current_user_ns(), ino->gid);
442 spin_unlock(&sbi->fs_lock); 442 spin_unlock(&sbi->fs_lock);
443 } 443 }
444 path_put(&path); 444 path_put(&path);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 8a4fed8ead30..b104726e2d0a 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -36,8 +36,8 @@ struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi)
36 36
37void autofs4_clean_ino(struct autofs_info *ino) 37void autofs4_clean_ino(struct autofs_info *ino)
38{ 38{
39 ino->uid = 0; 39 ino->uid = GLOBAL_ROOT_UID;
40 ino->gid = 0; 40 ino->gid = GLOBAL_ROOT_GID;
41 ino->last_used = jiffies; 41 ino->last_used = jiffies;
42} 42}
43 43
@@ -79,10 +79,12 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root)
79 return 0; 79 return 0;
80 80
81 seq_printf(m, ",fd=%d", sbi->pipefd); 81 seq_printf(m, ",fd=%d", sbi->pipefd);
82 if (root_inode->i_uid != 0) 82 if (!uid_eq(root_inode->i_uid, GLOBAL_ROOT_UID))
83 seq_printf(m, ",uid=%u", root_inode->i_uid); 83 seq_printf(m, ",uid=%u",
84 if (root_inode->i_gid != 0) 84 from_kuid_munged(&init_user_ns, root_inode->i_uid));
85 seq_printf(m, ",gid=%u", root_inode->i_gid); 85 if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID))
86 seq_printf(m, ",gid=%u",
87 from_kgid_munged(&init_user_ns, root_inode->i_gid));
86 seq_printf(m, ",pgrp=%d", sbi->oz_pgrp); 88 seq_printf(m, ",pgrp=%d", sbi->oz_pgrp);
87 seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ); 89 seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ);
88 seq_printf(m, ",minproto=%d", sbi->min_proto); 90 seq_printf(m, ",minproto=%d", sbi->min_proto);
@@ -126,7 +128,7 @@ static const match_table_t tokens = {
126 {Opt_err, NULL} 128 {Opt_err, NULL}
127}; 129};
128 130
129static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, 131static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
130 pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto) 132 pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto)
131{ 133{
132 char *p; 134 char *p;
@@ -159,12 +161,16 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
159 case Opt_uid: 161 case Opt_uid:
160 if (match_int(args, &option)) 162 if (match_int(args, &option))
161 return 1; 163 return 1;
162 *uid = option; 164 *uid = make_kuid(current_user_ns(), option);
165 if (!uid_valid(*uid))
166 return 1;
163 break; 167 break;
164 case Opt_gid: 168 case Opt_gid:
165 if (match_int(args, &option)) 169 if (match_int(args, &option))
166 return 1; 170 return 1;
167 *gid = option; 171 *gid = make_kgid(current_user_ns(), option);
172 if (!gid_valid(*gid))
173 return 1;
168 break; 174 break;
169 case Opt_pgrp: 175 case Opt_pgrp:
170 if (match_int(args, &option)) 176 if (match_int(args, &option))
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index dce436e595c1..03bc1d347d8e 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -154,6 +154,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
154 case autofs_ptype_expire_direct: 154 case autofs_ptype_expire_direct:
155 { 155 {
156 struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet; 156 struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet;
157 struct user_namespace *user_ns = sbi->pipe->f_cred->user_ns;
157 158
158 pktsz = sizeof(*packet); 159 pktsz = sizeof(*packet);
159 160
@@ -163,8 +164,8 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
163 packet->name[wq->name.len] = '\0'; 164 packet->name[wq->name.len] = '\0';
164 packet->dev = wq->dev; 165 packet->dev = wq->dev;
165 packet->ino = wq->ino; 166 packet->ino = wq->ino;
166 packet->uid = wq->uid; 167 packet->uid = from_kuid_munged(user_ns, wq->uid);
167 packet->gid = wq->gid; 168 packet->gid = from_kgid_munged(user_ns, wq->gid);
168 packet->pid = wq->pid; 169 packet->pid = wq->pid;
169 packet->tgid = wq->tgid; 170 packet->tgid = wq->tgid;
170 break; 171 break;
diff --git a/fs/exec.c b/fs/exec.c
index d5eb9e605ffd..d8e1191cb112 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1266,14 +1266,13 @@ int prepare_binprm(struct linux_binprm *bprm)
1266 bprm->cred->egid = current_egid(); 1266 bprm->cred->egid = current_egid();
1267 1267
1268 if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) && 1268 if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
1269 !current->no_new_privs) { 1269 !current->no_new_privs &&
1270 kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
1271 kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
1270 /* Set-uid? */ 1272 /* Set-uid? */
1271 if (mode & S_ISUID) { 1273 if (mode & S_ISUID) {
1272 if (!kuid_has_mapping(bprm->cred->user_ns, inode->i_uid))
1273 return -EPERM;
1274 bprm->per_clear |= PER_CLEAR_ON_SETID; 1274 bprm->per_clear |= PER_CLEAR_ON_SETID;
1275 bprm->cred->euid = inode->i_uid; 1275 bprm->cred->euid = inode->i_uid;
1276
1277 } 1276 }
1278 1277
1279 /* Set-gid? */ 1278 /* Set-gid? */
@@ -1283,8 +1282,6 @@ int prepare_binprm(struct linux_binprm *bprm)
1283 * executable. 1282 * executable.
1284 */ 1283 */
1285 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { 1284 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1286 if (!kgid_has_mapping(bprm->cred->user_ns, inode->i_gid))
1287 return -EPERM;
1288 bprm->per_clear |= PER_CLEAR_ON_SETID; 1285 bprm->per_clear |= PER_CLEAR_ON_SETID;
1289 bprm->cred->egid = inode->i_gid; 1286 bprm->cred->egid = inode->i_gid;
1290 } 1287 }
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8c23fa7a91e6..c16335315e5d 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -92,8 +92,8 @@ static void __fuse_put_request(struct fuse_req *req)
92 92
93static void fuse_req_init_context(struct fuse_req *req) 93static void fuse_req_init_context(struct fuse_req *req)
94{ 94{
95 req->in.h.uid = current_fsuid(); 95 req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
96 req->in.h.gid = current_fsgid(); 96 req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
97 req->in.h.pid = current->pid; 97 req->in.h.pid = current->pid;
98} 98}
99 99
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 324bc0850534..b7c09f9eb40c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -818,8 +818,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
818 stat->ino = attr->ino; 818 stat->ino = attr->ino;
819 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 819 stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
820 stat->nlink = attr->nlink; 820 stat->nlink = attr->nlink;
821 stat->uid = attr->uid; 821 stat->uid = make_kuid(&init_user_ns, attr->uid);
822 stat->gid = attr->gid; 822 stat->gid = make_kgid(&init_user_ns, attr->gid);
823 stat->rdev = inode->i_rdev; 823 stat->rdev = inode->i_rdev;
824 stat->atime.tv_sec = attr->atime; 824 stat->atime.tv_sec = attr->atime;
825 stat->atime.tv_nsec = attr->atimensec; 825 stat->atime.tv_nsec = attr->atimensec;
@@ -1007,12 +1007,12 @@ int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
1007 rcu_read_lock(); 1007 rcu_read_lock();
1008 ret = 0; 1008 ret = 0;
1009 cred = __task_cred(task); 1009 cred = __task_cred(task);
1010 if (cred->euid == fc->user_id && 1010 if (uid_eq(cred->euid, fc->user_id) &&
1011 cred->suid == fc->user_id && 1011 uid_eq(cred->suid, fc->user_id) &&
1012 cred->uid == fc->user_id && 1012 uid_eq(cred->uid, fc->user_id) &&
1013 cred->egid == fc->group_id && 1013 gid_eq(cred->egid, fc->group_id) &&
1014 cred->sgid == fc->group_id && 1014 gid_eq(cred->sgid, fc->group_id) &&
1015 cred->gid == fc->group_id) 1015 gid_eq(cred->gid, fc->group_id))
1016 ret = 1; 1016 ret = 1;
1017 rcu_read_unlock(); 1017 rcu_read_unlock();
1018 1018
@@ -1306,9 +1306,9 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
1306 if (ivalid & ATTR_MODE) 1306 if (ivalid & ATTR_MODE)
1307 arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; 1307 arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
1308 if (ivalid & ATTR_UID) 1308 if (ivalid & ATTR_UID)
1309 arg->valid |= FATTR_UID, arg->uid = iattr->ia_uid; 1309 arg->valid |= FATTR_UID, arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
1310 if (ivalid & ATTR_GID) 1310 if (ivalid & ATTR_GID)
1311 arg->valid |= FATTR_GID, arg->gid = iattr->ia_gid; 1311 arg->valid |= FATTR_GID, arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
1312 if (ivalid & ATTR_SIZE) 1312 if (ivalid & ATTR_SIZE)
1313 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; 1313 arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
1314 if (ivalid & ATTR_ATIME) { 1314 if (ivalid & ATTR_ATIME) {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e24dd74e3068..e105a53fc72d 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -333,10 +333,10 @@ struct fuse_conn {
333 atomic_t count; 333 atomic_t count;
334 334
335 /** The user id for this mount */ 335 /** The user id for this mount */
336 uid_t user_id; 336 kuid_t user_id;
337 337
338 /** The group id for this mount */ 338 /** The group id for this mount */
339 gid_t group_id; 339 kgid_t group_id;
340 340
341 /** The fuse mount flags for this mount */ 341 /** The fuse mount flags for this mount */
342 unsigned flags; 342 unsigned flags;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f0eda124cffb..73ca6b72beaf 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -60,8 +60,8 @@ MODULE_PARM_DESC(max_user_congthresh,
60struct fuse_mount_data { 60struct fuse_mount_data {
61 int fd; 61 int fd;
62 unsigned rootmode; 62 unsigned rootmode;
63 unsigned user_id; 63 kuid_t user_id;
64 unsigned group_id; 64 kgid_t group_id;
65 unsigned fd_present:1; 65 unsigned fd_present:1;
66 unsigned rootmode_present:1; 66 unsigned rootmode_present:1;
67 unsigned user_id_present:1; 67 unsigned user_id_present:1;
@@ -164,8 +164,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
164 inode->i_ino = fuse_squash_ino(attr->ino); 164 inode->i_ino = fuse_squash_ino(attr->ino);
165 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 165 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
166 set_nlink(inode, attr->nlink); 166 set_nlink(inode, attr->nlink);
167 inode->i_uid = attr->uid; 167 inode->i_uid = make_kuid(&init_user_ns, attr->uid);
168 inode->i_gid = attr->gid; 168 inode->i_gid = make_kgid(&init_user_ns, attr->gid);
169 inode->i_blocks = attr->blocks; 169 inode->i_blocks = attr->blocks;
170 inode->i_atime.tv_sec = attr->atime; 170 inode->i_atime.tv_sec = attr->atime;
171 inode->i_atime.tv_nsec = attr->atimensec; 171 inode->i_atime.tv_nsec = attr->atimensec;
@@ -492,14 +492,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
492 case OPT_USER_ID: 492 case OPT_USER_ID:
493 if (match_int(&args[0], &value)) 493 if (match_int(&args[0], &value))
494 return 0; 494 return 0;
495 d->user_id = value; 495 d->user_id = make_kuid(current_user_ns(), value);
496 if (!uid_valid(d->user_id))
497 return 0;
496 d->user_id_present = 1; 498 d->user_id_present = 1;
497 break; 499 break;
498 500
499 case OPT_GROUP_ID: 501 case OPT_GROUP_ID:
500 if (match_int(&args[0], &value)) 502 if (match_int(&args[0], &value))
501 return 0; 503 return 0;
502 d->group_id = value; 504 d->group_id = make_kgid(current_user_ns(), value);
505 if (!gid_valid(d->group_id))
506 return 0;
503 d->group_id_present = 1; 507 d->group_id_present = 1;
504 break; 508 break;
505 509
@@ -540,8 +544,8 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
540 struct super_block *sb = root->d_sb; 544 struct super_block *sb = root->d_sb;
541 struct fuse_conn *fc = get_fuse_conn_super(sb); 545 struct fuse_conn *fc = get_fuse_conn_super(sb);
542 546
543 seq_printf(m, ",user_id=%u", fc->user_id); 547 seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id));
544 seq_printf(m, ",group_id=%u", fc->group_id); 548 seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id));
545 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) 549 if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
546 seq_puts(m, ",default_permissions"); 550 seq_puts(m, ",default_permissions");
547 if (fc->flags & FUSE_ALLOW_OTHER) 551 if (fc->flags & FUSE_ALLOW_OTHER)
@@ -989,7 +993,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
989 if (!file) 993 if (!file)
990 goto err; 994 goto err;
991 995
992 if (file->f_op != &fuse_dev_operations) 996 if ((file->f_op != &fuse_dev_operations) ||
997 (file->f_cred->user_ns != &init_user_ns))
993 goto err_fput; 998 goto err_fput;
994 999
995 fc = kmalloc(sizeof(*fc), GFP_KERNEL); 1000 fc = kmalloc(sizeof(*fc), GFP_KERNEL);
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 78f21f8dc2ec..43b315f2002b 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -710,7 +710,7 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
710 struct vfsmount *proc_mnt; 710 struct vfsmount *proc_mnt;
711 int err = -ENOENT; 711 int err = -ENOENT;
712 712
713 proc_mnt = mntget(current->nsproxy->pid_ns->proc_mnt); 713 proc_mnt = mntget(task_active_pid_ns(current)->proc_mnt);
714 if (IS_ERR(proc_mnt)) 714 if (IS_ERR(proc_mnt))
715 goto out; 715 goto out;
716 716
diff --git a/fs/mount.h b/fs/mount.h
index 4f291f9de641..cd5007980400 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -4,8 +4,11 @@
4 4
5struct mnt_namespace { 5struct mnt_namespace {
6 atomic_t count; 6 atomic_t count;
7 unsigned int proc_inum;
7 struct mount * root; 8 struct mount * root;
8 struct list_head list; 9 struct list_head list;
10 struct user_namespace *user_ns;
11 u64 seq; /* Sequence number to prevent loops */
9 wait_queue_head_t poll; 12 wait_queue_head_t poll;
10 int event; 13 int event;
11}; 14};
diff --git a/fs/namespace.c b/fs/namespace.c
index 24960626bb6b..c1bbe86f4920 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -12,6 +12,7 @@
12#include <linux/export.h> 12#include <linux/export.h>
13#include <linux/capability.h> 13#include <linux/capability.h>
14#include <linux/mnt_namespace.h> 14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
15#include <linux/namei.h> 16#include <linux/namei.h>
16#include <linux/security.h> 17#include <linux/security.h>
17#include <linux/idr.h> 18#include <linux/idr.h>
@@ -20,6 +21,7 @@
20#include <linux/fs_struct.h> /* get_fs_root et.al. */ 21#include <linux/fs_struct.h> /* get_fs_root et.al. */
21#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ 22#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
22#include <linux/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/proc_fs.h>
23#include "pnode.h" 25#include "pnode.h"
24#include "internal.h" 26#include "internal.h"
25 27
@@ -784,7 +786,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
784 if (!mnt) 786 if (!mnt)
785 return ERR_PTR(-ENOMEM); 787 return ERR_PTR(-ENOMEM);
786 788
787 if (flag & (CL_SLAVE | CL_PRIVATE)) 789 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
788 mnt->mnt_group_id = 0; /* not a peer of original */ 790 mnt->mnt_group_id = 0; /* not a peer of original */
789 else 791 else
790 mnt->mnt_group_id = old->mnt_group_id; 792 mnt->mnt_group_id = old->mnt_group_id;
@@ -805,7 +807,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
805 list_add_tail(&mnt->mnt_instance, &sb->s_mounts); 807 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
806 br_write_unlock(&vfsmount_lock); 808 br_write_unlock(&vfsmount_lock);
807 809
808 if (flag & CL_SLAVE) { 810 if ((flag & CL_SLAVE) ||
811 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
809 list_add(&mnt->mnt_slave, &old->mnt_slave_list); 812 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
810 mnt->mnt_master = old; 813 mnt->mnt_master = old;
811 CLEAR_MNT_SHARED(mnt); 814 CLEAR_MNT_SHARED(mnt);
@@ -1266,7 +1269,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1266 goto dput_and_out; 1269 goto dput_and_out;
1267 1270
1268 retval = -EPERM; 1271 retval = -EPERM;
1269 if (!capable(CAP_SYS_ADMIN)) 1272 if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
1270 goto dput_and_out; 1273 goto dput_and_out;
1271 1274
1272 retval = do_umount(mnt, flags); 1275 retval = do_umount(mnt, flags);
@@ -1292,7 +1295,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
1292 1295
1293static int mount_is_safe(struct path *path) 1296static int mount_is_safe(struct path *path)
1294{ 1297{
1295 if (capable(CAP_SYS_ADMIN)) 1298 if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
1296 return 0; 1299 return 0;
1297 return -EPERM; 1300 return -EPERM;
1298#ifdef notyet 1301#ifdef notyet
@@ -1308,6 +1311,26 @@ static int mount_is_safe(struct path *path)
1308#endif 1311#endif
1309} 1312}
1310 1313
1314static bool mnt_ns_loop(struct path *path)
1315{
1316 /* Could bind mounting the mount namespace inode cause a
1317 * mount namespace loop?
1318 */
1319 struct inode *inode = path->dentry->d_inode;
1320 struct proc_inode *ei;
1321 struct mnt_namespace *mnt_ns;
1322
1323 if (!proc_ns_inode(inode))
1324 return false;
1325
1326 ei = PROC_I(inode);
1327 if (ei->ns_ops != &mntns_operations)
1328 return false;
1329
1330 mnt_ns = ei->ns;
1331 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1332}
1333
1311struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, 1334struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1312 int flag) 1335 int flag)
1313{ 1336{
@@ -1610,7 +1633,7 @@ static int do_change_type(struct path *path, int flag)
1610 int type; 1633 int type;
1611 int err = 0; 1634 int err = 0;
1612 1635
1613 if (!capable(CAP_SYS_ADMIN)) 1636 if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
1614 return -EPERM; 1637 return -EPERM;
1615 1638
1616 if (path->dentry != path->mnt->mnt_root) 1639 if (path->dentry != path->mnt->mnt_root)
@@ -1655,6 +1678,10 @@ static int do_loopback(struct path *path, const char *old_name,
1655 if (err) 1678 if (err)
1656 return err; 1679 return err;
1657 1680
1681 err = -EINVAL;
1682 if (mnt_ns_loop(&old_path))
1683 goto out;
1684
1658 err = lock_mount(path); 1685 err = lock_mount(path);
1659 if (err) 1686 if (err)
1660 goto out; 1687 goto out;
@@ -1770,7 +1797,7 @@ static int do_move_mount(struct path *path, const char *old_name)
1770 struct mount *p; 1797 struct mount *p;
1771 struct mount *old; 1798 struct mount *old;
1772 int err = 0; 1799 int err = 0;
1773 if (!capable(CAP_SYS_ADMIN)) 1800 if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
1774 return -EPERM; 1801 return -EPERM;
1775 if (!old_name || !*old_name) 1802 if (!old_name || !*old_name)
1776 return -EINVAL; 1803 return -EINVAL;
@@ -1857,21 +1884,6 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1857 return ERR_PTR(err); 1884 return ERR_PTR(err);
1858} 1885}
1859 1886
1860static struct vfsmount *
1861do_kern_mount(const char *fstype, int flags, const char *name, void *data)
1862{
1863 struct file_system_type *type = get_fs_type(fstype);
1864 struct vfsmount *mnt;
1865 if (!type)
1866 return ERR_PTR(-ENODEV);
1867 mnt = vfs_kern_mount(type, flags, name, data);
1868 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
1869 !mnt->mnt_sb->s_subtype)
1870 mnt = fs_set_subtype(mnt, fstype);
1871 put_filesystem(type);
1872 return mnt;
1873}
1874
1875/* 1887/*
1876 * add a mount into a namespace's mount tree 1888 * add a mount into a namespace's mount tree
1877 */ 1889 */
@@ -1917,20 +1929,46 @@ unlock:
1917 * create a new mount for userspace and request it to be added into the 1929 * create a new mount for userspace and request it to be added into the
1918 * namespace's tree 1930 * namespace's tree
1919 */ 1931 */
1920static int do_new_mount(struct path *path, const char *type, int flags, 1932static int do_new_mount(struct path *path, const char *fstype, int flags,
1921 int mnt_flags, const char *name, void *data) 1933 int mnt_flags, const char *name, void *data)
1922{ 1934{
1935 struct file_system_type *type;
1936 struct user_namespace *user_ns;
1923 struct vfsmount *mnt; 1937 struct vfsmount *mnt;
1924 int err; 1938 int err;
1925 1939
1926 if (!type) 1940 if (!fstype)
1927 return -EINVAL; 1941 return -EINVAL;
1928 1942
1929 /* we need capabilities... */ 1943 /* we need capabilities... */
1930 if (!capable(CAP_SYS_ADMIN)) 1944 user_ns = real_mount(path->mnt)->mnt_ns->user_ns;
1945 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1931 return -EPERM; 1946 return -EPERM;
1932 1947
1933 mnt = do_kern_mount(type, flags, name, data); 1948 type = get_fs_type(fstype);
1949 if (!type)
1950 return -ENODEV;
1951
1952 if (user_ns != &init_user_ns) {
1953 if (!(type->fs_flags & FS_USERNS_MOUNT)) {
1954 put_filesystem(type);
1955 return -EPERM;
1956 }
1957 /* Only in special cases allow devices from mounts
1958 * created outside the initial user namespace.
1959 */
1960 if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
1961 flags |= MS_NODEV;
1962 mnt_flags |= MNT_NODEV;
1963 }
1964 }
1965
1966 mnt = vfs_kern_mount(type, flags, name, data);
1967 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
1968 !mnt->mnt_sb->s_subtype)
1969 mnt = fs_set_subtype(mnt, fstype);
1970
1971 put_filesystem(type);
1934 if (IS_ERR(mnt)) 1972 if (IS_ERR(mnt))
1935 return PTR_ERR(mnt); 1973 return PTR_ERR(mnt);
1936 1974
@@ -2261,18 +2299,42 @@ dput_out:
2261 return retval; 2299 return retval;
2262} 2300}
2263 2301
2264static struct mnt_namespace *alloc_mnt_ns(void) 2302static void free_mnt_ns(struct mnt_namespace *ns)
2303{
2304 proc_free_inum(ns->proc_inum);
2305 put_user_ns(ns->user_ns);
2306 kfree(ns);
2307}
2308
2309/*
2310 * Assign a sequence number so we can detect when we attempt to bind
2311 * mount a reference to an older mount namespace into the current
2312 * mount namespace, preventing reference counting loops. A 64bit
2313 * number incrementing at 10Ghz will take 12,427 years to wrap which
2314 * is effectively never, so we can ignore the possibility.
2315 */
2316static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
2317
2318static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2265{ 2319{
2266 struct mnt_namespace *new_ns; 2320 struct mnt_namespace *new_ns;
2321 int ret;
2267 2322
2268 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); 2323 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2269 if (!new_ns) 2324 if (!new_ns)
2270 return ERR_PTR(-ENOMEM); 2325 return ERR_PTR(-ENOMEM);
2326 ret = proc_alloc_inum(&new_ns->proc_inum);
2327 if (ret) {
2328 kfree(new_ns);
2329 return ERR_PTR(ret);
2330 }
2331 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
2271 atomic_set(&new_ns->count, 1); 2332 atomic_set(&new_ns->count, 1);
2272 new_ns->root = NULL; 2333 new_ns->root = NULL;
2273 INIT_LIST_HEAD(&new_ns->list); 2334 INIT_LIST_HEAD(&new_ns->list);
2274 init_waitqueue_head(&new_ns->poll); 2335 init_waitqueue_head(&new_ns->poll);
2275 new_ns->event = 0; 2336 new_ns->event = 0;
2337 new_ns->user_ns = get_user_ns(user_ns);
2276 return new_ns; 2338 return new_ns;
2277} 2339}
2278 2340
@@ -2281,24 +2343,28 @@ static struct mnt_namespace *alloc_mnt_ns(void)
2281 * copied from the namespace of the passed in task structure. 2343 * copied from the namespace of the passed in task structure.
2282 */ 2344 */
2283static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, 2345static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2284 struct fs_struct *fs) 2346 struct user_namespace *user_ns, struct fs_struct *fs)
2285{ 2347{
2286 struct mnt_namespace *new_ns; 2348 struct mnt_namespace *new_ns;
2287 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; 2349 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
2288 struct mount *p, *q; 2350 struct mount *p, *q;
2289 struct mount *old = mnt_ns->root; 2351 struct mount *old = mnt_ns->root;
2290 struct mount *new; 2352 struct mount *new;
2353 int copy_flags;
2291 2354
2292 new_ns = alloc_mnt_ns(); 2355 new_ns = alloc_mnt_ns(user_ns);
2293 if (IS_ERR(new_ns)) 2356 if (IS_ERR(new_ns))
2294 return new_ns; 2357 return new_ns;
2295 2358
2296 down_write(&namespace_sem); 2359 down_write(&namespace_sem);
2297 /* First pass: copy the tree topology */ 2360 /* First pass: copy the tree topology */
2298 new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE); 2361 copy_flags = CL_COPY_ALL | CL_EXPIRE;
2362 if (user_ns != mnt_ns->user_ns)
2363 copy_flags |= CL_SHARED_TO_SLAVE;
2364 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2299 if (IS_ERR(new)) { 2365 if (IS_ERR(new)) {
2300 up_write(&namespace_sem); 2366 up_write(&namespace_sem);
2301 kfree(new_ns); 2367 free_mnt_ns(new_ns);
2302 return ERR_CAST(new); 2368 return ERR_CAST(new);
2303 } 2369 }
2304 new_ns->root = new; 2370 new_ns->root = new;
@@ -2339,7 +2405,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2339} 2405}
2340 2406
2341struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, 2407struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2342 struct fs_struct *new_fs) 2408 struct user_namespace *user_ns, struct fs_struct *new_fs)
2343{ 2409{
2344 struct mnt_namespace *new_ns; 2410 struct mnt_namespace *new_ns;
2345 2411
@@ -2349,7 +2415,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2349 if (!(flags & CLONE_NEWNS)) 2415 if (!(flags & CLONE_NEWNS))
2350 return ns; 2416 return ns;
2351 2417
2352 new_ns = dup_mnt_ns(ns, new_fs); 2418 new_ns = dup_mnt_ns(ns, user_ns, new_fs);
2353 2419
2354 put_mnt_ns(ns); 2420 put_mnt_ns(ns);
2355 return new_ns; 2421 return new_ns;
@@ -2361,7 +2427,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2361 */ 2427 */
2362static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) 2428static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2363{ 2429{
2364 struct mnt_namespace *new_ns = alloc_mnt_ns(); 2430 struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
2365 if (!IS_ERR(new_ns)) { 2431 if (!IS_ERR(new_ns)) {
2366 struct mount *mnt = real_mount(m); 2432 struct mount *mnt = real_mount(m);
2367 mnt->mnt_ns = new_ns; 2433 mnt->mnt_ns = new_ns;
@@ -2501,7 +2567,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2501 struct mount *new_mnt, *root_mnt; 2567 struct mount *new_mnt, *root_mnt;
2502 int error; 2568 int error;
2503 2569
2504 if (!capable(CAP_SYS_ADMIN)) 2570 if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
2505 return -EPERM; 2571 return -EPERM;
2506 2572
2507 error = user_path_dir(new_root, &new); 2573 error = user_path_dir(new_root, &new);
@@ -2583,8 +2649,13 @@ static void __init init_mount_tree(void)
2583 struct vfsmount *mnt; 2649 struct vfsmount *mnt;
2584 struct mnt_namespace *ns; 2650 struct mnt_namespace *ns;
2585 struct path root; 2651 struct path root;
2652 struct file_system_type *type;
2586 2653
2587 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 2654 type = get_fs_type("rootfs");
2655 if (!type)
2656 panic("Can't find rootfs type");
2657 mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
2658 put_filesystem(type);
2588 if (IS_ERR(mnt)) 2659 if (IS_ERR(mnt))
2589 panic("Can't create rootfs"); 2660 panic("Can't create rootfs");
2590 2661
@@ -2647,7 +2718,7 @@ void put_mnt_ns(struct mnt_namespace *ns)
2647 br_write_unlock(&vfsmount_lock); 2718 br_write_unlock(&vfsmount_lock);
2648 up_write(&namespace_sem); 2719 up_write(&namespace_sem);
2649 release_mounts(&umount_list); 2720 release_mounts(&umount_list);
2650 kfree(ns); 2721 free_mnt_ns(ns);
2651} 2722}
2652 2723
2653struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) 2724struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
@@ -2681,3 +2752,71 @@ bool our_mnt(struct vfsmount *mnt)
2681{ 2752{
2682 return check_mnt(real_mount(mnt)); 2753 return check_mnt(real_mount(mnt));
2683} 2754}
2755
2756static void *mntns_get(struct task_struct *task)
2757{
2758 struct mnt_namespace *ns = NULL;
2759 struct nsproxy *nsproxy;
2760
2761 rcu_read_lock();
2762 nsproxy = task_nsproxy(task);
2763 if (nsproxy) {
2764 ns = nsproxy->mnt_ns;
2765 get_mnt_ns(ns);
2766 }
2767 rcu_read_unlock();
2768
2769 return ns;
2770}
2771
2772static void mntns_put(void *ns)
2773{
2774 put_mnt_ns(ns);
2775}
2776
2777static int mntns_install(struct nsproxy *nsproxy, void *ns)
2778{
2779 struct fs_struct *fs = current->fs;
2780 struct mnt_namespace *mnt_ns = ns;
2781 struct path root;
2782
2783 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
2784 !nsown_capable(CAP_SYS_CHROOT))
2785 return -EPERM;
2786
2787 if (fs->users != 1)
2788 return -EINVAL;
2789
2790 get_mnt_ns(mnt_ns);
2791 put_mnt_ns(nsproxy->mnt_ns);
2792 nsproxy->mnt_ns = mnt_ns;
2793
2794 /* Find the root */
2795 root.mnt = &mnt_ns->root->mnt;
2796 root.dentry = mnt_ns->root->mnt.mnt_root;
2797 path_get(&root);
2798 while(d_mountpoint(root.dentry) && follow_down_one(&root))
2799 ;
2800
2801 /* Update the pwd and root */
2802 set_fs_pwd(fs, &root);
2803 set_fs_root(fs, &root);
2804
2805 path_put(&root);
2806 return 0;
2807}
2808
2809static unsigned int mntns_inum(void *ns)
2810{
2811 struct mnt_namespace *mnt_ns = ns;
2812 return mnt_ns->proc_inum;
2813}
2814
2815const struct proc_ns_operations mntns_operations = {
2816 .name = "mnt",
2817 .type = CLONE_NEWNS,
2818 .get = mntns_get,
2819 .put = mntns_put,
2820 .install = mntns_install,
2821 .inum = mntns_inum,
2822};
diff --git a/fs/open.c b/fs/open.c
index 59071f55bf7f..182d8667b7bd 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -435,7 +435,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename)
435 goto dput_and_out; 435 goto dput_and_out;
436 436
437 error = -EPERM; 437 error = -EPERM;
438 if (!capable(CAP_SYS_CHROOT)) 438 if (!nsown_capable(CAP_SYS_CHROOT))
439 goto dput_and_out; 439 goto dput_and_out;
440 error = security_path_chroot(&path); 440 error = security_path_chroot(&path);
441 if (error) 441 if (error)
diff --git a/fs/pnode.h b/fs/pnode.h
index 65c60979d541..19b853a3445c 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -22,6 +22,7 @@
22#define CL_COPY_ALL 0x04 22#define CL_COPY_ALL 0x04
23#define CL_MAKE_SHARED 0x08 23#define CL_MAKE_SHARED 0x08
24#define CL_PRIVATE 0x10 24#define CL_PRIVATE 0x10
25#define CL_SHARED_TO_SLAVE 0x20
25 26
26static inline void set_mnt_shared(struct mount *mnt) 27static inline void set_mnt_shared(struct mount *mnt)
27{ 28{
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 99349efbbc2b..981b05601931 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -21,6 +21,7 @@ proc-y += uptime.o
21proc-y += version.o 21proc-y += version.o
22proc-y += softirqs.o 22proc-y += softirqs.o
23proc-y += namespaces.o 23proc-y += namespaces.o
24proc-y += self.o
24proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o 25proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
25proc-$(CONFIG_NET) += proc_net.o 26proc-$(CONFIG_NET) += proc_net.o
26proc-$(CONFIG_PROC_KCORE) += kcore.o 27proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 060a56a91278..6a91e6ffbcbd 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -162,7 +162,7 @@ static inline const char *get_task_state(struct task_struct *tsk)
162static inline void task_state(struct seq_file *m, struct pid_namespace *ns, 162static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
163 struct pid *pid, struct task_struct *p) 163 struct pid *pid, struct task_struct *p)
164{ 164{
165 struct user_namespace *user_ns = current_user_ns(); 165 struct user_namespace *user_ns = seq_user_ns(m);
166 struct group_info *group_info; 166 struct group_info *group_info;
167 int g; 167 int g;
168 struct fdtable *fdt = NULL; 168 struct fdtable *fdt = NULL;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index aa63d25157b8..5a5a0be40e40 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2345,146 +2345,6 @@ static const struct file_operations proc_coredump_filter_operations = {
2345}; 2345};
2346#endif 2346#endif
2347 2347
2348/*
2349 * /proc/self:
2350 */
2351static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
2352 int buflen)
2353{
2354 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
2355 pid_t tgid = task_tgid_nr_ns(current, ns);
2356 char tmp[PROC_NUMBUF];
2357 if (!tgid)
2358 return -ENOENT;
2359 sprintf(tmp, "%d", tgid);
2360 return vfs_readlink(dentry,buffer,buflen,tmp);
2361}
2362
2363static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
2364{
2365 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
2366 pid_t tgid = task_tgid_nr_ns(current, ns);
2367 char *name = ERR_PTR(-ENOENT);
2368 if (tgid) {
2369 /* 11 for max length of signed int in decimal + NULL term */
2370 name = kmalloc(12, GFP_KERNEL);
2371 if (!name)
2372 name = ERR_PTR(-ENOMEM);
2373 else
2374 sprintf(name, "%d", tgid);
2375 }
2376 nd_set_link(nd, name);
2377 return NULL;
2378}
2379
2380static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
2381 void *cookie)
2382{
2383 char *s = nd_get_link(nd);
2384 if (!IS_ERR(s))
2385 kfree(s);
2386}
2387
2388static const struct inode_operations proc_self_inode_operations = {
2389 .readlink = proc_self_readlink,
2390 .follow_link = proc_self_follow_link,
2391 .put_link = proc_self_put_link,
2392};
2393
2394/*
2395 * proc base
2396 *
2397 * These are the directory entries in the root directory of /proc
2398 * that properly belong to the /proc filesystem, as they describe
2399 * describe something that is process related.
2400 */
2401static const struct pid_entry proc_base_stuff[] = {
2402 NOD("self", S_IFLNK|S_IRWXUGO,
2403 &proc_self_inode_operations, NULL, {}),
2404};
2405
2406static struct dentry *proc_base_instantiate(struct inode *dir,
2407 struct dentry *dentry, struct task_struct *task, const void *ptr)
2408{
2409 const struct pid_entry *p = ptr;
2410 struct inode *inode;
2411 struct proc_inode *ei;
2412 struct dentry *error;
2413
2414 /* Allocate the inode */
2415 error = ERR_PTR(-ENOMEM);
2416 inode = new_inode(dir->i_sb);
2417 if (!inode)
2418 goto out;
2419
2420 /* Initialize the inode */
2421 ei = PROC_I(inode);
2422 inode->i_ino = get_next_ino();
2423 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2424
2425 /*
2426 * grab the reference to the task.
2427 */
2428 ei->pid = get_task_pid(task, PIDTYPE_PID);
2429 if (!ei->pid)
2430 goto out_iput;
2431
2432 inode->i_mode = p->mode;
2433 if (S_ISDIR(inode->i_mode))
2434 set_nlink(inode, 2);
2435 if (S_ISLNK(inode->i_mode))
2436 inode->i_size = 64;
2437 if (p->iop)
2438 inode->i_op = p->iop;
2439 if (p->fop)
2440 inode->i_fop = p->fop;
2441 ei->op = p->op;
2442 d_add(dentry, inode);
2443 error = NULL;
2444out:
2445 return error;
2446out_iput:
2447 iput(inode);
2448 goto out;
2449}
2450
2451static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
2452{
2453 struct dentry *error;
2454 struct task_struct *task = get_proc_task(dir);
2455 const struct pid_entry *p, *last;
2456
2457 error = ERR_PTR(-ENOENT);
2458
2459 if (!task)
2460 goto out_no_task;
2461
2462 /* Lookup the directory entry */
2463 last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
2464 for (p = proc_base_stuff; p <= last; p++) {
2465 if (p->len != dentry->d_name.len)
2466 continue;
2467 if (!memcmp(dentry->d_name.name, p->name, p->len))
2468 break;
2469 }
2470 if (p > last)
2471 goto out;
2472
2473 error = proc_base_instantiate(dir, dentry, task, p);
2474
2475out:
2476 put_task_struct(task);
2477out_no_task:
2478 return error;
2479}
2480
2481static int proc_base_fill_cache(struct file *filp, void *dirent,
2482 filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
2483{
2484 return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
2485 proc_base_instantiate, task, p);
2486}
2487
2488#ifdef CONFIG_TASK_IO_ACCOUNTING 2348#ifdef CONFIG_TASK_IO_ACCOUNTING
2489static int do_io_accounting(struct task_struct *task, char *buffer, int whole) 2349static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2490{ 2350{
@@ -2839,10 +2699,6 @@ void proc_flush_task(struct task_struct *task)
2839 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, 2699 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
2840 tgid->numbers[i].nr); 2700 tgid->numbers[i].nr);
2841 } 2701 }
2842
2843 upid = &pid->numbers[pid->level];
2844 if (upid->nr == 1)
2845 pid_ns_release_proc(upid->ns);
2846} 2702}
2847 2703
2848static struct dentry *proc_pid_instantiate(struct inode *dir, 2704static struct dentry *proc_pid_instantiate(struct inode *dir,
@@ -2876,15 +2732,11 @@ out:
2876 2732
2877struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) 2733struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
2878{ 2734{
2879 struct dentry *result; 2735 struct dentry *result = NULL;
2880 struct task_struct *task; 2736 struct task_struct *task;
2881 unsigned tgid; 2737 unsigned tgid;
2882 struct pid_namespace *ns; 2738 struct pid_namespace *ns;
2883 2739
2884 result = proc_base_lookup(dir, dentry);
2885 if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
2886 goto out;
2887
2888 tgid = name_to_int(dentry); 2740 tgid = name_to_int(dentry);
2889 if (tgid == ~0U) 2741 if (tgid == ~0U)
2890 goto out; 2742 goto out;
@@ -2947,7 +2799,7 @@ retry:
2947 return iter; 2799 return iter;
2948} 2800}
2949 2801
2950#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) 2802#define TGID_OFFSET (FIRST_PROCESS_ENTRY)
2951 2803
2952static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 2804static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
2953 struct tgid_iter iter) 2805 struct tgid_iter iter)
@@ -2967,25 +2819,12 @@ static int fake_filldir(void *buf, const char *name, int namelen,
2967/* for the /proc/ directory itself, after non-process stuff has been done */ 2819/* for the /proc/ directory itself, after non-process stuff has been done */
2968int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 2820int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2969{ 2821{
2970 unsigned int nr;
2971 struct task_struct *reaper;
2972 struct tgid_iter iter; 2822 struct tgid_iter iter;
2973 struct pid_namespace *ns; 2823 struct pid_namespace *ns;
2974 filldir_t __filldir; 2824 filldir_t __filldir;
2975 2825
2976 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) 2826 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
2977 goto out_no_task; 2827 goto out;
2978 nr = filp->f_pos - FIRST_PROCESS_ENTRY;
2979
2980 reaper = get_proc_task(filp->f_path.dentry->d_inode);
2981 if (!reaper)
2982 goto out_no_task;
2983
2984 for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
2985 const struct pid_entry *p = &proc_base_stuff[nr];
2986 if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
2987 goto out;
2988 }
2989 2828
2990 ns = filp->f_dentry->d_sb->s_fs_info; 2829 ns = filp->f_dentry->d_sb->s_fs_info;
2991 iter.task = NULL; 2830 iter.task = NULL;
@@ -3006,8 +2845,6 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
3006 } 2845 }
3007 filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; 2846 filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
3008out: 2847out:
3009 put_task_struct(reaper);
3010out_no_task:
3011 return 0; 2848 return 0;
3012} 2849}
3013 2850
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 0d80cef4cfb9..7b3ae3cc0ef9 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -350,14 +350,14 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
350 * Return an inode number between PROC_DYNAMIC_FIRST and 350 * Return an inode number between PROC_DYNAMIC_FIRST and
351 * 0xffffffff, or zero on failure. 351 * 0xffffffff, or zero on failure.
352 */ 352 */
353static unsigned int get_inode_number(void) 353int proc_alloc_inum(unsigned int *inum)
354{ 354{
355 unsigned int i; 355 unsigned int i;
356 int error; 356 int error;
357 357
358retry: 358retry:
359 if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0) 359 if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL))
360 return 0; 360 return -ENOMEM;
361 361
362 spin_lock(&proc_inum_lock); 362 spin_lock(&proc_inum_lock);
363 error = ida_get_new(&proc_inum_ida, &i); 363 error = ida_get_new(&proc_inum_ida, &i);
@@ -365,18 +365,19 @@ retry:
365 if (error == -EAGAIN) 365 if (error == -EAGAIN)
366 goto retry; 366 goto retry;
367 else if (error) 367 else if (error)
368 return 0; 368 return error;
369 369
370 if (i > UINT_MAX - PROC_DYNAMIC_FIRST) { 370 if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
371 spin_lock(&proc_inum_lock); 371 spin_lock(&proc_inum_lock);
372 ida_remove(&proc_inum_ida, i); 372 ida_remove(&proc_inum_ida, i);
373 spin_unlock(&proc_inum_lock); 373 spin_unlock(&proc_inum_lock);
374 return 0; 374 return -ENOSPC;
375 } 375 }
376 return PROC_DYNAMIC_FIRST + i; 376 *inum = PROC_DYNAMIC_FIRST + i;
377 return 0;
377} 378}
378 379
379static void release_inode_number(unsigned int inum) 380void proc_free_inum(unsigned int inum)
380{ 381{
381 spin_lock(&proc_inum_lock); 382 spin_lock(&proc_inum_lock);
382 ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); 383 ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
@@ -554,13 +555,12 @@ static const struct inode_operations proc_dir_inode_operations = {
554 555
555static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) 556static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
556{ 557{
557 unsigned int i;
558 struct proc_dir_entry *tmp; 558 struct proc_dir_entry *tmp;
559 int ret;
559 560
560 i = get_inode_number(); 561 ret = proc_alloc_inum(&dp->low_ino);
561 if (i == 0) 562 if (ret)
562 return -EAGAIN; 563 return ret;
563 dp->low_ino = i;
564 564
565 if (S_ISDIR(dp->mode)) { 565 if (S_ISDIR(dp->mode)) {
566 if (dp->proc_iops == NULL) { 566 if (dp->proc_iops == NULL) {
@@ -764,7 +764,7 @@ EXPORT_SYMBOL(proc_create_data);
764 764
765static void free_proc_entry(struct proc_dir_entry *de) 765static void free_proc_entry(struct proc_dir_entry *de)
766{ 766{
767 release_inode_number(de->low_ino); 767 proc_free_inum(de->low_ino);
768 768
769 if (S_ISLNK(de->mode)) 769 if (S_ISLNK(de->mode))
770 kfree(de->data); 770 kfree(de->data);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 3b22bbdee9ec..439ae6886507 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -31,6 +31,7 @@ static void proc_evict_inode(struct inode *inode)
31 struct proc_dir_entry *de; 31 struct proc_dir_entry *de;
32 struct ctl_table_header *head; 32 struct ctl_table_header *head;
33 const struct proc_ns_operations *ns_ops; 33 const struct proc_ns_operations *ns_ops;
34 void *ns;
34 35
35 truncate_inode_pages(&inode->i_data, 0); 36 truncate_inode_pages(&inode->i_data, 0);
36 clear_inode(inode); 37 clear_inode(inode);
@@ -49,8 +50,9 @@ static void proc_evict_inode(struct inode *inode)
49 } 50 }
50 /* Release any associated namespace */ 51 /* Release any associated namespace */
51 ns_ops = PROC_I(inode)->ns_ops; 52 ns_ops = PROC_I(inode)->ns_ops;
52 if (ns_ops && ns_ops->put) 53 ns = PROC_I(inode)->ns;
53 ns_ops->put(PROC_I(inode)->ns); 54 if (ns_ops && ns)
55 ns_ops->put(ns);
54} 56}
55 57
56static struct kmem_cache * proc_inode_cachep; 58static struct kmem_cache * proc_inode_cachep;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 43973b084abf..252544c05207 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -15,6 +15,7 @@ struct ctl_table_header;
15struct mempolicy; 15struct mempolicy;
16 16
17extern struct proc_dir_entry proc_root; 17extern struct proc_dir_entry proc_root;
18extern void proc_self_init(void);
18#ifdef CONFIG_PROC_SYSCTL 19#ifdef CONFIG_PROC_SYSCTL
19extern int proc_sys_init(void); 20extern int proc_sys_init(void);
20extern void sysctl_head_put(struct ctl_table_header *head); 21extern void sysctl_head_put(struct ctl_table_header *head);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index b178ed733c36..b7a47196c8c3 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -11,6 +11,7 @@
11#include <net/net_namespace.h> 11#include <net/net_namespace.h>
12#include <linux/ipc_namespace.h> 12#include <linux/ipc_namespace.h>
13#include <linux/pid_namespace.h> 13#include <linux/pid_namespace.h>
14#include <linux/user_namespace.h>
14#include "internal.h" 15#include "internal.h"
15 16
16 17
@@ -24,12 +25,168 @@ static const struct proc_ns_operations *ns_entries[] = {
24#ifdef CONFIG_IPC_NS 25#ifdef CONFIG_IPC_NS
25 &ipcns_operations, 26 &ipcns_operations,
26#endif 27#endif
28#ifdef CONFIG_PID_NS
29 &pidns_operations,
30#endif
31#ifdef CONFIG_USER_NS
32 &userns_operations,
33#endif
34 &mntns_operations,
27}; 35};
28 36
29static const struct file_operations ns_file_operations = { 37static const struct file_operations ns_file_operations = {
30 .llseek = no_llseek, 38 .llseek = no_llseek,
31}; 39};
32 40
41static const struct inode_operations ns_inode_operations = {
42 .setattr = proc_setattr,
43};
44
45static int ns_delete_dentry(const struct dentry *dentry)
46{
47 /* Don't cache namespace inodes when not in use */
48 return 1;
49}
50
51static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
52{
53 struct inode *inode = dentry->d_inode;
54 const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
55
56 return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
57 ns_ops->name, inode->i_ino);
58}
59
60const struct dentry_operations ns_dentry_operations =
61{
62 .d_delete = ns_delete_dentry,
63 .d_dname = ns_dname,
64};
65
66static struct dentry *proc_ns_get_dentry(struct super_block *sb,
67 struct task_struct *task, const struct proc_ns_operations *ns_ops)
68{
69 struct dentry *dentry, *result;
70 struct inode *inode;
71 struct proc_inode *ei;
72 struct qstr qname = { .name = "", };
73 void *ns;
74
75 ns = ns_ops->get(task);
76 if (!ns)
77 return ERR_PTR(-ENOENT);
78
79 dentry = d_alloc_pseudo(sb, &qname);
80 if (!dentry) {
81 ns_ops->put(ns);
82 return ERR_PTR(-ENOMEM);
83 }
84
85 inode = iget_locked(sb, ns_ops->inum(ns));
86 if (!inode) {
87 dput(dentry);
88 ns_ops->put(ns);
89 return ERR_PTR(-ENOMEM);
90 }
91
92 ei = PROC_I(inode);
93 if (inode->i_state & I_NEW) {
94 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
95 inode->i_op = &ns_inode_operations;
96 inode->i_mode = S_IFREG | S_IRUGO;
97 inode->i_fop = &ns_file_operations;
98 ei->ns_ops = ns_ops;
99 ei->ns = ns;
100 unlock_new_inode(inode);
101 } else {
102 ns_ops->put(ns);
103 }
104
105 d_set_d_op(dentry, &ns_dentry_operations);
106 result = d_instantiate_unique(dentry, inode);
107 if (result) {
108 dput(dentry);
109 dentry = result;
110 }
111
112 return dentry;
113}
114
115static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
116{
117 struct inode *inode = dentry->d_inode;
118 struct super_block *sb = inode->i_sb;
119 struct proc_inode *ei = PROC_I(inode);
120 struct task_struct *task;
121 struct dentry *ns_dentry;
122 void *error = ERR_PTR(-EACCES);
123
124 task = get_proc_task(inode);
125 if (!task)
126 goto out;
127
128 if (!ptrace_may_access(task, PTRACE_MODE_READ))
129 goto out_put_task;
130
131 ns_dentry = proc_ns_get_dentry(sb, task, ei->ns_ops);
132 if (IS_ERR(ns_dentry)) {
133 error = ERR_CAST(ns_dentry);
134 goto out_put_task;
135 }
136
137 dput(nd->path.dentry);
138 nd->path.dentry = ns_dentry;
139 error = NULL;
140
141out_put_task:
142 put_task_struct(task);
143out:
144 return error;
145}
146
147static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
148{
149 struct inode *inode = dentry->d_inode;
150 struct proc_inode *ei = PROC_I(inode);
151 const struct proc_ns_operations *ns_ops = ei->ns_ops;
152 struct task_struct *task;
153 void *ns;
154 char name[50];
155 int len = -EACCES;
156
157 task = get_proc_task(inode);
158 if (!task)
159 goto out;
160
161 if (!ptrace_may_access(task, PTRACE_MODE_READ))
162 goto out_put_task;
163
164 len = -ENOENT;
165 ns = ns_ops->get(task);
166 if (!ns)
167 goto out_put_task;
168
169 snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns));
170 len = strlen(name);
171
172 if (len > buflen)
173 len = buflen;
174 if (copy_to_user(buffer, name, len))
175 len = -EFAULT;
176
177 ns_ops->put(ns);
178out_put_task:
179 put_task_struct(task);
180out:
181 return len;
182}
183
184static const struct inode_operations proc_ns_link_inode_operations = {
185 .readlink = proc_ns_readlink,
186 .follow_link = proc_ns_follow_link,
187 .setattr = proc_setattr,
188};
189
33static struct dentry *proc_ns_instantiate(struct inode *dir, 190static struct dentry *proc_ns_instantiate(struct inode *dir,
34 struct dentry *dentry, struct task_struct *task, const void *ptr) 191 struct dentry *dentry, struct task_struct *task, const void *ptr)
35{ 192{
@@ -37,21 +194,15 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
37 struct inode *inode; 194 struct inode *inode;
38 struct proc_inode *ei; 195 struct proc_inode *ei;
39 struct dentry *error = ERR_PTR(-ENOENT); 196 struct dentry *error = ERR_PTR(-ENOENT);
40 void *ns;
41 197
42 inode = proc_pid_make_inode(dir->i_sb, task); 198 inode = proc_pid_make_inode(dir->i_sb, task);
43 if (!inode) 199 if (!inode)
44 goto out; 200 goto out;
45 201
46 ns = ns_ops->get(task);
47 if (!ns)
48 goto out_iput;
49
50 ei = PROC_I(inode); 202 ei = PROC_I(inode);
51 inode->i_mode = S_IFREG|S_IRUSR; 203 inode->i_mode = S_IFLNK|S_IRWXUGO;
52 inode->i_fop = &ns_file_operations; 204 inode->i_op = &proc_ns_link_inode_operations;
53 ei->ns_ops = ns_ops; 205 ei->ns_ops = ns_ops;
54 ei->ns = ns;
55 206
56 d_set_d_op(dentry, &pid_dentry_operations); 207 d_set_d_op(dentry, &pid_dentry_operations);
57 d_add(dentry, inode); 208 d_add(dentry, inode);
@@ -60,9 +211,6 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
60 error = NULL; 211 error = NULL;
61out: 212out:
62 return error; 213 return error;
63out_iput:
64 iput(inode);
65 goto out;
66} 214}
67 215
68static int proc_ns_fill_cache(struct file *filp, void *dirent, 216static int proc_ns_fill_cache(struct file *filp, void *dirent,
@@ -89,10 +237,6 @@ static int proc_ns_dir_readdir(struct file *filp, void *dirent,
89 if (!task) 237 if (!task)
90 goto out_no_task; 238 goto out_no_task;
91 239
92 ret = -EPERM;
93 if (!ptrace_may_access(task, PTRACE_MODE_READ))
94 goto out;
95
96 ret = 0; 240 ret = 0;
97 i = filp->f_pos; 241 i = filp->f_pos;
98 switch (i) { 242 switch (i) {
@@ -152,10 +296,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
152 if (!task) 296 if (!task)
153 goto out_no_task; 297 goto out_no_task;
154 298
155 error = ERR_PTR(-EPERM);
156 if (!ptrace_may_access(task, PTRACE_MODE_READ))
157 goto out;
158
159 last = &ns_entries[ARRAY_SIZE(ns_entries)]; 299 last = &ns_entries[ARRAY_SIZE(ns_entries)];
160 for (entry = ns_entries; entry < last; entry++) { 300 for (entry = ns_entries; entry < last; entry++) {
161 if (strlen((*entry)->name) != len) 301 if (strlen((*entry)->name) != len)
@@ -163,7 +303,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
163 if (!memcmp(dentry->d_name.name, (*entry)->name, len)) 303 if (!memcmp(dentry->d_name.name, (*entry)->name, len))
164 break; 304 break;
165 } 305 }
166 error = ERR_PTR(-ENOENT);
167 if (entry == last) 306 if (entry == last)
168 goto out; 307 goto out;
169 308
@@ -198,3 +337,7 @@ out_invalid:
198 return ERR_PTR(-EINVAL); 337 return ERR_PTR(-EINVAL);
199} 338}
200 339
340bool proc_ns_inode(struct inode *inode)
341{
342 return inode->i_fop == &ns_file_operations;
343}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9889a92d2e01..c6e9fac26bac 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -100,14 +100,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
100 int err; 100 int err;
101 struct super_block *sb; 101 struct super_block *sb;
102 struct pid_namespace *ns; 102 struct pid_namespace *ns;
103 struct proc_inode *ei;
104 char *options; 103 char *options;
105 104
106 if (flags & MS_KERNMOUNT) { 105 if (flags & MS_KERNMOUNT) {
107 ns = (struct pid_namespace *)data; 106 ns = (struct pid_namespace *)data;
108 options = NULL; 107 options = NULL;
109 } else { 108 } else {
110 ns = current->nsproxy->pid_ns; 109 ns = task_active_pid_ns(current);
111 options = data; 110 options = data;
112 } 111 }
113 112
@@ -130,13 +129,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
130 sb->s_flags |= MS_ACTIVE; 129 sb->s_flags |= MS_ACTIVE;
131 } 130 }
132 131
133 ei = PROC_I(sb->s_root->d_inode);
134 if (!ei->pid) {
135 rcu_read_lock();
136 ei->pid = get_pid(find_pid_ns(1, ns));
137 rcu_read_unlock();
138 }
139
140 return dget(sb->s_root); 132 return dget(sb->s_root);
141} 133}
142 134
@@ -153,6 +145,7 @@ static struct file_system_type proc_fs_type = {
153 .name = "proc", 145 .name = "proc",
154 .mount = proc_mount, 146 .mount = proc_mount,
155 .kill_sb = proc_kill_sb, 147 .kill_sb = proc_kill_sb,
148 .fs_flags = FS_USERNS_MOUNT,
156}; 149};
157 150
158void __init proc_root_init(void) 151void __init proc_root_init(void)
@@ -163,12 +156,8 @@ void __init proc_root_init(void)
163 err = register_filesystem(&proc_fs_type); 156 err = register_filesystem(&proc_fs_type);
164 if (err) 157 if (err)
165 return; 158 return;
166 err = pid_ns_prepare_proc(&init_pid_ns);
167 if (err) {
168 unregister_filesystem(&proc_fs_type);
169 return;
170 }
171 159
160 proc_self_init();
172 proc_symlink("mounts", NULL, "self/mounts"); 161 proc_symlink("mounts", NULL, "self/mounts");
173 162
174 proc_net_init(); 163 proc_net_init();
diff --git a/fs/proc/self.c b/fs/proc/self.c
new file mode 100644
index 000000000000..aa5cc3bff140
--- /dev/null
+++ b/fs/proc/self.c
@@ -0,0 +1,59 @@
1#include <linux/proc_fs.h>
2#include <linux/sched.h>
3#include <linux/namei.h>
4
5/*
6 * /proc/self:
7 */
8static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
9 int buflen)
10{
11 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
12 pid_t tgid = task_tgid_nr_ns(current, ns);
13 char tmp[PROC_NUMBUF];
14 if (!tgid)
15 return -ENOENT;
16 sprintf(tmp, "%d", tgid);
17 return vfs_readlink(dentry,buffer,buflen,tmp);
18}
19
20static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
21{
22 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
23 pid_t tgid = task_tgid_nr_ns(current, ns);
24 char *name = ERR_PTR(-ENOENT);
25 if (tgid) {
26 /* 11 for max length of signed int in decimal + NULL term */
27 name = kmalloc(12, GFP_KERNEL);
28 if (!name)
29 name = ERR_PTR(-ENOMEM);
30 else
31 sprintf(name, "%d", tgid);
32 }
33 nd_set_link(nd, name);
34 return NULL;
35}
36
37static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
38 void *cookie)
39{
40 char *s = nd_get_link(nd);
41 if (!IS_ERR(s))
42 kfree(s);
43}
44
45static const struct inode_operations proc_self_inode_operations = {
46 .readlink = proc_self_readlink,
47 .follow_link = proc_self_follow_link,
48 .put_link = proc_self_put_link,
49};
50
51void __init proc_self_init(void)
52{
53 struct proc_dir_entry *proc_self_symlink;
54 mode_t mode;
55
56 mode = S_IFLNK | S_IRWXUGO;
57 proc_self_symlink = proc_create("self", mode, NULL, NULL );
58 proc_self_symlink->proc_iops = &proc_self_inode_operations;
59}
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 71eb7e253927..db940a9be045 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -149,6 +149,7 @@ static struct file_system_type sysfs_fs_type = {
149 .name = "sysfs", 149 .name = "sysfs",
150 .mount = sysfs_mount, 150 .mount = sysfs_mount,
151 .kill_sb = sysfs_kill_sb, 151 .kill_sb = sysfs_kill_sb,
152 .fs_flags = FS_USERNS_MOUNT,
152}; 153};
153 154
154int __init sysfs_init(void) 155int __init sysfs_init(void)