diff options
59 files changed, 996 insertions, 451 deletions
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 965d381abd7..25db92a8e1c 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c | |||
@@ -1094,7 +1094,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private) | |||
1094 | LOAD_INT(c), LOAD_FRAC(c), | 1094 | LOAD_INT(c), LOAD_FRAC(c), |
1095 | count_active_contexts(), | 1095 | count_active_contexts(), |
1096 | atomic_read(&nr_spu_contexts), | 1096 | atomic_read(&nr_spu_contexts), |
1097 | current->nsproxy->pid_ns->last_pid); | 1097 | task_active_pid_ns(current)->last_pid); |
1098 | return 0; | 1098 | return 0; |
1099 | } | 1099 | } |
1100 | 1100 | ||
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 49e3b49e552..4bd82ac0210 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c | |||
@@ -123,7 +123,7 @@ void mconsole_log(struct mc_request *req) | |||
123 | 123 | ||
124 | void mconsole_proc(struct mc_request *req) | 124 | void mconsole_proc(struct mc_request *req) |
125 | { | 125 | { |
126 | struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt; | 126 | struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt; |
127 | char *buf; | 127 | char *buf; |
128 | int len; | 128 | int len; |
129 | struct file *file; | 129 | struct file *file; |
diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 4a36e9ab8cf..2d12e8a1f82 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/uaccess.h> | 35 | #include <linux/uaccess.h> |
36 | #include <linux/vmalloc.h> | 36 | #include <linux/vmalloc.h> |
37 | #include <linux/slab.h> | 37 | #include <linux/slab.h> |
38 | #include <linux/pid_namespace.h> | ||
38 | 39 | ||
39 | #include "binder.h" | 40 | #include "binder.h" |
40 | #include "binder_trace.h" | 41 | #include "binder_trace.h" |
@@ -2320,7 +2321,7 @@ retry: | |||
2320 | if (t->from) { | 2321 | if (t->from) { |
2321 | struct task_struct *sender = t->from->proc->tsk; | 2322 | struct task_struct *sender = t->from->proc->tsk; |
2322 | tr.sender_pid = task_tgid_nr_ns(sender, | 2323 | tr.sender_pid = task_tgid_nr_ns(sender, |
2323 | current->nsproxy->pid_ns); | 2324 | task_active_pid_ns(current)); |
2324 | } else { | 2325 | } else { |
2325 | tr.sender_pid = 0; | 2326 | tr.sender_pid = 0; |
2326 | } | 2327 | } |
@@ -49,14 +49,15 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr) | |||
49 | /* Make sure a caller can chown. */ | 49 | /* Make sure a caller can chown. */ |
50 | if ((ia_valid & ATTR_UID) && | 50 | if ((ia_valid & ATTR_UID) && |
51 | (!uid_eq(current_fsuid(), inode->i_uid) || | 51 | (!uid_eq(current_fsuid(), inode->i_uid) || |
52 | !uid_eq(attr->ia_uid, inode->i_uid)) && !capable(CAP_CHOWN)) | 52 | !uid_eq(attr->ia_uid, inode->i_uid)) && |
53 | !inode_capable(inode, CAP_CHOWN)) | ||
53 | return -EPERM; | 54 | return -EPERM; |
54 | 55 | ||
55 | /* Make sure caller can chgrp. */ | 56 | /* Make sure caller can chgrp. */ |
56 | if ((ia_valid & ATTR_GID) && | 57 | if ((ia_valid & ATTR_GID) && |
57 | (!uid_eq(current_fsuid(), inode->i_uid) || | 58 | (!uid_eq(current_fsuid(), inode->i_uid) || |
58 | (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) && | 59 | (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) && |
59 | !capable(CAP_CHOWN)) | 60 | !inode_capable(inode, CAP_CHOWN)) |
60 | return -EPERM; | 61 | return -EPERM; |
61 | 62 | ||
62 | /* Make sure a caller can chmod. */ | 63 | /* Make sure a caller can chmod. */ |
@@ -65,7 +66,8 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr) | |||
65 | return -EPERM; | 66 | return -EPERM; |
66 | /* Also check the setgid bit! */ | 67 | /* Also check the setgid bit! */ |
67 | if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : | 68 | if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : |
68 | inode->i_gid) && !capable(CAP_FSETID)) | 69 | inode->i_gid) && |
70 | !inode_capable(inode, CAP_FSETID)) | ||
69 | attr->ia_mode &= ~S_ISGID; | 71 | attr->ia_mode &= ~S_ISGID; |
70 | } | 72 | } |
71 | 73 | ||
@@ -157,7 +159,8 @@ void setattr_copy(struct inode *inode, const struct iattr *attr) | |||
157 | if (ia_valid & ATTR_MODE) { | 159 | if (ia_valid & ATTR_MODE) { |
158 | umode_t mode = attr->ia_mode; | 160 | umode_t mode = attr->ia_mode; |
159 | 161 | ||
160 | if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) | 162 | if (!in_group_p(inode->i_gid) && |
163 | !inode_capable(inode, CAP_FSETID)) | ||
161 | mode &= ~S_ISGID; | 164 | mode &= ~S_ISGID; |
162 | inode->i_mode = mode; | 165 | inode->i_mode = mode; |
163 | } | 166 | } |
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 908e1845541..b785e770795 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h | |||
@@ -74,8 +74,8 @@ struct autofs_info { | |||
74 | unsigned long last_used; | 74 | unsigned long last_used; |
75 | atomic_t count; | 75 | atomic_t count; |
76 | 76 | ||
77 | uid_t uid; | 77 | kuid_t uid; |
78 | gid_t gid; | 78 | kgid_t gid; |
79 | }; | 79 | }; |
80 | 80 | ||
81 | #define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ | 81 | #define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ |
@@ -89,8 +89,8 @@ struct autofs_wait_queue { | |||
89 | struct qstr name; | 89 | struct qstr name; |
90 | u32 dev; | 90 | u32 dev; |
91 | u64 ino; | 91 | u64 ino; |
92 | uid_t uid; | 92 | kuid_t uid; |
93 | gid_t gid; | 93 | kgid_t gid; |
94 | pid_t pid; | 94 | pid_t pid; |
95 | pid_t tgid; | 95 | pid_t tgid; |
96 | /* This is for status reporting upon return */ | 96 | /* This is for status reporting upon return */ |
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index a16214109d3..9f68a37bb2b 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c | |||
@@ -437,8 +437,8 @@ static int autofs_dev_ioctl_requester(struct file *fp, | |||
437 | err = 0; | 437 | err = 0; |
438 | autofs4_expire_wait(path.dentry); | 438 | autofs4_expire_wait(path.dentry); |
439 | spin_lock(&sbi->fs_lock); | 439 | spin_lock(&sbi->fs_lock); |
440 | param->requester.uid = ino->uid; | 440 | param->requester.uid = from_kuid_munged(current_user_ns(), ino->uid); |
441 | param->requester.gid = ino->gid; | 441 | param->requester.gid = from_kgid_munged(current_user_ns(), ino->gid); |
442 | spin_unlock(&sbi->fs_lock); | 442 | spin_unlock(&sbi->fs_lock); |
443 | } | 443 | } |
444 | path_put(&path); | 444 | path_put(&path); |
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 8a4fed8ead3..b104726e2d0 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c | |||
@@ -36,8 +36,8 @@ struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi) | |||
36 | 36 | ||
37 | void autofs4_clean_ino(struct autofs_info *ino) | 37 | void autofs4_clean_ino(struct autofs_info *ino) |
38 | { | 38 | { |
39 | ino->uid = 0; | 39 | ino->uid = GLOBAL_ROOT_UID; |
40 | ino->gid = 0; | 40 | ino->gid = GLOBAL_ROOT_GID; |
41 | ino->last_used = jiffies; | 41 | ino->last_used = jiffies; |
42 | } | 42 | } |
43 | 43 | ||
@@ -79,10 +79,12 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root) | |||
79 | return 0; | 79 | return 0; |
80 | 80 | ||
81 | seq_printf(m, ",fd=%d", sbi->pipefd); | 81 | seq_printf(m, ",fd=%d", sbi->pipefd); |
82 | if (root_inode->i_uid != 0) | 82 | if (!uid_eq(root_inode->i_uid, GLOBAL_ROOT_UID)) |
83 | seq_printf(m, ",uid=%u", root_inode->i_uid); | 83 | seq_printf(m, ",uid=%u", |
84 | if (root_inode->i_gid != 0) | 84 | from_kuid_munged(&init_user_ns, root_inode->i_uid)); |
85 | seq_printf(m, ",gid=%u", root_inode->i_gid); | 85 | if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID)) |
86 | seq_printf(m, ",gid=%u", | ||
87 | from_kgid_munged(&init_user_ns, root_inode->i_gid)); | ||
86 | seq_printf(m, ",pgrp=%d", sbi->oz_pgrp); | 88 | seq_printf(m, ",pgrp=%d", sbi->oz_pgrp); |
87 | seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ); | 89 | seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ); |
88 | seq_printf(m, ",minproto=%d", sbi->min_proto); | 90 | seq_printf(m, ",minproto=%d", sbi->min_proto); |
@@ -126,7 +128,7 @@ static const match_table_t tokens = { | |||
126 | {Opt_err, NULL} | 128 | {Opt_err, NULL} |
127 | }; | 129 | }; |
128 | 130 | ||
129 | static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, | 131 | static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, |
130 | pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto) | 132 | pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto) |
131 | { | 133 | { |
132 | char *p; | 134 | char *p; |
@@ -159,12 +161,16 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, | |||
159 | case Opt_uid: | 161 | case Opt_uid: |
160 | if (match_int(args, &option)) | 162 | if (match_int(args, &option)) |
161 | return 1; | 163 | return 1; |
162 | *uid = option; | 164 | *uid = make_kuid(current_user_ns(), option); |
165 | if (!uid_valid(*uid)) | ||
166 | return 1; | ||
163 | break; | 167 | break; |
164 | case Opt_gid: | 168 | case Opt_gid: |
165 | if (match_int(args, &option)) | 169 | if (match_int(args, &option)) |
166 | return 1; | 170 | return 1; |
167 | *gid = option; | 171 | *gid = make_kgid(current_user_ns(), option); |
172 | if (!gid_valid(*gid)) | ||
173 | return 1; | ||
168 | break; | 174 | break; |
169 | case Opt_pgrp: | 175 | case Opt_pgrp: |
170 | if (match_int(args, &option)) | 176 | if (match_int(args, &option)) |
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index dce436e595c..03bc1d347d8 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c | |||
@@ -154,6 +154,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | |||
154 | case autofs_ptype_expire_direct: | 154 | case autofs_ptype_expire_direct: |
155 | { | 155 | { |
156 | struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet; | 156 | struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet; |
157 | struct user_namespace *user_ns = sbi->pipe->f_cred->user_ns; | ||
157 | 158 | ||
158 | pktsz = sizeof(*packet); | 159 | pktsz = sizeof(*packet); |
159 | 160 | ||
@@ -163,8 +164,8 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, | |||
163 | packet->name[wq->name.len] = '\0'; | 164 | packet->name[wq->name.len] = '\0'; |
164 | packet->dev = wq->dev; | 165 | packet->dev = wq->dev; |
165 | packet->ino = wq->ino; | 166 | packet->ino = wq->ino; |
166 | packet->uid = wq->uid; | 167 | packet->uid = from_kuid_munged(user_ns, wq->uid); |
167 | packet->gid = wq->gid; | 168 | packet->gid = from_kgid_munged(user_ns, wq->gid); |
168 | packet->pid = wq->pid; | 169 | packet->pid = wq->pid; |
169 | packet->tgid = wq->tgid; | 170 | packet->tgid = wq->tgid; |
170 | break; | 171 | break; |
@@ -1266,14 +1266,13 @@ int prepare_binprm(struct linux_binprm *bprm) | |||
1266 | bprm->cred->egid = current_egid(); | 1266 | bprm->cred->egid = current_egid(); |
1267 | 1267 | ||
1268 | if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) && | 1268 | if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) && |
1269 | !current->no_new_privs) { | 1269 | !current->no_new_privs && |
1270 | kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) && | ||
1271 | kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) { | ||
1270 | /* Set-uid? */ | 1272 | /* Set-uid? */ |
1271 | if (mode & S_ISUID) { | 1273 | if (mode & S_ISUID) { |
1272 | if (!kuid_has_mapping(bprm->cred->user_ns, inode->i_uid)) | ||
1273 | return -EPERM; | ||
1274 | bprm->per_clear |= PER_CLEAR_ON_SETID; | 1274 | bprm->per_clear |= PER_CLEAR_ON_SETID; |
1275 | bprm->cred->euid = inode->i_uid; | 1275 | bprm->cred->euid = inode->i_uid; |
1276 | |||
1277 | } | 1276 | } |
1278 | 1277 | ||
1279 | /* Set-gid? */ | 1278 | /* Set-gid? */ |
@@ -1283,8 +1282,6 @@ int prepare_binprm(struct linux_binprm *bprm) | |||
1283 | * executable. | 1282 | * executable. |
1284 | */ | 1283 | */ |
1285 | if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { | 1284 | if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { |
1286 | if (!kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) | ||
1287 | return -EPERM; | ||
1288 | bprm->per_clear |= PER_CLEAR_ON_SETID; | 1285 | bprm->per_clear |= PER_CLEAR_ON_SETID; |
1289 | bprm->cred->egid = inode->i_gid; | 1286 | bprm->cred->egid = inode->i_gid; |
1290 | } | 1287 | } |
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8c23fa7a91e..c16335315e5 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -92,8 +92,8 @@ static void __fuse_put_request(struct fuse_req *req) | |||
92 | 92 | ||
93 | static void fuse_req_init_context(struct fuse_req *req) | 93 | static void fuse_req_init_context(struct fuse_req *req) |
94 | { | 94 | { |
95 | req->in.h.uid = current_fsuid(); | 95 | req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid()); |
96 | req->in.h.gid = current_fsgid(); | 96 | req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid()); |
97 | req->in.h.pid = current->pid; | 97 | req->in.h.pid = current->pid; |
98 | } | 98 | } |
99 | 99 | ||
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 324bc085053..b7c09f9eb40 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -818,8 +818,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, | |||
818 | stat->ino = attr->ino; | 818 | stat->ino = attr->ino; |
819 | stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); | 819 | stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); |
820 | stat->nlink = attr->nlink; | 820 | stat->nlink = attr->nlink; |
821 | stat->uid = attr->uid; | 821 | stat->uid = make_kuid(&init_user_ns, attr->uid); |
822 | stat->gid = attr->gid; | 822 | stat->gid = make_kgid(&init_user_ns, attr->gid); |
823 | stat->rdev = inode->i_rdev; | 823 | stat->rdev = inode->i_rdev; |
824 | stat->atime.tv_sec = attr->atime; | 824 | stat->atime.tv_sec = attr->atime; |
825 | stat->atime.tv_nsec = attr->atimensec; | 825 | stat->atime.tv_nsec = attr->atimensec; |
@@ -1007,12 +1007,12 @@ int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) | |||
1007 | rcu_read_lock(); | 1007 | rcu_read_lock(); |
1008 | ret = 0; | 1008 | ret = 0; |
1009 | cred = __task_cred(task); | 1009 | cred = __task_cred(task); |
1010 | if (cred->euid == fc->user_id && | 1010 | if (uid_eq(cred->euid, fc->user_id) && |
1011 | cred->suid == fc->user_id && | 1011 | uid_eq(cred->suid, fc->user_id) && |
1012 | cred->uid == fc->user_id && | 1012 | uid_eq(cred->uid, fc->user_id) && |
1013 | cred->egid == fc->group_id && | 1013 | gid_eq(cred->egid, fc->group_id) && |
1014 | cred->sgid == fc->group_id && | 1014 | gid_eq(cred->sgid, fc->group_id) && |
1015 | cred->gid == fc->group_id) | 1015 | gid_eq(cred->gid, fc->group_id)) |
1016 | ret = 1; | 1016 | ret = 1; |
1017 | rcu_read_unlock(); | 1017 | rcu_read_unlock(); |
1018 | 1018 | ||
@@ -1306,9 +1306,9 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) | |||
1306 | if (ivalid & ATTR_MODE) | 1306 | if (ivalid & ATTR_MODE) |
1307 | arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; | 1307 | arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; |
1308 | if (ivalid & ATTR_UID) | 1308 | if (ivalid & ATTR_UID) |
1309 | arg->valid |= FATTR_UID, arg->uid = iattr->ia_uid; | 1309 | arg->valid |= FATTR_UID, arg->uid = from_kuid(&init_user_ns, iattr->ia_uid); |
1310 | if (ivalid & ATTR_GID) | 1310 | if (ivalid & ATTR_GID) |
1311 | arg->valid |= FATTR_GID, arg->gid = iattr->ia_gid; | 1311 | arg->valid |= FATTR_GID, arg->gid = from_kgid(&init_user_ns, iattr->ia_gid); |
1312 | if (ivalid & ATTR_SIZE) | 1312 | if (ivalid & ATTR_SIZE) |
1313 | arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; | 1313 | arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; |
1314 | if (ivalid & ATTR_ATIME) { | 1314 | if (ivalid & ATTR_ATIME) { |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e24dd74e306..e105a53fc72 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -333,10 +333,10 @@ struct fuse_conn { | |||
333 | atomic_t count; | 333 | atomic_t count; |
334 | 334 | ||
335 | /** The user id for this mount */ | 335 | /** The user id for this mount */ |
336 | uid_t user_id; | 336 | kuid_t user_id; |
337 | 337 | ||
338 | /** The group id for this mount */ | 338 | /** The group id for this mount */ |
339 | gid_t group_id; | 339 | kgid_t group_id; |
340 | 340 | ||
341 | /** The fuse mount flags for this mount */ | 341 | /** The fuse mount flags for this mount */ |
342 | unsigned flags; | 342 | unsigned flags; |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f0eda124cff..73ca6b72bea 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -60,8 +60,8 @@ MODULE_PARM_DESC(max_user_congthresh, | |||
60 | struct fuse_mount_data { | 60 | struct fuse_mount_data { |
61 | int fd; | 61 | int fd; |
62 | unsigned rootmode; | 62 | unsigned rootmode; |
63 | unsigned user_id; | 63 | kuid_t user_id; |
64 | unsigned group_id; | 64 | kgid_t group_id; |
65 | unsigned fd_present:1; | 65 | unsigned fd_present:1; |
66 | unsigned rootmode_present:1; | 66 | unsigned rootmode_present:1; |
67 | unsigned user_id_present:1; | 67 | unsigned user_id_present:1; |
@@ -164,8 +164,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, | |||
164 | inode->i_ino = fuse_squash_ino(attr->ino); | 164 | inode->i_ino = fuse_squash_ino(attr->ino); |
165 | inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); | 165 | inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); |
166 | set_nlink(inode, attr->nlink); | 166 | set_nlink(inode, attr->nlink); |
167 | inode->i_uid = attr->uid; | 167 | inode->i_uid = make_kuid(&init_user_ns, attr->uid); |
168 | inode->i_gid = attr->gid; | 168 | inode->i_gid = make_kgid(&init_user_ns, attr->gid); |
169 | inode->i_blocks = attr->blocks; | 169 | inode->i_blocks = attr->blocks; |
170 | inode->i_atime.tv_sec = attr->atime; | 170 | inode->i_atime.tv_sec = attr->atime; |
171 | inode->i_atime.tv_nsec = attr->atimensec; | 171 | inode->i_atime.tv_nsec = attr->atimensec; |
@@ -492,14 +492,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) | |||
492 | case OPT_USER_ID: | 492 | case OPT_USER_ID: |
493 | if (match_int(&args[0], &value)) | 493 | if (match_int(&args[0], &value)) |
494 | return 0; | 494 | return 0; |
495 | d->user_id = value; | 495 | d->user_id = make_kuid(current_user_ns(), value); |
496 | if (!uid_valid(d->user_id)) | ||
497 | return 0; | ||
496 | d->user_id_present = 1; | 498 | d->user_id_present = 1; |
497 | break; | 499 | break; |
498 | 500 | ||
499 | case OPT_GROUP_ID: | 501 | case OPT_GROUP_ID: |
500 | if (match_int(&args[0], &value)) | 502 | if (match_int(&args[0], &value)) |
501 | return 0; | 503 | return 0; |
502 | d->group_id = value; | 504 | d->group_id = make_kgid(current_user_ns(), value); |
505 | if (!gid_valid(d->group_id)) | ||
506 | return 0; | ||
503 | d->group_id_present = 1; | 507 | d->group_id_present = 1; |
504 | break; | 508 | break; |
505 | 509 | ||
@@ -540,8 +544,8 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) | |||
540 | struct super_block *sb = root->d_sb; | 544 | struct super_block *sb = root->d_sb; |
541 | struct fuse_conn *fc = get_fuse_conn_super(sb); | 545 | struct fuse_conn *fc = get_fuse_conn_super(sb); |
542 | 546 | ||
543 | seq_printf(m, ",user_id=%u", fc->user_id); | 547 | seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id)); |
544 | seq_printf(m, ",group_id=%u", fc->group_id); | 548 | seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id)); |
545 | if (fc->flags & FUSE_DEFAULT_PERMISSIONS) | 549 | if (fc->flags & FUSE_DEFAULT_PERMISSIONS) |
546 | seq_puts(m, ",default_permissions"); | 550 | seq_puts(m, ",default_permissions"); |
547 | if (fc->flags & FUSE_ALLOW_OTHER) | 551 | if (fc->flags & FUSE_ALLOW_OTHER) |
@@ -989,7 +993,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
989 | if (!file) | 993 | if (!file) |
990 | goto err; | 994 | goto err; |
991 | 995 | ||
992 | if (file->f_op != &fuse_dev_operations) | 996 | if ((file->f_op != &fuse_dev_operations) || |
997 | (file->f_cred->user_ns != &init_user_ns)) | ||
993 | goto err_fput; | 998 | goto err_fput; |
994 | 999 | ||
995 | fc = kmalloc(sizeof(*fc), GFP_KERNEL); | 1000 | fc = kmalloc(sizeof(*fc), GFP_KERNEL); |
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 78f21f8dc2e..43b315f2002 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c | |||
@@ -710,7 +710,7 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent) | |||
710 | struct vfsmount *proc_mnt; | 710 | struct vfsmount *proc_mnt; |
711 | int err = -ENOENT; | 711 | int err = -ENOENT; |
712 | 712 | ||
713 | proc_mnt = mntget(current->nsproxy->pid_ns->proc_mnt); | 713 | proc_mnt = mntget(task_active_pid_ns(current)->proc_mnt); |
714 | if (IS_ERR(proc_mnt)) | 714 | if (IS_ERR(proc_mnt)) |
715 | goto out; | 715 | goto out; |
716 | 716 | ||
diff --git a/fs/mount.h b/fs/mount.h index 4f291f9de64..cd500798040 100644 --- a/fs/mount.h +++ b/fs/mount.h | |||
@@ -4,8 +4,11 @@ | |||
4 | 4 | ||
5 | struct mnt_namespace { | 5 | struct mnt_namespace { |
6 | atomic_t count; | 6 | atomic_t count; |
7 | unsigned int proc_inum; | ||
7 | struct mount * root; | 8 | struct mount * root; |
8 | struct list_head list; | 9 | struct list_head list; |
10 | struct user_namespace *user_ns; | ||
11 | u64 seq; /* Sequence number to prevent loops */ | ||
9 | wait_queue_head_t poll; | 12 | wait_queue_head_t poll; |
10 | int event; | 13 | int event; |
11 | }; | 14 | }; |
diff --git a/fs/namespace.c b/fs/namespace.c index 24960626bb6..c1bbe86f492 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/export.h> | 12 | #include <linux/export.h> |
13 | #include <linux/capability.h> | 13 | #include <linux/capability.h> |
14 | #include <linux/mnt_namespace.h> | 14 | #include <linux/mnt_namespace.h> |
15 | #include <linux/user_namespace.h> | ||
15 | #include <linux/namei.h> | 16 | #include <linux/namei.h> |
16 | #include <linux/security.h> | 17 | #include <linux/security.h> |
17 | #include <linux/idr.h> | 18 | #include <linux/idr.h> |
@@ -20,6 +21,7 @@ | |||
20 | #include <linux/fs_struct.h> /* get_fs_root et.al. */ | 21 | #include <linux/fs_struct.h> /* get_fs_root et.al. */ |
21 | #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ | 22 | #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ |
22 | #include <linux/uaccess.h> | 23 | #include <linux/uaccess.h> |
24 | #include <linux/proc_fs.h> | ||
23 | #include "pnode.h" | 25 | #include "pnode.h" |
24 | #include "internal.h" | 26 | #include "internal.h" |
25 | 27 | ||
@@ -784,7 +786,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, | |||
784 | if (!mnt) | 786 | if (!mnt) |
785 | return ERR_PTR(-ENOMEM); | 787 | return ERR_PTR(-ENOMEM); |
786 | 788 | ||
787 | if (flag & (CL_SLAVE | CL_PRIVATE)) | 789 | if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE)) |
788 | mnt->mnt_group_id = 0; /* not a peer of original */ | 790 | mnt->mnt_group_id = 0; /* not a peer of original */ |
789 | else | 791 | else |
790 | mnt->mnt_group_id = old->mnt_group_id; | 792 | mnt->mnt_group_id = old->mnt_group_id; |
@@ -805,7 +807,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, | |||
805 | list_add_tail(&mnt->mnt_instance, &sb->s_mounts); | 807 | list_add_tail(&mnt->mnt_instance, &sb->s_mounts); |
806 | br_write_unlock(&vfsmount_lock); | 808 | br_write_unlock(&vfsmount_lock); |
807 | 809 | ||
808 | if (flag & CL_SLAVE) { | 810 | if ((flag & CL_SLAVE) || |
811 | ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) { | ||
809 | list_add(&mnt->mnt_slave, &old->mnt_slave_list); | 812 | list_add(&mnt->mnt_slave, &old->mnt_slave_list); |
810 | mnt->mnt_master = old; | 813 | mnt->mnt_master = old; |
811 | CLEAR_MNT_SHARED(mnt); | 814 | CLEAR_MNT_SHARED(mnt); |
@@ -1266,7 +1269,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) | |||
1266 | goto dput_and_out; | 1269 | goto dput_and_out; |
1267 | 1270 | ||
1268 | retval = -EPERM; | 1271 | retval = -EPERM; |
1269 | if (!capable(CAP_SYS_ADMIN)) | 1272 | if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) |
1270 | goto dput_and_out; | 1273 | goto dput_and_out; |
1271 | 1274 | ||
1272 | retval = do_umount(mnt, flags); | 1275 | retval = do_umount(mnt, flags); |
@@ -1292,7 +1295,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name) | |||
1292 | 1295 | ||
1293 | static int mount_is_safe(struct path *path) | 1296 | static int mount_is_safe(struct path *path) |
1294 | { | 1297 | { |
1295 | if (capable(CAP_SYS_ADMIN)) | 1298 | if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN)) |
1296 | return 0; | 1299 | return 0; |
1297 | return -EPERM; | 1300 | return -EPERM; |
1298 | #ifdef notyet | 1301 | #ifdef notyet |
@@ -1308,6 +1311,26 @@ static int mount_is_safe(struct path *path) | |||
1308 | #endif | 1311 | #endif |
1309 | } | 1312 | } |
1310 | 1313 | ||
1314 | static bool mnt_ns_loop(struct path *path) | ||
1315 | { | ||
1316 | /* Could bind mounting the mount namespace inode cause a | ||
1317 | * mount namespace loop? | ||
1318 | */ | ||
1319 | struct inode *inode = path->dentry->d_inode; | ||
1320 | struct proc_inode *ei; | ||
1321 | struct mnt_namespace *mnt_ns; | ||
1322 | |||
1323 | if (!proc_ns_inode(inode)) | ||
1324 | return false; | ||
1325 | |||
1326 | ei = PROC_I(inode); | ||
1327 | if (ei->ns_ops != &mntns_operations) | ||
1328 | return false; | ||
1329 | |||
1330 | mnt_ns = ei->ns; | ||
1331 | return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; | ||
1332 | } | ||
1333 | |||
1311 | struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, | 1334 | struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, |
1312 | int flag) | 1335 | int flag) |
1313 | { | 1336 | { |
@@ -1610,7 +1633,7 @@ static int do_change_type(struct path *path, int flag) | |||
1610 | int type; | 1633 | int type; |
1611 | int err = 0; | 1634 | int err = 0; |
1612 | 1635 | ||
1613 | if (!capable(CAP_SYS_ADMIN)) | 1636 | if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) |
1614 | return -EPERM; | 1637 | return -EPERM; |
1615 | 1638 | ||
1616 | if (path->dentry != path->mnt->mnt_root) | 1639 | if (path->dentry != path->mnt->mnt_root) |
@@ -1655,6 +1678,10 @@ static int do_loopback(struct path *path, const char *old_name, | |||
1655 | if (err) | 1678 | if (err) |
1656 | return err; | 1679 | return err; |
1657 | 1680 | ||
1681 | err = -EINVAL; | ||
1682 | if (mnt_ns_loop(&old_path)) | ||
1683 | goto out; | ||
1684 | |||
1658 | err = lock_mount(path); | 1685 | err = lock_mount(path); |
1659 | if (err) | 1686 | if (err) |
1660 | goto out; | 1687 | goto out; |
@@ -1770,7 +1797,7 @@ static int do_move_mount(struct path *path, const char *old_name) | |||
1770 | struct mount *p; | 1797 | struct mount *p; |
1771 | struct mount *old; | 1798 | struct mount *old; |
1772 | int err = 0; | 1799 | int err = 0; |
1773 | if (!capable(CAP_SYS_ADMIN)) | 1800 | if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN)) |
1774 | return -EPERM; | 1801 | return -EPERM; |
1775 | if (!old_name || !*old_name) | 1802 | if (!old_name || !*old_name) |
1776 | return -EINVAL; | 1803 | return -EINVAL; |
@@ -1857,21 +1884,6 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) | |||
1857 | return ERR_PTR(err); | 1884 | return ERR_PTR(err); |
1858 | } | 1885 | } |
1859 | 1886 | ||
1860 | static struct vfsmount * | ||
1861 | do_kern_mount(const char *fstype, int flags, const char *name, void *data) | ||
1862 | { | ||
1863 | struct file_system_type *type = get_fs_type(fstype); | ||
1864 | struct vfsmount *mnt; | ||
1865 | if (!type) | ||
1866 | return ERR_PTR(-ENODEV); | ||
1867 | mnt = vfs_kern_mount(type, flags, name, data); | ||
1868 | if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && | ||
1869 | !mnt->mnt_sb->s_subtype) | ||
1870 | mnt = fs_set_subtype(mnt, fstype); | ||
1871 | put_filesystem(type); | ||
1872 | return mnt; | ||
1873 | } | ||
1874 | |||
1875 | /* | 1887 | /* |
1876 | * add a mount into a namespace's mount tree | 1888 | * add a mount into a namespace's mount tree |
1877 | */ | 1889 | */ |
@@ -1917,20 +1929,46 @@ unlock: | |||
1917 | * create a new mount for userspace and request it to be added into the | 1929 | * create a new mount for userspace and request it to be added into the |
1918 | * namespace's tree | 1930 | * namespace's tree |
1919 | */ | 1931 | */ |
1920 | static int do_new_mount(struct path *path, const char *type, int flags, | 1932 | static int do_new_mount(struct path *path, const char *fstype, int flags, |
1921 | int mnt_flags, const char *name, void *data) | 1933 | int mnt_flags, const char *name, void *data) |
1922 | { | 1934 | { |
1935 | struct file_system_type *type; | ||
1936 | struct user_namespace *user_ns; | ||
1923 | struct vfsmount *mnt; | 1937 | struct vfsmount *mnt; |
1924 | int err; | 1938 | int err; |
1925 | 1939 | ||
1926 | if (!type) | 1940 | if (!fstype) |
1927 | return -EINVAL; | 1941 | return -EINVAL; |
1928 | 1942 | ||
1929 | /* we need capabilities... */ | 1943 | /* we need capabilities... */ |
1930 | if (!capable(CAP_SYS_ADMIN)) | 1944 | user_ns = real_mount(path->mnt)->mnt_ns->user_ns; |
1945 | if (!ns_capable(user_ns, CAP_SYS_ADMIN)) | ||
1931 | return -EPERM; | 1946 | return -EPERM; |
1932 | 1947 | ||
1933 | mnt = do_kern_mount(type, flags, name, data); | 1948 | type = get_fs_type(fstype); |
1949 | if (!type) | ||
1950 | return -ENODEV; | ||
1951 | |||
1952 | if (user_ns != &init_user_ns) { | ||
1953 | if (!(type->fs_flags & FS_USERNS_MOUNT)) { | ||
1954 | put_filesystem(type); | ||
1955 | return -EPERM; | ||
1956 | } | ||
1957 | /* Only in special cases allow devices from mounts | ||
1958 | * created outside the initial user namespace. | ||
1959 | */ | ||
1960 | if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) { | ||
1961 | flags |= MS_NODEV; | ||
1962 | mnt_flags |= MNT_NODEV; | ||
1963 | } | ||
1964 | } | ||
1965 | |||
1966 | mnt = vfs_kern_mount(type, flags, name, data); | ||
1967 | if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && | ||
1968 | !mnt->mnt_sb->s_subtype) | ||
1969 | mnt = fs_set_subtype(mnt, fstype); | ||
1970 | |||
1971 | put_filesystem(type); | ||
1934 | if (IS_ERR(mnt)) | 1972 | if (IS_ERR(mnt)) |
1935 | return PTR_ERR(mnt); | 1973 | return PTR_ERR(mnt); |
1936 | 1974 | ||
@@ -2261,18 +2299,42 @@ dput_out: | |||
2261 | return retval; | 2299 | return retval; |
2262 | } | 2300 | } |
2263 | 2301 | ||
2264 | static struct mnt_namespace *alloc_mnt_ns(void) | 2302 | static void free_mnt_ns(struct mnt_namespace *ns) |
2303 | { | ||
2304 | proc_free_inum(ns->proc_inum); | ||
2305 | put_user_ns(ns->user_ns); | ||
2306 | kfree(ns); | ||
2307 | } | ||
2308 | |||
2309 | /* | ||
2310 | * Assign a sequence number so we can detect when we attempt to bind | ||
2311 | * mount a reference to an older mount namespace into the current | ||
2312 | * mount namespace, preventing reference counting loops. A 64bit | ||
2313 | * number incrementing at 10Ghz will take 12,427 years to wrap which | ||
2314 | * is effectively never, so we can ignore the possibility. | ||
2315 | */ | ||
2316 | static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); | ||
2317 | |||
2318 | static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) | ||
2265 | { | 2319 | { |
2266 | struct mnt_namespace *new_ns; | 2320 | struct mnt_namespace *new_ns; |
2321 | int ret; | ||
2267 | 2322 | ||
2268 | new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); | 2323 | new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); |
2269 | if (!new_ns) | 2324 | if (!new_ns) |
2270 | return ERR_PTR(-ENOMEM); | 2325 | return ERR_PTR(-ENOMEM); |
2326 | ret = proc_alloc_inum(&new_ns->proc_inum); | ||
2327 | if (ret) { | ||
2328 | kfree(new_ns); | ||
2329 | return ERR_PTR(ret); | ||
2330 | } | ||
2331 | new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); | ||
2271 | atomic_set(&new_ns->count, 1); | 2332 | atomic_set(&new_ns->count, 1); |
2272 | new_ns->root = NULL; | 2333 | new_ns->root = NULL; |
2273 | INIT_LIST_HEAD(&new_ns->list); | 2334 | INIT_LIST_HEAD(&new_ns->list); |
2274 | init_waitqueue_head(&new_ns->poll); | 2335 | init_waitqueue_head(&new_ns->poll); |
2275 | new_ns->event = 0; | 2336 | new_ns->event = 0; |
2337 | new_ns->user_ns = get_user_ns(user_ns); | ||
2276 | return new_ns; | 2338 | return new_ns; |
2277 | } | 2339 | } |
2278 | 2340 | ||
@@ -2281,24 +2343,28 @@ static struct mnt_namespace *alloc_mnt_ns(void) | |||
2281 | * copied from the namespace of the passed in task structure. | 2343 | * copied from the namespace of the passed in task structure. |
2282 | */ | 2344 | */ |
2283 | static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | 2345 | static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, |
2284 | struct fs_struct *fs) | 2346 | struct user_namespace *user_ns, struct fs_struct *fs) |
2285 | { | 2347 | { |
2286 | struct mnt_namespace *new_ns; | 2348 | struct mnt_namespace *new_ns; |
2287 | struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; | 2349 | struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; |
2288 | struct mount *p, *q; | 2350 | struct mount *p, *q; |
2289 | struct mount *old = mnt_ns->root; | 2351 | struct mount *old = mnt_ns->root; |
2290 | struct mount *new; | 2352 | struct mount *new; |
2353 | int copy_flags; | ||
2291 | 2354 | ||
2292 | new_ns = alloc_mnt_ns(); | 2355 | new_ns = alloc_mnt_ns(user_ns); |
2293 | if (IS_ERR(new_ns)) | 2356 | if (IS_ERR(new_ns)) |
2294 | return new_ns; | 2357 | return new_ns; |
2295 | 2358 | ||
2296 | down_write(&namespace_sem); | 2359 | down_write(&namespace_sem); |
2297 | /* First pass: copy the tree topology */ | 2360 | /* First pass: copy the tree topology */ |
2298 | new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE); | 2361 | copy_flags = CL_COPY_ALL | CL_EXPIRE; |
2362 | if (user_ns != mnt_ns->user_ns) | ||
2363 | copy_flags |= CL_SHARED_TO_SLAVE; | ||
2364 | new = copy_tree(old, old->mnt.mnt_root, copy_flags); | ||
2299 | if (IS_ERR(new)) { | 2365 | if (IS_ERR(new)) { |
2300 | up_write(&namespace_sem); | 2366 | up_write(&namespace_sem); |
2301 | kfree(new_ns); | 2367 | free_mnt_ns(new_ns); |
2302 | return ERR_CAST(new); | 2368 | return ERR_CAST(new); |
2303 | } | 2369 | } |
2304 | new_ns->root = new; | 2370 | new_ns->root = new; |
@@ -2339,7 +2405,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
2339 | } | 2405 | } |
2340 | 2406 | ||
2341 | struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, | 2407 | struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, |
2342 | struct fs_struct *new_fs) | 2408 | struct user_namespace *user_ns, struct fs_struct *new_fs) |
2343 | { | 2409 | { |
2344 | struct mnt_namespace *new_ns; | 2410 | struct mnt_namespace *new_ns; |
2345 | 2411 | ||
@@ -2349,7 +2415,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, | |||
2349 | if (!(flags & CLONE_NEWNS)) | 2415 | if (!(flags & CLONE_NEWNS)) |
2350 | return ns; | 2416 | return ns; |
2351 | 2417 | ||
2352 | new_ns = dup_mnt_ns(ns, new_fs); | 2418 | new_ns = dup_mnt_ns(ns, user_ns, new_fs); |
2353 | 2419 | ||
2354 | put_mnt_ns(ns); | 2420 | put_mnt_ns(ns); |
2355 | return new_ns; | 2421 | return new_ns; |
@@ -2361,7 +2427,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, | |||
2361 | */ | 2427 | */ |
2362 | static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) | 2428 | static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) |
2363 | { | 2429 | { |
2364 | struct mnt_namespace *new_ns = alloc_mnt_ns(); | 2430 | struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns); |
2365 | if (!IS_ERR(new_ns)) { | 2431 | if (!IS_ERR(new_ns)) { |
2366 | struct mount *mnt = real_mount(m); | 2432 | struct mount *mnt = real_mount(m); |
2367 | mnt->mnt_ns = new_ns; | 2433 | mnt->mnt_ns = new_ns; |
@@ -2501,7 +2567,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
2501 | struct mount *new_mnt, *root_mnt; | 2567 | struct mount *new_mnt, *root_mnt; |
2502 | int error; | 2568 | int error; |
2503 | 2569 | ||
2504 | if (!capable(CAP_SYS_ADMIN)) | 2570 | if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN)) |
2505 | return -EPERM; | 2571 | return -EPERM; |
2506 | 2572 | ||
2507 | error = user_path_dir(new_root, &new); | 2573 | error = user_path_dir(new_root, &new); |
@@ -2583,8 +2649,13 @@ static void __init init_mount_tree(void) | |||
2583 | struct vfsmount *mnt; | 2649 | struct vfsmount *mnt; |
2584 | struct mnt_namespace *ns; | 2650 | struct mnt_namespace *ns; |
2585 | struct path root; | 2651 | struct path root; |
2652 | struct file_system_type *type; | ||
2586 | 2653 | ||
2587 | mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); | 2654 | type = get_fs_type("rootfs"); |
2655 | if (!type) | ||
2656 | panic("Can't find rootfs type"); | ||
2657 | mnt = vfs_kern_mount(type, 0, "rootfs", NULL); | ||
2658 | put_filesystem(type); | ||
2588 | if (IS_ERR(mnt)) | 2659 | if (IS_ERR(mnt)) |
2589 | panic("Can't create rootfs"); | 2660 | panic("Can't create rootfs"); |
2590 | 2661 | ||
@@ -2647,7 +2718,7 @@ void put_mnt_ns(struct mnt_namespace *ns) | |||
2647 | br_write_unlock(&vfsmount_lock); | 2718 | br_write_unlock(&vfsmount_lock); |
2648 | up_write(&namespace_sem); | 2719 | up_write(&namespace_sem); |
2649 | release_mounts(&umount_list); | 2720 | release_mounts(&umount_list); |
2650 | kfree(ns); | 2721 | free_mnt_ns(ns); |
2651 | } | 2722 | } |
2652 | 2723 | ||
2653 | struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) | 2724 | struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) |
@@ -2681,3 +2752,71 @@ bool our_mnt(struct vfsmount *mnt) | |||
2681 | { | 2752 | { |
2682 | return check_mnt(real_mount(mnt)); | 2753 | return check_mnt(real_mount(mnt)); |
2683 | } | 2754 | } |
2755 | |||
2756 | static void *mntns_get(struct task_struct *task) | ||
2757 | { | ||
2758 | struct mnt_namespace *ns = NULL; | ||
2759 | struct nsproxy *nsproxy; | ||
2760 | |||
2761 | rcu_read_lock(); | ||
2762 | nsproxy = task_nsproxy(task); | ||
2763 | if (nsproxy) { | ||
2764 | ns = nsproxy->mnt_ns; | ||
2765 | get_mnt_ns(ns); | ||
2766 | } | ||
2767 | rcu_read_unlock(); | ||
2768 | |||
2769 | return ns; | ||
2770 | } | ||
2771 | |||
2772 | static void mntns_put(void *ns) | ||
2773 | { | ||
2774 | put_mnt_ns(ns); | ||
2775 | } | ||
2776 | |||
2777 | static int mntns_install(struct nsproxy *nsproxy, void *ns) | ||
2778 | { | ||
2779 | struct fs_struct *fs = current->fs; | ||
2780 | struct mnt_namespace *mnt_ns = ns; | ||
2781 | struct path root; | ||
2782 | |||
2783 | if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) || | ||
2784 | !nsown_capable(CAP_SYS_CHROOT)) | ||
2785 | return -EPERM; | ||
2786 | |||
2787 | if (fs->users != 1) | ||
2788 | return -EINVAL; | ||
2789 | |||
2790 | get_mnt_ns(mnt_ns); | ||
2791 | put_mnt_ns(nsproxy->mnt_ns); | ||
2792 | nsproxy->mnt_ns = mnt_ns; | ||
2793 | |||
2794 | /* Find the root */ | ||
2795 | root.mnt = &mnt_ns->root->mnt; | ||
2796 | root.dentry = mnt_ns->root->mnt.mnt_root; | ||
2797 | path_get(&root); | ||
2798 | while(d_mountpoint(root.dentry) && follow_down_one(&root)) | ||
2799 | ; | ||
2800 | |||
2801 | /* Update the pwd and root */ | ||
2802 | set_fs_pwd(fs, &root); | ||
2803 | set_fs_root(fs, &root); | ||
2804 | |||
2805 | path_put(&root); | ||
2806 | return 0; | ||
2807 | } | ||
2808 | |||
2809 | static unsigned int mntns_inum(void *ns) | ||
2810 | { | ||
2811 | struct mnt_namespace *mnt_ns = ns; | ||
2812 | return mnt_ns->proc_inum; | ||
2813 | } | ||
2814 | |||
2815 | const struct proc_ns_operations mntns_operations = { | ||
2816 | .name = "mnt", | ||
2817 | .type = CLONE_NEWNS, | ||
2818 | .get = mntns_get, | ||
2819 | .put = mntns_put, | ||
2820 | .install = mntns_install, | ||
2821 | .inum = mntns_inum, | ||
2822 | }; | ||
@@ -435,7 +435,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) | |||
435 | goto dput_and_out; | 435 | goto dput_and_out; |
436 | 436 | ||
437 | error = -EPERM; | 437 | error = -EPERM; |
438 | if (!capable(CAP_SYS_CHROOT)) | 438 | if (!nsown_capable(CAP_SYS_CHROOT)) |
439 | goto dput_and_out; | 439 | goto dput_and_out; |
440 | error = security_path_chroot(&path); | 440 | error = security_path_chroot(&path); |
441 | if (error) | 441 | if (error) |
diff --git a/fs/pnode.h b/fs/pnode.h index 65c60979d54..19b853a3445 100644 --- a/fs/pnode.h +++ b/fs/pnode.h | |||
@@ -22,6 +22,7 @@ | |||
22 | #define CL_COPY_ALL 0x04 | 22 | #define CL_COPY_ALL 0x04 |
23 | #define CL_MAKE_SHARED 0x08 | 23 | #define CL_MAKE_SHARED 0x08 |
24 | #define CL_PRIVATE 0x10 | 24 | #define CL_PRIVATE 0x10 |
25 | #define CL_SHARED_TO_SLAVE 0x20 | ||
25 | 26 | ||
26 | static inline void set_mnt_shared(struct mount *mnt) | 27 | static inline void set_mnt_shared(struct mount *mnt) |
27 | { | 28 | { |
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 99349efbbc2..981b0560193 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile | |||
@@ -21,6 +21,7 @@ proc-y += uptime.o | |||
21 | proc-y += version.o | 21 | proc-y += version.o |
22 | proc-y += softirqs.o | 22 | proc-y += softirqs.o |
23 | proc-y += namespaces.o | 23 | proc-y += namespaces.o |
24 | proc-y += self.o | ||
24 | proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o | 25 | proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o |
25 | proc-$(CONFIG_NET) += proc_net.o | 26 | proc-$(CONFIG_NET) += proc_net.o |
26 | proc-$(CONFIG_PROC_KCORE) += kcore.o | 27 | proc-$(CONFIG_PROC_KCORE) += kcore.o |
diff --git a/fs/proc/array.c b/fs/proc/array.c index d3696708fc1..d66248a1919 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -162,7 +162,7 @@ static inline const char *get_task_state(struct task_struct *tsk) | |||
162 | static inline void task_state(struct seq_file *m, struct pid_namespace *ns, | 162 | static inline void task_state(struct seq_file *m, struct pid_namespace *ns, |
163 | struct pid *pid, struct task_struct *p) | 163 | struct pid *pid, struct task_struct *p) |
164 | { | 164 | { |
165 | struct user_namespace *user_ns = current_user_ns(); | 165 | struct user_namespace *user_ns = seq_user_ns(m); |
166 | struct group_info *group_info; | 166 | struct group_info *group_info; |
167 | int g; | 167 | int g; |
168 | struct fdtable *fdt = NULL; | 168 | struct fdtable *fdt = NULL; |
diff --git a/fs/proc/base.c b/fs/proc/base.c index aa63d25157b..5a5a0be40e4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -2345,146 +2345,6 @@ static const struct file_operations proc_coredump_filter_operations = { | |||
2345 | }; | 2345 | }; |
2346 | #endif | 2346 | #endif |
2347 | 2347 | ||
2348 | /* | ||
2349 | * /proc/self: | ||
2350 | */ | ||
2351 | static int proc_self_readlink(struct dentry *dentry, char __user *buffer, | ||
2352 | int buflen) | ||
2353 | { | ||
2354 | struct pid_namespace *ns = dentry->d_sb->s_fs_info; | ||
2355 | pid_t tgid = task_tgid_nr_ns(current, ns); | ||
2356 | char tmp[PROC_NUMBUF]; | ||
2357 | if (!tgid) | ||
2358 | return -ENOENT; | ||
2359 | sprintf(tmp, "%d", tgid); | ||
2360 | return vfs_readlink(dentry,buffer,buflen,tmp); | ||
2361 | } | ||
2362 | |||
2363 | static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
2364 | { | ||
2365 | struct pid_namespace *ns = dentry->d_sb->s_fs_info; | ||
2366 | pid_t tgid = task_tgid_nr_ns(current, ns); | ||
2367 | char *name = ERR_PTR(-ENOENT); | ||
2368 | if (tgid) { | ||
2369 | /* 11 for max length of signed int in decimal + NULL term */ | ||
2370 | name = kmalloc(12, GFP_KERNEL); | ||
2371 | if (!name) | ||
2372 | name = ERR_PTR(-ENOMEM); | ||
2373 | else | ||
2374 | sprintf(name, "%d", tgid); | ||
2375 | } | ||
2376 | nd_set_link(nd, name); | ||
2377 | return NULL; | ||
2378 | } | ||
2379 | |||
2380 | static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, | ||
2381 | void *cookie) | ||
2382 | { | ||
2383 | char *s = nd_get_link(nd); | ||
2384 | if (!IS_ERR(s)) | ||
2385 | kfree(s); | ||
2386 | } | ||
2387 | |||
2388 | static const struct inode_operations proc_self_inode_operations = { | ||
2389 | .readlink = proc_self_readlink, | ||
2390 | .follow_link = proc_self_follow_link, | ||
2391 | .put_link = proc_self_put_link, | ||
2392 | }; | ||
2393 | |||
2394 | /* | ||
2395 | * proc base | ||
2396 | * | ||
2397 | * These are the directory entries in the root directory of /proc | ||
2398 | * that properly belong to the /proc filesystem, as they describe | ||
2399 | * describe something that is process related. | ||
2400 | */ | ||
2401 | static const struct pid_entry proc_base_stuff[] = { | ||
2402 | NOD("self", S_IFLNK|S_IRWXUGO, | ||
2403 | &proc_self_inode_operations, NULL, {}), | ||
2404 | }; | ||
2405 | |||
2406 | static struct dentry *proc_base_instantiate(struct inode *dir, | ||
2407 | struct dentry *dentry, struct task_struct *task, const void *ptr) | ||
2408 | { | ||
2409 | const struct pid_entry *p = ptr; | ||
2410 | struct inode *inode; | ||
2411 | struct proc_inode *ei; | ||
2412 | struct dentry *error; | ||
2413 | |||
2414 | /* Allocate the inode */ | ||
2415 | error = ERR_PTR(-ENOMEM); | ||
2416 | inode = new_inode(dir->i_sb); | ||
2417 | if (!inode) | ||
2418 | goto out; | ||
2419 | |||
2420 | /* Initialize the inode */ | ||
2421 | ei = PROC_I(inode); | ||
2422 | inode->i_ino = get_next_ino(); | ||
2423 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
2424 | |||
2425 | /* | ||
2426 | * grab the reference to the task. | ||
2427 | */ | ||
2428 | ei->pid = get_task_pid(task, PIDTYPE_PID); | ||
2429 | if (!ei->pid) | ||
2430 | goto out_iput; | ||
2431 | |||
2432 | inode->i_mode = p->mode; | ||
2433 | if (S_ISDIR(inode->i_mode)) | ||
2434 | set_nlink(inode, 2); | ||
2435 | if (S_ISLNK(inode->i_mode)) | ||
2436 | inode->i_size = 64; | ||
2437 | if (p->iop) | ||
2438 | inode->i_op = p->iop; | ||
2439 | if (p->fop) | ||
2440 | inode->i_fop = p->fop; | ||
2441 | ei->op = p->op; | ||
2442 | d_add(dentry, inode); | ||
2443 | error = NULL; | ||
2444 | out: | ||
2445 | return error; | ||
2446 | out_iput: | ||
2447 | iput(inode); | ||
2448 | goto out; | ||
2449 | } | ||
2450 | |||
2451 | static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) | ||
2452 | { | ||
2453 | struct dentry *error; | ||
2454 | struct task_struct *task = get_proc_task(dir); | ||
2455 | const struct pid_entry *p, *last; | ||
2456 | |||
2457 | error = ERR_PTR(-ENOENT); | ||
2458 | |||
2459 | if (!task) | ||
2460 | goto out_no_task; | ||
2461 | |||
2462 | /* Lookup the directory entry */ | ||
2463 | last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1]; | ||
2464 | for (p = proc_base_stuff; p <= last; p++) { | ||
2465 | if (p->len != dentry->d_name.len) | ||
2466 | continue; | ||
2467 | if (!memcmp(dentry->d_name.name, p->name, p->len)) | ||
2468 | break; | ||
2469 | } | ||
2470 | if (p > last) | ||
2471 | goto out; | ||
2472 | |||
2473 | error = proc_base_instantiate(dir, dentry, task, p); | ||
2474 | |||
2475 | out: | ||
2476 | put_task_struct(task); | ||
2477 | out_no_task: | ||
2478 | return error; | ||
2479 | } | ||
2480 | |||
2481 | static int proc_base_fill_cache(struct file *filp, void *dirent, | ||
2482 | filldir_t filldir, struct task_struct *task, const struct pid_entry *p) | ||
2483 | { | ||
2484 | return proc_fill_cache(filp, dirent, filldir, p->name, p->len, | ||
2485 | proc_base_instantiate, task, p); | ||
2486 | } | ||
2487 | |||
2488 | #ifdef CONFIG_TASK_IO_ACCOUNTING | 2348 | #ifdef CONFIG_TASK_IO_ACCOUNTING |
2489 | static int do_io_accounting(struct task_struct *task, char *buffer, int whole) | 2349 | static int do_io_accounting(struct task_struct *task, char *buffer, int whole) |
2490 | { | 2350 | { |
@@ -2839,10 +2699,6 @@ void proc_flush_task(struct task_struct *task) | |||
2839 | proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, | 2699 | proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, |
2840 | tgid->numbers[i].nr); | 2700 | tgid->numbers[i].nr); |
2841 | } | 2701 | } |
2842 | |||
2843 | upid = &pid->numbers[pid->level]; | ||
2844 | if (upid->nr == 1) | ||
2845 | pid_ns_release_proc(upid->ns); | ||
2846 | } | 2702 | } |
2847 | 2703 | ||
2848 | static struct dentry *proc_pid_instantiate(struct inode *dir, | 2704 | static struct dentry *proc_pid_instantiate(struct inode *dir, |
@@ -2876,15 +2732,11 @@ out: | |||
2876 | 2732 | ||
2877 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) | 2733 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) |
2878 | { | 2734 | { |
2879 | struct dentry *result; | 2735 | struct dentry *result = NULL; |
2880 | struct task_struct *task; | 2736 | struct task_struct *task; |
2881 | unsigned tgid; | 2737 | unsigned tgid; |
2882 | struct pid_namespace *ns; | 2738 | struct pid_namespace *ns; |
2883 | 2739 | ||
2884 | result = proc_base_lookup(dir, dentry); | ||
2885 | if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT) | ||
2886 | goto out; | ||
2887 | |||
2888 | tgid = name_to_int(dentry); | 2740 | tgid = name_to_int(dentry); |
2889 | if (tgid == ~0U) | 2741 | if (tgid == ~0U) |
2890 | goto out; | 2742 | goto out; |
@@ -2947,7 +2799,7 @@ retry: | |||
2947 | return iter; | 2799 | return iter; |
2948 | } | 2800 | } |
2949 | 2801 | ||
2950 | #define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) | 2802 | #define TGID_OFFSET (FIRST_PROCESS_ENTRY) |
2951 | 2803 | ||
2952 | static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | 2804 | static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, |
2953 | struct tgid_iter iter) | 2805 | struct tgid_iter iter) |
@@ -2967,25 +2819,12 @@ static int fake_filldir(void *buf, const char *name, int namelen, | |||
2967 | /* for the /proc/ directory itself, after non-process stuff has been done */ | 2819 | /* for the /proc/ directory itself, after non-process stuff has been done */ |
2968 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | 2820 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) |
2969 | { | 2821 | { |
2970 | unsigned int nr; | ||
2971 | struct task_struct *reaper; | ||
2972 | struct tgid_iter iter; | 2822 | struct tgid_iter iter; |
2973 | struct pid_namespace *ns; | 2823 | struct pid_namespace *ns; |
2974 | filldir_t __filldir; | 2824 | filldir_t __filldir; |
2975 | 2825 | ||
2976 | if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) | 2826 | if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) |
2977 | goto out_no_task; | 2827 | goto out; |
2978 | nr = filp->f_pos - FIRST_PROCESS_ENTRY; | ||
2979 | |||
2980 | reaper = get_proc_task(filp->f_path.dentry->d_inode); | ||
2981 | if (!reaper) | ||
2982 | goto out_no_task; | ||
2983 | |||
2984 | for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) { | ||
2985 | const struct pid_entry *p = &proc_base_stuff[nr]; | ||
2986 | if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0) | ||
2987 | goto out; | ||
2988 | } | ||
2989 | 2828 | ||
2990 | ns = filp->f_dentry->d_sb->s_fs_info; | 2829 | ns = filp->f_dentry->d_sb->s_fs_info; |
2991 | iter.task = NULL; | 2830 | iter.task = NULL; |
@@ -3006,8 +2845,6 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
3006 | } | 2845 | } |
3007 | filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; | 2846 | filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; |
3008 | out: | 2847 | out: |
3009 | put_task_struct(reaper); | ||
3010 | out_no_task: | ||
3011 | return 0; | 2848 | return 0; |
3012 | } | 2849 | } |
3013 | 2850 | ||
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 0d80cef4cfb..7b3ae3cc0ef 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -350,14 +350,14 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ | |||
350 | * Return an inode number between PROC_DYNAMIC_FIRST and | 350 | * Return an inode number between PROC_DYNAMIC_FIRST and |
351 | * 0xffffffff, or zero on failure. | 351 | * 0xffffffff, or zero on failure. |
352 | */ | 352 | */ |
353 | static unsigned int get_inode_number(void) | 353 | int proc_alloc_inum(unsigned int *inum) |
354 | { | 354 | { |
355 | unsigned int i; | 355 | unsigned int i; |
356 | int error; | 356 | int error; |
357 | 357 | ||
358 | retry: | 358 | retry: |
359 | if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0) | 359 | if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL)) |
360 | return 0; | 360 | return -ENOMEM; |
361 | 361 | ||
362 | spin_lock(&proc_inum_lock); | 362 | spin_lock(&proc_inum_lock); |
363 | error = ida_get_new(&proc_inum_ida, &i); | 363 | error = ida_get_new(&proc_inum_ida, &i); |
@@ -365,18 +365,19 @@ retry: | |||
365 | if (error == -EAGAIN) | 365 | if (error == -EAGAIN) |
366 | goto retry; | 366 | goto retry; |
367 | else if (error) | 367 | else if (error) |
368 | return 0; | 368 | return error; |
369 | 369 | ||
370 | if (i > UINT_MAX - PROC_DYNAMIC_FIRST) { | 370 | if (i > UINT_MAX - PROC_DYNAMIC_FIRST) { |
371 | spin_lock(&proc_inum_lock); | 371 | spin_lock(&proc_inum_lock); |
372 | ida_remove(&proc_inum_ida, i); | 372 | ida_remove(&proc_inum_ida, i); |
373 | spin_unlock(&proc_inum_lock); | 373 | spin_unlock(&proc_inum_lock); |
374 | return 0; | 374 | return -ENOSPC; |
375 | } | 375 | } |
376 | return PROC_DYNAMIC_FIRST + i; | 376 | *inum = PROC_DYNAMIC_FIRST + i; |
377 | return 0; | ||
377 | } | 378 | } |
378 | 379 | ||
379 | static void release_inode_number(unsigned int inum) | 380 | void proc_free_inum(unsigned int inum) |
380 | { | 381 | { |
381 | spin_lock(&proc_inum_lock); | 382 | spin_lock(&proc_inum_lock); |
382 | ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); | 383 | ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); |
@@ -554,13 +555,12 @@ static const struct inode_operations proc_dir_inode_operations = { | |||
554 | 555 | ||
555 | static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) | 556 | static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) |
556 | { | 557 | { |
557 | unsigned int i; | ||
558 | struct proc_dir_entry *tmp; | 558 | struct proc_dir_entry *tmp; |
559 | int ret; | ||
559 | 560 | ||
560 | i = get_inode_number(); | 561 | ret = proc_alloc_inum(&dp->low_ino); |
561 | if (i == 0) | 562 | if (ret) |
562 | return -EAGAIN; | 563 | return ret; |
563 | dp->low_ino = i; | ||
564 | 564 | ||
565 | if (S_ISDIR(dp->mode)) { | 565 | if (S_ISDIR(dp->mode)) { |
566 | if (dp->proc_iops == NULL) { | 566 | if (dp->proc_iops == NULL) { |
@@ -764,7 +764,7 @@ EXPORT_SYMBOL(proc_create_data); | |||
764 | 764 | ||
765 | static void free_proc_entry(struct proc_dir_entry *de) | 765 | static void free_proc_entry(struct proc_dir_entry *de) |
766 | { | 766 | { |
767 | release_inode_number(de->low_ino); | 767 | proc_free_inum(de->low_ino); |
768 | 768 | ||
769 | if (S_ISLNK(de->mode)) | 769 | if (S_ISLNK(de->mode)) |
770 | kfree(de->data); | 770 | kfree(de->data); |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 3b22bbdee9e..439ae688650 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -31,6 +31,7 @@ static void proc_evict_inode(struct inode *inode) | |||
31 | struct proc_dir_entry *de; | 31 | struct proc_dir_entry *de; |
32 | struct ctl_table_header *head; | 32 | struct ctl_table_header *head; |
33 | const struct proc_ns_operations *ns_ops; | 33 | const struct proc_ns_operations *ns_ops; |
34 | void *ns; | ||
34 | 35 | ||
35 | truncate_inode_pages(&inode->i_data, 0); | 36 | truncate_inode_pages(&inode->i_data, 0); |
36 | clear_inode(inode); | 37 | clear_inode(inode); |
@@ -49,8 +50,9 @@ static void proc_evict_inode(struct inode *inode) | |||
49 | } | 50 | } |
50 | /* Release any associated namespace */ | 51 | /* Release any associated namespace */ |
51 | ns_ops = PROC_I(inode)->ns_ops; | 52 | ns_ops = PROC_I(inode)->ns_ops; |
52 | if (ns_ops && ns_ops->put) | 53 | ns = PROC_I(inode)->ns; |
53 | ns_ops->put(PROC_I(inode)->ns); | 54 | if (ns_ops && ns) |
55 | ns_ops->put(ns); | ||
54 | } | 56 | } |
55 | 57 | ||
56 | static struct kmem_cache * proc_inode_cachep; | 58 | static struct kmem_cache * proc_inode_cachep; |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 43973b084ab..252544c0520 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -15,6 +15,7 @@ struct ctl_table_header; | |||
15 | struct mempolicy; | 15 | struct mempolicy; |
16 | 16 | ||
17 | extern struct proc_dir_entry proc_root; | 17 | extern struct proc_dir_entry proc_root; |
18 | extern void proc_self_init(void); | ||
18 | #ifdef CONFIG_PROC_SYSCTL | 19 | #ifdef CONFIG_PROC_SYSCTL |
19 | extern int proc_sys_init(void); | 20 | extern int proc_sys_init(void); |
20 | extern void sysctl_head_put(struct ctl_table_header *head); | 21 | extern void sysctl_head_put(struct ctl_table_header *head); |
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index b178ed733c3..b7a47196c8c 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <net/net_namespace.h> | 11 | #include <net/net_namespace.h> |
12 | #include <linux/ipc_namespace.h> | 12 | #include <linux/ipc_namespace.h> |
13 | #include <linux/pid_namespace.h> | 13 | #include <linux/pid_namespace.h> |
14 | #include <linux/user_namespace.h> | ||
14 | #include "internal.h" | 15 | #include "internal.h" |
15 | 16 | ||
16 | 17 | ||
@@ -24,12 +25,168 @@ static const struct proc_ns_operations *ns_entries[] = { | |||
24 | #ifdef CONFIG_IPC_NS | 25 | #ifdef CONFIG_IPC_NS |
25 | &ipcns_operations, | 26 | &ipcns_operations, |
26 | #endif | 27 | #endif |
28 | #ifdef CONFIG_PID_NS | ||
29 | &pidns_operations, | ||
30 | #endif | ||
31 | #ifdef CONFIG_USER_NS | ||
32 | &userns_operations, | ||
33 | #endif | ||
34 | &mntns_operations, | ||
27 | }; | 35 | }; |
28 | 36 | ||
29 | static const struct file_operations ns_file_operations = { | 37 | static const struct file_operations ns_file_operations = { |
30 | .llseek = no_llseek, | 38 | .llseek = no_llseek, |
31 | }; | 39 | }; |
32 | 40 | ||
41 | static const struct inode_operations ns_inode_operations = { | ||
42 | .setattr = proc_setattr, | ||
43 | }; | ||
44 | |||
45 | static int ns_delete_dentry(const struct dentry *dentry) | ||
46 | { | ||
47 | /* Don't cache namespace inodes when not in use */ | ||
48 | return 1; | ||
49 | } | ||
50 | |||
51 | static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) | ||
52 | { | ||
53 | struct inode *inode = dentry->d_inode; | ||
54 | const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; | ||
55 | |||
56 | return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]", | ||
57 | ns_ops->name, inode->i_ino); | ||
58 | } | ||
59 | |||
60 | const struct dentry_operations ns_dentry_operations = | ||
61 | { | ||
62 | .d_delete = ns_delete_dentry, | ||
63 | .d_dname = ns_dname, | ||
64 | }; | ||
65 | |||
66 | static struct dentry *proc_ns_get_dentry(struct super_block *sb, | ||
67 | struct task_struct *task, const struct proc_ns_operations *ns_ops) | ||
68 | { | ||
69 | struct dentry *dentry, *result; | ||
70 | struct inode *inode; | ||
71 | struct proc_inode *ei; | ||
72 | struct qstr qname = { .name = "", }; | ||
73 | void *ns; | ||
74 | |||
75 | ns = ns_ops->get(task); | ||
76 | if (!ns) | ||
77 | return ERR_PTR(-ENOENT); | ||
78 | |||
79 | dentry = d_alloc_pseudo(sb, &qname); | ||
80 | if (!dentry) { | ||
81 | ns_ops->put(ns); | ||
82 | return ERR_PTR(-ENOMEM); | ||
83 | } | ||
84 | |||
85 | inode = iget_locked(sb, ns_ops->inum(ns)); | ||
86 | if (!inode) { | ||
87 | dput(dentry); | ||
88 | ns_ops->put(ns); | ||
89 | return ERR_PTR(-ENOMEM); | ||
90 | } | ||
91 | |||
92 | ei = PROC_I(inode); | ||
93 | if (inode->i_state & I_NEW) { | ||
94 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
95 | inode->i_op = &ns_inode_operations; | ||
96 | inode->i_mode = S_IFREG | S_IRUGO; | ||
97 | inode->i_fop = &ns_file_operations; | ||
98 | ei->ns_ops = ns_ops; | ||
99 | ei->ns = ns; | ||
100 | unlock_new_inode(inode); | ||
101 | } else { | ||
102 | ns_ops->put(ns); | ||
103 | } | ||
104 | |||
105 | d_set_d_op(dentry, &ns_dentry_operations); | ||
106 | result = d_instantiate_unique(dentry, inode); | ||
107 | if (result) { | ||
108 | dput(dentry); | ||
109 | dentry = result; | ||
110 | } | ||
111 | |||
112 | return dentry; | ||
113 | } | ||
114 | |||
115 | static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
116 | { | ||
117 | struct inode *inode = dentry->d_inode; | ||
118 | struct super_block *sb = inode->i_sb; | ||
119 | struct proc_inode *ei = PROC_I(inode); | ||
120 | struct task_struct *task; | ||
121 | struct dentry *ns_dentry; | ||
122 | void *error = ERR_PTR(-EACCES); | ||
123 | |||
124 | task = get_proc_task(inode); | ||
125 | if (!task) | ||
126 | goto out; | ||
127 | |||
128 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | ||
129 | goto out_put_task; | ||
130 | |||
131 | ns_dentry = proc_ns_get_dentry(sb, task, ei->ns_ops); | ||
132 | if (IS_ERR(ns_dentry)) { | ||
133 | error = ERR_CAST(ns_dentry); | ||
134 | goto out_put_task; | ||
135 | } | ||
136 | |||
137 | dput(nd->path.dentry); | ||
138 | nd->path.dentry = ns_dentry; | ||
139 | error = NULL; | ||
140 | |||
141 | out_put_task: | ||
142 | put_task_struct(task); | ||
143 | out: | ||
144 | return error; | ||
145 | } | ||
146 | |||
147 | static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen) | ||
148 | { | ||
149 | struct inode *inode = dentry->d_inode; | ||
150 | struct proc_inode *ei = PROC_I(inode); | ||
151 | const struct proc_ns_operations *ns_ops = ei->ns_ops; | ||
152 | struct task_struct *task; | ||
153 | void *ns; | ||
154 | char name[50]; | ||
155 | int len = -EACCES; | ||
156 | |||
157 | task = get_proc_task(inode); | ||
158 | if (!task) | ||
159 | goto out; | ||
160 | |||
161 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | ||
162 | goto out_put_task; | ||
163 | |||
164 | len = -ENOENT; | ||
165 | ns = ns_ops->get(task); | ||
166 | if (!ns) | ||
167 | goto out_put_task; | ||
168 | |||
169 | snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns)); | ||
170 | len = strlen(name); | ||
171 | |||
172 | if (len > buflen) | ||
173 | len = buflen; | ||
174 | if (copy_to_user(buffer, name, len)) | ||
175 | len = -EFAULT; | ||
176 | |||
177 | ns_ops->put(ns); | ||
178 | out_put_task: | ||
179 | put_task_struct(task); | ||
180 | out: | ||
181 | return len; | ||
182 | } | ||
183 | |||
184 | static const struct inode_operations proc_ns_link_inode_operations = { | ||
185 | .readlink = proc_ns_readlink, | ||
186 | .follow_link = proc_ns_follow_link, | ||
187 | .setattr = proc_setattr, | ||
188 | }; | ||
189 | |||
33 | static struct dentry *proc_ns_instantiate(struct inode *dir, | 190 | static struct dentry *proc_ns_instantiate(struct inode *dir, |
34 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 191 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
35 | { | 192 | { |
@@ -37,21 +194,15 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, | |||
37 | struct inode *inode; | 194 | struct inode *inode; |
38 | struct proc_inode *ei; | 195 | struct proc_inode *ei; |
39 | struct dentry *error = ERR_PTR(-ENOENT); | 196 | struct dentry *error = ERR_PTR(-ENOENT); |
40 | void *ns; | ||
41 | 197 | ||
42 | inode = proc_pid_make_inode(dir->i_sb, task); | 198 | inode = proc_pid_make_inode(dir->i_sb, task); |
43 | if (!inode) | 199 | if (!inode) |
44 | goto out; | 200 | goto out; |
45 | 201 | ||
46 | ns = ns_ops->get(task); | ||
47 | if (!ns) | ||
48 | goto out_iput; | ||
49 | |||
50 | ei = PROC_I(inode); | 202 | ei = PROC_I(inode); |
51 | inode->i_mode = S_IFREG|S_IRUSR; | 203 | inode->i_mode = S_IFLNK|S_IRWXUGO; |
52 | inode->i_fop = &ns_file_operations; | 204 | inode->i_op = &proc_ns_link_inode_operations; |
53 | ei->ns_ops = ns_ops; | 205 | ei->ns_ops = ns_ops; |
54 | ei->ns = ns; | ||
55 | 206 | ||
56 | d_set_d_op(dentry, &pid_dentry_operations); | 207 | d_set_d_op(dentry, &pid_dentry_operations); |
57 | d_add(dentry, inode); | 208 | d_add(dentry, inode); |
@@ -60,9 +211,6 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, | |||
60 | error = NULL; | 211 | error = NULL; |
61 | out: | 212 | out: |
62 | return error; | 213 | return error; |
63 | out_iput: | ||
64 | iput(inode); | ||
65 | goto out; | ||
66 | } | 214 | } |
67 | 215 | ||
68 | static int proc_ns_fill_cache(struct file *filp, void *dirent, | 216 | static int proc_ns_fill_cache(struct file *filp, void *dirent, |
@@ -89,10 +237,6 @@ static int proc_ns_dir_readdir(struct file *filp, void *dirent, | |||
89 | if (!task) | 237 | if (!task) |
90 | goto out_no_task; | 238 | goto out_no_task; |
91 | 239 | ||
92 | ret = -EPERM; | ||
93 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | ||
94 | goto out; | ||
95 | |||
96 | ret = 0; | 240 | ret = 0; |
97 | i = filp->f_pos; | 241 | i = filp->f_pos; |
98 | switch (i) { | 242 | switch (i) { |
@@ -152,10 +296,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir, | |||
152 | if (!task) | 296 | if (!task) |
153 | goto out_no_task; | 297 | goto out_no_task; |
154 | 298 | ||
155 | error = ERR_PTR(-EPERM); | ||
156 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | ||
157 | goto out; | ||
158 | |||
159 | last = &ns_entries[ARRAY_SIZE(ns_entries)]; | 299 | last = &ns_entries[ARRAY_SIZE(ns_entries)]; |
160 | for (entry = ns_entries; entry < last; entry++) { | 300 | for (entry = ns_entries; entry < last; entry++) { |
161 | if (strlen((*entry)->name) != len) | 301 | if (strlen((*entry)->name) != len) |
@@ -163,7 +303,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir, | |||
163 | if (!memcmp(dentry->d_name.name, (*entry)->name, len)) | 303 | if (!memcmp(dentry->d_name.name, (*entry)->name, len)) |
164 | break; | 304 | break; |
165 | } | 305 | } |
166 | error = ERR_PTR(-ENOENT); | ||
167 | if (entry == last) | 306 | if (entry == last) |
168 | goto out; | 307 | goto out; |
169 | 308 | ||
@@ -198,3 +337,7 @@ out_invalid: | |||
198 | return ERR_PTR(-EINVAL); | 337 | return ERR_PTR(-EINVAL); |
199 | } | 338 | } |
200 | 339 | ||
340 | bool proc_ns_inode(struct inode *inode) | ||
341 | { | ||
342 | return inode->i_fop == &ns_file_operations; | ||
343 | } | ||
diff --git a/fs/proc/root.c b/fs/proc/root.c index 9889a92d2e0..c6e9fac26ba 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -100,14 +100,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, | |||
100 | int err; | 100 | int err; |
101 | struct super_block *sb; | 101 | struct super_block *sb; |
102 | struct pid_namespace *ns; | 102 | struct pid_namespace *ns; |
103 | struct proc_inode *ei; | ||
104 | char *options; | 103 | char *options; |
105 | 104 | ||
106 | if (flags & MS_KERNMOUNT) { | 105 | if (flags & MS_KERNMOUNT) { |
107 | ns = (struct pid_namespace *)data; | 106 | ns = (struct pid_namespace *)data; |
108 | options = NULL; | 107 | options = NULL; |
109 | } else { | 108 | } else { |
110 | ns = current->nsproxy->pid_ns; | 109 | ns = task_active_pid_ns(current); |
111 | options = data; | 110 | options = data; |
112 | } | 111 | } |
113 | 112 | ||
@@ -130,13 +129,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, | |||
130 | sb->s_flags |= MS_ACTIVE; | 129 | sb->s_flags |= MS_ACTIVE; |
131 | } | 130 | } |
132 | 131 | ||
133 | ei = PROC_I(sb->s_root->d_inode); | ||
134 | if (!ei->pid) { | ||
135 | rcu_read_lock(); | ||
136 | ei->pid = get_pid(find_pid_ns(1, ns)); | ||
137 | rcu_read_unlock(); | ||
138 | } | ||
139 | |||
140 | return dget(sb->s_root); | 132 | return dget(sb->s_root); |
141 | } | 133 | } |
142 | 134 | ||
@@ -153,6 +145,7 @@ static struct file_system_type proc_fs_type = { | |||
153 | .name = "proc", | 145 | .name = "proc", |
154 | .mount = proc_mount, | 146 | .mount = proc_mount, |
155 | .kill_sb = proc_kill_sb, | 147 | .kill_sb = proc_kill_sb, |
148 | .fs_flags = FS_USERNS_MOUNT, | ||
156 | }; | 149 | }; |
157 | 150 | ||
158 | void __init proc_root_init(void) | 151 | void __init proc_root_init(void) |
@@ -163,12 +156,8 @@ void __init proc_root_init(void) | |||
163 | err = register_filesystem(&proc_fs_type); | 156 | err = register_filesystem(&proc_fs_type); |
164 | if (err) | 157 | if (err) |
165 | return; | 158 | return; |
166 | err = pid_ns_prepare_proc(&init_pid_ns); | ||
167 | if (err) { | ||
168 | unregister_filesystem(&proc_fs_type); | ||
169 | return; | ||
170 | } | ||
171 | 159 | ||
160 | proc_self_init(); | ||
172 | proc_symlink("mounts", NULL, "self/mounts"); | 161 | proc_symlink("mounts", NULL, "self/mounts"); |
173 | 162 | ||
174 | proc_net_init(); | 163 | proc_net_init(); |
diff --git a/fs/proc/self.c b/fs/proc/self.c new file mode 100644 index 00000000000..aa5cc3bff14 --- /dev/null +++ b/fs/proc/self.c | |||
@@ -0,0 +1,59 @@ | |||
1 | #include <linux/proc_fs.h> | ||
2 | #include <linux/sched.h> | ||
3 | #include <linux/namei.h> | ||
4 | |||
5 | /* | ||
6 | * /proc/self: | ||
7 | */ | ||
8 | static int proc_self_readlink(struct dentry *dentry, char __user *buffer, | ||
9 | int buflen) | ||
10 | { | ||
11 | struct pid_namespace *ns = dentry->d_sb->s_fs_info; | ||
12 | pid_t tgid = task_tgid_nr_ns(current, ns); | ||
13 | char tmp[PROC_NUMBUF]; | ||
14 | if (!tgid) | ||
15 | return -ENOENT; | ||
16 | sprintf(tmp, "%d", tgid); | ||
17 | return vfs_readlink(dentry,buffer,buflen,tmp); | ||
18 | } | ||
19 | |||
20 | static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
21 | { | ||
22 | struct pid_namespace *ns = dentry->d_sb->s_fs_info; | ||
23 | pid_t tgid = task_tgid_nr_ns(current, ns); | ||
24 | char *name = ERR_PTR(-ENOENT); | ||
25 | if (tgid) { | ||
26 | /* 11 for max length of signed int in decimal + NULL term */ | ||
27 | name = kmalloc(12, GFP_KERNEL); | ||
28 | if (!name) | ||
29 | name = ERR_PTR(-ENOMEM); | ||
30 | else | ||
31 | sprintf(name, "%d", tgid); | ||
32 | } | ||
33 | nd_set_link(nd, name); | ||
34 | return NULL; | ||
35 | } | ||
36 | |||
37 | static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, | ||
38 | void *cookie) | ||
39 | { | ||
40 | char *s = nd_get_link(nd); | ||
41 | if (!IS_ERR(s)) | ||
42 | kfree(s); | ||
43 | } | ||
44 | |||
45 | static const struct inode_operations proc_self_inode_operations = { | ||
46 | .readlink = proc_self_readlink, | ||
47 | .follow_link = proc_self_follow_link, | ||
48 | .put_link = proc_self_put_link, | ||
49 | }; | ||
50 | |||
51 | void __init proc_self_init(void) | ||
52 | { | ||
53 | struct proc_dir_entry *proc_self_symlink; | ||
54 | mode_t mode; | ||
55 | |||
56 | mode = S_IFLNK | S_IRWXUGO; | ||
57 | proc_self_symlink = proc_create("self", mode, NULL, NULL ); | ||
58 | proc_self_symlink->proc_iops = &proc_self_inode_operations; | ||
59 | } | ||
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 71eb7e25392..db940a9be04 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c | |||
@@ -149,6 +149,7 @@ static struct file_system_type sysfs_fs_type = { | |||
149 | .name = "sysfs", | 149 | .name = "sysfs", |
150 | .mount = sysfs_mount, | 150 | .mount = sysfs_mount, |
151 | .kill_sb = sysfs_kill_sb, | 151 | .kill_sb = sysfs_kill_sb, |
152 | .fs_flags = FS_USERNS_MOUNT, | ||
152 | }; | 153 | }; |
153 | 154 | ||
154 | int __init sysfs_init(void) | 155 | int __init sysfs_init(void) |
diff --git a/include/linux/cred.h b/include/linux/cred.h index 0142aacb70b..abb2cd50f6b 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h | |||
@@ -344,10 +344,8 @@ static inline void put_cred(const struct cred *_cred) | |||
344 | extern struct user_namespace init_user_ns; | 344 | extern struct user_namespace init_user_ns; |
345 | #ifdef CONFIG_USER_NS | 345 | #ifdef CONFIG_USER_NS |
346 | #define current_user_ns() (current_cred_xxx(user_ns)) | 346 | #define current_user_ns() (current_cred_xxx(user_ns)) |
347 | #define task_user_ns(task) (task_cred_xxx((task), user_ns)) | ||
348 | #else | 347 | #else |
349 | #define current_user_ns() (&init_user_ns) | 348 | #define current_user_ns() (&init_user_ns) |
350 | #define task_user_ns(task) (&init_user_ns) | ||
351 | #endif | 349 | #endif |
352 | 350 | ||
353 | 351 | ||
diff --git a/include/linux/fs.h b/include/linux/fs.h index 408fb1e77a0..035521b4652 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -1810,6 +1810,8 @@ struct file_system_type { | |||
1810 | #define FS_REQUIRES_DEV 1 | 1810 | #define FS_REQUIRES_DEV 1 |
1811 | #define FS_BINARY_MOUNTDATA 2 | 1811 | #define FS_BINARY_MOUNTDATA 2 |
1812 | #define FS_HAS_SUBTYPE 4 | 1812 | #define FS_HAS_SUBTYPE 4 |
1813 | #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ | ||
1814 | #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ | ||
1813 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ | 1815 | #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ |
1814 | #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ | 1816 | #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ |
1815 | struct dentry *(*mount) (struct file_system_type *, int, | 1817 | struct dentry *(*mount) (struct file_system_type *, int, |
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 5499c92a915..fe771978e87 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h | |||
@@ -67,6 +67,8 @@ struct ipc_namespace { | |||
67 | 67 | ||
68 | /* user_ns which owns the ipc ns */ | 68 | /* user_ns which owns the ipc ns */ |
69 | struct user_namespace *user_ns; | 69 | struct user_namespace *user_ns; |
70 | |||
71 | unsigned int proc_inum; | ||
70 | }; | 72 | }; |
71 | 73 | ||
72 | extern struct ipc_namespace init_ipc_ns; | 74 | extern struct ipc_namespace init_ipc_ns; |
@@ -133,7 +135,8 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; } | |||
133 | 135 | ||
134 | #if defined(CONFIG_IPC_NS) | 136 | #if defined(CONFIG_IPC_NS) |
135 | extern struct ipc_namespace *copy_ipcs(unsigned long flags, | 137 | extern struct ipc_namespace *copy_ipcs(unsigned long flags, |
136 | struct task_struct *tsk); | 138 | struct user_namespace *user_ns, struct ipc_namespace *ns); |
139 | |||
137 | static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) | 140 | static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) |
138 | { | 141 | { |
139 | if (ns) | 142 | if (ns) |
@@ -144,12 +147,12 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) | |||
144 | extern void put_ipc_ns(struct ipc_namespace *ns); | 147 | extern void put_ipc_ns(struct ipc_namespace *ns); |
145 | #else | 148 | #else |
146 | static inline struct ipc_namespace *copy_ipcs(unsigned long flags, | 149 | static inline struct ipc_namespace *copy_ipcs(unsigned long flags, |
147 | struct task_struct *tsk) | 150 | struct user_namespace *user_ns, struct ipc_namespace *ns) |
148 | { | 151 | { |
149 | if (flags & CLONE_NEWIPC) | 152 | if (flags & CLONE_NEWIPC) |
150 | return ERR_PTR(-EINVAL); | 153 | return ERR_PTR(-EINVAL); |
151 | 154 | ||
152 | return tsk->nsproxy->ipc_ns; | 155 | return ns; |
153 | } | 156 | } |
154 | 157 | ||
155 | static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) | 158 | static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) |
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 5a8e3903d77..12b2ab51032 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h | |||
@@ -4,9 +4,10 @@ | |||
4 | 4 | ||
5 | struct mnt_namespace; | 5 | struct mnt_namespace; |
6 | struct fs_struct; | 6 | struct fs_struct; |
7 | struct user_namespace; | ||
7 | 8 | ||
8 | extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, | 9 | extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, |
9 | struct fs_struct *); | 10 | struct user_namespace *, struct fs_struct *); |
10 | extern void put_mnt_ns(struct mnt_namespace *ns); | 11 | extern void put_mnt_ns(struct mnt_namespace *ns); |
11 | 12 | ||
12 | extern const struct file_operations proc_mounts_operations; | 13 | extern const struct file_operations proc_mounts_operations; |
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index cc37a55ad00..10e5947491c 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h | |||
@@ -67,7 +67,7 @@ void exit_task_namespaces(struct task_struct *tsk); | |||
67 | void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); | 67 | void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); |
68 | void free_nsproxy(struct nsproxy *ns); | 68 | void free_nsproxy(struct nsproxy *ns); |
69 | int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, | 69 | int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, |
70 | struct fs_struct *); | 70 | struct cred *, struct fs_struct *); |
71 | int __init nsproxy_cache_init(void); | 71 | int __init nsproxy_cache_init(void); |
72 | 72 | ||
73 | static inline void put_nsproxy(struct nsproxy *ns) | 73 | static inline void put_nsproxy(struct nsproxy *ns) |
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 65e3e87eacc..bf285999273 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h | |||
@@ -21,6 +21,7 @@ struct pid_namespace { | |||
21 | struct kref kref; | 21 | struct kref kref; |
22 | struct pidmap pidmap[PIDMAP_ENTRIES]; | 22 | struct pidmap pidmap[PIDMAP_ENTRIES]; |
23 | int last_pid; | 23 | int last_pid; |
24 | int nr_hashed; | ||
24 | struct task_struct *child_reaper; | 25 | struct task_struct *child_reaper; |
25 | struct kmem_cache *pid_cachep; | 26 | struct kmem_cache *pid_cachep; |
26 | unsigned int level; | 27 | unsigned int level; |
@@ -31,9 +32,12 @@ struct pid_namespace { | |||
31 | #ifdef CONFIG_BSD_PROCESS_ACCT | 32 | #ifdef CONFIG_BSD_PROCESS_ACCT |
32 | struct bsd_acct_struct *bacct; | 33 | struct bsd_acct_struct *bacct; |
33 | #endif | 34 | #endif |
35 | struct user_namespace *user_ns; | ||
36 | struct work_struct proc_work; | ||
34 | kgid_t pid_gid; | 37 | kgid_t pid_gid; |
35 | int hide_pid; | 38 | int hide_pid; |
36 | int reboot; /* group exit code if this pidns was rebooted */ | 39 | int reboot; /* group exit code if this pidns was rebooted */ |
40 | unsigned int proc_inum; | ||
37 | }; | 41 | }; |
38 | 42 | ||
39 | extern struct pid_namespace init_pid_ns; | 43 | extern struct pid_namespace init_pid_ns; |
@@ -46,7 +50,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) | |||
46 | return ns; | 50 | return ns; |
47 | } | 51 | } |
48 | 52 | ||
49 | extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); | 53 | extern struct pid_namespace *copy_pid_ns(unsigned long flags, |
54 | struct user_namespace *user_ns, struct pid_namespace *ns); | ||
50 | extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); | 55 | extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); |
51 | extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd); | 56 | extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd); |
52 | extern void put_pid_ns(struct pid_namespace *ns); | 57 | extern void put_pid_ns(struct pid_namespace *ns); |
@@ -59,8 +64,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) | |||
59 | return ns; | 64 | return ns; |
60 | } | 65 | } |
61 | 66 | ||
62 | static inline struct pid_namespace * | 67 | static inline struct pid_namespace *copy_pid_ns(unsigned long flags, |
63 | copy_pid_ns(unsigned long flags, struct pid_namespace *ns) | 68 | struct user_namespace *user_ns, struct pid_namespace *ns) |
64 | { | 69 | { |
65 | if (flags & CLONE_NEWPID) | 70 | if (flags & CLONE_NEWPID) |
66 | ns = ERR_PTR(-EINVAL); | 71 | ns = ERR_PTR(-EINVAL); |
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 3fd2e871ff1..2e24018b7ce 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h | |||
@@ -28,7 +28,11 @@ struct mm_struct; | |||
28 | */ | 28 | */ |
29 | 29 | ||
30 | enum { | 30 | enum { |
31 | PROC_ROOT_INO = 1, | 31 | PROC_ROOT_INO = 1, |
32 | PROC_IPC_INIT_INO = 0xEFFFFFFFU, | ||
33 | PROC_UTS_INIT_INO = 0xEFFFFFFEU, | ||
34 | PROC_USER_INIT_INO = 0xEFFFFFFDU, | ||
35 | PROC_PID_INIT_INO = 0xEFFFFFFCU, | ||
32 | }; | 36 | }; |
33 | 37 | ||
34 | /* | 38 | /* |
@@ -174,7 +178,10 @@ extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, | |||
174 | struct proc_dir_entry *parent); | 178 | struct proc_dir_entry *parent); |
175 | 179 | ||
176 | extern struct file *proc_ns_fget(int fd); | 180 | extern struct file *proc_ns_fget(int fd); |
181 | extern bool proc_ns_inode(struct inode *inode); | ||
177 | 182 | ||
183 | extern int proc_alloc_inum(unsigned int *pino); | ||
184 | extern void proc_free_inum(unsigned int inum); | ||
178 | #else | 185 | #else |
179 | 186 | ||
180 | #define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) | 187 | #define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) |
@@ -229,6 +236,19 @@ static inline struct file *proc_ns_fget(int fd) | |||
229 | return ERR_PTR(-EINVAL); | 236 | return ERR_PTR(-EINVAL); |
230 | } | 237 | } |
231 | 238 | ||
239 | static inline bool proc_ns_inode(struct inode *inode) | ||
240 | { | ||
241 | return false; | ||
242 | } | ||
243 | |||
244 | static inline int proc_alloc_inum(unsigned int *inum) | ||
245 | { | ||
246 | *inum = 1; | ||
247 | return 0; | ||
248 | } | ||
249 | static inline void proc_free_inum(unsigned int inum) | ||
250 | { | ||
251 | } | ||
232 | #endif /* CONFIG_PROC_FS */ | 252 | #endif /* CONFIG_PROC_FS */ |
233 | 253 | ||
234 | #if !defined(CONFIG_PROC_KCORE) | 254 | #if !defined(CONFIG_PROC_KCORE) |
@@ -247,10 +267,14 @@ struct proc_ns_operations { | |||
247 | void *(*get)(struct task_struct *task); | 267 | void *(*get)(struct task_struct *task); |
248 | void (*put)(void *ns); | 268 | void (*put)(void *ns); |
249 | int (*install)(struct nsproxy *nsproxy, void *ns); | 269 | int (*install)(struct nsproxy *nsproxy, void *ns); |
270 | unsigned int (*inum)(void *ns); | ||
250 | }; | 271 | }; |
251 | extern const struct proc_ns_operations netns_operations; | 272 | extern const struct proc_ns_operations netns_operations; |
252 | extern const struct proc_ns_operations utsns_operations; | 273 | extern const struct proc_ns_operations utsns_operations; |
253 | extern const struct proc_ns_operations ipcns_operations; | 274 | extern const struct proc_ns_operations ipcns_operations; |
275 | extern const struct proc_ns_operations pidns_operations; | ||
276 | extern const struct proc_ns_operations userns_operations; | ||
277 | extern const struct proc_ns_operations mntns_operations; | ||
254 | 278 | ||
255 | union proc_op { | 279 | union proc_op { |
256 | int (*proc_get_link)(struct dentry *, struct path *); | 280 | int (*proc_get_link)(struct dentry *, struct path *); |
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 95142cae446..b9bd2e6c73c 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h | |||
@@ -25,6 +25,7 @@ struct user_namespace { | |||
25 | struct user_namespace *parent; | 25 | struct user_namespace *parent; |
26 | kuid_t owner; | 26 | kuid_t owner; |
27 | kgid_t group; | 27 | kgid_t group; |
28 | unsigned int proc_inum; | ||
28 | }; | 29 | }; |
29 | 30 | ||
30 | extern struct user_namespace init_user_ns; | 31 | extern struct user_namespace init_user_ns; |
@@ -39,6 +40,7 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns) | |||
39 | } | 40 | } |
40 | 41 | ||
41 | extern int create_user_ns(struct cred *new); | 42 | extern int create_user_ns(struct cred *new); |
43 | extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred); | ||
42 | extern void free_user_ns(struct kref *kref); | 44 | extern void free_user_ns(struct kref *kref); |
43 | 45 | ||
44 | static inline void put_user_ns(struct user_namespace *ns) | 46 | static inline void put_user_ns(struct user_namespace *ns) |
@@ -66,6 +68,14 @@ static inline int create_user_ns(struct cred *new) | |||
66 | return -EINVAL; | 68 | return -EINVAL; |
67 | } | 69 | } |
68 | 70 | ||
71 | static inline int unshare_userns(unsigned long unshare_flags, | ||
72 | struct cred **new_cred) | ||
73 | { | ||
74 | if (unshare_flags & CLONE_NEWUSER) | ||
75 | return -EINVAL; | ||
76 | return 0; | ||
77 | } | ||
78 | |||
69 | static inline void put_user_ns(struct user_namespace *ns) | 79 | static inline void put_user_ns(struct user_namespace *ns) |
70 | { | 80 | { |
71 | } | 81 | } |
diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 2b345206722..239e27733d6 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h | |||
@@ -23,6 +23,7 @@ struct uts_namespace { | |||
23 | struct kref kref; | 23 | struct kref kref; |
24 | struct new_utsname name; | 24 | struct new_utsname name; |
25 | struct user_namespace *user_ns; | 25 | struct user_namespace *user_ns; |
26 | unsigned int proc_inum; | ||
26 | }; | 27 | }; |
27 | extern struct uts_namespace init_uts_ns; | 28 | extern struct uts_namespace init_uts_ns; |
28 | 29 | ||
@@ -33,7 +34,7 @@ static inline void get_uts_ns(struct uts_namespace *ns) | |||
33 | } | 34 | } |
34 | 35 | ||
35 | extern struct uts_namespace *copy_utsname(unsigned long flags, | 36 | extern struct uts_namespace *copy_utsname(unsigned long flags, |
36 | struct task_struct *tsk); | 37 | struct user_namespace *user_ns, struct uts_namespace *old_ns); |
37 | extern void free_uts_ns(struct kref *kref); | 38 | extern void free_uts_ns(struct kref *kref); |
38 | 39 | ||
39 | static inline void put_uts_ns(struct uts_namespace *ns) | 40 | static inline void put_uts_ns(struct uts_namespace *ns) |
@@ -50,12 +51,12 @@ static inline void put_uts_ns(struct uts_namespace *ns) | |||
50 | } | 51 | } |
51 | 52 | ||
52 | static inline struct uts_namespace *copy_utsname(unsigned long flags, | 53 | static inline struct uts_namespace *copy_utsname(unsigned long flags, |
53 | struct task_struct *tsk) | 54 | struct user_namespace *user_ns, struct uts_namespace *old_ns) |
54 | { | 55 | { |
55 | if (flags & CLONE_NEWUTS) | 56 | if (flags & CLONE_NEWUTS) |
56 | return ERR_PTR(-EINVAL); | 57 | return ERR_PTR(-EINVAL); |
57 | 58 | ||
58 | return tsk->nsproxy->uts_ns; | 59 | return old_ns; |
59 | } | 60 | } |
60 | #endif | 61 | #endif |
61 | 62 | ||
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index c5a43f56b79..de644bcd861 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h | |||
@@ -56,6 +56,8 @@ struct net { | |||
56 | 56 | ||
57 | struct user_namespace *user_ns; /* Owning user namespace */ | 57 | struct user_namespace *user_ns; /* Owning user namespace */ |
58 | 58 | ||
59 | unsigned int proc_inum; | ||
60 | |||
59 | struct proc_dir_entry *proc_net; | 61 | struct proc_dir_entry *proc_net; |
60 | struct proc_dir_entry *proc_net_stat; | 62 | struct proc_dir_entry *proc_net_stat; |
61 | 63 | ||
diff --git a/init/Kconfig b/init/Kconfig index 1a207efca59..675d8a2326c 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -1069,11 +1069,9 @@ config UIDGID_CONVERTED | |||
1069 | # Filesystems | 1069 | # Filesystems |
1070 | depends on 9P_FS = n | 1070 | depends on 9P_FS = n |
1071 | depends on AFS_FS = n | 1071 | depends on AFS_FS = n |
1072 | depends on AUTOFS4_FS = n | ||
1073 | depends on CEPH_FS = n | 1072 | depends on CEPH_FS = n |
1074 | depends on CIFS = n | 1073 | depends on CIFS = n |
1075 | depends on CODA_FS = n | 1074 | depends on CODA_FS = n |
1076 | depends on FUSE_FS = n | ||
1077 | depends on GFS2_FS = n | 1075 | depends on GFS2_FS = n |
1078 | depends on NCP_FS = n | 1076 | depends on NCP_FS = n |
1079 | depends on NFSD = n | 1077 | depends on NFSD = n |
diff --git a/init/main.c b/init/main.c index 63ae904a99a..baf1f0f5c46 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -812,7 +812,6 @@ static int __ref kernel_init(void *unused) | |||
812 | system_state = SYSTEM_RUNNING; | 812 | system_state = SYSTEM_RUNNING; |
813 | numa_default_policy(); | 813 | numa_default_policy(); |
814 | 814 | ||
815 | current->signal->flags |= SIGNAL_UNKILLABLE; | ||
816 | flush_delayed_fput(); | 815 | flush_delayed_fput(); |
817 | 816 | ||
818 | if (ramdisk_execute_command) { | 817 | if (ramdisk_execute_command) { |
diff --git a/init/version.c b/init/version.c index 86fe0ccb997..58170f18912 100644 --- a/init/version.c +++ b/init/version.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/utsname.h> | 12 | #include <linux/utsname.h> |
13 | #include <generated/utsrelease.h> | 13 | #include <generated/utsrelease.h> |
14 | #include <linux/version.h> | 14 | #include <linux/version.h> |
15 | #include <linux/proc_fs.h> | ||
15 | 16 | ||
16 | #ifndef CONFIG_KALLSYMS | 17 | #ifndef CONFIG_KALLSYMS |
17 | #define version(a) Version_ ## a | 18 | #define version(a) Version_ ## a |
@@ -34,6 +35,7 @@ struct uts_namespace init_uts_ns = { | |||
34 | .domainname = UTS_DOMAINNAME, | 35 | .domainname = UTS_DOMAINNAME, |
35 | }, | 36 | }, |
36 | .user_ns = &init_user_ns, | 37 | .user_ns = &init_user_ns, |
38 | .proc_inum = PROC_UTS_INIT_INO, | ||
37 | }; | 39 | }; |
38 | EXPORT_SYMBOL_GPL(init_uts_ns); | 40 | EXPORT_SYMBOL_GPL(init_uts_ns); |
39 | 41 | ||
diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 26143d377c9..6471f1bdae9 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/msg.h> | 16 | #include <linux/msg.h> |
17 | #include <linux/ipc_namespace.h> | 17 | #include <linux/ipc_namespace.h> |
18 | #include <linux/utsname.h> | 18 | #include <linux/utsname.h> |
19 | #include <linux/proc_fs.h> | ||
19 | #include <asm/uaccess.h> | 20 | #include <asm/uaccess.h> |
20 | 21 | ||
21 | #include "util.h" | 22 | #include "util.h" |
@@ -30,6 +31,7 @@ DEFINE_SPINLOCK(mq_lock); | |||
30 | struct ipc_namespace init_ipc_ns = { | 31 | struct ipc_namespace init_ipc_ns = { |
31 | .count = ATOMIC_INIT(1), | 32 | .count = ATOMIC_INIT(1), |
32 | .user_ns = &init_user_ns, | 33 | .user_ns = &init_user_ns, |
34 | .proc_inum = PROC_IPC_INIT_INO, | ||
33 | }; | 35 | }; |
34 | 36 | ||
35 | atomic_t nr_ipc_ns = ATOMIC_INIT(1); | 37 | atomic_t nr_ipc_ns = ATOMIC_INIT(1); |
diff --git a/ipc/namespace.c b/ipc/namespace.c index f362298c5ce..cf3386a51de 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c | |||
@@ -16,7 +16,7 @@ | |||
16 | 16 | ||
17 | #include "util.h" | 17 | #include "util.h" |
18 | 18 | ||
19 | static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk, | 19 | static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, |
20 | struct ipc_namespace *old_ns) | 20 | struct ipc_namespace *old_ns) |
21 | { | 21 | { |
22 | struct ipc_namespace *ns; | 22 | struct ipc_namespace *ns; |
@@ -26,9 +26,16 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk, | |||
26 | if (ns == NULL) | 26 | if (ns == NULL) |
27 | return ERR_PTR(-ENOMEM); | 27 | return ERR_PTR(-ENOMEM); |
28 | 28 | ||
29 | err = proc_alloc_inum(&ns->proc_inum); | ||
30 | if (err) { | ||
31 | kfree(ns); | ||
32 | return ERR_PTR(err); | ||
33 | } | ||
34 | |||
29 | atomic_set(&ns->count, 1); | 35 | atomic_set(&ns->count, 1); |
30 | err = mq_init_ns(ns); | 36 | err = mq_init_ns(ns); |
31 | if (err) { | 37 | if (err) { |
38 | proc_free_inum(ns->proc_inum); | ||
32 | kfree(ns); | 39 | kfree(ns); |
33 | return ERR_PTR(err); | 40 | return ERR_PTR(err); |
34 | } | 41 | } |
@@ -46,19 +53,17 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk, | |||
46 | ipcns_notify(IPCNS_CREATED); | 53 | ipcns_notify(IPCNS_CREATED); |
47 | register_ipcns_notifier(ns); | 54 | register_ipcns_notifier(ns); |
48 | 55 | ||
49 | ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns)); | 56 | ns->user_ns = get_user_ns(user_ns); |
50 | 57 | ||
51 | return ns; | 58 | return ns; |
52 | } | 59 | } |
53 | 60 | ||
54 | struct ipc_namespace *copy_ipcs(unsigned long flags, | 61 | struct ipc_namespace *copy_ipcs(unsigned long flags, |
55 | struct task_struct *tsk) | 62 | struct user_namespace *user_ns, struct ipc_namespace *ns) |
56 | { | 63 | { |
57 | struct ipc_namespace *ns = tsk->nsproxy->ipc_ns; | ||
58 | |||
59 | if (!(flags & CLONE_NEWIPC)) | 64 | if (!(flags & CLONE_NEWIPC)) |
60 | return get_ipc_ns(ns); | 65 | return get_ipc_ns(ns); |
61 | return create_ipc_ns(tsk, ns); | 66 | return create_ipc_ns(user_ns, ns); |
62 | } | 67 | } |
63 | 68 | ||
64 | /* | 69 | /* |
@@ -113,6 +118,7 @@ static void free_ipc_ns(struct ipc_namespace *ns) | |||
113 | */ | 118 | */ |
114 | ipcns_notify(IPCNS_REMOVED); | 119 | ipcns_notify(IPCNS_REMOVED); |
115 | put_user_ns(ns->user_ns); | 120 | put_user_ns(ns->user_ns); |
121 | proc_free_inum(ns->proc_inum); | ||
116 | kfree(ns); | 122 | kfree(ns); |
117 | } | 123 | } |
118 | 124 | ||
@@ -161,8 +167,12 @@ static void ipcns_put(void *ns) | |||
161 | return put_ipc_ns(ns); | 167 | return put_ipc_ns(ns); |
162 | } | 168 | } |
163 | 169 | ||
164 | static int ipcns_install(struct nsproxy *nsproxy, void *ns) | 170 | static int ipcns_install(struct nsproxy *nsproxy, void *new) |
165 | { | 171 | { |
172 | struct ipc_namespace *ns = new; | ||
173 | if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) | ||
174 | return -EPERM; | ||
175 | |||
166 | /* Ditch state from the old ipc namespace */ | 176 | /* Ditch state from the old ipc namespace */ |
167 | exit_sem(current); | 177 | exit_sem(current); |
168 | put_ipc_ns(nsproxy->ipc_ns); | 178 | put_ipc_ns(nsproxy->ipc_ns); |
@@ -170,10 +180,18 @@ static int ipcns_install(struct nsproxy *nsproxy, void *ns) | |||
170 | return 0; | 180 | return 0; |
171 | } | 181 | } |
172 | 182 | ||
183 | static unsigned int ipcns_inum(void *vp) | ||
184 | { | ||
185 | struct ipc_namespace *ns = vp; | ||
186 | |||
187 | return ns->proc_inum; | ||
188 | } | ||
189 | |||
173 | const struct proc_ns_operations ipcns_operations = { | 190 | const struct proc_ns_operations ipcns_operations = { |
174 | .name = "ipc", | 191 | .name = "ipc", |
175 | .type = CLONE_NEWIPC, | 192 | .type = CLONE_NEWIPC, |
176 | .get = ipcns_get, | 193 | .get = ipcns_get, |
177 | .put = ipcns_put, | 194 | .put = ipcns_put, |
178 | .install = ipcns_install, | 195 | .install = ipcns_install, |
196 | .inum = ipcns_inum, | ||
179 | }; | 197 | }; |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f34c41bfaa3..9915ffe0137 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -3409,7 +3409,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, | |||
3409 | { | 3409 | { |
3410 | struct cgroup_pidlist *l; | 3410 | struct cgroup_pidlist *l; |
3411 | /* don't need task_nsproxy() if we're looking at ourself */ | 3411 | /* don't need task_nsproxy() if we're looking at ourself */ |
3412 | struct pid_namespace *ns = current->nsproxy->pid_ns; | 3412 | struct pid_namespace *ns = task_active_pid_ns(current); |
3413 | 3413 | ||
3414 | /* | 3414 | /* |
3415 | * We can't drop the pidlist_mutex before taking the l->mutex in case | 3415 | * We can't drop the pidlist_mutex before taking the l->mutex in case |
diff --git a/kernel/events/core.c b/kernel/events/core.c index f9ff5493171..301079d06f2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -6155,7 +6155,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
6155 | 6155 | ||
6156 | event->parent = parent_event; | 6156 | event->parent = parent_event; |
6157 | 6157 | ||
6158 | event->ns = get_pid_ns(current->nsproxy->pid_ns); | 6158 | event->ns = get_pid_ns(task_active_pid_ns(current)); |
6159 | event->id = atomic64_inc_return(&perf_event_id); | 6159 | event->id = atomic64_inc_return(&perf_event_id); |
6160 | 6160 | ||
6161 | event->state = PERF_EVENT_STATE_INACTIVE; | 6161 | event->state = PERF_EVENT_STATE_INACTIVE; |
diff --git a/kernel/exit.c b/kernel/exit.c index 50d2e93c36e..b4df2193721 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -72,18 +72,6 @@ static void __unhash_process(struct task_struct *p, bool group_dead) | |||
72 | list_del_rcu(&p->tasks); | 72 | list_del_rcu(&p->tasks); |
73 | list_del_init(&p->sibling); | 73 | list_del_init(&p->sibling); |
74 | __this_cpu_dec(process_counts); | 74 | __this_cpu_dec(process_counts); |
75 | /* | ||
76 | * If we are the last child process in a pid namespace to be | ||
77 | * reaped, notify the reaper sleeping zap_pid_ns_processes(). | ||
78 | */ | ||
79 | if (IS_ENABLED(CONFIG_PID_NS)) { | ||
80 | struct task_struct *parent = p->real_parent; | ||
81 | |||
82 | if ((task_active_pid_ns(parent)->child_reaper == parent) && | ||
83 | list_empty(&parent->children) && | ||
84 | (parent->flags & PF_EXITING)) | ||
85 | wake_up_process(parent); | ||
86 | } | ||
87 | } | 75 | } |
88 | list_del_rcu(&p->thread_group); | 76 | list_del_rcu(&p->thread_group); |
89 | } | 77 | } |
diff --git a/kernel/fork.c b/kernel/fork.c index 115d6c2e4cc..c36c4e301ef 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1044,8 +1044,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
1044 | atomic_set(&sig->live, 1); | 1044 | atomic_set(&sig->live, 1); |
1045 | atomic_set(&sig->sigcnt, 1); | 1045 | atomic_set(&sig->sigcnt, 1); |
1046 | init_waitqueue_head(&sig->wait_chldexit); | 1046 | init_waitqueue_head(&sig->wait_chldexit); |
1047 | if (clone_flags & CLONE_NEWPID) | ||
1048 | sig->flags |= SIGNAL_UNKILLABLE; | ||
1049 | sig->curr_target = tsk; | 1047 | sig->curr_target = tsk; |
1050 | init_sigpending(&sig->shared_pending); | 1048 | init_sigpending(&sig->shared_pending); |
1051 | INIT_LIST_HEAD(&sig->posix_timers); | 1049 | INIT_LIST_HEAD(&sig->posix_timers); |
@@ -1438,8 +1436,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1438 | ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); | 1436 | ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); |
1439 | 1437 | ||
1440 | if (thread_group_leader(p)) { | 1438 | if (thread_group_leader(p)) { |
1441 | if (is_child_reaper(pid)) | 1439 | if (is_child_reaper(pid)) { |
1442 | p->nsproxy->pid_ns->child_reaper = p; | 1440 | ns_of_pid(pid)->child_reaper = p; |
1441 | p->signal->flags |= SIGNAL_UNKILLABLE; | ||
1442 | } | ||
1443 | 1443 | ||
1444 | p->signal->leader_pid = pid; | 1444 | p->signal->leader_pid = pid; |
1445 | p->signal->tty = tty_kref_get(current->signal->tty); | 1445 | p->signal->tty = tty_kref_get(current->signal->tty); |
@@ -1473,8 +1473,6 @@ bad_fork_cleanup_io: | |||
1473 | if (p->io_context) | 1473 | if (p->io_context) |
1474 | exit_io_context(p); | 1474 | exit_io_context(p); |
1475 | bad_fork_cleanup_namespaces: | 1475 | bad_fork_cleanup_namespaces: |
1476 | if (unlikely(clone_flags & CLONE_NEWPID)) | ||
1477 | pid_ns_release_proc(p->nsproxy->pid_ns); | ||
1478 | exit_task_namespaces(p); | 1476 | exit_task_namespaces(p); |
1479 | bad_fork_cleanup_mm: | 1477 | bad_fork_cleanup_mm: |
1480 | if (p->mm) | 1478 | if (p->mm) |
@@ -1554,15 +1552,9 @@ long do_fork(unsigned long clone_flags, | |||
1554 | * Do some preliminary argument and permissions checking before we | 1552 | * Do some preliminary argument and permissions checking before we |
1555 | * actually start allocating stuff | 1553 | * actually start allocating stuff |
1556 | */ | 1554 | */ |
1557 | if (clone_flags & CLONE_NEWUSER) { | 1555 | if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) { |
1558 | if (clone_flags & CLONE_THREAD) | 1556 | if (clone_flags & (CLONE_THREAD|CLONE_PARENT)) |
1559 | return -EINVAL; | 1557 | return -EINVAL; |
1560 | /* hopefully this check will go away when userns support is | ||
1561 | * complete | ||
1562 | */ | ||
1563 | if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || | ||
1564 | !capable(CAP_SETGID)) | ||
1565 | return -EPERM; | ||
1566 | } | 1558 | } |
1567 | 1559 | ||
1568 | /* | 1560 | /* |
@@ -1724,7 +1716,8 @@ static int check_unshare_flags(unsigned long unshare_flags) | |||
1724 | { | 1716 | { |
1725 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | 1717 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| |
1726 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | 1718 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| |
1727 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) | 1719 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET| |
1720 | CLONE_NEWUSER|CLONE_NEWPID)) | ||
1728 | return -EINVAL; | 1721 | return -EINVAL; |
1729 | /* | 1722 | /* |
1730 | * Not implemented, but pretend it works if there is nothing to | 1723 | * Not implemented, but pretend it works if there is nothing to |
@@ -1791,19 +1784,40 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1791 | { | 1784 | { |
1792 | struct fs_struct *fs, *new_fs = NULL; | 1785 | struct fs_struct *fs, *new_fs = NULL; |
1793 | struct files_struct *fd, *new_fd = NULL; | 1786 | struct files_struct *fd, *new_fd = NULL; |
1787 | struct cred *new_cred = NULL; | ||
1794 | struct nsproxy *new_nsproxy = NULL; | 1788 | struct nsproxy *new_nsproxy = NULL; |
1795 | int do_sysvsem = 0; | 1789 | int do_sysvsem = 0; |
1796 | int err; | 1790 | int err; |
1797 | 1791 | ||
1798 | err = check_unshare_flags(unshare_flags); | 1792 | /* |
1799 | if (err) | 1793 | * If unsharing a user namespace must also unshare the thread. |
1800 | goto bad_unshare_out; | 1794 | */ |
1801 | 1795 | if (unshare_flags & CLONE_NEWUSER) | |
1796 | unshare_flags |= CLONE_THREAD; | ||
1797 | /* | ||
1798 | * If unsharing a pid namespace must also unshare the thread. | ||
1799 | */ | ||
1800 | if (unshare_flags & CLONE_NEWPID) | ||
1801 | unshare_flags |= CLONE_THREAD; | ||
1802 | /* | ||
1803 | * If unsharing a thread from a thread group, must also unshare vm. | ||
1804 | */ | ||
1805 | if (unshare_flags & CLONE_THREAD) | ||
1806 | unshare_flags |= CLONE_VM; | ||
1807 | /* | ||
1808 | * If unsharing vm, must also unshare signal handlers. | ||
1809 | */ | ||
1810 | if (unshare_flags & CLONE_VM) | ||
1811 | unshare_flags |= CLONE_SIGHAND; | ||
1802 | /* | 1812 | /* |
1803 | * If unsharing namespace, must also unshare filesystem information. | 1813 | * If unsharing namespace, must also unshare filesystem information. |
1804 | */ | 1814 | */ |
1805 | if (unshare_flags & CLONE_NEWNS) | 1815 | if (unshare_flags & CLONE_NEWNS) |
1806 | unshare_flags |= CLONE_FS; | 1816 | unshare_flags |= CLONE_FS; |
1817 | |||
1818 | err = check_unshare_flags(unshare_flags); | ||
1819 | if (err) | ||
1820 | goto bad_unshare_out; | ||
1807 | /* | 1821 | /* |
1808 | * CLONE_NEWIPC must also detach from the undolist: after switching | 1822 | * CLONE_NEWIPC must also detach from the undolist: after switching |
1809 | * to a new ipc namespace, the semaphore arrays from the old | 1823 | * to a new ipc namespace, the semaphore arrays from the old |
@@ -1817,11 +1831,15 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1817 | err = unshare_fd(unshare_flags, &new_fd); | 1831 | err = unshare_fd(unshare_flags, &new_fd); |
1818 | if (err) | 1832 | if (err) |
1819 | goto bad_unshare_cleanup_fs; | 1833 | goto bad_unshare_cleanup_fs; |
1820 | err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs); | 1834 | err = unshare_userns(unshare_flags, &new_cred); |
1821 | if (err) | 1835 | if (err) |
1822 | goto bad_unshare_cleanup_fd; | 1836 | goto bad_unshare_cleanup_fd; |
1837 | err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, | ||
1838 | new_cred, new_fs); | ||
1839 | if (err) | ||
1840 | goto bad_unshare_cleanup_cred; | ||
1823 | 1841 | ||
1824 | if (new_fs || new_fd || do_sysvsem || new_nsproxy) { | 1842 | if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) { |
1825 | if (do_sysvsem) { | 1843 | if (do_sysvsem) { |
1826 | /* | 1844 | /* |
1827 | * CLONE_SYSVSEM is equivalent to sys_exit(). | 1845 | * CLONE_SYSVSEM is equivalent to sys_exit(). |
@@ -1854,11 +1872,20 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1854 | } | 1872 | } |
1855 | 1873 | ||
1856 | task_unlock(current); | 1874 | task_unlock(current); |
1875 | |||
1876 | if (new_cred) { | ||
1877 | /* Install the new user namespace */ | ||
1878 | commit_creds(new_cred); | ||
1879 | new_cred = NULL; | ||
1880 | } | ||
1857 | } | 1881 | } |
1858 | 1882 | ||
1859 | if (new_nsproxy) | 1883 | if (new_nsproxy) |
1860 | put_nsproxy(new_nsproxy); | 1884 | put_nsproxy(new_nsproxy); |
1861 | 1885 | ||
1886 | bad_unshare_cleanup_cred: | ||
1887 | if (new_cred) | ||
1888 | put_cred(new_cred); | ||
1862 | bad_unshare_cleanup_fd: | 1889 | bad_unshare_cleanup_fd: |
1863 | if (new_fd) | 1890 | if (new_fd) |
1864 | put_files_struct(new_fd); | 1891 | put_files_struct(new_fd); |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 7e1c3de1ce4..78e2ecb2016 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
@@ -57,7 +57,8 @@ static inline struct nsproxy *create_nsproxy(void) | |||
57 | * leave it to the caller to do proper locking and attach it to task. | 57 | * leave it to the caller to do proper locking and attach it to task. |
58 | */ | 58 | */ |
59 | static struct nsproxy *create_new_namespaces(unsigned long flags, | 59 | static struct nsproxy *create_new_namespaces(unsigned long flags, |
60 | struct task_struct *tsk, struct fs_struct *new_fs) | 60 | struct task_struct *tsk, struct user_namespace *user_ns, |
61 | struct fs_struct *new_fs) | ||
61 | { | 62 | { |
62 | struct nsproxy *new_nsp; | 63 | struct nsproxy *new_nsp; |
63 | int err; | 64 | int err; |
@@ -66,31 +67,31 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, | |||
66 | if (!new_nsp) | 67 | if (!new_nsp) |
67 | return ERR_PTR(-ENOMEM); | 68 | return ERR_PTR(-ENOMEM); |
68 | 69 | ||
69 | new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); | 70 | new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs); |
70 | if (IS_ERR(new_nsp->mnt_ns)) { | 71 | if (IS_ERR(new_nsp->mnt_ns)) { |
71 | err = PTR_ERR(new_nsp->mnt_ns); | 72 | err = PTR_ERR(new_nsp->mnt_ns); |
72 | goto out_ns; | 73 | goto out_ns; |
73 | } | 74 | } |
74 | 75 | ||
75 | new_nsp->uts_ns = copy_utsname(flags, tsk); | 76 | new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns); |
76 | if (IS_ERR(new_nsp->uts_ns)) { | 77 | if (IS_ERR(new_nsp->uts_ns)) { |
77 | err = PTR_ERR(new_nsp->uts_ns); | 78 | err = PTR_ERR(new_nsp->uts_ns); |
78 | goto out_uts; | 79 | goto out_uts; |
79 | } | 80 | } |
80 | 81 | ||
81 | new_nsp->ipc_ns = copy_ipcs(flags, tsk); | 82 | new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns); |
82 | if (IS_ERR(new_nsp->ipc_ns)) { | 83 | if (IS_ERR(new_nsp->ipc_ns)) { |
83 | err = PTR_ERR(new_nsp->ipc_ns); | 84 | err = PTR_ERR(new_nsp->ipc_ns); |
84 | goto out_ipc; | 85 | goto out_ipc; |
85 | } | 86 | } |
86 | 87 | ||
87 | new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk)); | 88 | new_nsp->pid_ns = copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns); |
88 | if (IS_ERR(new_nsp->pid_ns)) { | 89 | if (IS_ERR(new_nsp->pid_ns)) { |
89 | err = PTR_ERR(new_nsp->pid_ns); | 90 | err = PTR_ERR(new_nsp->pid_ns); |
90 | goto out_pid; | 91 | goto out_pid; |
91 | } | 92 | } |
92 | 93 | ||
93 | new_nsp->net_ns = copy_net_ns(flags, task_cred_xxx(tsk, user_ns), tsk->nsproxy->net_ns); | 94 | new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns); |
94 | if (IS_ERR(new_nsp->net_ns)) { | 95 | if (IS_ERR(new_nsp->net_ns)) { |
95 | err = PTR_ERR(new_nsp->net_ns); | 96 | err = PTR_ERR(new_nsp->net_ns); |
96 | goto out_net; | 97 | goto out_net; |
@@ -122,6 +123,7 @@ out_ns: | |||
122 | int copy_namespaces(unsigned long flags, struct task_struct *tsk) | 123 | int copy_namespaces(unsigned long flags, struct task_struct *tsk) |
123 | { | 124 | { |
124 | struct nsproxy *old_ns = tsk->nsproxy; | 125 | struct nsproxy *old_ns = tsk->nsproxy; |
126 | struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns); | ||
125 | struct nsproxy *new_ns; | 127 | struct nsproxy *new_ns; |
126 | int err = 0; | 128 | int err = 0; |
127 | 129 | ||
@@ -134,7 +136,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) | |||
134 | CLONE_NEWPID | CLONE_NEWNET))) | 136 | CLONE_NEWPID | CLONE_NEWNET))) |
135 | return 0; | 137 | return 0; |
136 | 138 | ||
137 | if (!capable(CAP_SYS_ADMIN)) { | 139 | if (!ns_capable(user_ns, CAP_SYS_ADMIN)) { |
138 | err = -EPERM; | 140 | err = -EPERM; |
139 | goto out; | 141 | goto out; |
140 | } | 142 | } |
@@ -151,7 +153,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) | |||
151 | goto out; | 153 | goto out; |
152 | } | 154 | } |
153 | 155 | ||
154 | new_ns = create_new_namespaces(flags, tsk, tsk->fs); | 156 | new_ns = create_new_namespaces(flags, tsk, |
157 | task_cred_xxx(tsk, user_ns), tsk->fs); | ||
155 | if (IS_ERR(new_ns)) { | 158 | if (IS_ERR(new_ns)) { |
156 | err = PTR_ERR(new_ns); | 159 | err = PTR_ERR(new_ns); |
157 | goto out; | 160 | goto out; |
@@ -183,19 +186,21 @@ void free_nsproxy(struct nsproxy *ns) | |||
183 | * On success, returns the new nsproxy. | 186 | * On success, returns the new nsproxy. |
184 | */ | 187 | */ |
185 | int unshare_nsproxy_namespaces(unsigned long unshare_flags, | 188 | int unshare_nsproxy_namespaces(unsigned long unshare_flags, |
186 | struct nsproxy **new_nsp, struct fs_struct *new_fs) | 189 | struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs) |
187 | { | 190 | { |
191 | struct user_namespace *user_ns; | ||
188 | int err = 0; | 192 | int err = 0; |
189 | 193 | ||
190 | if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | | 194 | if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | |
191 | CLONE_NEWNET))) | 195 | CLONE_NEWNET | CLONE_NEWPID))) |
192 | return 0; | 196 | return 0; |
193 | 197 | ||
194 | if (!capable(CAP_SYS_ADMIN)) | 198 | user_ns = new_cred ? new_cred->user_ns : current_user_ns(); |
199 | if (!ns_capable(user_ns, CAP_SYS_ADMIN)) | ||
195 | return -EPERM; | 200 | return -EPERM; |
196 | 201 | ||
197 | *new_nsp = create_new_namespaces(unshare_flags, current, | 202 | *new_nsp = create_new_namespaces(unshare_flags, current, user_ns, |
198 | new_fs ? new_fs : current->fs); | 203 | new_fs ? new_fs : current->fs); |
199 | if (IS_ERR(*new_nsp)) { | 204 | if (IS_ERR(*new_nsp)) { |
200 | err = PTR_ERR(*new_nsp); | 205 | err = PTR_ERR(*new_nsp); |
201 | goto out; | 206 | goto out; |
@@ -241,9 +246,6 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) | |||
241 | struct file *file; | 246 | struct file *file; |
242 | int err; | 247 | int err; |
243 | 248 | ||
244 | if (!capable(CAP_SYS_ADMIN)) | ||
245 | return -EPERM; | ||
246 | |||
247 | file = proc_ns_fget(fd); | 249 | file = proc_ns_fget(fd); |
248 | if (IS_ERR(file)) | 250 | if (IS_ERR(file)) |
249 | return PTR_ERR(file); | 251 | return PTR_ERR(file); |
@@ -254,7 +256,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype) | |||
254 | if (nstype && (ops->type != nstype)) | 256 | if (nstype && (ops->type != nstype)) |
255 | goto out; | 257 | goto out; |
256 | 258 | ||
257 | new_nsproxy = create_new_namespaces(0, tsk, tsk->fs); | 259 | new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs); |
258 | if (IS_ERR(new_nsproxy)) { | 260 | if (IS_ERR(new_nsproxy)) { |
259 | err = PTR_ERR(new_nsproxy); | 261 | err = PTR_ERR(new_nsproxy); |
260 | goto out; | 262 | goto out; |
diff --git a/kernel/pid.c b/kernel/pid.c index fd996c1ed9f..3e2cf8100ac 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/pid_namespace.h> | 36 | #include <linux/pid_namespace.h> |
37 | #include <linux/init_task.h> | 37 | #include <linux/init_task.h> |
38 | #include <linux/syscalls.h> | 38 | #include <linux/syscalls.h> |
39 | #include <linux/proc_fs.h> | ||
39 | 40 | ||
40 | #define pid_hashfn(nr, ns) \ | 41 | #define pid_hashfn(nr, ns) \ |
41 | hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) | 42 | hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) |
@@ -78,6 +79,8 @@ struct pid_namespace init_pid_ns = { | |||
78 | .last_pid = 0, | 79 | .last_pid = 0, |
79 | .level = 0, | 80 | .level = 0, |
80 | .child_reaper = &init_task, | 81 | .child_reaper = &init_task, |
82 | .user_ns = &init_user_ns, | ||
83 | .proc_inum = PROC_PID_INIT_INO, | ||
81 | }; | 84 | }; |
82 | EXPORT_SYMBOL_GPL(init_pid_ns); | 85 | EXPORT_SYMBOL_GPL(init_pid_ns); |
83 | 86 | ||
@@ -269,8 +272,24 @@ void free_pid(struct pid *pid) | |||
269 | unsigned long flags; | 272 | unsigned long flags; |
270 | 273 | ||
271 | spin_lock_irqsave(&pidmap_lock, flags); | 274 | spin_lock_irqsave(&pidmap_lock, flags); |
272 | for (i = 0; i <= pid->level; i++) | 275 | for (i = 0; i <= pid->level; i++) { |
273 | hlist_del_rcu(&pid->numbers[i].pid_chain); | 276 | struct upid *upid = pid->numbers + i; |
277 | struct pid_namespace *ns = upid->ns; | ||
278 | hlist_del_rcu(&upid->pid_chain); | ||
279 | switch(--ns->nr_hashed) { | ||
280 | case 1: | ||
281 | /* When all that is left in the pid namespace | ||
282 | * is the reaper wake up the reaper. The reaper | ||
283 | * may be sleeping in zap_pid_ns_processes(). | ||
284 | */ | ||
285 | wake_up_process(ns->child_reaper); | ||
286 | break; | ||
287 | case 0: | ||
288 | ns->nr_hashed = -1; | ||
289 | schedule_work(&ns->proc_work); | ||
290 | break; | ||
291 | } | ||
292 | } | ||
274 | spin_unlock_irqrestore(&pidmap_lock, flags); | 293 | spin_unlock_irqrestore(&pidmap_lock, flags); |
275 | 294 | ||
276 | for (i = 0; i <= pid->level; i++) | 295 | for (i = 0; i <= pid->level; i++) |
@@ -292,6 +311,7 @@ struct pid *alloc_pid(struct pid_namespace *ns) | |||
292 | goto out; | 311 | goto out; |
293 | 312 | ||
294 | tmp = ns; | 313 | tmp = ns; |
314 | pid->level = ns->level; | ||
295 | for (i = ns->level; i >= 0; i--) { | 315 | for (i = ns->level; i >= 0; i--) { |
296 | nr = alloc_pidmap(tmp); | 316 | nr = alloc_pidmap(tmp); |
297 | if (nr < 0) | 317 | if (nr < 0) |
@@ -302,22 +322,32 @@ struct pid *alloc_pid(struct pid_namespace *ns) | |||
302 | tmp = tmp->parent; | 322 | tmp = tmp->parent; |
303 | } | 323 | } |
304 | 324 | ||
325 | if (unlikely(is_child_reaper(pid))) { | ||
326 | if (pid_ns_prepare_proc(ns)) | ||
327 | goto out_free; | ||
328 | } | ||
329 | |||
305 | get_pid_ns(ns); | 330 | get_pid_ns(ns); |
306 | pid->level = ns->level; | ||
307 | atomic_set(&pid->count, 1); | 331 | atomic_set(&pid->count, 1); |
308 | for (type = 0; type < PIDTYPE_MAX; ++type) | 332 | for (type = 0; type < PIDTYPE_MAX; ++type) |
309 | INIT_HLIST_HEAD(&pid->tasks[type]); | 333 | INIT_HLIST_HEAD(&pid->tasks[type]); |
310 | 334 | ||
311 | upid = pid->numbers + ns->level; | 335 | upid = pid->numbers + ns->level; |
312 | spin_lock_irq(&pidmap_lock); | 336 | spin_lock_irq(&pidmap_lock); |
313 | for ( ; upid >= pid->numbers; --upid) | 337 | if (ns->nr_hashed < 0) |
338 | goto out_unlock; | ||
339 | for ( ; upid >= pid->numbers; --upid) { | ||
314 | hlist_add_head_rcu(&upid->pid_chain, | 340 | hlist_add_head_rcu(&upid->pid_chain, |
315 | &pid_hash[pid_hashfn(upid->nr, upid->ns)]); | 341 | &pid_hash[pid_hashfn(upid->nr, upid->ns)]); |
342 | upid->ns->nr_hashed++; | ||
343 | } | ||
316 | spin_unlock_irq(&pidmap_lock); | 344 | spin_unlock_irq(&pidmap_lock); |
317 | 345 | ||
318 | out: | 346 | out: |
319 | return pid; | 347 | return pid; |
320 | 348 | ||
349 | out_unlock: | ||
350 | spin_unlock(&pidmap_lock); | ||
321 | out_free: | 351 | out_free: |
322 | while (++i <= ns->level) | 352 | while (++i <= ns->level) |
323 | free_pidmap(pid->numbers + i); | 353 | free_pidmap(pid->numbers + i); |
@@ -344,7 +374,7 @@ EXPORT_SYMBOL_GPL(find_pid_ns); | |||
344 | 374 | ||
345 | struct pid *find_vpid(int nr) | 375 | struct pid *find_vpid(int nr) |
346 | { | 376 | { |
347 | return find_pid_ns(nr, current->nsproxy->pid_ns); | 377 | return find_pid_ns(nr, task_active_pid_ns(current)); |
348 | } | 378 | } |
349 | EXPORT_SYMBOL_GPL(find_vpid); | 379 | EXPORT_SYMBOL_GPL(find_vpid); |
350 | 380 | ||
@@ -428,7 +458,7 @@ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) | |||
428 | 458 | ||
429 | struct task_struct *find_task_by_vpid(pid_t vnr) | 459 | struct task_struct *find_task_by_vpid(pid_t vnr) |
430 | { | 460 | { |
431 | return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns); | 461 | return find_task_by_pid_ns(vnr, task_active_pid_ns(current)); |
432 | } | 462 | } |
433 | 463 | ||
434 | struct pid *get_task_pid(struct task_struct *task, enum pid_type type) | 464 | struct pid *get_task_pid(struct task_struct *task, enum pid_type type) |
@@ -483,7 +513,7 @@ EXPORT_SYMBOL_GPL(pid_nr_ns); | |||
483 | 513 | ||
484 | pid_t pid_vnr(struct pid *pid) | 514 | pid_t pid_vnr(struct pid *pid) |
485 | { | 515 | { |
486 | return pid_nr_ns(pid, current->nsproxy->pid_ns); | 516 | return pid_nr_ns(pid, task_active_pid_ns(current)); |
487 | } | 517 | } |
488 | EXPORT_SYMBOL_GPL(pid_vnr); | 518 | EXPORT_SYMBOL_GPL(pid_vnr); |
489 | 519 | ||
@@ -494,7 +524,7 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, | |||
494 | 524 | ||
495 | rcu_read_lock(); | 525 | rcu_read_lock(); |
496 | if (!ns) | 526 | if (!ns) |
497 | ns = current->nsproxy->pid_ns; | 527 | ns = task_active_pid_ns(current); |
498 | if (likely(pid_alive(task))) { | 528 | if (likely(pid_alive(task))) { |
499 | if (type != PIDTYPE_PID) | 529 | if (type != PIDTYPE_PID) |
500 | task = task->group_leader; | 530 | task = task->group_leader; |
@@ -569,6 +599,7 @@ void __init pidmap_init(void) | |||
569 | /* Reserve PID 0. We never call free_pidmap(0) */ | 599 | /* Reserve PID 0. We never call free_pidmap(0) */ |
570 | set_bit(0, init_pid_ns.pidmap[0].page); | 600 | set_bit(0, init_pid_ns.pidmap[0].page); |
571 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); | 601 | atomic_dec(&init_pid_ns.pidmap[0].nr_free); |
602 | init_pid_ns.nr_hashed = 1; | ||
572 | 603 | ||
573 | init_pid_ns.pid_cachep = KMEM_CACHE(pid, | 604 | init_pid_ns.pid_cachep = KMEM_CACHE(pid, |
574 | SLAB_HWCACHE_ALIGN | SLAB_PANIC); | 605 | SLAB_HWCACHE_ALIGN | SLAB_PANIC); |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 7b07cc0dfb7..560da0dab23 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -10,6 +10,7 @@ | |||
10 | 10 | ||
11 | #include <linux/pid.h> | 11 | #include <linux/pid.h> |
12 | #include <linux/pid_namespace.h> | 12 | #include <linux/pid_namespace.h> |
13 | #include <linux/user_namespace.h> | ||
13 | #include <linux/syscalls.h> | 14 | #include <linux/syscalls.h> |
14 | #include <linux/err.h> | 15 | #include <linux/err.h> |
15 | #include <linux/acct.h> | 16 | #include <linux/acct.h> |
@@ -71,10 +72,17 @@ err_alloc: | |||
71 | return NULL; | 72 | return NULL; |
72 | } | 73 | } |
73 | 74 | ||
75 | static void proc_cleanup_work(struct work_struct *work) | ||
76 | { | ||
77 | struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work); | ||
78 | pid_ns_release_proc(ns); | ||
79 | } | ||
80 | |||
74 | /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ | 81 | /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ |
75 | #define MAX_PID_NS_LEVEL 32 | 82 | #define MAX_PID_NS_LEVEL 32 |
76 | 83 | ||
77 | static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns) | 84 | static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns, |
85 | struct pid_namespace *parent_pid_ns) | ||
78 | { | 86 | { |
79 | struct pid_namespace *ns; | 87 | struct pid_namespace *ns; |
80 | unsigned int level = parent_pid_ns->level + 1; | 88 | unsigned int level = parent_pid_ns->level + 1; |
@@ -99,9 +107,15 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p | |||
99 | if (ns->pid_cachep == NULL) | 107 | if (ns->pid_cachep == NULL) |
100 | goto out_free_map; | 108 | goto out_free_map; |
101 | 109 | ||
110 | err = proc_alloc_inum(&ns->proc_inum); | ||
111 | if (err) | ||
112 | goto out_free_map; | ||
113 | |||
102 | kref_init(&ns->kref); | 114 | kref_init(&ns->kref); |
103 | ns->level = level; | 115 | ns->level = level; |
104 | ns->parent = get_pid_ns(parent_pid_ns); | 116 | ns->parent = get_pid_ns(parent_pid_ns); |
117 | ns->user_ns = get_user_ns(user_ns); | ||
118 | INIT_WORK(&ns->proc_work, proc_cleanup_work); | ||
105 | 119 | ||
106 | set_bit(0, ns->pidmap[0].page); | 120 | set_bit(0, ns->pidmap[0].page); |
107 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); | 121 | atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); |
@@ -109,14 +123,8 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p | |||
109 | for (i = 1; i < PIDMAP_ENTRIES; i++) | 123 | for (i = 1; i < PIDMAP_ENTRIES; i++) |
110 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); | 124 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); |
111 | 125 | ||
112 | err = pid_ns_prepare_proc(ns); | ||
113 | if (err) | ||
114 | goto out_put_parent_pid_ns; | ||
115 | |||
116 | return ns; | 126 | return ns; |
117 | 127 | ||
118 | out_put_parent_pid_ns: | ||
119 | put_pid_ns(parent_pid_ns); | ||
120 | out_free_map: | 128 | out_free_map: |
121 | kfree(ns->pidmap[0].page); | 129 | kfree(ns->pidmap[0].page); |
122 | out_free: | 130 | out_free: |
@@ -129,18 +137,21 @@ static void destroy_pid_namespace(struct pid_namespace *ns) | |||
129 | { | 137 | { |
130 | int i; | 138 | int i; |
131 | 139 | ||
140 | proc_free_inum(ns->proc_inum); | ||
132 | for (i = 0; i < PIDMAP_ENTRIES; i++) | 141 | for (i = 0; i < PIDMAP_ENTRIES; i++) |
133 | kfree(ns->pidmap[i].page); | 142 | kfree(ns->pidmap[i].page); |
143 | put_user_ns(ns->user_ns); | ||
134 | kmem_cache_free(pid_ns_cachep, ns); | 144 | kmem_cache_free(pid_ns_cachep, ns); |
135 | } | 145 | } |
136 | 146 | ||
137 | struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) | 147 | struct pid_namespace *copy_pid_ns(unsigned long flags, |
148 | struct user_namespace *user_ns, struct pid_namespace *old_ns) | ||
138 | { | 149 | { |
139 | if (!(flags & CLONE_NEWPID)) | 150 | if (!(flags & CLONE_NEWPID)) |
140 | return get_pid_ns(old_ns); | 151 | return get_pid_ns(old_ns); |
141 | if (flags & (CLONE_THREAD|CLONE_PARENT)) | 152 | if (task_active_pid_ns(current) != old_ns) |
142 | return ERR_PTR(-EINVAL); | 153 | return ERR_PTR(-EINVAL); |
143 | return create_pid_namespace(old_ns); | 154 | return create_pid_namespace(user_ns, old_ns); |
144 | } | 155 | } |
145 | 156 | ||
146 | static void free_pid_ns(struct kref *kref) | 157 | static void free_pid_ns(struct kref *kref) |
@@ -211,22 +222,15 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
211 | 222 | ||
212 | /* | 223 | /* |
213 | * sys_wait4() above can't reap the TASK_DEAD children. | 224 | * sys_wait4() above can't reap the TASK_DEAD children. |
214 | * Make sure they all go away, see __unhash_process(). | 225 | * Make sure they all go away, see free_pid(). |
215 | */ | 226 | */ |
216 | for (;;) { | 227 | for (;;) { |
217 | bool need_wait = false; | 228 | set_current_state(TASK_UNINTERRUPTIBLE); |
218 | 229 | if (pid_ns->nr_hashed == 1) | |
219 | read_lock(&tasklist_lock); | ||
220 | if (!list_empty(¤t->children)) { | ||
221 | __set_current_state(TASK_UNINTERRUPTIBLE); | ||
222 | need_wait = true; | ||
223 | } | ||
224 | read_unlock(&tasklist_lock); | ||
225 | |||
226 | if (!need_wait) | ||
227 | break; | 230 | break; |
228 | schedule(); | 231 | schedule(); |
229 | } | 232 | } |
233 | __set_current_state(TASK_RUNNING); | ||
230 | 234 | ||
231 | if (pid_ns->reboot) | 235 | if (pid_ns->reboot) |
232 | current->signal->group_exit_code = pid_ns->reboot; | 236 | current->signal->group_exit_code = pid_ns->reboot; |
@@ -239,9 +243,10 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
239 | static int pid_ns_ctl_handler(struct ctl_table *table, int write, | 243 | static int pid_ns_ctl_handler(struct ctl_table *table, int write, |
240 | void __user *buffer, size_t *lenp, loff_t *ppos) | 244 | void __user *buffer, size_t *lenp, loff_t *ppos) |
241 | { | 245 | { |
246 | struct pid_namespace *pid_ns = task_active_pid_ns(current); | ||
242 | struct ctl_table tmp = *table; | 247 | struct ctl_table tmp = *table; |
243 | 248 | ||
244 | if (write && !capable(CAP_SYS_ADMIN)) | 249 | if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN)) |
245 | return -EPERM; | 250 | return -EPERM; |
246 | 251 | ||
247 | /* | 252 | /* |
@@ -250,7 +255,7 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write, | |||
250 | * it should synchronize its usage with external means. | 255 | * it should synchronize its usage with external means. |
251 | */ | 256 | */ |
252 | 257 | ||
253 | tmp.data = ¤t->nsproxy->pid_ns->last_pid; | 258 | tmp.data = &pid_ns->last_pid; |
254 | return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); | 259 | return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); |
255 | } | 260 | } |
256 | 261 | ||
@@ -299,6 +304,67 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) | |||
299 | return 0; | 304 | return 0; |
300 | } | 305 | } |
301 | 306 | ||
307 | static void *pidns_get(struct task_struct *task) | ||
308 | { | ||
309 | struct pid_namespace *ns; | ||
310 | |||
311 | rcu_read_lock(); | ||
312 | ns = get_pid_ns(task_active_pid_ns(task)); | ||
313 | rcu_read_unlock(); | ||
314 | |||
315 | return ns; | ||
316 | } | ||
317 | |||
318 | static void pidns_put(void *ns) | ||
319 | { | ||
320 | put_pid_ns(ns); | ||
321 | } | ||
322 | |||
323 | static int pidns_install(struct nsproxy *nsproxy, void *ns) | ||
324 | { | ||
325 | struct pid_namespace *active = task_active_pid_ns(current); | ||
326 | struct pid_namespace *ancestor, *new = ns; | ||
327 | |||
328 | if (!ns_capable(new->user_ns, CAP_SYS_ADMIN)) | ||
329 | return -EPERM; | ||
330 | |||
331 | /* | ||
332 | * Only allow entering the current active pid namespace | ||
333 | * or a child of the current active pid namespace. | ||
334 | * | ||
335 | * This is required for fork to return a usable pid value and | ||
336 | * this maintains the property that processes and their | ||
337 | * children can not escape their current pid namespace. | ||
338 | */ | ||
339 | if (new->level < active->level) | ||
340 | return -EINVAL; | ||
341 | |||
342 | ancestor = new; | ||
343 | while (ancestor->level > active->level) | ||
344 | ancestor = ancestor->parent; | ||
345 | if (ancestor != active) | ||
346 | return -EINVAL; | ||
347 | |||
348 | put_pid_ns(nsproxy->pid_ns); | ||
349 | nsproxy->pid_ns = get_pid_ns(new); | ||
350 | return 0; | ||
351 | } | ||
352 | |||
353 | static unsigned int pidns_inum(void *ns) | ||
354 | { | ||
355 | struct pid_namespace *pid_ns = ns; | ||
356 | return pid_ns->proc_inum; | ||
357 | } | ||
358 | |||
359 | const struct proc_ns_operations pidns_operations = { | ||
360 | .name = "pid", | ||
361 | .type = CLONE_NEWPID, | ||
362 | .get = pidns_get, | ||
363 | .put = pidns_put, | ||
364 | .install = pidns_install, | ||
365 | .inum = pidns_inum, | ||
366 | }; | ||
367 | |||
302 | static __init int pid_namespaces_init(void) | 368 | static __init int pid_namespaces_init(void) |
303 | { | 369 | { |
304 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); | 370 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1f5e55dda95..7b09b88862c 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -215,8 +215,12 @@ ok: | |||
215 | smp_rmb(); | 215 | smp_rmb(); |
216 | if (task->mm) | 216 | if (task->mm) |
217 | dumpable = get_dumpable(task->mm); | 217 | dumpable = get_dumpable(task->mm); |
218 | if (!dumpable && !ptrace_has_cap(task_user_ns(task), mode)) | 218 | rcu_read_lock(); |
219 | if (!dumpable && !ptrace_has_cap(__task_cred(task)->user_ns, mode)) { | ||
220 | rcu_read_unlock(); | ||
219 | return -EPERM; | 221 | return -EPERM; |
222 | } | ||
223 | rcu_read_unlock(); | ||
220 | 224 | ||
221 | return security_ptrace_access_check(task, mode); | 225 | return security_ptrace_access_check(task, mode); |
222 | } | 226 | } |
@@ -280,8 +284,10 @@ static int ptrace_attach(struct task_struct *task, long request, | |||
280 | 284 | ||
281 | if (seize) | 285 | if (seize) |
282 | flags |= PT_SEIZED; | 286 | flags |= PT_SEIZED; |
283 | if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE)) | 287 | rcu_read_lock(); |
288 | if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE)) | ||
284 | flags |= PT_PTRACE_CAP; | 289 | flags |= PT_PTRACE_CAP; |
290 | rcu_read_unlock(); | ||
285 | task->ptrace = flags; | 291 | task->ptrace = flags; |
286 | 292 | ||
287 | __ptrace_link(task, current); | 293 | __ptrace_link(task, current); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c1fb82104bf..257002c13bb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -4097,8 +4097,14 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
4097 | goto out_free_cpus_allowed; | 4097 | goto out_free_cpus_allowed; |
4098 | } | 4098 | } |
4099 | retval = -EPERM; | 4099 | retval = -EPERM; |
4100 | if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE)) | 4100 | if (!check_same_owner(p)) { |
4101 | goto out_unlock; | 4101 | rcu_read_lock(); |
4102 | if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { | ||
4103 | rcu_read_unlock(); | ||
4104 | goto out_unlock; | ||
4105 | } | ||
4106 | rcu_read_unlock(); | ||
4107 | } | ||
4102 | 4108 | ||
4103 | retval = security_task_setscheduler(p); | 4109 | retval = security_task_setscheduler(p); |
4104 | if (retval) | 4110 | if (retval) |
diff --git a/kernel/signal.c b/kernel/signal.c index a49c7f36ceb..580a91e6347 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1753,7 +1753,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, | |||
1753 | * see comment in do_notify_parent() about the following 4 lines | 1753 | * see comment in do_notify_parent() about the following 4 lines |
1754 | */ | 1754 | */ |
1755 | rcu_read_lock(); | 1755 | rcu_read_lock(); |
1756 | info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns); | 1756 | info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(parent)); |
1757 | info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk)); | 1757 | info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk)); |
1758 | rcu_read_unlock(); | 1758 | rcu_read_unlock(); |
1759 | 1759 | ||
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 65bdcf198d4..5a638445050 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c | |||
@@ -1344,7 +1344,7 @@ static ssize_t binary_sysctl(const int *name, int nlen, | |||
1344 | goto out_putname; | 1344 | goto out_putname; |
1345 | } | 1345 | } |
1346 | 1346 | ||
1347 | mnt = current->nsproxy->pid_ns->proc_mnt; | 1347 | mnt = task_active_pid_ns(current)->proc_mnt; |
1348 | file = file_open_root(mnt->mnt_root, mnt, pathname, flags); | 1348 | file = file_open_root(mnt->mnt_root, mnt, pathname, flags); |
1349 | result = PTR_ERR(file); | 1349 | result = PTR_ERR(file); |
1350 | if (IS_ERR(file)) | 1350 | if (IS_ERR(file)) |
diff --git a/kernel/user.c b/kernel/user.c index 750acffbe9e..33acb5e53a5 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
17 | #include <linux/export.h> | 17 | #include <linux/export.h> |
18 | #include <linux/user_namespace.h> | 18 | #include <linux/user_namespace.h> |
19 | #include <linux/proc_fs.h> | ||
19 | 20 | ||
20 | /* | 21 | /* |
21 | * userns count is 1 for root user, 1 for init_uts_ns, | 22 | * userns count is 1 for root user, 1 for init_uts_ns, |
@@ -51,6 +52,7 @@ struct user_namespace init_user_ns = { | |||
51 | }, | 52 | }, |
52 | .owner = GLOBAL_ROOT_UID, | 53 | .owner = GLOBAL_ROOT_UID, |
53 | .group = GLOBAL_ROOT_GID, | 54 | .group = GLOBAL_ROOT_GID, |
55 | .proc_inum = PROC_USER_INIT_INO, | ||
54 | }; | 56 | }; |
55 | EXPORT_SYMBOL_GPL(init_user_ns); | 57 | EXPORT_SYMBOL_GPL(init_user_ns); |
56 | 58 | ||
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 456a6b9fba3..f5975ccf934 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/nsproxy.h> | 9 | #include <linux/nsproxy.h> |
10 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
11 | #include <linux/user_namespace.h> | 11 | #include <linux/user_namespace.h> |
12 | #include <linux/proc_fs.h> | ||
12 | #include <linux/highuid.h> | 13 | #include <linux/highuid.h> |
13 | #include <linux/cred.h> | 14 | #include <linux/cred.h> |
14 | #include <linux/securebits.h> | 15 | #include <linux/securebits.h> |
@@ -26,6 +27,24 @@ static struct kmem_cache *user_ns_cachep __read_mostly; | |||
26 | static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, | 27 | static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, |
27 | struct uid_gid_map *map); | 28 | struct uid_gid_map *map); |
28 | 29 | ||
30 | static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) | ||
31 | { | ||
32 | /* Start with the same capabilities as init but useless for doing | ||
33 | * anything as the capabilities are bound to the new user namespace. | ||
34 | */ | ||
35 | cred->securebits = SECUREBITS_DEFAULT; | ||
36 | cred->cap_inheritable = CAP_EMPTY_SET; | ||
37 | cred->cap_permitted = CAP_FULL_SET; | ||
38 | cred->cap_effective = CAP_FULL_SET; | ||
39 | cred->cap_bset = CAP_FULL_SET; | ||
40 | #ifdef CONFIG_KEYS | ||
41 | key_put(cred->request_key_auth); | ||
42 | cred->request_key_auth = NULL; | ||
43 | #endif | ||
44 | /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ | ||
45 | cred->user_ns = user_ns; | ||
46 | } | ||
47 | |||
29 | /* | 48 | /* |
30 | * Create a new user namespace, deriving the creator from the user in the | 49 | * Create a new user namespace, deriving the creator from the user in the |
31 | * passed credentials, and replacing that user with the new root user for the | 50 | * passed credentials, and replacing that user with the new root user for the |
@@ -39,6 +58,7 @@ int create_user_ns(struct cred *new) | |||
39 | struct user_namespace *ns, *parent_ns = new->user_ns; | 58 | struct user_namespace *ns, *parent_ns = new->user_ns; |
40 | kuid_t owner = new->euid; | 59 | kuid_t owner = new->euid; |
41 | kgid_t group = new->egid; | 60 | kgid_t group = new->egid; |
61 | int ret; | ||
42 | 62 | ||
43 | /* The creator needs a mapping in the parent user namespace | 63 | /* The creator needs a mapping in the parent user namespace |
44 | * or else we won't be able to reasonably tell userspace who | 64 | * or else we won't be able to reasonably tell userspace who |
@@ -52,38 +72,45 @@ int create_user_ns(struct cred *new) | |||
52 | if (!ns) | 72 | if (!ns) |
53 | return -ENOMEM; | 73 | return -ENOMEM; |
54 | 74 | ||
75 | ret = proc_alloc_inum(&ns->proc_inum); | ||
76 | if (ret) { | ||
77 | kmem_cache_free(user_ns_cachep, ns); | ||
78 | return ret; | ||
79 | } | ||
80 | |||
55 | kref_init(&ns->kref); | 81 | kref_init(&ns->kref); |
82 | /* Leave the new->user_ns reference with the new user namespace. */ | ||
56 | ns->parent = parent_ns; | 83 | ns->parent = parent_ns; |
57 | ns->owner = owner; | 84 | ns->owner = owner; |
58 | ns->group = group; | 85 | ns->group = group; |
59 | 86 | ||
60 | /* Start with the same capabilities as init but useless for doing | 87 | set_cred_user_ns(new, ns); |
61 | * anything as the capabilities are bound to the new user namespace. | ||
62 | */ | ||
63 | new->securebits = SECUREBITS_DEFAULT; | ||
64 | new->cap_inheritable = CAP_EMPTY_SET; | ||
65 | new->cap_permitted = CAP_FULL_SET; | ||
66 | new->cap_effective = CAP_FULL_SET; | ||
67 | new->cap_bset = CAP_FULL_SET; | ||
68 | #ifdef CONFIG_KEYS | ||
69 | key_put(new->request_key_auth); | ||
70 | new->request_key_auth = NULL; | ||
71 | #endif | ||
72 | /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ | ||
73 | |||
74 | /* Leave the new->user_ns reference with the new user namespace. */ | ||
75 | /* Leave the reference to our user_ns with the new cred. */ | ||
76 | new->user_ns = ns; | ||
77 | 88 | ||
78 | return 0; | 89 | return 0; |
79 | } | 90 | } |
80 | 91 | ||
92 | int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) | ||
93 | { | ||
94 | struct cred *cred; | ||
95 | |||
96 | if (!(unshare_flags & CLONE_NEWUSER)) | ||
97 | return 0; | ||
98 | |||
99 | cred = prepare_creds(); | ||
100 | if (!cred) | ||
101 | return -ENOMEM; | ||
102 | |||
103 | *new_cred = cred; | ||
104 | return create_user_ns(cred); | ||
105 | } | ||
106 | |||
81 | void free_user_ns(struct kref *kref) | 107 | void free_user_ns(struct kref *kref) |
82 | { | 108 | { |
83 | struct user_namespace *parent, *ns = | 109 | struct user_namespace *parent, *ns = |
84 | container_of(kref, struct user_namespace, kref); | 110 | container_of(kref, struct user_namespace, kref); |
85 | 111 | ||
86 | parent = ns->parent; | 112 | parent = ns->parent; |
113 | proc_free_inum(ns->proc_inum); | ||
87 | kmem_cache_free(user_ns_cachep, ns); | 114 | kmem_cache_free(user_ns_cachep, ns); |
88 | put_user_ns(parent); | 115 | put_user_ns(parent); |
89 | } | 116 | } |
@@ -372,7 +399,7 @@ static int uid_m_show(struct seq_file *seq, void *v) | |||
372 | struct user_namespace *lower_ns; | 399 | struct user_namespace *lower_ns; |
373 | uid_t lower; | 400 | uid_t lower; |
374 | 401 | ||
375 | lower_ns = current_user_ns(); | 402 | lower_ns = seq_user_ns(seq); |
376 | if ((lower_ns == ns) && lower_ns->parent) | 403 | if ((lower_ns == ns) && lower_ns->parent) |
377 | lower_ns = lower_ns->parent; | 404 | lower_ns = lower_ns->parent; |
378 | 405 | ||
@@ -393,7 +420,7 @@ static int gid_m_show(struct seq_file *seq, void *v) | |||
393 | struct user_namespace *lower_ns; | 420 | struct user_namespace *lower_ns; |
394 | gid_t lower; | 421 | gid_t lower; |
395 | 422 | ||
396 | lower_ns = current_user_ns(); | 423 | lower_ns = seq_user_ns(seq); |
397 | if ((lower_ns == ns) && lower_ns->parent) | 424 | if ((lower_ns == ns) && lower_ns->parent) |
398 | lower_ns = lower_ns->parent; | 425 | lower_ns = lower_ns->parent; |
399 | 426 | ||
@@ -669,10 +696,14 @@ ssize_t proc_uid_map_write(struct file *file, const char __user *buf, size_t siz | |||
669 | { | 696 | { |
670 | struct seq_file *seq = file->private_data; | 697 | struct seq_file *seq = file->private_data; |
671 | struct user_namespace *ns = seq->private; | 698 | struct user_namespace *ns = seq->private; |
699 | struct user_namespace *seq_ns = seq_user_ns(seq); | ||
672 | 700 | ||
673 | if (!ns->parent) | 701 | if (!ns->parent) |
674 | return -EPERM; | 702 | return -EPERM; |
675 | 703 | ||
704 | if ((seq_ns != ns) && (seq_ns != ns->parent)) | ||
705 | return -EPERM; | ||
706 | |||
676 | return map_write(file, buf, size, ppos, CAP_SETUID, | 707 | return map_write(file, buf, size, ppos, CAP_SETUID, |
677 | &ns->uid_map, &ns->parent->uid_map); | 708 | &ns->uid_map, &ns->parent->uid_map); |
678 | } | 709 | } |
@@ -681,10 +712,14 @@ ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t siz | |||
681 | { | 712 | { |
682 | struct seq_file *seq = file->private_data; | 713 | struct seq_file *seq = file->private_data; |
683 | struct user_namespace *ns = seq->private; | 714 | struct user_namespace *ns = seq->private; |
715 | struct user_namespace *seq_ns = seq_user_ns(seq); | ||
684 | 716 | ||
685 | if (!ns->parent) | 717 | if (!ns->parent) |
686 | return -EPERM; | 718 | return -EPERM; |
687 | 719 | ||
720 | if ((seq_ns != ns) && (seq_ns != ns->parent)) | ||
721 | return -EPERM; | ||
722 | |||
688 | return map_write(file, buf, size, ppos, CAP_SETGID, | 723 | return map_write(file, buf, size, ppos, CAP_SETGID, |
689 | &ns->gid_map, &ns->parent->gid_map); | 724 | &ns->gid_map, &ns->parent->gid_map); |
690 | } | 725 | } |
@@ -709,6 +744,21 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t | |||
709 | static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, | 744 | static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, |
710 | struct uid_gid_map *new_map) | 745 | struct uid_gid_map *new_map) |
711 | { | 746 | { |
747 | /* Allow mapping to your own filesystem ids */ | ||
748 | if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) { | ||
749 | u32 id = new_map->extent[0].lower_first; | ||
750 | if (cap_setid == CAP_SETUID) { | ||
751 | kuid_t uid = make_kuid(ns->parent, id); | ||
752 | if (uid_eq(uid, current_fsuid())) | ||
753 | return true; | ||
754 | } | ||
755 | else if (cap_setid == CAP_SETGID) { | ||
756 | kgid_t gid = make_kgid(ns->parent, id); | ||
757 | if (gid_eq(gid, current_fsgid())) | ||
758 | return true; | ||
759 | } | ||
760 | } | ||
761 | |||
712 | /* Allow anyone to set a mapping that doesn't require privilege */ | 762 | /* Allow anyone to set a mapping that doesn't require privilege */ |
713 | if (!cap_valid(cap_setid)) | 763 | if (!cap_valid(cap_setid)) |
714 | return true; | 764 | return true; |
@@ -722,6 +772,65 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, | |||
722 | return false; | 772 | return false; |
723 | } | 773 | } |
724 | 774 | ||
775 | static void *userns_get(struct task_struct *task) | ||
776 | { | ||
777 | struct user_namespace *user_ns; | ||
778 | |||
779 | rcu_read_lock(); | ||
780 | user_ns = get_user_ns(__task_cred(task)->user_ns); | ||
781 | rcu_read_unlock(); | ||
782 | |||
783 | return user_ns; | ||
784 | } | ||
785 | |||
786 | static void userns_put(void *ns) | ||
787 | { | ||
788 | put_user_ns(ns); | ||
789 | } | ||
790 | |||
791 | static int userns_install(struct nsproxy *nsproxy, void *ns) | ||
792 | { | ||
793 | struct user_namespace *user_ns = ns; | ||
794 | struct cred *cred; | ||
795 | |||
796 | /* Don't allow gaining capabilities by reentering | ||
797 | * the same user namespace. | ||
798 | */ | ||
799 | if (user_ns == current_user_ns()) | ||
800 | return -EINVAL; | ||
801 | |||
802 | /* Threaded many not enter a different user namespace */ | ||
803 | if (atomic_read(¤t->mm->mm_users) > 1) | ||
804 | return -EINVAL; | ||
805 | |||
806 | if (!ns_capable(user_ns, CAP_SYS_ADMIN)) | ||
807 | return -EPERM; | ||
808 | |||
809 | cred = prepare_creds(); | ||
810 | if (!cred) | ||
811 | return -ENOMEM; | ||
812 | |||
813 | put_user_ns(cred->user_ns); | ||
814 | set_cred_user_ns(cred, get_user_ns(user_ns)); | ||
815 | |||
816 | return commit_creds(cred); | ||
817 | } | ||
818 | |||
819 | static unsigned int userns_inum(void *ns) | ||
820 | { | ||
821 | struct user_namespace *user_ns = ns; | ||
822 | return user_ns->proc_inum; | ||
823 | } | ||
824 | |||
825 | const struct proc_ns_operations userns_operations = { | ||
826 | .name = "user", | ||
827 | .type = CLONE_NEWUSER, | ||
828 | .get = userns_get, | ||
829 | .put = userns_put, | ||
830 | .install = userns_install, | ||
831 | .inum = userns_inum, | ||
832 | }; | ||
833 | |||
725 | static __init int user_namespaces_init(void) | 834 | static __init int user_namespaces_init(void) |
726 | { | 835 | { |
727 | user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); | 836 | user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); |
diff --git a/kernel/utsname.c b/kernel/utsname.c index 679d97a5d3f..f6336d51d64 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c | |||
@@ -32,18 +32,25 @@ static struct uts_namespace *create_uts_ns(void) | |||
32 | * @old_ns: namespace to clone | 32 | * @old_ns: namespace to clone |
33 | * Return NULL on error (failure to kmalloc), new ns otherwise | 33 | * Return NULL on error (failure to kmalloc), new ns otherwise |
34 | */ | 34 | */ |
35 | static struct uts_namespace *clone_uts_ns(struct task_struct *tsk, | 35 | static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns, |
36 | struct uts_namespace *old_ns) | 36 | struct uts_namespace *old_ns) |
37 | { | 37 | { |
38 | struct uts_namespace *ns; | 38 | struct uts_namespace *ns; |
39 | int err; | ||
39 | 40 | ||
40 | ns = create_uts_ns(); | 41 | ns = create_uts_ns(); |
41 | if (!ns) | 42 | if (!ns) |
42 | return ERR_PTR(-ENOMEM); | 43 | return ERR_PTR(-ENOMEM); |
43 | 44 | ||
45 | err = proc_alloc_inum(&ns->proc_inum); | ||
46 | if (err) { | ||
47 | kfree(ns); | ||
48 | return ERR_PTR(err); | ||
49 | } | ||
50 | |||
44 | down_read(&uts_sem); | 51 | down_read(&uts_sem); |
45 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); | 52 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); |
46 | ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns)); | 53 | ns->user_ns = get_user_ns(user_ns); |
47 | up_read(&uts_sem); | 54 | up_read(&uts_sem); |
48 | return ns; | 55 | return ns; |
49 | } | 56 | } |
@@ -55,9 +62,8 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk, | |||
55 | * versa. | 62 | * versa. |
56 | */ | 63 | */ |
57 | struct uts_namespace *copy_utsname(unsigned long flags, | 64 | struct uts_namespace *copy_utsname(unsigned long flags, |
58 | struct task_struct *tsk) | 65 | struct user_namespace *user_ns, struct uts_namespace *old_ns) |
59 | { | 66 | { |
60 | struct uts_namespace *old_ns = tsk->nsproxy->uts_ns; | ||
61 | struct uts_namespace *new_ns; | 67 | struct uts_namespace *new_ns; |
62 | 68 | ||
63 | BUG_ON(!old_ns); | 69 | BUG_ON(!old_ns); |
@@ -66,7 +72,7 @@ struct uts_namespace *copy_utsname(unsigned long flags, | |||
66 | if (!(flags & CLONE_NEWUTS)) | 72 | if (!(flags & CLONE_NEWUTS)) |
67 | return old_ns; | 73 | return old_ns; |
68 | 74 | ||
69 | new_ns = clone_uts_ns(tsk, old_ns); | 75 | new_ns = clone_uts_ns(user_ns, old_ns); |
70 | 76 | ||
71 | put_uts_ns(old_ns); | 77 | put_uts_ns(old_ns); |
72 | return new_ns; | 78 | return new_ns; |
@@ -78,6 +84,7 @@ void free_uts_ns(struct kref *kref) | |||
78 | 84 | ||
79 | ns = container_of(kref, struct uts_namespace, kref); | 85 | ns = container_of(kref, struct uts_namespace, kref); |
80 | put_user_ns(ns->user_ns); | 86 | put_user_ns(ns->user_ns); |
87 | proc_free_inum(ns->proc_inum); | ||
81 | kfree(ns); | 88 | kfree(ns); |
82 | } | 89 | } |
83 | 90 | ||
@@ -102,19 +109,31 @@ static void utsns_put(void *ns) | |||
102 | put_uts_ns(ns); | 109 | put_uts_ns(ns); |
103 | } | 110 | } |
104 | 111 | ||
105 | static int utsns_install(struct nsproxy *nsproxy, void *ns) | 112 | static int utsns_install(struct nsproxy *nsproxy, void *new) |
106 | { | 113 | { |
114 | struct uts_namespace *ns = new; | ||
115 | |||
116 | if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) | ||
117 | return -EPERM; | ||
118 | |||
107 | get_uts_ns(ns); | 119 | get_uts_ns(ns); |
108 | put_uts_ns(nsproxy->uts_ns); | 120 | put_uts_ns(nsproxy->uts_ns); |
109 | nsproxy->uts_ns = ns; | 121 | nsproxy->uts_ns = ns; |
110 | return 0; | 122 | return 0; |
111 | } | 123 | } |
112 | 124 | ||
125 | static unsigned int utsns_inum(void *vp) | ||
126 | { | ||
127 | struct uts_namespace *ns = vp; | ||
128 | |||
129 | return ns->proc_inum; | ||
130 | } | ||
131 | |||
113 | const struct proc_ns_operations utsns_operations = { | 132 | const struct proc_ns_operations utsns_operations = { |
114 | .name = "uts", | 133 | .name = "uts", |
115 | .type = CLONE_NEWUTS, | 134 | .type = CLONE_NEWUTS, |
116 | .get = utsns_get, | 135 | .get = utsns_get, |
117 | .put = utsns_put, | 136 | .put = utsns_put, |
118 | .install = utsns_install, | 137 | .install = utsns_install, |
138 | .inum = utsns_inum, | ||
119 | }; | 139 | }; |
120 | |||
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 6456439cbbd..2e9a3132b8d 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c | |||
@@ -381,6 +381,21 @@ struct net *get_net_ns_by_pid(pid_t pid) | |||
381 | } | 381 | } |
382 | EXPORT_SYMBOL_GPL(get_net_ns_by_pid); | 382 | EXPORT_SYMBOL_GPL(get_net_ns_by_pid); |
383 | 383 | ||
384 | static __net_init int net_ns_net_init(struct net *net) | ||
385 | { | ||
386 | return proc_alloc_inum(&net->proc_inum); | ||
387 | } | ||
388 | |||
389 | static __net_exit void net_ns_net_exit(struct net *net) | ||
390 | { | ||
391 | proc_free_inum(net->proc_inum); | ||
392 | } | ||
393 | |||
394 | static struct pernet_operations __net_initdata net_ns_ops = { | ||
395 | .init = net_ns_net_init, | ||
396 | .exit = net_ns_net_exit, | ||
397 | }; | ||
398 | |||
384 | static int __init net_ns_init(void) | 399 | static int __init net_ns_init(void) |
385 | { | 400 | { |
386 | struct net_generic *ng; | 401 | struct net_generic *ng; |
@@ -412,6 +427,8 @@ static int __init net_ns_init(void) | |||
412 | 427 | ||
413 | mutex_unlock(&net_mutex); | 428 | mutex_unlock(&net_mutex); |
414 | 429 | ||
430 | register_pernet_subsys(&net_ns_ops); | ||
431 | |||
415 | return 0; | 432 | return 0; |
416 | } | 433 | } |
417 | 434 | ||
@@ -630,16 +647,28 @@ static void netns_put(void *ns) | |||
630 | 647 | ||
631 | static int netns_install(struct nsproxy *nsproxy, void *ns) | 648 | static int netns_install(struct nsproxy *nsproxy, void *ns) |
632 | { | 649 | { |
650 | struct net *net = ns; | ||
651 | |||
652 | if (!ns_capable(net->user_ns, CAP_SYS_ADMIN)) | ||
653 | return -EPERM; | ||
654 | |||
633 | put_net(nsproxy->net_ns); | 655 | put_net(nsproxy->net_ns); |
634 | nsproxy->net_ns = get_net(ns); | 656 | nsproxy->net_ns = get_net(net); |
635 | return 0; | 657 | return 0; |
636 | } | 658 | } |
637 | 659 | ||
660 | static unsigned int netns_inum(void *ns) | ||
661 | { | ||
662 | struct net *net = ns; | ||
663 | return net->proc_inum; | ||
664 | } | ||
665 | |||
638 | const struct proc_ns_operations netns_operations = { | 666 | const struct proc_ns_operations netns_operations = { |
639 | .name = "net", | 667 | .name = "net", |
640 | .type = CLONE_NEWNET, | 668 | .type = CLONE_NEWNET, |
641 | .get = netns_get, | 669 | .get = netns_get, |
642 | .put = netns_put, | 670 | .put = netns_put, |
643 | .install = netns_install, | 671 | .install = netns_install, |
672 | .inum = netns_inum, | ||
644 | }; | 673 | }; |
645 | #endif | 674 | #endif |
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index 2663145d119..23414b93771 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c | |||
@@ -298,14 +298,18 @@ int yama_ptrace_access_check(struct task_struct *child, | |||
298 | /* No additional restrictions. */ | 298 | /* No additional restrictions. */ |
299 | break; | 299 | break; |
300 | case YAMA_SCOPE_RELATIONAL: | 300 | case YAMA_SCOPE_RELATIONAL: |
301 | rcu_read_lock(); | ||
301 | if (!task_is_descendant(current, child) && | 302 | if (!task_is_descendant(current, child) && |
302 | !ptracer_exception_found(current, child) && | 303 | !ptracer_exception_found(current, child) && |
303 | !ns_capable(task_user_ns(child), CAP_SYS_PTRACE)) | 304 | !ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE)) |
304 | rc = -EPERM; | 305 | rc = -EPERM; |
306 | rcu_read_unlock(); | ||
305 | break; | 307 | break; |
306 | case YAMA_SCOPE_CAPABILITY: | 308 | case YAMA_SCOPE_CAPABILITY: |
307 | if (!ns_capable(task_user_ns(child), CAP_SYS_PTRACE)) | 309 | rcu_read_lock(); |
310 | if (!ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE)) | ||
308 | rc = -EPERM; | 311 | rc = -EPERM; |
312 | rcu_read_unlock(); | ||
309 | break; | 313 | break; |
310 | case YAMA_SCOPE_NO_ATTACH: | 314 | case YAMA_SCOPE_NO_ATTACH: |
311 | default: | 315 | default: |
@@ -343,8 +347,10 @@ int yama_ptrace_traceme(struct task_struct *parent) | |||
343 | /* Only disallow PTRACE_TRACEME on more aggressive settings. */ | 347 | /* Only disallow PTRACE_TRACEME on more aggressive settings. */ |
344 | switch (ptrace_scope) { | 348 | switch (ptrace_scope) { |
345 | case YAMA_SCOPE_CAPABILITY: | 349 | case YAMA_SCOPE_CAPABILITY: |
346 | if (!ns_capable(task_user_ns(parent), CAP_SYS_PTRACE)) | 350 | rcu_read_lock(); |
351 | if (!ns_capable(__task_cred(parent)->user_ns, CAP_SYS_PTRACE)) | ||
347 | rc = -EPERM; | 352 | rc = -EPERM; |
353 | rcu_read_unlock(); | ||
348 | break; | 354 | break; |
349 | case YAMA_SCOPE_NO_ATTACH: | 355 | case YAMA_SCOPE_NO_ATTACH: |
350 | rc = -EPERM; | 356 | rc = -EPERM; |