diff options
author | Serge Hallyn <serue@us.ibm.com> | 2008-10-15 17:38:45 -0400 |
---|---|---|
committer | Serge E. Hallyn <serue@us.ibm.com> | 2008-11-24 18:57:41 -0500 |
commit | 18b6e0414e42d95183f07d8177e3ff0241abd825 (patch) | |
tree | 91ca2f2d442055e31eb7bb551bf7060f3f4c4cc7 | |
parent | 9789cfe22e5d7bc10cad841a4ea96ecedb34b267 (diff) |
User namespaces: set of cleanups (v2)
The user_ns is moved from nsproxy to user_struct, so that a struct
cred by itself is sufficient to determine access (which it otherwise
would not be). Corresponding ecryptfs fixes (by David Howells) are
here as well.
Fix refcounting. The following rules now apply:
1. The task pins the user struct.
2. The user struct pins its user namespace.
3. The user namespace pins the struct user which created it.
User namespaces are cloned during copy_creds(). Unsharing a new user_ns
is no longer possible. (We could re-add that, but it'll cause code
duplication and doesn't seem useful if PAM doesn't need to clone user
namespaces).
When a user namespace is created, its first user (uid 0) gets empty
keyrings and a clean group_info.
This incorporates a previous patch by David Howells. Here
is his original patch description:
>I suggest adding the attached incremental patch. It makes the following
>changes:
>
> (1) Provides a current_user_ns() macro to wrap accesses to current's user
> namespace.
>
> (2) Fixes eCryptFS.
>
> (3) Renames create_new_userns() to create_user_ns() to be more consistent
> with the other associated functions and because the 'new' in the name is
> superfluous.
>
> (4) Moves the argument and permission checks made for CLONE_NEWUSER to the
> beginning of do_fork() so that they're done prior to making any attempts
> at allocation.
>
> (5) Calls create_user_ns() after prepare_creds(), and gives it the new creds
> to fill in rather than have it return the new root user. I don't imagine
> the new root user being used for anything other than filling in a cred
> struct.
>
> This also permits me to get rid of a get_uid() and a free_uid(), as the
> reference the creds were holding on the old user_struct can just be
> transferred to the new namespace's creator pointer.
>
> (6) Makes create_user_ns() reset the UIDs and GIDs of the creds under
> preparation rather than doing it in copy_creds().
>
>David
>Signed-off-by: David Howells <dhowells@redhat.com>
Changelog:
Oct 20: integrate dhowells comments
1. leave thread_keyring alone
2. use current_user_ns() in set_user()
Signed-off-by: Serge Hallyn <serue@us.ibm.com>
-rw-r--r-- | fs/ecryptfs/messaging.c | 13 | ||||
-rw-r--r-- | fs/ecryptfs/miscdev.c | 19 | ||||
-rw-r--r-- | include/linux/cred.h | 2 | ||||
-rw-r--r-- | include/linux/init_task.h | 1 | ||||
-rw-r--r-- | include/linux/nsproxy.h | 1 | ||||
-rw-r--r-- | include/linux/sched.h | 1 | ||||
-rw-r--r-- | include/linux/user_namespace.h | 13 | ||||
-rw-r--r-- | kernel/cred.c | 15 | ||||
-rw-r--r-- | kernel/fork.c | 19 | ||||
-rw-r--r-- | kernel/nsproxy.c | 15 | ||||
-rw-r--r-- | kernel/sys.c | 4 | ||||
-rw-r--r-- | kernel/user.c | 47 | ||||
-rw-r--r-- | kernel/user_namespace.c | 75 |
13 files changed, 96 insertions, 129 deletions
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index e0b0a4e28b9b..6913f727624d 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c | |||
@@ -360,7 +360,7 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, | |||
360 | struct ecryptfs_msg_ctx *msg_ctx; | 360 | struct ecryptfs_msg_ctx *msg_ctx; |
361 | size_t msg_size; | 361 | size_t msg_size; |
362 | struct nsproxy *nsproxy; | 362 | struct nsproxy *nsproxy; |
363 | struct user_namespace *current_user_ns; | 363 | struct user_namespace *tsk_user_ns; |
364 | uid_t ctx_euid; | 364 | uid_t ctx_euid; |
365 | int rc; | 365 | int rc; |
366 | 366 | ||
@@ -385,9 +385,9 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, | |||
385 | mutex_unlock(&ecryptfs_daemon_hash_mux); | 385 | mutex_unlock(&ecryptfs_daemon_hash_mux); |
386 | goto wake_up; | 386 | goto wake_up; |
387 | } | 387 | } |
388 | current_user_ns = nsproxy->user_ns; | 388 | tsk_user_ns = __task_cred(msg_ctx->task)->user->user_ns; |
389 | ctx_euid = task_euid(msg_ctx->task); | 389 | ctx_euid = task_euid(msg_ctx->task); |
390 | rc = ecryptfs_find_daemon_by_euid(&daemon, ctx_euid, current_user_ns); | 390 | rc = ecryptfs_find_daemon_by_euid(&daemon, ctx_euid, tsk_user_ns); |
391 | rcu_read_unlock(); | 391 | rcu_read_unlock(); |
392 | mutex_unlock(&ecryptfs_daemon_hash_mux); | 392 | mutex_unlock(&ecryptfs_daemon_hash_mux); |
393 | if (rc) { | 393 | if (rc) { |
@@ -405,11 +405,11 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, | |||
405 | euid, ctx_euid); | 405 | euid, ctx_euid); |
406 | goto unlock; | 406 | goto unlock; |
407 | } | 407 | } |
408 | if (current_user_ns != user_ns) { | 408 | if (tsk_user_ns != user_ns) { |
409 | rc = -EBADMSG; | 409 | rc = -EBADMSG; |
410 | printk(KERN_WARNING "%s: Received message from user_ns " | 410 | printk(KERN_WARNING "%s: Received message from user_ns " |
411 | "[0x%p]; expected message from user_ns [0x%p]\n", | 411 | "[0x%p]; expected message from user_ns [0x%p]\n", |
412 | __func__, user_ns, nsproxy->user_ns); | 412 | __func__, user_ns, tsk_user_ns); |
413 | goto unlock; | 413 | goto unlock; |
414 | } | 414 | } |
415 | if (daemon->pid != pid) { | 415 | if (daemon->pid != pid) { |
@@ -468,8 +468,7 @@ ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type, | |||
468 | uid_t euid = current_euid(); | 468 | uid_t euid = current_euid(); |
469 | int rc; | 469 | int rc; |
470 | 470 | ||
471 | rc = ecryptfs_find_daemon_by_euid(&daemon, euid, | 471 | rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); |
472 | current->nsproxy->user_ns); | ||
473 | if (rc || !daemon) { | 472 | if (rc || !daemon) { |
474 | rc = -ENOTCONN; | 473 | rc = -ENOTCONN; |
475 | printk(KERN_ERR "%s: User [%d] does not have a daemon " | 474 | printk(KERN_ERR "%s: User [%d] does not have a daemon " |
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 047ac609695b..efd95a0ed1ea 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c | |||
@@ -47,8 +47,7 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt) | |||
47 | 47 | ||
48 | mutex_lock(&ecryptfs_daemon_hash_mux); | 48 | mutex_lock(&ecryptfs_daemon_hash_mux); |
49 | /* TODO: Just use file->private_data? */ | 49 | /* TODO: Just use file->private_data? */ |
50 | rc = ecryptfs_find_daemon_by_euid(&daemon, euid, | 50 | rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); |
51 | current->nsproxy->user_ns); | ||
52 | BUG_ON(rc || !daemon); | 51 | BUG_ON(rc || !daemon); |
53 | mutex_lock(&daemon->mux); | 52 | mutex_lock(&daemon->mux); |
54 | mutex_unlock(&ecryptfs_daemon_hash_mux); | 53 | mutex_unlock(&ecryptfs_daemon_hash_mux); |
@@ -95,11 +94,9 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) | |||
95 | "count; rc = [%d]\n", __func__, rc); | 94 | "count; rc = [%d]\n", __func__, rc); |
96 | goto out_unlock_daemon_list; | 95 | goto out_unlock_daemon_list; |
97 | } | 96 | } |
98 | rc = ecryptfs_find_daemon_by_euid(&daemon, euid, | 97 | rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); |
99 | current->nsproxy->user_ns); | ||
100 | if (rc || !daemon) { | 98 | if (rc || !daemon) { |
101 | rc = ecryptfs_spawn_daemon(&daemon, euid, | 99 | rc = ecryptfs_spawn_daemon(&daemon, euid, current_user_ns(), |
102 | current->nsproxy->user_ns, | ||
103 | task_pid(current)); | 100 | task_pid(current)); |
104 | if (rc) { | 101 | if (rc) { |
105 | printk(KERN_ERR "%s: Error attempting to spawn daemon; " | 102 | printk(KERN_ERR "%s: Error attempting to spawn daemon; " |
@@ -153,8 +150,7 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file) | |||
153 | int rc; | 150 | int rc; |
154 | 151 | ||
155 | mutex_lock(&ecryptfs_daemon_hash_mux); | 152 | mutex_lock(&ecryptfs_daemon_hash_mux); |
156 | rc = ecryptfs_find_daemon_by_euid(&daemon, euid, | 153 | rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); |
157 | current->nsproxy->user_ns); | ||
158 | BUG_ON(rc || !daemon); | 154 | BUG_ON(rc || !daemon); |
159 | mutex_lock(&daemon->mux); | 155 | mutex_lock(&daemon->mux); |
160 | BUG_ON(daemon->pid != task_pid(current)); | 156 | BUG_ON(daemon->pid != task_pid(current)); |
@@ -254,8 +250,7 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, | |||
254 | 250 | ||
255 | mutex_lock(&ecryptfs_daemon_hash_mux); | 251 | mutex_lock(&ecryptfs_daemon_hash_mux); |
256 | /* TODO: Just use file->private_data? */ | 252 | /* TODO: Just use file->private_data? */ |
257 | rc = ecryptfs_find_daemon_by_euid(&daemon, euid, | 253 | rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); |
258 | current->nsproxy->user_ns); | ||
259 | BUG_ON(rc || !daemon); | 254 | BUG_ON(rc || !daemon); |
260 | mutex_lock(&daemon->mux); | 255 | mutex_lock(&daemon->mux); |
261 | if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { | 256 | if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { |
@@ -295,7 +290,7 @@ check_list: | |||
295 | goto check_list; | 290 | goto check_list; |
296 | } | 291 | } |
297 | BUG_ON(euid != daemon->euid); | 292 | BUG_ON(euid != daemon->euid); |
298 | BUG_ON(current->nsproxy->user_ns != daemon->user_ns); | 293 | BUG_ON(current_user_ns() != daemon->user_ns); |
299 | BUG_ON(task_pid(current) != daemon->pid); | 294 | BUG_ON(task_pid(current) != daemon->pid); |
300 | msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue, | 295 | msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue, |
301 | struct ecryptfs_msg_ctx, daemon_out_list); | 296 | struct ecryptfs_msg_ctx, daemon_out_list); |
@@ -468,7 +463,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, | |||
468 | goto out_free; | 463 | goto out_free; |
469 | } | 464 | } |
470 | rc = ecryptfs_miscdev_response(&data[i], packet_size, | 465 | rc = ecryptfs_miscdev_response(&data[i], packet_size, |
471 | euid, current->nsproxy->user_ns, | 466 | euid, current_user_ns(), |
472 | task_pid(current), seq); | 467 | task_pid(current), seq); |
473 | if (rc) | 468 | if (rc) |
474 | printk(KERN_WARNING "%s: Failed to deliver miscdev " | 469 | printk(KERN_WARNING "%s: Failed to deliver miscdev " |
diff --git a/include/linux/cred.h b/include/linux/cred.h index 26c1ab179946..3282ee4318e7 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h | |||
@@ -60,6 +60,7 @@ do { \ | |||
60 | } while (0) | 60 | } while (0) |
61 | 61 | ||
62 | extern struct group_info *groups_alloc(int); | 62 | extern struct group_info *groups_alloc(int); |
63 | extern struct group_info init_groups; | ||
63 | extern void groups_free(struct group_info *); | 64 | extern void groups_free(struct group_info *); |
64 | extern int set_current_groups(struct group_info *); | 65 | extern int set_current_groups(struct group_info *); |
65 | extern int set_groups(struct cred *, struct group_info *); | 66 | extern int set_groups(struct cred *, struct group_info *); |
@@ -315,6 +316,7 @@ static inline void put_cred(const struct cred *_cred) | |||
315 | #define current_fsgid() (current_cred_xxx(fsgid)) | 316 | #define current_fsgid() (current_cred_xxx(fsgid)) |
316 | #define current_cap() (current_cred_xxx(cap_effective)) | 317 | #define current_cap() (current_cred_xxx(cap_effective)) |
317 | #define current_user() (current_cred_xxx(user)) | 318 | #define current_user() (current_cred_xxx(user)) |
319 | #define current_user_ns() (current_cred_xxx(user)->user_ns) | ||
318 | #define current_security() (current_cred_xxx(security)) | 320 | #define current_security() (current_cred_xxx(security)) |
319 | 321 | ||
320 | #define current_uid_gid(_uid, _gid) \ | 322 | #define current_uid_gid(_uid, _gid) \ |
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2597858035cd..959f5522d10a 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -57,7 +57,6 @@ extern struct nsproxy init_nsproxy; | |||
57 | .mnt_ns = NULL, \ | 57 | .mnt_ns = NULL, \ |
58 | INIT_NET_NS(net_ns) \ | 58 | INIT_NET_NS(net_ns) \ |
59 | INIT_IPC_NS(ipc_ns) \ | 59 | INIT_IPC_NS(ipc_ns) \ |
60 | .user_ns = &init_user_ns, \ | ||
61 | } | 60 | } |
62 | 61 | ||
63 | #define INIT_SIGHAND(sighand) { \ | 62 | #define INIT_SIGHAND(sighand) { \ |
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index c8a768e59640..afad7dec1b36 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h | |||
@@ -27,7 +27,6 @@ struct nsproxy { | |||
27 | struct ipc_namespace *ipc_ns; | 27 | struct ipc_namespace *ipc_ns; |
28 | struct mnt_namespace *mnt_ns; | 28 | struct mnt_namespace *mnt_ns; |
29 | struct pid_namespace *pid_ns; | 29 | struct pid_namespace *pid_ns; |
30 | struct user_namespace *user_ns; | ||
31 | struct net *net_ns; | 30 | struct net *net_ns; |
32 | }; | 31 | }; |
33 | extern struct nsproxy init_nsproxy; | 32 | extern struct nsproxy init_nsproxy; |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 2036e9f26020..7f8015a3082e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -638,6 +638,7 @@ struct user_struct { | |||
638 | /* Hash table maintenance information */ | 638 | /* Hash table maintenance information */ |
639 | struct hlist_node uidhash_node; | 639 | struct hlist_node uidhash_node; |
640 | uid_t uid; | 640 | uid_t uid; |
641 | struct user_namespace *user_ns; | ||
641 | 642 | ||
642 | #ifdef CONFIG_USER_SCHED | 643 | #ifdef CONFIG_USER_SCHED |
643 | struct task_group *tg; | 644 | struct task_group *tg; |
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index b5f41d4c2eec..315bcd375224 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h | |||
@@ -12,7 +12,7 @@ | |||
12 | struct user_namespace { | 12 | struct user_namespace { |
13 | struct kref kref; | 13 | struct kref kref; |
14 | struct hlist_head uidhash_table[UIDHASH_SZ]; | 14 | struct hlist_head uidhash_table[UIDHASH_SZ]; |
15 | struct user_struct *root_user; | 15 | struct user_struct *creator; |
16 | }; | 16 | }; |
17 | 17 | ||
18 | extern struct user_namespace init_user_ns; | 18 | extern struct user_namespace init_user_ns; |
@@ -26,8 +26,7 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns) | |||
26 | return ns; | 26 | return ns; |
27 | } | 27 | } |
28 | 28 | ||
29 | extern struct user_namespace *copy_user_ns(int flags, | 29 | extern int create_user_ns(struct cred *new); |
30 | struct user_namespace *old_ns); | ||
31 | extern void free_user_ns(struct kref *kref); | 30 | extern void free_user_ns(struct kref *kref); |
32 | 31 | ||
33 | static inline void put_user_ns(struct user_namespace *ns) | 32 | static inline void put_user_ns(struct user_namespace *ns) |
@@ -43,13 +42,9 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns) | |||
43 | return &init_user_ns; | 42 | return &init_user_ns; |
44 | } | 43 | } |
45 | 44 | ||
46 | static inline struct user_namespace *copy_user_ns(int flags, | 45 | static inline int create_user_ns(struct cred *new) |
47 | struct user_namespace *old_ns) | ||
48 | { | 46 | { |
49 | if (flags & CLONE_NEWUSER) | 47 | return -EINVAL; |
50 | return ERR_PTR(-EINVAL); | ||
51 | |||
52 | return old_ns; | ||
53 | } | 48 | } |
54 | 49 | ||
55 | static inline void put_user_ns(struct user_namespace *ns) | 50 | static inline void put_user_ns(struct user_namespace *ns) |
diff --git a/kernel/cred.c b/kernel/cred.c index 13697ca2bb38..ff7bc071991c 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -274,6 +274,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) | |||
274 | struct thread_group_cred *tgcred; | 274 | struct thread_group_cred *tgcred; |
275 | #endif | 275 | #endif |
276 | struct cred *new; | 276 | struct cred *new; |
277 | int ret; | ||
277 | 278 | ||
278 | mutex_init(&p->cred_exec_mutex); | 279 | mutex_init(&p->cred_exec_mutex); |
279 | 280 | ||
@@ -293,6 +294,12 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) | |||
293 | if (!new) | 294 | if (!new) |
294 | return -ENOMEM; | 295 | return -ENOMEM; |
295 | 296 | ||
297 | if (clone_flags & CLONE_NEWUSER) { | ||
298 | ret = create_user_ns(new); | ||
299 | if (ret < 0) | ||
300 | goto error_put; | ||
301 | } | ||
302 | |||
296 | #ifdef CONFIG_KEYS | 303 | #ifdef CONFIG_KEYS |
297 | /* new threads get their own thread keyrings if their parent already | 304 | /* new threads get their own thread keyrings if their parent already |
298 | * had one */ | 305 | * had one */ |
@@ -309,8 +316,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) | |||
309 | if (!(clone_flags & CLONE_THREAD)) { | 316 | if (!(clone_flags & CLONE_THREAD)) { |
310 | tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); | 317 | tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); |
311 | if (!tgcred) { | 318 | if (!tgcred) { |
312 | put_cred(new); | 319 | ret = -ENOMEM; |
313 | return -ENOMEM; | 320 | goto error_put; |
314 | } | 321 | } |
315 | atomic_set(&tgcred->usage, 1); | 322 | atomic_set(&tgcred->usage, 1); |
316 | spin_lock_init(&tgcred->lock); | 323 | spin_lock_init(&tgcred->lock); |
@@ -325,6 +332,10 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) | |||
325 | atomic_inc(&new->user->processes); | 332 | atomic_inc(&new->user->processes); |
326 | p->cred = p->real_cred = get_cred(new); | 333 | p->cred = p->real_cred = get_cred(new); |
327 | return 0; | 334 | return 0; |
335 | |||
336 | error_put: | ||
337 | put_cred(new); | ||
338 | return ret; | ||
328 | } | 339 | } |
329 | 340 | ||
330 | /** | 341 | /** |
diff --git a/kernel/fork.c b/kernel/fork.c index 29c18c14812d..1dd89451fae4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -976,7 +976,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
976 | if (atomic_read(&p->real_cred->user->processes) >= | 976 | if (atomic_read(&p->real_cred->user->processes) >= |
977 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { | 977 | p->signal->rlim[RLIMIT_NPROC].rlim_cur) { |
978 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && | 978 | if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && |
979 | p->real_cred->user != current->nsproxy->user_ns->root_user) | 979 | p->real_cred->user != INIT_USER) |
980 | goto bad_fork_free; | 980 | goto bad_fork_free; |
981 | } | 981 | } |
982 | 982 | ||
@@ -1335,6 +1335,20 @@ long do_fork(unsigned long clone_flags, | |||
1335 | long nr; | 1335 | long nr; |
1336 | 1336 | ||
1337 | /* | 1337 | /* |
1338 | * Do some preliminary argument and permissions checking before we | ||
1339 | * actually start allocating stuff | ||
1340 | */ | ||
1341 | if (clone_flags & CLONE_NEWUSER) { | ||
1342 | if (clone_flags & CLONE_THREAD) | ||
1343 | return -EINVAL; | ||
1344 | /* hopefully this check will go away when userns support is | ||
1345 | * complete | ||
1346 | */ | ||
1347 | if (!capable(CAP_SYS_ADMIN)) | ||
1348 | return -EPERM; | ||
1349 | } | ||
1350 | |||
1351 | /* | ||
1338 | * We hope to recycle these flags after 2.6.26 | 1352 | * We hope to recycle these flags after 2.6.26 |
1339 | */ | 1353 | */ |
1340 | if (unlikely(clone_flags & CLONE_STOPPED)) { | 1354 | if (unlikely(clone_flags & CLONE_STOPPED)) { |
@@ -1581,8 +1595,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
1581 | err = -EINVAL; | 1595 | err = -EINVAL; |
1582 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | 1596 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| |
1583 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | 1597 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| |
1584 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER| | 1598 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) |
1585 | CLONE_NEWNET)) | ||
1586 | goto bad_unshare_out; | 1599 | goto bad_unshare_out; |
1587 | 1600 | ||
1588 | /* | 1601 | /* |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 1d3ef29a2583..63598dca2d0c 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
@@ -80,12 +80,6 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, | |||
80 | goto out_pid; | 80 | goto out_pid; |
81 | } | 81 | } |
82 | 82 | ||
83 | new_nsp->user_ns = copy_user_ns(flags, tsk->nsproxy->user_ns); | ||
84 | if (IS_ERR(new_nsp->user_ns)) { | ||
85 | err = PTR_ERR(new_nsp->user_ns); | ||
86 | goto out_user; | ||
87 | } | ||
88 | |||
89 | new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns); | 83 | new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns); |
90 | if (IS_ERR(new_nsp->net_ns)) { | 84 | if (IS_ERR(new_nsp->net_ns)) { |
91 | err = PTR_ERR(new_nsp->net_ns); | 85 | err = PTR_ERR(new_nsp->net_ns); |
@@ -95,9 +89,6 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, | |||
95 | return new_nsp; | 89 | return new_nsp; |
96 | 90 | ||
97 | out_net: | 91 | out_net: |
98 | if (new_nsp->user_ns) | ||
99 | put_user_ns(new_nsp->user_ns); | ||
100 | out_user: | ||
101 | if (new_nsp->pid_ns) | 92 | if (new_nsp->pid_ns) |
102 | put_pid_ns(new_nsp->pid_ns); | 93 | put_pid_ns(new_nsp->pid_ns); |
103 | out_pid: | 94 | out_pid: |
@@ -130,7 +121,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) | |||
130 | get_nsproxy(old_ns); | 121 | get_nsproxy(old_ns); |
131 | 122 | ||
132 | if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | | 123 | if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | |
133 | CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET))) | 124 | CLONE_NEWPID | CLONE_NEWNET))) |
134 | return 0; | 125 | return 0; |
135 | 126 | ||
136 | if (!capable(CAP_SYS_ADMIN)) { | 127 | if (!capable(CAP_SYS_ADMIN)) { |
@@ -173,8 +164,6 @@ void free_nsproxy(struct nsproxy *ns) | |||
173 | put_ipc_ns(ns->ipc_ns); | 164 | put_ipc_ns(ns->ipc_ns); |
174 | if (ns->pid_ns) | 165 | if (ns->pid_ns) |
175 | put_pid_ns(ns->pid_ns); | 166 | put_pid_ns(ns->pid_ns); |
176 | if (ns->user_ns) | ||
177 | put_user_ns(ns->user_ns); | ||
178 | put_net(ns->net_ns); | 167 | put_net(ns->net_ns); |
179 | kmem_cache_free(nsproxy_cachep, ns); | 168 | kmem_cache_free(nsproxy_cachep, ns); |
180 | } | 169 | } |
@@ -189,7 +178,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, | |||
189 | int err = 0; | 178 | int err = 0; |
190 | 179 | ||
191 | if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | | 180 | if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | |
192 | CLONE_NEWUSER | CLONE_NEWNET))) | 181 | CLONE_NEWNET))) |
193 | return 0; | 182 | return 0; |
194 | 183 | ||
195 | if (!capable(CAP_SYS_ADMIN)) | 184 | if (!capable(CAP_SYS_ADMIN)) |
diff --git a/kernel/sys.c b/kernel/sys.c index ab735040468a..ebe65c2c9873 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -565,13 +565,13 @@ static int set_user(struct cred *new) | |||
565 | { | 565 | { |
566 | struct user_struct *new_user; | 566 | struct user_struct *new_user; |
567 | 567 | ||
568 | new_user = alloc_uid(current->nsproxy->user_ns, new->uid); | 568 | new_user = alloc_uid(current_user_ns(), new->uid); |
569 | if (!new_user) | 569 | if (!new_user) |
570 | return -EAGAIN; | 570 | return -EAGAIN; |
571 | 571 | ||
572 | if (atomic_read(&new_user->processes) >= | 572 | if (atomic_read(&new_user->processes) >= |
573 | current->signal->rlim[RLIMIT_NPROC].rlim_cur && | 573 | current->signal->rlim[RLIMIT_NPROC].rlim_cur && |
574 | new_user != current->nsproxy->user_ns->root_user) { | 574 | new_user != INIT_USER) { |
575 | free_uid(new_user); | 575 | free_uid(new_user); |
576 | return -EAGAIN; | 576 | return -EAGAIN; |
577 | } | 577 | } |
diff --git a/kernel/user.c b/kernel/user.c index d476307dd4b0..c0ef3a464438 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -20,9 +20,9 @@ | |||
20 | 20 | ||
21 | struct user_namespace init_user_ns = { | 21 | struct user_namespace init_user_ns = { |
22 | .kref = { | 22 | .kref = { |
23 | .refcount = ATOMIC_INIT(2), | 23 | .refcount = ATOMIC_INIT(1), |
24 | }, | 24 | }, |
25 | .root_user = &root_user, | 25 | .creator = &root_user, |
26 | }; | 26 | }; |
27 | EXPORT_SYMBOL_GPL(init_user_ns); | 27 | EXPORT_SYMBOL_GPL(init_user_ns); |
28 | 28 | ||
@@ -48,12 +48,14 @@ static struct kmem_cache *uid_cachep; | |||
48 | */ | 48 | */ |
49 | static DEFINE_SPINLOCK(uidhash_lock); | 49 | static DEFINE_SPINLOCK(uidhash_lock); |
50 | 50 | ||
51 | /* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->creator */ | ||
51 | struct user_struct root_user = { | 52 | struct user_struct root_user = { |
52 | .__count = ATOMIC_INIT(1), | 53 | .__count = ATOMIC_INIT(2), |
53 | .processes = ATOMIC_INIT(1), | 54 | .processes = ATOMIC_INIT(1), |
54 | .files = ATOMIC_INIT(0), | 55 | .files = ATOMIC_INIT(0), |
55 | .sigpending = ATOMIC_INIT(0), | 56 | .sigpending = ATOMIC_INIT(0), |
56 | .locked_shm = 0, | 57 | .locked_shm = 0, |
58 | .user_ns = &init_user_ns, | ||
57 | #ifdef CONFIG_USER_SCHED | 59 | #ifdef CONFIG_USER_SCHED |
58 | .tg = &init_task_group, | 60 | .tg = &init_task_group, |
59 | #endif | 61 | #endif |
@@ -314,12 +316,13 @@ done: | |||
314 | * IRQ state (as stored in flags) is restored and uidhash_lock released | 316 | * IRQ state (as stored in flags) is restored and uidhash_lock released |
315 | * upon function exit. | 317 | * upon function exit. |
316 | */ | 318 | */ |
317 | static inline void free_user(struct user_struct *up, unsigned long flags) | 319 | static void free_user(struct user_struct *up, unsigned long flags) |
318 | { | 320 | { |
319 | /* restore back the count */ | 321 | /* restore back the count */ |
320 | atomic_inc(&up->__count); | 322 | atomic_inc(&up->__count); |
321 | spin_unlock_irqrestore(&uidhash_lock, flags); | 323 | spin_unlock_irqrestore(&uidhash_lock, flags); |
322 | 324 | ||
325 | put_user_ns(up->user_ns); | ||
323 | INIT_WORK(&up->work, remove_user_sysfs_dir); | 326 | INIT_WORK(&up->work, remove_user_sysfs_dir); |
324 | schedule_work(&up->work); | 327 | schedule_work(&up->work); |
325 | } | 328 | } |
@@ -335,13 +338,14 @@ static inline void uids_mutex_unlock(void) { } | |||
335 | * IRQ state (as stored in flags) is restored and uidhash_lock released | 338 | * IRQ state (as stored in flags) is restored and uidhash_lock released |
336 | * upon function exit. | 339 | * upon function exit. |
337 | */ | 340 | */ |
338 | static inline void free_user(struct user_struct *up, unsigned long flags) | 341 | static void free_user(struct user_struct *up, unsigned long flags) |
339 | { | 342 | { |
340 | uid_hash_remove(up); | 343 | uid_hash_remove(up); |
341 | spin_unlock_irqrestore(&uidhash_lock, flags); | 344 | spin_unlock_irqrestore(&uidhash_lock, flags); |
342 | sched_destroy_user(up); | 345 | sched_destroy_user(up); |
343 | key_put(up->uid_keyring); | 346 | key_put(up->uid_keyring); |
344 | key_put(up->session_keyring); | 347 | key_put(up->session_keyring); |
348 | put_user_ns(up->user_ns); | ||
345 | kmem_cache_free(uid_cachep, up); | 349 | kmem_cache_free(uid_cachep, up); |
346 | } | 350 | } |
347 | 351 | ||
@@ -357,7 +361,7 @@ struct user_struct *find_user(uid_t uid) | |||
357 | { | 361 | { |
358 | struct user_struct *ret; | 362 | struct user_struct *ret; |
359 | unsigned long flags; | 363 | unsigned long flags; |
360 | struct user_namespace *ns = current->nsproxy->user_ns; | 364 | struct user_namespace *ns = current_user()->user_ns; |
361 | 365 | ||
362 | spin_lock_irqsave(&uidhash_lock, flags); | 366 | spin_lock_irqsave(&uidhash_lock, flags); |
363 | ret = uid_hash_find(uid, uidhashentry(ns, uid)); | 367 | ret = uid_hash_find(uid, uidhashentry(ns, uid)); |
@@ -404,6 +408,8 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) | |||
404 | if (sched_create_user(new) < 0) | 408 | if (sched_create_user(new) < 0) |
405 | goto out_free_user; | 409 | goto out_free_user; |
406 | 410 | ||
411 | new->user_ns = get_user_ns(ns); | ||
412 | |||
407 | if (uids_user_create(new)) | 413 | if (uids_user_create(new)) |
408 | goto out_destoy_sched; | 414 | goto out_destoy_sched; |
409 | 415 | ||
@@ -427,7 +433,6 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) | |||
427 | up = new; | 433 | up = new; |
428 | } | 434 | } |
429 | spin_unlock_irq(&uidhash_lock); | 435 | spin_unlock_irq(&uidhash_lock); |
430 | |||
431 | } | 436 | } |
432 | 437 | ||
433 | uids_mutex_unlock(); | 438 | uids_mutex_unlock(); |
@@ -436,6 +441,7 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) | |||
436 | 441 | ||
437 | out_destoy_sched: | 442 | out_destoy_sched: |
438 | sched_destroy_user(new); | 443 | sched_destroy_user(new); |
444 | put_user_ns(new->user_ns); | ||
439 | out_free_user: | 445 | out_free_user: |
440 | kmem_cache_free(uid_cachep, new); | 446 | kmem_cache_free(uid_cachep, new); |
441 | out_unlock: | 447 | out_unlock: |
@@ -443,33 +449,6 @@ out_unlock: | |||
443 | return NULL; | 449 | return NULL; |
444 | } | 450 | } |
445 | 451 | ||
446 | #ifdef CONFIG_USER_NS | ||
447 | void release_uids(struct user_namespace *ns) | ||
448 | { | ||
449 | int i; | ||
450 | unsigned long flags; | ||
451 | struct hlist_head *head; | ||
452 | struct hlist_node *nd; | ||
453 | |||
454 | spin_lock_irqsave(&uidhash_lock, flags); | ||
455 | /* | ||
456 | * collapse the chains so that the user_struct-s will | ||
457 | * be still alive, but not in hashes. subsequent free_uid() | ||
458 | * will free them. | ||
459 | */ | ||
460 | for (i = 0; i < UIDHASH_SZ; i++) { | ||
461 | head = ns->uidhash_table + i; | ||
462 | while (!hlist_empty(head)) { | ||
463 | nd = head->first; | ||
464 | hlist_del_init(nd); | ||
465 | } | ||
466 | } | ||
467 | spin_unlock_irqrestore(&uidhash_lock, flags); | ||
468 | |||
469 | free_uid(ns->root_user); | ||
470 | } | ||
471 | #endif | ||
472 | |||
473 | static int __init uid_cache_init(void) | 452 | static int __init uid_cache_init(void) |
474 | { | 453 | { |
475 | int n; | 454 | int n; |
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 0d9c51d67333..79084311ee57 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
@@ -9,70 +9,55 @@ | |||
9 | #include <linux/nsproxy.h> | 9 | #include <linux/nsproxy.h> |
10 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
11 | #include <linux/user_namespace.h> | 11 | #include <linux/user_namespace.h> |
12 | #include <linux/cred.h> | ||
12 | 13 | ||
13 | /* | 14 | /* |
14 | * Clone a new ns copying an original user ns, setting refcount to 1 | 15 | * Create a new user namespace, deriving the creator from the user in the |
15 | * @old_ns: namespace to clone | 16 | * passed credentials, and replacing that user with the new root user for the |
16 | * Return NULL on error (failure to kmalloc), new ns otherwise | 17 | * new namespace. |
18 | * | ||
19 | * This is called by copy_creds(), which will finish setting the target task's | ||
20 | * credentials. | ||
17 | */ | 21 | */ |
18 | static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) | 22 | int create_user_ns(struct cred *new) |
19 | { | 23 | { |
20 | struct user_namespace *ns; | 24 | struct user_namespace *ns; |
21 | struct user_struct *new_user; | 25 | struct user_struct *root_user; |
22 | struct cred *new; | ||
23 | int n; | 26 | int n; |
24 | 27 | ||
25 | ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL); | 28 | ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL); |
26 | if (!ns) | 29 | if (!ns) |
27 | return ERR_PTR(-ENOMEM); | 30 | return -ENOMEM; |
28 | 31 | ||
29 | kref_init(&ns->kref); | 32 | kref_init(&ns->kref); |
30 | 33 | ||
31 | for (n = 0; n < UIDHASH_SZ; ++n) | 34 | for (n = 0; n < UIDHASH_SZ; ++n) |
32 | INIT_HLIST_HEAD(ns->uidhash_table + n); | 35 | INIT_HLIST_HEAD(ns->uidhash_table + n); |
33 | 36 | ||
34 | /* Insert new root user. */ | 37 | /* Alloc new root user. */ |
35 | ns->root_user = alloc_uid(ns, 0); | 38 | root_user = alloc_uid(ns, 0); |
36 | if (!ns->root_user) { | 39 | if (!root_user) { |
37 | kfree(ns); | 40 | kfree(ns); |
38 | return ERR_PTR(-ENOMEM); | 41 | return -ENOMEM; |
39 | } | 42 | } |
40 | 43 | ||
41 | /* Reset current->user with a new one */ | 44 | /* set the new root user in the credentials under preparation */ |
42 | new_user = alloc_uid(ns, current_uid()); | 45 | ns->creator = new->user; |
43 | if (!new_user) { | 46 | new->user = root_user; |
44 | free_uid(ns->root_user); | 47 | new->uid = new->euid = new->suid = new->fsuid = 0; |
45 | kfree(ns); | 48 | new->gid = new->egid = new->sgid = new->fsgid = 0; |
46 | return ERR_PTR(-ENOMEM); | 49 | put_group_info(new->group_info); |
47 | } | 50 | new->group_info = get_group_info(&init_groups); |
48 | 51 | #ifdef CONFIG_KEYS | |
49 | /* Install the new user */ | 52 | key_put(new->request_key_auth); |
50 | new = prepare_creds(); | 53 | new->request_key_auth = NULL; |
51 | if (!new) { | 54 | #endif |
52 | free_uid(new_user); | 55 | /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ |
53 | free_uid(ns->root_user); | ||
54 | kfree(ns); | ||
55 | } | ||
56 | free_uid(new->user); | ||
57 | new->user = new_user; | ||
58 | commit_creds(new); | ||
59 | return ns; | ||
60 | } | ||
61 | |||
62 | struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns) | ||
63 | { | ||
64 | struct user_namespace *new_ns; | ||
65 | |||
66 | BUG_ON(!old_ns); | ||
67 | get_user_ns(old_ns); | ||
68 | |||
69 | if (!(flags & CLONE_NEWUSER)) | ||
70 | return old_ns; | ||
71 | 56 | ||
72 | new_ns = clone_user_ns(old_ns); | 57 | /* alloc_uid() incremented the userns refcount. Just set it to 1 */ |
58 | kref_set(&ns->kref, 1); | ||
73 | 59 | ||
74 | put_user_ns(old_ns); | 60 | return 0; |
75 | return new_ns; | ||
76 | } | 61 | } |
77 | 62 | ||
78 | void free_user_ns(struct kref *kref) | 63 | void free_user_ns(struct kref *kref) |
@@ -80,7 +65,7 @@ void free_user_ns(struct kref *kref) | |||
80 | struct user_namespace *ns; | 65 | struct user_namespace *ns; |
81 | 66 | ||
82 | ns = container_of(kref, struct user_namespace, kref); | 67 | ns = container_of(kref, struct user_namespace, kref); |
83 | release_uids(ns); | 68 | free_uid(ns->creator); |
84 | kfree(ns); | 69 | kfree(ns); |
85 | } | 70 | } |
86 | EXPORT_SYMBOL(free_user_ns); | 71 | EXPORT_SYMBOL(free_user_ns); |