aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 18:44:47 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 18:44:47 -0500
commit6a2b60b17b3e48a418695a94bd2420f6ab32e519 (patch)
tree54b7792fa68b8890f710fa6398b6ba8626a039a8 /kernel/fork.c
parent9228ff90387e276ad67b10c0eb525c9d6a57d5e9 (diff)
parent98f842e675f96ffac96e6c50315790912b2812be (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull user namespace changes from Eric Biederman: "While small this set of changes is very significant with respect to containers in general and user namespaces in particular. The user space interface is now complete. This set of changes adds support for unprivileged users to create user namespaces and as a user namespace root to create other namespaces. The tyranny of supporting suid root preventing unprivileged users from using cool new kernel features is broken. This set of changes completes the work on setns, adding support for the pid, user, mount namespaces. This set of changes includes a bunch of basic pid namespace cleanups/simplifications. Of particular significance is the rework of the pid namespace cleanup so it no longer requires sending out tendrils into all kinds of unexpected cleanup paths for operation. At least one case of broken error handling is fixed by this cleanup. The files under /proc/<pid>/ns/ have been converted from regular files to magic symlinks which prevents incorrect caching by the VFS, ensuring the files always refer to the namespace the process is currently using and ensuring that the ptrace_mayaccess permission checks are always applied. The files under /proc/<pid>/ns/ have been given stable inode numbers so it is now possible to see if different processes share the same namespaces. Through the David Miller's net tree are changes to relax many of the permission checks in the networking stack to allowing the user namespace root to usefully use the networking stack. Similar changes for the mount namespace and the pid namespace are coming through my tree. Two small changes to add user namespace support were commited here adn in David Miller's -net tree so that I could complete the work on the /proc/<pid>/ns/ files in this tree. Work remains to make it safe to build user namespaces and 9p, afs, ceph, cifs, coda, gfs2, ncpfs, nfs, nfsd, ocfs2, and xfs so the Kconfig guard remains in place preventing that user namespaces from being built when any of those filesystems are enabled. Future design work remains to allow root users outside of the initial user namespace to mount more than just /proc and /sys." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (38 commits) proc: Usable inode numbers for the namespace file descriptors. proc: Fix the namespace inode permission checks. proc: Generalize proc inode allocation userns: Allow unprivilged mounts of proc and sysfs userns: For /proc/self/{uid,gid}_map derive the lower userns from the struct file procfs: Print task uids and gids in the userns that opened the proc file userns: Implement unshare of the user namespace userns: Implent proc namespace operations userns: Kill task_user_ns userns: Make create_new_namespaces take a user_ns parameter userns: Allow unprivileged use of setns. userns: Allow unprivileged users to create new namespaces userns: Allow setting a userns mapping to your current uid. userns: Allow chown and setgid preservation userns: Allow unprivileged users to create user namespaces. userns: Ignore suid and sgid on binaries if the uid or gid can not be mapped userns: fix return value on mntns_install() failure vfs: Allow unprivileged manipulation of the mount namespace. vfs: Only support slave subtrees across different user namespaces vfs: Add a user namespace reference from struct mnt_namespace ...
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c69
1 files changed, 48 insertions, 21 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 115d6c2e4cca..c36c4e301efe 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1044,8 +1044,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
1044 atomic_set(&sig->live, 1); 1044 atomic_set(&sig->live, 1);
1045 atomic_set(&sig->sigcnt, 1); 1045 atomic_set(&sig->sigcnt, 1);
1046 init_waitqueue_head(&sig->wait_chldexit); 1046 init_waitqueue_head(&sig->wait_chldexit);
1047 if (clone_flags & CLONE_NEWPID)
1048 sig->flags |= SIGNAL_UNKILLABLE;
1049 sig->curr_target = tsk; 1047 sig->curr_target = tsk;
1050 init_sigpending(&sig->shared_pending); 1048 init_sigpending(&sig->shared_pending);
1051 INIT_LIST_HEAD(&sig->posix_timers); 1049 INIT_LIST_HEAD(&sig->posix_timers);
@@ -1438,8 +1436,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1438 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); 1436 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
1439 1437
1440 if (thread_group_leader(p)) { 1438 if (thread_group_leader(p)) {
1441 if (is_child_reaper(pid)) 1439 if (is_child_reaper(pid)) {
1442 p->nsproxy->pid_ns->child_reaper = p; 1440 ns_of_pid(pid)->child_reaper = p;
1441 p->signal->flags |= SIGNAL_UNKILLABLE;
1442 }
1443 1443
1444 p->signal->leader_pid = pid; 1444 p->signal->leader_pid = pid;
1445 p->signal->tty = tty_kref_get(current->signal->tty); 1445 p->signal->tty = tty_kref_get(current->signal->tty);
@@ -1473,8 +1473,6 @@ bad_fork_cleanup_io:
1473 if (p->io_context) 1473 if (p->io_context)
1474 exit_io_context(p); 1474 exit_io_context(p);
1475bad_fork_cleanup_namespaces: 1475bad_fork_cleanup_namespaces:
1476 if (unlikely(clone_flags & CLONE_NEWPID))
1477 pid_ns_release_proc(p->nsproxy->pid_ns);
1478 exit_task_namespaces(p); 1476 exit_task_namespaces(p);
1479bad_fork_cleanup_mm: 1477bad_fork_cleanup_mm:
1480 if (p->mm) 1478 if (p->mm)
@@ -1554,15 +1552,9 @@ long do_fork(unsigned long clone_flags,
1554 * Do some preliminary argument and permissions checking before we 1552 * Do some preliminary argument and permissions checking before we
1555 * actually start allocating stuff 1553 * actually start allocating stuff
1556 */ 1554 */
1557 if (clone_flags & CLONE_NEWUSER) { 1555 if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) {
1558 if (clone_flags & CLONE_THREAD) 1556 if (clone_flags & (CLONE_THREAD|CLONE_PARENT))
1559 return -EINVAL; 1557 return -EINVAL;
1560 /* hopefully this check will go away when userns support is
1561 * complete
1562 */
1563 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
1564 !capable(CAP_SETGID))
1565 return -EPERM;
1566 } 1558 }
1567 1559
1568 /* 1560 /*
@@ -1724,7 +1716,8 @@ static int check_unshare_flags(unsigned long unshare_flags)
1724{ 1716{
1725 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| 1717 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
1726 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| 1718 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
1727 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) 1719 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
1720 CLONE_NEWUSER|CLONE_NEWPID))
1728 return -EINVAL; 1721 return -EINVAL;
1729 /* 1722 /*
1730 * Not implemented, but pretend it works if there is nothing to 1723 * Not implemented, but pretend it works if there is nothing to
@@ -1791,19 +1784,40 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1791{ 1784{
1792 struct fs_struct *fs, *new_fs = NULL; 1785 struct fs_struct *fs, *new_fs = NULL;
1793 struct files_struct *fd, *new_fd = NULL; 1786 struct files_struct *fd, *new_fd = NULL;
1787 struct cred *new_cred = NULL;
1794 struct nsproxy *new_nsproxy = NULL; 1788 struct nsproxy *new_nsproxy = NULL;
1795 int do_sysvsem = 0; 1789 int do_sysvsem = 0;
1796 int err; 1790 int err;
1797 1791
1798 err = check_unshare_flags(unshare_flags); 1792 /*
1799 if (err) 1793 * If unsharing a user namespace must also unshare the thread.
1800 goto bad_unshare_out; 1794 */
1801 1795 if (unshare_flags & CLONE_NEWUSER)
1796 unshare_flags |= CLONE_THREAD;
1797 /*
1798 * If unsharing a pid namespace must also unshare the thread.
1799 */
1800 if (unshare_flags & CLONE_NEWPID)
1801 unshare_flags |= CLONE_THREAD;
1802 /*
1803 * If unsharing a thread from a thread group, must also unshare vm.
1804 */
1805 if (unshare_flags & CLONE_THREAD)
1806 unshare_flags |= CLONE_VM;
1807 /*
1808 * If unsharing vm, must also unshare signal handlers.
1809 */
1810 if (unshare_flags & CLONE_VM)
1811 unshare_flags |= CLONE_SIGHAND;
1802 /* 1812 /*
1803 * If unsharing namespace, must also unshare filesystem information. 1813 * If unsharing namespace, must also unshare filesystem information.
1804 */ 1814 */
1805 if (unshare_flags & CLONE_NEWNS) 1815 if (unshare_flags & CLONE_NEWNS)
1806 unshare_flags |= CLONE_FS; 1816 unshare_flags |= CLONE_FS;
1817
1818 err = check_unshare_flags(unshare_flags);
1819 if (err)
1820 goto bad_unshare_out;
1807 /* 1821 /*
1808 * CLONE_NEWIPC must also detach from the undolist: after switching 1822 * CLONE_NEWIPC must also detach from the undolist: after switching
1809 * to a new ipc namespace, the semaphore arrays from the old 1823 * to a new ipc namespace, the semaphore arrays from the old
@@ -1817,11 +1831,15 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1817 err = unshare_fd(unshare_flags, &new_fd); 1831 err = unshare_fd(unshare_flags, &new_fd);
1818 if (err) 1832 if (err)
1819 goto bad_unshare_cleanup_fs; 1833 goto bad_unshare_cleanup_fs;
1820 err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs); 1834 err = unshare_userns(unshare_flags, &new_cred);
1821 if (err) 1835 if (err)
1822 goto bad_unshare_cleanup_fd; 1836 goto bad_unshare_cleanup_fd;
1837 err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
1838 new_cred, new_fs);
1839 if (err)
1840 goto bad_unshare_cleanup_cred;
1823 1841
1824 if (new_fs || new_fd || do_sysvsem || new_nsproxy) { 1842 if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
1825 if (do_sysvsem) { 1843 if (do_sysvsem) {
1826 /* 1844 /*
1827 * CLONE_SYSVSEM is equivalent to sys_exit(). 1845 * CLONE_SYSVSEM is equivalent to sys_exit().
@@ -1854,11 +1872,20 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1854 } 1872 }
1855 1873
1856 task_unlock(current); 1874 task_unlock(current);
1875
1876 if (new_cred) {
1877 /* Install the new user namespace */
1878 commit_creds(new_cred);
1879 new_cred = NULL;
1880 }
1857 } 1881 }
1858 1882
1859 if (new_nsproxy) 1883 if (new_nsproxy)
1860 put_nsproxy(new_nsproxy); 1884 put_nsproxy(new_nsproxy);
1861 1885
1886bad_unshare_cleanup_cred:
1887 if (new_cred)
1888 put_cred(new_cred);
1862bad_unshare_cleanup_fd: 1889bad_unshare_cleanup_fd:
1863 if (new_fd) 1890 if (new_fd)
1864 put_files_struct(new_fd); 1891 put_files_struct(new_fd);