From 3898b1b4ebff8dcfbcf1807e0661585e06c9a91c Mon Sep 17 00:00:00 2001 From: "Andrew G. Morgan" Date: Mon, 28 Apr 2008 02:13:40 -0700 Subject: capabilities: implement per-process securebits Filesystem capability support makes it possible to do away with (set)uid-0 based privilege and use capabilities instead. That is, with filesystem support for capabilities but without this present patch, it is (conceptually) possible to manage a system with capabilities alone and never need to obtain privilege via (set)uid-0. Of course, conceptually isn't quite the same as currently possible since few user applications, certainly not enough to run a viable system, are currently prepared to leverage capabilities to exercise privilege. Further, many applications exist that may never get upgraded in this way, and the kernel will continue to want to support their setuid-0 base privilege needs. Where pure-capability applications evolve and replace setuid-0 binaries, it is desirable that there be a mechanisms by which they can contain their privilege. In addition to leveraging the per-process bounding and inheritable sets, this should include suppressing the privilege of the uid-0 superuser from the process' tree of children. The feature added by this patch can be leveraged to suppress the privilege associated with (set)uid-0. This suppression requires CAP_SETPCAP to initiate, and only immediately affects the 'current' process (it is inherited through fork()/exec()). This reimplementation differs significantly from the historical support for securebits which was system-wide, unwieldy and which has ultimately withered to a dead relic in the source of the modern kernel. With this patch applied a process, that is capable(CAP_SETPCAP), can now drop all legacy privilege (through uid=0) for itself and all subsequently fork()'d/exec()'d children with: prctl(PR_SET_SECUREBITS, 0x2f); This patch represents a no-op unless CONFIG_SECURITY_FILE_CAPABILITIES is enabled at configure time. [akpm@linux-foundation.org: fix uninitialised var warning] [serue@us.ibm.com: capabilities: use cap_task_prctl when !CONFIG_SECURITY] Signed-off-by: Andrew G. Morgan Acked-by: Serge Hallyn Reviewed-by: James Morris Cc: Stephen Smalley Cc: Paul Moore Signed-off-by: Serge E. Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index 6a0cc71ee88d..f2a451366953 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1632,10 +1632,9 @@ asmlinkage long sys_umask(int mask) asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { - long error; + long uninitialized_var(error); - error = security_task_prctl(option, arg2, arg3, arg4, arg5); - if (error) + if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error)) return error; switch (option) { @@ -1688,17 +1687,6 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, error = -EINVAL; break; - case PR_GET_KEEPCAPS: - if (current->keep_capabilities) - error = 1; - break; - case PR_SET_KEEPCAPS: - if (arg2 != 0 && arg2 != 1) { - error = -EINVAL; - break; - } - current->keep_capabilities = arg2; - break; case PR_SET_NAME: { struct task_struct *me = current; unsigned char ncomm[sizeof(me->comm)]; @@ -1732,17 +1720,6 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, case PR_SET_SECCOMP: error = prctl_set_seccomp(arg2); break; - - case PR_CAPBSET_READ: - if (!cap_valid(arg2)) - return -EINVAL; - return !!cap_raised(current->cap_bset, arg2); - case PR_CAPBSET_DROP: -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES - return cap_prctl_drop(arg2); -#else - return -EINVAL; -#endif case PR_GET_TSC: error = GET_TSC_CTL(arg2); break; -- cgit v1.2.2 From 679c9cd4acc2cf2872171813752eab3320273339 Mon Sep 17 00:00:00 2001 From: Sripathi Kodi Date: Tue, 29 Apr 2008 00:58:42 -0700 Subject: add RUSAGE_THREAD Add the RUSAGE_THREAD option for the getrusage system call. This is essentially Roland's patch from http://lkml.org/lkml/2008/1/18/589, but the line about RUSAGE_LWP line has been removed, as suggested by Ulrich and Christoph. Signed-off-by: Roland McGrath Signed-off-by: Sripathi Kodi Cc: Ingo Molnar Cc: Michael Kerrisk Cc: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index f2a451366953..e423d0d9e6ff 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1545,6 +1545,19 @@ out: * */ +static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r, + cputime_t *utimep, cputime_t *stimep) +{ + *utimep = cputime_add(*utimep, t->utime); + *stimep = cputime_add(*stimep, t->stime); + r->ru_nvcsw += t->nvcsw; + r->ru_nivcsw += t->nivcsw; + r->ru_minflt += t->min_flt; + r->ru_majflt += t->maj_flt; + r->ru_inblock += task_io_get_inblock(t); + r->ru_oublock += task_io_get_oublock(t); +} + static void k_getrusage(struct task_struct *p, int who, struct rusage *r) { struct task_struct *t; @@ -1554,6 +1567,11 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) memset((char *) r, 0, sizeof *r); utime = stime = cputime_zero; + if (who == RUSAGE_THREAD) { + accumulate_thread_rusage(p, r, &utime, &stime); + goto out; + } + rcu_read_lock(); if (!lock_task_sighand(p, &flags)) { rcu_read_unlock(); @@ -1586,14 +1604,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) r->ru_oublock += p->signal->oublock; t = p; do { - utime = cputime_add(utime, t->utime); - stime = cputime_add(stime, t->stime); - r->ru_nvcsw += t->nvcsw; - r->ru_nivcsw += t->nivcsw; - r->ru_minflt += t->min_flt; - r->ru_majflt += t->maj_flt; - r->ru_inblock += task_io_get_inblock(t); - r->ru_oublock += task_io_get_oublock(t); + accumulate_thread_rusage(t, r, &utime, &stime); t = next_thread(t); } while (t != p); break; @@ -1605,6 +1616,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) unlock_task_sighand(p, &flags); rcu_read_unlock(); +out: cputime_to_timeval(utime, &r->ru_utime); cputime_to_timeval(stime, &r->ru_stime); } @@ -1618,7 +1630,8 @@ int getrusage(struct task_struct *p, int who, struct rusage __user *ru) asmlinkage long sys_getrusage(int who, struct rusage __user *ru) { - if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) + if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && + who != RUSAGE_THREAD) return -EINVAL; return getrusage(current, who, ru); } -- cgit v1.2.2 From d6cf723a142f63ccb92272bc0e9bfffd3c3a5cac Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:52:38 -0700 Subject: k_getrusage: don't take rcu_read_lock() Just a trivial example, more to come. k_getrusage() holds rcu_read_lock() because it was previously required by lock_task_sighand(). Unneeded now. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: "Paul E. McKenney" Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index e423d0d9e6ff..47c30a20b554 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1572,11 +1572,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) goto out; } - rcu_read_lock(); - if (!lock_task_sighand(p, &flags)) { - rcu_read_unlock(); + if (!lock_task_sighand(p, &flags)) return; - } switch (who) { case RUSAGE_BOTH: @@ -1612,9 +1609,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) default: BUG(); } - unlock_task_sighand(p, &flags); - rcu_read_unlock(); out: cputime_to_timeval(utime, &r->ru_utime); -- cgit v1.2.2 From 83beaf3c6c75b36b7c9be7f555c8cf7797842cc5 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:54:27 -0700 Subject: pids: sys_setpgid: use change_pid() helper Use change_pid() instead of detach_pid() + attach_pid() in sys_setpgid(). This way task_pgrp() is not NULL in between. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index 47c30a20b554..5d0b44cd435c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -978,8 +978,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) goto out; if (task_pgrp(p) != pgrp) { - detach_pid(p, PIDTYPE_PGID); - attach_pid(p, PIDTYPE_PGID, pgrp); + change_pid(p, PIDTYPE_PGID, pgrp); set_task_pgrp(p, pid_nr(pgrp)); } -- cgit v1.2.2 From 1dd768c0815334d2319d6377f0750ace075b6142 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:54:28 -0700 Subject: pids: sys_getsid: fix unsafe *pid usage, fix possible 0 instead of -ESRCH 1. sys_getsid() needs rcu_read_lock() to derive the session _nr, even if the task is current, otherwise we can race with another thread which does sys_setsid(). 2. The task can exit between find_task_by_vpid() and task_session_vnr(), in that unlikely case sys_getsid() returns 0 instead of -ESRCH. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index 5d0b44cd435c..ddd28e261f3a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1022,23 +1022,30 @@ asmlinkage long sys_getpgrp(void) asmlinkage long sys_getsid(pid_t pid) { + struct task_struct *p; + struct pid *sid; + int retval; + + rcu_read_lock(); if (!pid) - return task_session_vnr(current); + sid = task_session(current); else { - int retval; - struct task_struct *p; - - rcu_read_lock(); - p = find_task_by_vpid(pid); retval = -ESRCH; - if (p) { - retval = security_task_getsid(p); - if (!retval) - retval = task_session_vnr(p); - } - rcu_read_unlock(); - return retval; + p = find_task_by_vpid(pid); + if (!p) + goto out; + sid = task_session(p); + if (!sid) + goto out; + + retval = security_task_getsid(p); + if (retval) + goto out; } + retval = pid_vnr(sid); +out: + rcu_read_unlock(); + return retval; } asmlinkage long sys_setsid(void) -- cgit v1.2.2 From 12a3de0a965826096d8adc593bcf4392a7d5b459 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:54:29 -0700 Subject: pids: sys_getpgid: fix unsafe *pid usage, s/tasklist/rcu/ 1. sys_getpgid() needs rcu_read_lock() to derive the pgrp _nr, even if the task is current, otherwise we can race with another thread which does sys_setpgid(). 2. Use rcu_read_lock() instead of tasklist_lock when pid != 0, make sure that we don't use the NULL pid if the task exits right after successful find_task_by_vpid(). Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index ddd28e261f3a..895d2d4c9493 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -991,31 +991,37 @@ out: asmlinkage long sys_getpgid(pid_t pid) { + struct task_struct *p; + struct pid *grp; + int retval; + + rcu_read_lock(); if (!pid) - return task_pgrp_vnr(current); + grp = task_pgrp(current); else { - int retval; - struct task_struct *p; - - read_lock(&tasklist_lock); - p = find_task_by_vpid(pid); retval = -ESRCH; - if (p) { - retval = security_task_getpgid(p); - if (!retval) - retval = task_pgrp_vnr(p); - } - read_unlock(&tasklist_lock); - return retval; + p = find_task_by_vpid(pid); + if (!p) + goto out; + grp = task_pgrp(p); + if (!grp) + goto out; + + retval = security_task_getpgid(p); + if (retval) + goto out; } + retval = pid_vnr(grp); +out: + rcu_read_unlock(); + return retval; } #ifdef __ARCH_WANT_SYS_GETPGRP asmlinkage long sys_getpgrp(void) { - /* SMP - assuming writes are word atomic this is fine */ - return task_pgrp_vnr(current); + return sys_getpgid(0); } #endif -- cgit v1.2.2