aboutsummaryrefslogtreecommitdiffstats
path: root/security/commoncap.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-23 20:42:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-23 20:42:39 -0400
commit644473e9c60c1ff4f6351fed637a6e5551e3dce7 (patch)
tree10316518bedc735a2c6552886658d69dfd9f1eb0 /security/commoncap.c
parentfb827ec68446c83e9e8754fa9b55aed27ecc4661 (diff)
parent4b06a81f1daee668fbd6de85557bfb36dd36078f (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull user namespace enhancements from Eric Biederman: "This is a course correction for the user namespace, so that we can reach an inexpensive, maintainable, and reasonably complete implementation. Highlights: - Config guards make it impossible to enable the user namespace and code that has not been converted to be user namespace safe. - Use of the new kuid_t type ensures the if you somehow get past the config guards the kernel will encounter type errors if you enable user namespaces and attempt to compile in code whose permission checks have not been updated to be user namespace safe. - All uids from child user namespaces are mapped into the initial user namespace before they are processed. Removing the need to add an additional check to see if the user namespace of the compared uids remains the same. - With the user namespaces compiled out the performance is as good or better than it is today. - For most operations absolutely nothing changes performance or operationally with the user namespace enabled. - The worst case performance I could come up with was timing 1 billion cache cold stat operations with the user namespace code enabled. This went from 156s to 164s on my laptop (or 156ns to 164ns per stat operation). - (uid_t)-1 and (gid_t)-1 are reserved as an internal error value. Most uid/gid setting system calls treat these value specially anyway so attempting to use -1 as a uid would likely cause entertaining failures in userspace. - If setuid is called with a uid that can not be mapped setuid fails. I have looked at sendmail, login, ssh and every other program I could think of that would call setuid and they all check for and handle the case where setuid fails. - If stat or a similar system call is called from a context in which we can not map a uid we lie and return overflowuid. The LFS experience suggests not lying and returning an error code might be better, but the historical precedent with uids is different and I can not think of anything that would break by lying about a uid we can't map. - Capabilities are localized to the current user namespace making it safe to give the initial user in a user namespace all capabilities. My git tree covers all of the modifications needed to convert the core kernel and enough changes to make a system bootable to runlevel 1." Fix up trivial conflicts due to nearby independent changes in fs/stat.c * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (46 commits) userns: Silence silly gcc warning. cred: use correct cred accessor with regards to rcu read lock userns: Convert the move_pages, and migrate_pages permission checks to use uid_eq userns: Convert cgroup permission checks to use uid_eq userns: Convert tmpfs to use kuid and kgid where appropriate userns: Convert sysfs to use kgid/kuid where appropriate userns: Convert sysctl permission checks to use kuid and kgids. userns: Convert proc to use kuid/kgid where appropriate userns: Convert ext4 to user kuid/kgid where appropriate userns: Convert ext3 to use kuid/kgid where appropriate userns: Convert ext2 to use kuid/kgid where appropriate. userns: Convert devpts to use kuid/kgid where appropriate userns: Convert binary formats to use kuid/kgid where appropriate userns: Add negative depends on entries to avoid building code that is userns unsafe userns: signal remove unnecessary map_cred_ns userns: Teach inode_capable to understand inodes whose uids map to other namespaces. userns: Fail exec for suid and sgid binaries with ids outside our user namespace. userns: Convert stat to return values mapped from kuids and kgids userns: Convert user specfied uids and gids in chown into kuids and kgid userns: Use uid_eq gid_eq helpers when comparing kuids and kgids in the vfs ...
Diffstat (limited to 'security/commoncap.c')
-rw-r--r--security/commoncap.c61
1 files changed, 36 insertions, 25 deletions
diff --git a/security/commoncap.c b/security/commoncap.c
index f80d11609391..e771cb1b2d79 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -77,12 +77,12 @@ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
77 int cap, int audit) 77 int cap, int audit)
78{ 78{
79 for (;;) { 79 for (;;) {
80 /* The creator of the user namespace has all caps. */ 80 /* The owner of the user namespace has all caps. */
81 if (targ_ns != &init_user_ns && targ_ns->creator == cred->user) 81 if (targ_ns != &init_user_ns && uid_eq(targ_ns->owner, cred->euid))
82 return 0; 82 return 0;
83 83
84 /* Do we have the necessary capabilities? */ 84 /* Do we have the necessary capabilities? */
85 if (targ_ns == cred->user->user_ns) 85 if (targ_ns == cred->user_ns)
86 return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM; 86 return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
87 87
88 /* Have we tried all of the parent namespaces? */ 88 /* Have we tried all of the parent namespaces? */
@@ -93,7 +93,7 @@ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
93 *If you have a capability in a parent user ns, then you have 93 *If you have a capability in a parent user ns, then you have
94 * it over all children user namespaces as well. 94 * it over all children user namespaces as well.
95 */ 95 */
96 targ_ns = targ_ns->creator->user_ns; 96 targ_ns = targ_ns->parent;
97 } 97 }
98 98
99 /* We never get here */ 99 /* We never get here */
@@ -137,10 +137,10 @@ int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
137 rcu_read_lock(); 137 rcu_read_lock();
138 cred = current_cred(); 138 cred = current_cred();
139 child_cred = __task_cred(child); 139 child_cred = __task_cred(child);
140 if (cred->user->user_ns == child_cred->user->user_ns && 140 if (cred->user_ns == child_cred->user_ns &&
141 cap_issubset(child_cred->cap_permitted, cred->cap_permitted)) 141 cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
142 goto out; 142 goto out;
143 if (ns_capable(child_cred->user->user_ns, CAP_SYS_PTRACE)) 143 if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE))
144 goto out; 144 goto out;
145 ret = -EPERM; 145 ret = -EPERM;
146out: 146out:
@@ -169,10 +169,10 @@ int cap_ptrace_traceme(struct task_struct *parent)
169 rcu_read_lock(); 169 rcu_read_lock();
170 cred = __task_cred(parent); 170 cred = __task_cred(parent);
171 child_cred = current_cred(); 171 child_cred = current_cred();
172 if (cred->user->user_ns == child_cred->user->user_ns && 172 if (cred->user_ns == child_cred->user_ns &&
173 cap_issubset(child_cred->cap_permitted, cred->cap_permitted)) 173 cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
174 goto out; 174 goto out;
175 if (has_ns_capability(parent, child_cred->user->user_ns, CAP_SYS_PTRACE)) 175 if (has_ns_capability(parent, child_cred->user_ns, CAP_SYS_PTRACE))
176 goto out; 176 goto out;
177 ret = -EPERM; 177 ret = -EPERM;
178out: 178out:
@@ -215,7 +215,7 @@ static inline int cap_inh_is_capped(void)
215 /* they are so limited unless the current task has the CAP_SETPCAP 215 /* they are so limited unless the current task has the CAP_SETPCAP
216 * capability 216 * capability
217 */ 217 */
218 if (cap_capable(current_cred(), current_cred()->user->user_ns, 218 if (cap_capable(current_cred(), current_cred()->user_ns,
219 CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0) 219 CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0)
220 return 0; 220 return 0;
221 return 1; 221 return 1;
@@ -473,19 +473,22 @@ int cap_bprm_set_creds(struct linux_binprm *bprm)
473 struct cred *new = bprm->cred; 473 struct cred *new = bprm->cred;
474 bool effective, has_cap = false; 474 bool effective, has_cap = false;
475 int ret; 475 int ret;
476 kuid_t root_uid;
476 477
477 effective = false; 478 effective = false;
478 ret = get_file_caps(bprm, &effective, &has_cap); 479 ret = get_file_caps(bprm, &effective, &has_cap);
479 if (ret < 0) 480 if (ret < 0)
480 return ret; 481 return ret;
481 482
483 root_uid = make_kuid(new->user_ns, 0);
484
482 if (!issecure(SECURE_NOROOT)) { 485 if (!issecure(SECURE_NOROOT)) {
483 /* 486 /*
484 * If the legacy file capability is set, then don't set privs 487 * If the legacy file capability is set, then don't set privs
485 * for a setuid root binary run by a non-root user. Do set it 488 * for a setuid root binary run by a non-root user. Do set it
486 * for a root user just to cause least surprise to an admin. 489 * for a root user just to cause least surprise to an admin.
487 */ 490 */
488 if (has_cap && new->uid != 0 && new->euid == 0) { 491 if (has_cap && !uid_eq(new->uid, root_uid) && uid_eq(new->euid, root_uid)) {
489 warn_setuid_and_fcaps_mixed(bprm->filename); 492 warn_setuid_and_fcaps_mixed(bprm->filename);
490 goto skip; 493 goto skip;
491 } 494 }
@@ -496,12 +499,12 @@ int cap_bprm_set_creds(struct linux_binprm *bprm)
496 * 499 *
497 * If only the real uid is 0, we do not set the effective bit. 500 * If only the real uid is 0, we do not set the effective bit.
498 */ 501 */
499 if (new->euid == 0 || new->uid == 0) { 502 if (uid_eq(new->euid, root_uid) || uid_eq(new->uid, root_uid)) {
500 /* pP' = (cap_bset & ~0) | (pI & ~0) */ 503 /* pP' = (cap_bset & ~0) | (pI & ~0) */
501 new->cap_permitted = cap_combine(old->cap_bset, 504 new->cap_permitted = cap_combine(old->cap_bset,
502 old->cap_inheritable); 505 old->cap_inheritable);
503 } 506 }
504 if (new->euid == 0) 507 if (uid_eq(new->euid, root_uid))
505 effective = true; 508 effective = true;
506 } 509 }
507skip: 510skip:
@@ -516,8 +519,8 @@ skip:
516 * 519 *
517 * In addition, if NO_NEW_PRIVS, then ensure we get no new privs. 520 * In addition, if NO_NEW_PRIVS, then ensure we get no new privs.
518 */ 521 */
519 if ((new->euid != old->uid || 522 if ((!uid_eq(new->euid, old->uid) ||
520 new->egid != old->gid || 523 !gid_eq(new->egid, old->gid) ||
521 !cap_issubset(new->cap_permitted, old->cap_permitted)) && 524 !cap_issubset(new->cap_permitted, old->cap_permitted)) &&
522 bprm->unsafe & ~LSM_UNSAFE_PTRACE_CAP) { 525 bprm->unsafe & ~LSM_UNSAFE_PTRACE_CAP) {
523 /* downgrade; they get no more than they had, and maybe less */ 526 /* downgrade; they get no more than they had, and maybe less */
@@ -553,7 +556,7 @@ skip:
553 */ 556 */
554 if (!cap_isclear(new->cap_effective)) { 557 if (!cap_isclear(new->cap_effective)) {
555 if (!cap_issubset(CAP_FULL_SET, new->cap_effective) || 558 if (!cap_issubset(CAP_FULL_SET, new->cap_effective) ||
556 new->euid != 0 || new->uid != 0 || 559 !uid_eq(new->euid, root_uid) || !uid_eq(new->uid, root_uid) ||
557 issecure(SECURE_NOROOT)) { 560 issecure(SECURE_NOROOT)) {
558 ret = audit_log_bprm_fcaps(bprm, new, old); 561 ret = audit_log_bprm_fcaps(bprm, new, old);
559 if (ret < 0) 562 if (ret < 0)
@@ -578,16 +581,17 @@ skip:
578int cap_bprm_secureexec(struct linux_binprm *bprm) 581int cap_bprm_secureexec(struct linux_binprm *bprm)
579{ 582{
580 const struct cred *cred = current_cred(); 583 const struct cred *cred = current_cred();
584 kuid_t root_uid = make_kuid(cred->user_ns, 0);
581 585
582 if (cred->uid != 0) { 586 if (!uid_eq(cred->uid, root_uid)) {
583 if (bprm->cap_effective) 587 if (bprm->cap_effective)
584 return 1; 588 return 1;
585 if (!cap_isclear(cred->cap_permitted)) 589 if (!cap_isclear(cred->cap_permitted))
586 return 1; 590 return 1;
587 } 591 }
588 592
589 return (cred->euid != cred->uid || 593 return (!uid_eq(cred->euid, cred->uid) ||
590 cred->egid != cred->gid); 594 !gid_eq(cred->egid, cred->gid));
591} 595}
592 596
593/** 597/**
@@ -677,15 +681,21 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name)
677 */ 681 */
678static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old) 682static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old)
679{ 683{
680 if ((old->uid == 0 || old->euid == 0 || old->suid == 0) && 684 kuid_t root_uid = make_kuid(old->user_ns, 0);
681 (new->uid != 0 && new->euid != 0 && new->suid != 0) && 685
686 if ((uid_eq(old->uid, root_uid) ||
687 uid_eq(old->euid, root_uid) ||
688 uid_eq(old->suid, root_uid)) &&
689 (!uid_eq(new->uid, root_uid) &&
690 !uid_eq(new->euid, root_uid) &&
691 !uid_eq(new->suid, root_uid)) &&
682 !issecure(SECURE_KEEP_CAPS)) { 692 !issecure(SECURE_KEEP_CAPS)) {
683 cap_clear(new->cap_permitted); 693 cap_clear(new->cap_permitted);
684 cap_clear(new->cap_effective); 694 cap_clear(new->cap_effective);
685 } 695 }
686 if (old->euid == 0 && new->euid != 0) 696 if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid))
687 cap_clear(new->cap_effective); 697 cap_clear(new->cap_effective);
688 if (old->euid != 0 && new->euid == 0) 698 if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid))
689 new->cap_effective = new->cap_permitted; 699 new->cap_effective = new->cap_permitted;
690} 700}
691 701
@@ -718,11 +728,12 @@ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
718 * if not, we might be a bit too harsh here. 728 * if not, we might be a bit too harsh here.
719 */ 729 */
720 if (!issecure(SECURE_NO_SETUID_FIXUP)) { 730 if (!issecure(SECURE_NO_SETUID_FIXUP)) {
721 if (old->fsuid == 0 && new->fsuid != 0) 731 kuid_t root_uid = make_kuid(old->user_ns, 0);
732 if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid))
722 new->cap_effective = 733 new->cap_effective =
723 cap_drop_fs_set(new->cap_effective); 734 cap_drop_fs_set(new->cap_effective);
724 735
725 if (old->fsuid != 0 && new->fsuid == 0) 736 if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid))
726 new->cap_effective = 737 new->cap_effective =
727 cap_raise_fs_set(new->cap_effective, 738 cap_raise_fs_set(new->cap_effective,
728 new->cap_permitted); 739 new->cap_permitted);
@@ -875,7 +886,7 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
875 || ((new->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/ 886 || ((new->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/
876 || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ 887 || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/
877 || (cap_capable(current_cred(), 888 || (cap_capable(current_cred(),
878 current_cred()->user->user_ns, CAP_SETPCAP, 889 current_cred()->user_ns, CAP_SETPCAP,
879 SECURITY_CAP_AUDIT) != 0) /*[4]*/ 890 SECURITY_CAP_AUDIT) != 0) /*[4]*/
880 /* 891 /*
881 * [1] no changing of bits that are locked 892 * [1] no changing of bits that are locked