aboutsummaryrefslogtreecommitdiffstats
path: root/security
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-11 21:34:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-11 21:34:47 -0400
commitdd198ce7141aa8dd9ffcc9549de422fb055508de (patch)
tree86ad8e0e74b0a1cb8cc62a621e2946334733a402 /security
parent89fd915c402113528750353ad6de9ea68a787e5c (diff)
parent076a9bcacfc7ccbc2b3fdf3bd490718f6b182419 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull namespace updates from Eric Biederman: "Life has been busy and I have not gotten half as much done this round as I would have liked. I delayed it so that a minor conflict resolution with the mips tree could spend a little time in linux-next before I sent this pull request. This includes two long delayed user namespace changes from Kirill Tkhai. It also includes a very useful change from Serge Hallyn that allows the security capability attribute to be used inside of user namespaces. The practical effect of this is people can now untar tarballs and install rpms in user namespaces. It had been suggested to generalize this and encode some of the namespace information information in the xattr name. Upon close inspection that makes the things that should be hard easy and the things that should be easy more expensive. Then there is my bugfix/cleanup for signal injection that removes the magic encoding of the siginfo union member from the kernel internal si_code. The mips folks reported the case where I had used FPE_FIXME me is impossible so I have remove FPE_FIXME from mips, while at the same time including a return statement in that case to keep gcc from complaining about unitialized variables. I almost finished the work to get make copy_siginfo_to_user a trivial copy to user. The code is available at: git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git neuter-copy_siginfo_to_user-v3 But I did not have time/energy to get the code posted and reviewed before the merge window opened. I was able to see that the security excuse for just copying fields that we know are initialized doesn't work in practice there are buggy initializations that don't initialize the proper fields in siginfo. So we still sometimes copy unitialized data to userspace" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: Introduce v3 namespaced file capabilities mips/signal: In force_fcr31_sig return in the impossible case signal: Remove kernel interal si_code magic fcntl: Don't use ambiguous SIG_POLL si_codes prctl: Allow local CAP_SYS_ADMIN changing exe_file security: Use user_namespace::level to avoid redundant iterations in cap_capable() userns,pidns: Verify the userns for new pid namespaces signal/testing: Don't look for __SI_FAULT in userspace signal/mips: Document a conflict with SI_USER with SIGFPE signal/sparc: Document a conflict with SI_USER with SIGFPE signal/ia64: Document a conflict with SI_USER with SIGFPE signal/alpha: Document a conflict with SI_USER for SIGTRAP
Diffstat (limited to 'security')
-rw-r--r--security/commoncap.c277
1 files changed, 256 insertions, 21 deletions
diff --git a/security/commoncap.c b/security/commoncap.c
index d8e26fb9781d..6bf72b175b49 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -82,8 +82,11 @@ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
82 if (ns == cred->user_ns) 82 if (ns == cred->user_ns)
83 return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM; 83 return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
84 84
85 /* Have we tried all of the parent namespaces? */ 85 /*
86 if (ns == &init_user_ns) 86 * If we're already at a lower level than we're looking for,
87 * we're done searching.
88 */
89 if (ns->level <= cred->user_ns->level)
87 return -EPERM; 90 return -EPERM;
88 91
89 /* 92 /*
@@ -323,6 +326,209 @@ int cap_inode_killpriv(struct dentry *dentry)
323 return error; 326 return error;
324} 327}
325 328
329static bool rootid_owns_currentns(kuid_t kroot)
330{
331 struct user_namespace *ns;
332
333 if (!uid_valid(kroot))
334 return false;
335
336 for (ns = current_user_ns(); ; ns = ns->parent) {
337 if (from_kuid(ns, kroot) == 0)
338 return true;
339 if (ns == &init_user_ns)
340 break;
341 }
342
343 return false;
344}
345
346static __u32 sansflags(__u32 m)
347{
348 return m & ~VFS_CAP_FLAGS_EFFECTIVE;
349}
350
351static bool is_v2header(size_t size, __le32 magic)
352{
353 __u32 m = le32_to_cpu(magic);
354 if (size != XATTR_CAPS_SZ_2)
355 return false;
356 return sansflags(m) == VFS_CAP_REVISION_2;
357}
358
359static bool is_v3header(size_t size, __le32 magic)
360{
361 __u32 m = le32_to_cpu(magic);
362
363 if (size != XATTR_CAPS_SZ_3)
364 return false;
365 return sansflags(m) == VFS_CAP_REVISION_3;
366}
367
368/*
369 * getsecurity: We are called for security.* before any attempt to read the
370 * xattr from the inode itself.
371 *
372 * This gives us a chance to read the on-disk value and convert it. If we
373 * return -EOPNOTSUPP, then vfs_getxattr() will call the i_op handler.
374 *
375 * Note we are not called by vfs_getxattr_alloc(), but that is only called
376 * by the integrity subsystem, which really wants the unconverted values -
377 * so that's good.
378 */
379int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
380 bool alloc)
381{
382 int size, ret;
383 kuid_t kroot;
384 uid_t root, mappedroot;
385 char *tmpbuf = NULL;
386 struct vfs_cap_data *cap;
387 struct vfs_ns_cap_data *nscap;
388 struct dentry *dentry;
389 struct user_namespace *fs_ns;
390
391 if (strcmp(name, "capability") != 0)
392 return -EOPNOTSUPP;
393
394 dentry = d_find_alias(inode);
395 if (!dentry)
396 return -EINVAL;
397
398 size = sizeof(struct vfs_ns_cap_data);
399 ret = (int) vfs_getxattr_alloc(dentry, XATTR_NAME_CAPS,
400 &tmpbuf, size, GFP_NOFS);
401 dput(dentry);
402
403 if (ret < 0)
404 return ret;
405
406 fs_ns = inode->i_sb->s_user_ns;
407 cap = (struct vfs_cap_data *) tmpbuf;
408 if (is_v2header((size_t) ret, cap->magic_etc)) {
409 /* If this is sizeof(vfs_cap_data) then we're ok with the
410 * on-disk value, so return that. */
411 if (alloc)
412 *buffer = tmpbuf;
413 else
414 kfree(tmpbuf);
415 return ret;
416 } else if (!is_v3header((size_t) ret, cap->magic_etc)) {
417 kfree(tmpbuf);
418 return -EINVAL;
419 }
420
421 nscap = (struct vfs_ns_cap_data *) tmpbuf;
422 root = le32_to_cpu(nscap->rootid);
423 kroot = make_kuid(fs_ns, root);
424
425 /* If the root kuid maps to a valid uid in current ns, then return
426 * this as a nscap. */
427 mappedroot = from_kuid(current_user_ns(), kroot);
428 if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
429 if (alloc) {
430 *buffer = tmpbuf;
431 nscap->rootid = cpu_to_le32(mappedroot);
432 } else
433 kfree(tmpbuf);
434 return size;
435 }
436
437 if (!rootid_owns_currentns(kroot)) {
438 kfree(tmpbuf);
439 return -EOPNOTSUPP;
440 }
441
442 /* This comes from a parent namespace. Return as a v2 capability */
443 size = sizeof(struct vfs_cap_data);
444 if (alloc) {
445 *buffer = kmalloc(size, GFP_ATOMIC);
446 if (*buffer) {
447 struct vfs_cap_data *cap = *buffer;
448 __le32 nsmagic, magic;
449 magic = VFS_CAP_REVISION_2;
450 nsmagic = le32_to_cpu(nscap->magic_etc);
451 if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)
452 magic |= VFS_CAP_FLAGS_EFFECTIVE;
453 memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
454 cap->magic_etc = cpu_to_le32(magic);
455 }
456 }
457 kfree(tmpbuf);
458 return size;
459}
460
461static kuid_t rootid_from_xattr(const void *value, size_t size,
462 struct user_namespace *task_ns)
463{
464 const struct vfs_ns_cap_data *nscap = value;
465 uid_t rootid = 0;
466
467 if (size == XATTR_CAPS_SZ_3)
468 rootid = le32_to_cpu(nscap->rootid);
469
470 return make_kuid(task_ns, rootid);
471}
472
473static bool validheader(size_t size, __le32 magic)
474{
475 return is_v2header(size, magic) || is_v3header(size, magic);
476}
477
478/*
479 * User requested a write of security.capability. If needed, update the
480 * xattr to change from v2 to v3, or to fixup the v3 rootid.
481 *
482 * If all is ok, we return the new size, on error return < 0.
483 */
484int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size)
485{
486 struct vfs_ns_cap_data *nscap;
487 uid_t nsrootid;
488 const struct vfs_cap_data *cap = *ivalue;
489 __u32 magic, nsmagic;
490 struct inode *inode = d_backing_inode(dentry);
491 struct user_namespace *task_ns = current_user_ns(),
492 *fs_ns = inode->i_sb->s_user_ns;
493 kuid_t rootid;
494 size_t newsize;
495
496 if (!*ivalue)
497 return -EINVAL;
498 if (!validheader(size, cap->magic_etc))
499 return -EINVAL;
500 if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP))
501 return -EPERM;
502 if (size == XATTR_CAPS_SZ_2)
503 if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
504 /* user is privileged, just write the v2 */
505 return size;
506
507 rootid = rootid_from_xattr(*ivalue, size, task_ns);
508 if (!uid_valid(rootid))
509 return -EINVAL;
510
511 nsrootid = from_kuid(fs_ns, rootid);
512 if (nsrootid == -1)
513 return -EINVAL;
514
515 newsize = sizeof(struct vfs_ns_cap_data);
516 nscap = kmalloc(newsize, GFP_ATOMIC);
517 if (!nscap)
518 return -ENOMEM;
519 nscap->rootid = cpu_to_le32(nsrootid);
520 nsmagic = VFS_CAP_REVISION_3;
521 magic = le32_to_cpu(cap->magic_etc);
522 if (magic & VFS_CAP_FLAGS_EFFECTIVE)
523 nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
524 nscap->magic_etc = cpu_to_le32(nsmagic);
525 memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
526
527 kvfree(*ivalue);
528 *ivalue = nscap;
529 return newsize;
530}
531
326/* 532/*
327 * Calculate the new process capability sets from the capability sets attached 533 * Calculate the new process capability sets from the capability sets attached
328 * to a file. 534 * to a file.
@@ -376,7 +582,10 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data
376 __u32 magic_etc; 582 __u32 magic_etc;
377 unsigned tocopy, i; 583 unsigned tocopy, i;
378 int size; 584 int size;
379 struct vfs_cap_data caps; 585 struct vfs_ns_cap_data data, *nscaps = &data;
586 struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
587 kuid_t rootkuid;
588 struct user_namespace *fs_ns = inode->i_sb->s_user_ns;
380 589
381 memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data)); 590 memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
382 591
@@ -384,18 +593,20 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data
384 return -ENODATA; 593 return -ENODATA;
385 594
386 size = __vfs_getxattr((struct dentry *)dentry, inode, 595 size = __vfs_getxattr((struct dentry *)dentry, inode,
387 XATTR_NAME_CAPS, &caps, XATTR_CAPS_SZ); 596 XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ);
388 if (size == -ENODATA || size == -EOPNOTSUPP) 597 if (size == -ENODATA || size == -EOPNOTSUPP)
389 /* no data, that's ok */ 598 /* no data, that's ok */
390 return -ENODATA; 599 return -ENODATA;
600
391 if (size < 0) 601 if (size < 0)
392 return size; 602 return size;
393 603
394 if (size < sizeof(magic_etc)) 604 if (size < sizeof(magic_etc))
395 return -EINVAL; 605 return -EINVAL;
396 606
397 cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps.magic_etc); 607 cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc);
398 608
609 rootkuid = make_kuid(fs_ns, 0);
399 switch (magic_etc & VFS_CAP_REVISION_MASK) { 610 switch (magic_etc & VFS_CAP_REVISION_MASK) {
400 case VFS_CAP_REVISION_1: 611 case VFS_CAP_REVISION_1:
401 if (size != XATTR_CAPS_SZ_1) 612 if (size != XATTR_CAPS_SZ_1)
@@ -407,15 +618,27 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data
407 return -EINVAL; 618 return -EINVAL;
408 tocopy = VFS_CAP_U32_2; 619 tocopy = VFS_CAP_U32_2;
409 break; 620 break;
621 case VFS_CAP_REVISION_3:
622 if (size != XATTR_CAPS_SZ_3)
623 return -EINVAL;
624 tocopy = VFS_CAP_U32_3;
625 rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
626 break;
627
410 default: 628 default:
411 return -EINVAL; 629 return -EINVAL;
412 } 630 }
631 /* Limit the caps to the mounter of the filesystem
632 * or the more limited uid specified in the xattr.
633 */
634 if (!rootid_owns_currentns(rootkuid))
635 return -ENODATA;
413 636
414 CAP_FOR_EACH_U32(i) { 637 CAP_FOR_EACH_U32(i) {
415 if (i >= tocopy) 638 if (i >= tocopy)
416 break; 639 break;
417 cpu_caps->permitted.cap[i] = le32_to_cpu(caps.data[i].permitted); 640 cpu_caps->permitted.cap[i] = le32_to_cpu(caps->data[i].permitted);
418 cpu_caps->inheritable.cap[i] = le32_to_cpu(caps.data[i].inheritable); 641 cpu_caps->inheritable.cap[i] = le32_to_cpu(caps->data[i].inheritable);
419 } 642 }
420 643
421 cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; 644 cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
@@ -453,8 +676,8 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c
453 rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps); 676 rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps);
454 if (rc < 0) { 677 if (rc < 0) {
455 if (rc == -EINVAL) 678 if (rc == -EINVAL)
456 printk(KERN_NOTICE "%s: get_vfs_caps_from_disk returned %d for %s\n", 679 printk(KERN_NOTICE "Invalid argument reading file caps for %s\n",
457 __func__, rc, bprm->filename); 680 bprm->filename);
458 else if (rc == -ENODATA) 681 else if (rc == -ENODATA)
459 rc = 0; 682 rc = 0;
460 goto out; 683 goto out;
@@ -633,15 +856,19 @@ skip:
633int cap_inode_setxattr(struct dentry *dentry, const char *name, 856int cap_inode_setxattr(struct dentry *dentry, const char *name,
634 const void *value, size_t size, int flags) 857 const void *value, size_t size, int flags)
635{ 858{
636 if (!strcmp(name, XATTR_NAME_CAPS)) { 859 /* Ignore non-security xattrs */
637 if (!capable(CAP_SETFCAP)) 860 if (strncmp(name, XATTR_SECURITY_PREFIX,
638 return -EPERM; 861 sizeof(XATTR_SECURITY_PREFIX) - 1) != 0)
862 return 0;
863
864 /*
865 * For XATTR_NAME_CAPS the check will be done in
866 * cap_convert_nscap(), called by setxattr()
867 */
868 if (strcmp(name, XATTR_NAME_CAPS) == 0)
639 return 0; 869 return 0;
640 }
641 870
642 if (!strncmp(name, XATTR_SECURITY_PREFIX, 871 if (!capable(CAP_SYS_ADMIN))
643 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
644 !capable(CAP_SYS_ADMIN))
645 return -EPERM; 872 return -EPERM;
646 return 0; 873 return 0;
647} 874}
@@ -659,15 +886,22 @@ int cap_inode_setxattr(struct dentry *dentry, const char *name,
659 */ 886 */
660int cap_inode_removexattr(struct dentry *dentry, const char *name) 887int cap_inode_removexattr(struct dentry *dentry, const char *name)
661{ 888{
662 if (!strcmp(name, XATTR_NAME_CAPS)) { 889 /* Ignore non-security xattrs */
663 if (!capable(CAP_SETFCAP)) 890 if (strncmp(name, XATTR_SECURITY_PREFIX,
891 sizeof(XATTR_SECURITY_PREFIX) - 1) != 0)
892 return 0;
893
894 if (strcmp(name, XATTR_NAME_CAPS) == 0) {
895 /* security.capability gets namespaced */
896 struct inode *inode = d_backing_inode(dentry);
897 if (!inode)
898 return -EINVAL;
899 if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP))
664 return -EPERM; 900 return -EPERM;
665 return 0; 901 return 0;
666 } 902 }
667 903
668 if (!strncmp(name, XATTR_SECURITY_PREFIX, 904 if (!capable(CAP_SYS_ADMIN))
669 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
670 !capable(CAP_SYS_ADMIN))
671 return -EPERM; 905 return -EPERM;
672 return 0; 906 return 0;
673} 907}
@@ -1054,6 +1288,7 @@ struct security_hook_list capability_hooks[] __lsm_ro_after_init = {
1054 LSM_HOOK_INIT(bprm_set_creds, cap_bprm_set_creds), 1288 LSM_HOOK_INIT(bprm_set_creds, cap_bprm_set_creds),
1055 LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv), 1289 LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv),
1056 LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv), 1290 LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv),
1291 LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity),
1057 LSM_HOOK_INIT(mmap_addr, cap_mmap_addr), 1292 LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),
1058 LSM_HOOK_INIT(mmap_file, cap_mmap_file), 1293 LSM_HOOK_INIT(mmap_file, cap_mmap_file),
1059 LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid), 1294 LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid),