aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSerge E. Hallyn <serue@us.ibm.com>2008-02-05 01:29:45 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-05 12:44:20 -0500
commit3b7391de67da515c91f48aa371de77cb6cc5c07e (patch)
tree22b9f5d9d1c36b374eb5765219aca3c7e1f23486 /kernel
parent46c383cc4530ccc438cb325e92e11eb21dd3d4fc (diff)
capabilities: introduce per-process capability bounding set
The capability bounding set is a set beyond which capabilities cannot grow. Currently cap_bset is per-system. It can be manipulated through sysctl, but only init can add capabilities. Root can remove capabilities. By default it includes all caps except CAP_SETPCAP. This patch makes the bounding set per-process when file capabilities are enabled. It is inherited at fork from parent. Noone can add elements, CAP_SETPCAP is required to remove them. One example use of this is to start a safer container. For instance, until device namespaces or per-container device whitelists are introduced, it is best to take CAP_MKNOD away from a container. The bounding set will not affect pP and pE immediately. It will only affect pP' and pE' after subsequent exec()s. It also does not affect pI, and exec() does not constrain pI'. So to really start a shell with no way of regain CAP_MKNOD, you would do prctl(PR_CAPBSET_DROP, CAP_MKNOD); cap_t cap = cap_get_proc(); cap_value_t caparray[1]; caparray[0] = CAP_MKNOD; cap_set_flag(cap, CAP_INHERITABLE, 1, caparray, CAP_DROP); cap_set_proc(cap); cap_free(cap); The following test program will get and set the bounding set (but not pI). For instance ./bset get (lists capabilities in bset) ./bset drop cap_net_raw (starts shell with new bset) (use capset, setuid binary, or binary with file capabilities to try to increase caps) ************************************************************ cap_bound.c ************************************************************ #include <sys/prctl.h> #include <linux/capability.h> #include <sys/types.h> #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #ifndef PR_CAPBSET_READ #define PR_CAPBSET_READ 23 #endif #ifndef PR_CAPBSET_DROP #define PR_CAPBSET_DROP 24 #endif int usage(char *me) { printf("Usage: %s get\n", me); printf(" %s drop <capability>\n", me); return 1; } #define numcaps 32 char *captable[numcaps] = { "cap_chown", "cap_dac_override", "cap_dac_read_search", "cap_fowner", "cap_fsetid", "cap_kill", "cap_setgid", "cap_setuid", "cap_setpcap", "cap_linux_immutable", "cap_net_bind_service", "cap_net_broadcast", "cap_net_admin", "cap_net_raw", "cap_ipc_lock", "cap_ipc_owner", "cap_sys_module", "cap_sys_rawio", "cap_sys_chroot", "cap_sys_ptrace", "cap_sys_pacct", "cap_sys_admin", "cap_sys_boot", "cap_sys_nice", "cap_sys_resource", "cap_sys_time", "cap_sys_tty_config", "cap_mknod", "cap_lease", "cap_audit_write", "cap_audit_control", "cap_setfcap" }; int getbcap(void) { int comma=0; unsigned long i; int ret; printf("i know of %d capabilities\n", numcaps); printf("capability bounding set:"); for (i=0; i<numcaps; i++) { ret = prctl(PR_CAPBSET_READ, i); if (ret < 0) perror("prctl"); else if (ret==1) printf("%s%s", (comma++) ? ", " : " ", captable[i]); } printf("\n"); return 0; } int capdrop(char *str) { unsigned long i; int found=0; for (i=0; i<numcaps; i++) { if (strcmp(captable[i], str) == 0) { found=1; break; } } if (!found) return 1; if (prctl(PR_CAPBSET_DROP, i)) { perror("prctl"); return 1; } return 0; } int main(int argc, char *argv[]) { if (argc<2) return usage(argv[0]); if (strcmp(argv[1], "get")==0) return getbcap(); if (strcmp(argv[1], "drop")!=0 || argc<3) return usage(argv[0]); if (capdrop(argv[2])) { printf("unknown capability\n"); return 1; } return execl("/bin/bash", "/bin/bash", NULL); } ************************************************************ [serue@us.ibm.com: fix typo] Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> Signed-off-by: Andrew G. Morgan <morgan@kernel.org> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: James Morris <jmorris@namei.org> Cc: Chris Wright <chrisw@sous-sol.org> Cc: Casey Schaufler <casey@schaufler-ca.com>a Signed-off-by: "Serge E. Hallyn" <serue@us.ibm.com> Tested-by: Jiri Slaby <jirislaby@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/sys.c13
-rw-r--r--kernel/sysctl.c35
-rw-r--r--kernel/sysctl_check.c7
4 files changed, 13 insertions, 43 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 1160f87ba700..2b55b74cd999 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1118,6 +1118,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1118#ifdef CONFIG_SECURITY 1118#ifdef CONFIG_SECURITY
1119 p->security = NULL; 1119 p->security = NULL;
1120#endif 1120#endif
1121 p->cap_bset = current->cap_bset;
1121 p->io_context = NULL; 1122 p->io_context = NULL;
1122 p->audit_context = NULL; 1123 p->audit_context = NULL;
1123 cgroup_fork(p); 1124 cgroup_fork(p);
diff --git a/kernel/sys.c b/kernel/sys.c
index d1fe71eb4546..4162d12390b6 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1637,7 +1637,7 @@ asmlinkage long sys_umask(int mask)
1637 mask = xchg(&current->fs->umask, mask & S_IRWXUGO); 1637 mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1638 return mask; 1638 return mask;
1639} 1639}
1640 1640
1641asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, 1641asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
1642 unsigned long arg4, unsigned long arg5) 1642 unsigned long arg4, unsigned long arg5)
1643{ 1643{
@@ -1742,6 +1742,17 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
1742 error = prctl_set_seccomp(arg2); 1742 error = prctl_set_seccomp(arg2);
1743 break; 1743 break;
1744 1744
1745 case PR_CAPBSET_READ:
1746 if (!cap_valid(arg2))
1747 return -EINVAL;
1748 return !!cap_raised(current->cap_bset, arg2);
1749 case PR_CAPBSET_DROP:
1750#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
1751 return cap_prctl_drop(arg2);
1752#else
1753 return -EINVAL;
1754#endif
1755
1745 default: 1756 default:
1746 error = -EINVAL; 1757 error = -EINVAL;
1747 break; 1758 break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d0b47b859067..5e2ad5bf88e2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -419,15 +419,6 @@ static struct ctl_table kern_table[] = {
419 .proc_handler = &proc_dointvec, 419 .proc_handler = &proc_dointvec,
420 }, 420 },
421#endif 421#endif
422#ifdef CONFIG_SECURITY_CAPABILITIES
423 {
424 .procname = "cap-bound",
425 .data = &cap_bset,
426 .maxlen = sizeof(kernel_cap_t),
427 .mode = 0600,
428 .proc_handler = &proc_dointvec_bset,
429 },
430#endif /* def CONFIG_SECURITY_CAPABILITIES */
431#ifdef CONFIG_BLK_DEV_INITRD 422#ifdef CONFIG_BLK_DEV_INITRD
432 { 423 {
433 .ctl_name = KERN_REALROOTDEV, 424 .ctl_name = KERN_REALROOTDEV,
@@ -2096,26 +2087,6 @@ static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
2096 return 0; 2087 return 0;
2097} 2088}
2098 2089
2099#ifdef CONFIG_SECURITY_CAPABILITIES
2100/*
2101 * init may raise the set.
2102 */
2103
2104int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
2105 void __user *buffer, size_t *lenp, loff_t *ppos)
2106{
2107 int op;
2108
2109 if (write && !capable(CAP_SYS_MODULE)) {
2110 return -EPERM;
2111 }
2112
2113 op = is_global_init(current) ? OP_SET : OP_AND;
2114 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2115 do_proc_dointvec_bset_conv,&op);
2116}
2117#endif /* def CONFIG_SECURITY_CAPABILITIES */
2118
2119/* 2090/*
2120 * Taint values can only be increased 2091 * Taint values can only be increased
2121 */ 2092 */
@@ -2529,12 +2500,6 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
2529 return -ENOSYS; 2500 return -ENOSYS;
2530} 2501}
2531 2502
2532int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
2533 void __user *buffer, size_t *lenp, loff_t *ppos)
2534{
2535 return -ENOSYS;
2536}
2537
2538int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, 2503int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
2539 void __user *buffer, size_t *lenp, loff_t *ppos) 2504 void __user *buffer, size_t *lenp, loff_t *ppos)
2540{ 2505{
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index c3206fa50048..006365b69eaf 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -37,10 +37,6 @@ static struct trans_ctl_table trans_kern_table[] = {
37 { KERN_NODENAME, "hostname" }, 37 { KERN_NODENAME, "hostname" },
38 { KERN_DOMAINNAME, "domainname" }, 38 { KERN_DOMAINNAME, "domainname" },
39 39
40#ifdef CONFIG_SECURITY_CAPABILITIES
41 { KERN_CAP_BSET, "cap-bound" },
42#endif /* def CONFIG_SECURITY_CAPABILITIES */
43
44 { KERN_PANIC, "panic" }, 40 { KERN_PANIC, "panic" },
45 { KERN_REALROOTDEV, "real-root-dev" }, 41 { KERN_REALROOTDEV, "real-root-dev" },
46 42
@@ -1498,9 +1494,6 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
1498 (table->strategy == sysctl_ms_jiffies) || 1494 (table->strategy == sysctl_ms_jiffies) ||
1499 (table->proc_handler == proc_dostring) || 1495 (table->proc_handler == proc_dostring) ||
1500 (table->proc_handler == proc_dointvec) || 1496 (table->proc_handler == proc_dointvec) ||
1501#ifdef CONFIG_SECURITY_CAPABILITIES
1502 (table->proc_handler == proc_dointvec_bset) ||
1503#endif /* def CONFIG_SECURITY_CAPABILITIES */
1504 (table->proc_handler == proc_dointvec_minmax) || 1497 (table->proc_handler == proc_dointvec_minmax) ||
1505 (table->proc_handler == proc_dointvec_jiffies) || 1498 (table->proc_handler == proc_dointvec_jiffies) ||
1506 (table->proc_handler == proc_dointvec_userhz_jiffies) || 1499 (table->proc_handler == proc_dointvec_userhz_jiffies) ||