From ce6ada35bdf710d16582cc4869c26722547e6f11 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Thu, 25 Nov 2010 17:11:32 +0000 Subject: security: Define CAP_SYSLOG Privileged syslog operations currently require CAP_SYS_ADMIN. Split this off into a new CAP_SYSLOG privilege which we can sanely take away from a container through the capability bounding set. With this patch, an lxc container can be prevented from messing with the host's syslog (i.e. dmesg -c). Changelog: mar 12 2010: add selinux capability2:cap_syslog perm Changelog: nov 22 2010: . port to new kernel . add a WARN_ONCE if userspace isn't using CAP_SYSLOG Signed-off-by: Serge Hallyn Acked-by: Andrew G. Morgan Acked-By: Kees Cook Cc: James Morris Cc: Michael Kerrisk Cc: Stephen Smalley Cc: "Christopher J. PeBenito" Cc: Eric Paris Signed-off-by: James Morris --- include/linux/capability.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index 90012b9ddbf3..fb16a3699b99 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -246,7 +246,6 @@ struct cpu_vfs_cap_data { /* Allow configuration of the secure attention key */ /* Allow administration of the random device */ /* Allow examination and configuration of disk quotas */ -/* Allow configuring the kernel's syslog (printk behaviour) */ /* Allow setting the domainname */ /* Allow setting the hostname */ /* Allow calling bdflush() */ @@ -352,7 +351,11 @@ struct cpu_vfs_cap_data { #define CAP_MAC_ADMIN 33 -#define CAP_LAST_CAP CAP_MAC_ADMIN +/* Allow configuring the kernel's syslog (printk behaviour) */ + +#define CAP_SYSLOG 34 + +#define CAP_LAST_CAP CAP_SYSLOG #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) -- cgit v1.2.2 From 3486740a4f32a6a466f5ac931654d154790ba648 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 23 Mar 2011 16:43:17 -0700 Subject: userns: security: make capabilities relative to the user namespace - Introduce ns_capable to test for a capability in a non-default user namespace. - Teach cap_capable to handle capabilities in a non-default user namespace. The motivation is to get to the unprivileged creation of new namespaces. It looks like this gets us 90% of the way there, with only potential uid confusion issues left. I still need to handle getting all caps after creation but otherwise I think I have a good starter patch that achieves all of your goals. Changelog: 11/05/2010: [serge] add apparmor 12/14/2010: [serge] fix capabilities to created user namespaces Without this, if user serge creates a user_ns, he won't have capabilities to the user_ns he created. THis is because we were first checking whether his effective caps had the caps he needed and returning -EPERM if not, and THEN checking whether he was the creator. Reverse those checks. 12/16/2010: [serge] security_real_capable needs ns argument in !security case 01/11/2011: [serge] add task_ns_capable helper 01/11/2011: [serge] add nsown_capable() helper per Bastian Blank suggestion 02/16/2011: [serge] fix a logic bug: the root user is always creator of init_user_ns, but should not always have capabilities to it! Fix the check in cap_capable(). 02/21/2011: Add the required user_ns parameter to security_capable, fixing a compile failure. 02/23/2011: Convert some macros to functions as per akpm comments. Some couldn't be converted because we can't easily forward-declare them (they are inline if !SECURITY, extern if SECURITY). Add a current_user_ns function so we can use it in capability.h without #including cred.h. Move all forward declarations together to the top of the #ifdef __KERNEL__ section, and use kernel-doc format. 02/23/2011: Per dhowells, clean up comment in cap_capable(). 02/23/2011: Per akpm, remove unreachable 'return -EPERM' in cap_capable. (Original written and signed off by Eric; latest, modified version acked by him) [akpm@linux-foundation.org: fix build] [akpm@linux-foundation.org: export current_user_ns() for ecryptfs] [serge.hallyn@canonical.com: remove unneeded extra argument in selinux's task_has_capability] Signed-off-by: Eric W. Biederman Signed-off-by: Serge E. Hallyn Acked-by: "Eric W. Biederman" Acked-by: Daniel Lezcano Acked-by: David Howells Cc: James Morris Signed-off-by: Serge E. Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/capability.h | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index fb16a3699b99..7c9c82903012 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -368,6 +368,17 @@ struct cpu_vfs_cap_data { #ifdef __KERNEL__ +struct dentry; +struct user_namespace; + +extern struct user_namespace init_user_ns; + +struct user_namespace *current_user_ns(void); + +extern const kernel_cap_t __cap_empty_set; +extern const kernel_cap_t __cap_full_set; +extern const kernel_cap_t __cap_init_eff_set; + /* * Internal kernel functions only */ @@ -530,10 +541,6 @@ static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a, cap_intersect(permitted, __cap_nfsd_set)); } -extern const kernel_cap_t __cap_empty_set; -extern const kernel_cap_t __cap_full_set; -extern const kernel_cap_t __cap_init_eff_set; - /** * has_capability - Determine if a task has a superior capability available * @t: The task in question @@ -544,7 +551,7 @@ extern const kernel_cap_t __cap_init_eff_set; * * Note that this does not set PF_SUPERPRIV on the task. */ -#define has_capability(t, cap) (security_real_capable((t), (cap)) == 0) +#define has_capability(t, cap) (security_real_capable((t), &init_user_ns, (cap)) == 0) /** * has_capability_noaudit - Determine if a task has a superior capability available (unaudited) @@ -558,12 +565,25 @@ extern const kernel_cap_t __cap_init_eff_set; * Note that this does not set PF_SUPERPRIV on the task. */ #define has_capability_noaudit(t, cap) \ - (security_real_capable_noaudit((t), (cap)) == 0) + (security_real_capable_noaudit((t), &init_user_ns, (cap)) == 0) -extern int capable(int cap); +extern bool capable(int cap); +extern bool ns_capable(struct user_namespace *ns, int cap); +extern bool task_ns_capable(struct task_struct *t, int cap); + +/** + * nsown_capable - Check superior capability to one's own user_ns + * @cap: The capability in question + * + * Return true if the current task has the given superior capability + * targeted at its own user namespace. + */ +static inline bool nsown_capable(int cap) +{ + return ns_capable(current_user_ns(), cap); +} /* audit system wants to get cap info from files as well */ -struct dentry; extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); #endif /* __KERNEL__ */ -- cgit v1.2.2 From 8409cca7056113bee3236cb6a8e4d8d4d1eef102 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 23 Mar 2011 16:43:20 -0700 Subject: userns: allow ptrace from non-init user namespaces ptrace is allowed to tasks in the same user namespace according to the usual rules (i.e. the same rules as for two tasks in the init user namespace). ptrace is also allowed to a user namespace to which the current task the has CAP_SYS_PTRACE capability. Changelog: Dec 31: Address feedback by Eric: . Correct ptrace uid check . Rename may_ptrace_ns to ptrace_capable . Also fix the cap_ptrace checks. Jan 1: Use const cred struct Jan 11: use task_ns_capable() in place of ptrace_capable(). Feb 23: same_or_ancestore_user_ns() was not an appropriate check to constrain cap_issubset. Rather, cap_issubset() only is meaningful when both capsets are in the same user_ns. Signed-off-by: Serge E. Hallyn Cc: "Eric W. Biederman" Acked-by: Daniel Lezcano Acked-by: David Howells Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/capability.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index 7c9c82903012..2ec4a8cc86a5 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -553,6 +553,8 @@ static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a, */ #define has_capability(t, cap) (security_real_capable((t), &init_user_ns, (cap)) == 0) +#define has_ns_capability(t, ns, cap) (security_real_capable((t), (ns), (cap)) == 0) + /** * has_capability_noaudit - Determine if a task has a superior capability available (unaudited) * @t: The task in question -- cgit v1.2.2 From 3263245de48344ad7bdd0e7256bf1606d2592f88 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 23 Mar 2011 16:43:21 -0700 Subject: userns: make has_capability* into real functions So we can let type safety keep things sane, and as a bonus we can remove the declaration of init_user_ns in capability.h. Signed-off-by: Serge E. Hallyn Cc: "Eric W. Biederman" Cc: Daniel Lezcano Cc: David Howells Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/capability.h | 34 ++++------------------------------ 1 file changed, 4 insertions(+), 30 deletions(-) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index 2ec4a8cc86a5..16ee8b49a200 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -371,8 +371,6 @@ struct cpu_vfs_cap_data { struct dentry; struct user_namespace; -extern struct user_namespace init_user_ns; - struct user_namespace *current_user_ns(void); extern const kernel_cap_t __cap_empty_set; @@ -541,34 +539,10 @@ static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a, cap_intersect(permitted, __cap_nfsd_set)); } -/** - * has_capability - Determine if a task has a superior capability available - * @t: The task in question - * @cap: The capability to be tested for - * - * Return true if the specified task has the given superior capability - * currently in effect, false if not. - * - * Note that this does not set PF_SUPERPRIV on the task. - */ -#define has_capability(t, cap) (security_real_capable((t), &init_user_ns, (cap)) == 0) - -#define has_ns_capability(t, ns, cap) (security_real_capable((t), (ns), (cap)) == 0) - -/** - * has_capability_noaudit - Determine if a task has a superior capability available (unaudited) - * @t: The task in question - * @cap: The capability to be tested for - * - * Return true if the specified task has the given superior capability - * currently in effect, false if not, but don't write an audit message for the - * check. - * - * Note that this does not set PF_SUPERPRIV on the task. - */ -#define has_capability_noaudit(t, cap) \ - (security_real_capable_noaudit((t), &init_user_ns, (cap)) == 0) - +extern bool has_capability(struct task_struct *t, int cap); +extern bool has_ns_capability(struct task_struct *t, + struct user_namespace *ns, int cap); +extern bool has_capability_noaudit(struct task_struct *t, int cap); extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool task_ns_capable(struct task_struct *t, int cap); -- cgit v1.2.2 From ffa8e59df047d57e812a04f7d6baf6a25c652c0c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:08:34 -0400 Subject: capabilities: do not drop CAP_SETPCAP from the initial task In olden' days of yore CAP_SETPCAP had special meaning for the init task. We actually have code to make sure that CAP_SETPCAP wasn't in pE of things using the init_cred. But CAP_SETPCAP isn't so special any more and we don't have a reason to special case dropping it for init or kthreads.... Signed-off-by: Eric Paris Acked-by: Andrew G. Morgan Signed-off-by: James Morris --- include/linux/capability.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index 16ee8b49a200..11d562863e49 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -412,7 +412,6 @@ extern const kernel_cap_t __cap_init_eff_set; # define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }}) # define CAP_FULL_SET ((kernel_cap_t){{ ~0, ~0 }}) -# define CAP_INIT_EFF_SET ((kernel_cap_t){{ ~CAP_TO_MASK(CAP_SETPCAP), ~0 }}) # define CAP_FS_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \ CAP_FS_MASK_B1 } }) @@ -423,10 +422,10 @@ extern const kernel_cap_t __cap_init_eff_set; #endif /* _KERNEL_CAPABILITY_U32S != 2 */ #define CAP_INIT_INH_SET CAP_EMPTY_SET +#define CAP_INIT_EFF_SET CAP_FULL_SET # define cap_clear(c) do { (c) = __cap_empty_set; } while (0) # define cap_set_full(c) do { (c) = __cap_full_set; } while (0) -# define cap_set_init_eff(c) do { (c) = __cap_init_eff_set; } while (0) #define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) #define cap_lower(c, flag) ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag)) @@ -547,6 +546,9 @@ extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool task_ns_capable(struct task_struct *t, int cap); +extern const kernel_cap_t __cap_empty_set; +extern const kernel_cap_t __cap_full_set; + /** * nsown_capable - Check superior capability to one's own user_ns * @cap: The capability in question -- cgit v1.2.2 From 5163b583a036b103c3cec7171d6731c125773ed6 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:08:39 -0400 Subject: capabilities: delete unused cap_set_full unused code. Clean it up. Signed-off-by: Eric Paris Acked-by: David Howells Acked-by: Andrew G. Morgan Signed-off-by: James Morris --- include/linux/capability.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index 11d562863e49..8d0da30dad23 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -425,7 +425,6 @@ extern const kernel_cap_t __cap_init_eff_set; #define CAP_INIT_EFF_SET CAP_FULL_SET # define cap_clear(c) do { (c) = __cap_empty_set; } while (0) -# define cap_set_full(c) do { (c) = __cap_full_set; } while (0) #define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) #define cap_lower(c, flag) ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag)) @@ -547,7 +546,6 @@ extern bool ns_capable(struct user_namespace *ns, int cap); extern bool task_ns_capable(struct task_struct *t, int cap); extern const kernel_cap_t __cap_empty_set; -extern const kernel_cap_t __cap_full_set; /** * nsown_capable - Check superior capability to one's own user_ns -- cgit v1.2.2 From a3232d2fa2e3cbab3e76d91cdae5890fee8a4034 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:08:45 -0400 Subject: capabilities: delete all CAP_INIT macros The CAP_INIT macros of INH, BSET, and EFF made sense at one point in time, but now days they aren't helping. Just open code the logic in the init_cred. Signed-off-by: Eric Paris Acked-by: David Howells Signed-off-by: James Morris --- include/linux/capability.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index 8d0da30dad23..04fed72809de 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -421,9 +421,6 @@ extern const kernel_cap_t __cap_init_eff_set; #endif /* _KERNEL_CAPABILITY_U32S != 2 */ -#define CAP_INIT_INH_SET CAP_EMPTY_SET -#define CAP_INIT_EFF_SET CAP_FULL_SET - # define cap_clear(c) do { (c) = __cap_empty_set; } while (0) #define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) -- cgit v1.2.2 From 9a7adcf5c6dea63d2e47e6f6d2f7a6c9f48b9337 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 11 Jan 2011 09:54:33 -0800 Subject: timers: Posix interface for alarm-timers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch exposes alarm-timers to userland via the posix clock and timers interface, using two new clockids: CLOCK_REALTIME_ALARM and CLOCK_BOOTTIME_ALARM. Both clockids behave identically to CLOCK_REALTIME and CLOCK_BOOTTIME, respectively, but timers set against the _ALARM suffixed clockids will wake the system if it is suspended. Some background can be found here: https://lwn.net/Articles/429925/ The concept for Alarm-timers was inspired by the Android Alarm driver (by Arve Hjønnevåg) found in the Android kernel tree. See: http://android.git.kernel.org/?p=kernel/common.git;a=blob;f=drivers/rtc/alarm.c;h=1250edfbdf3302f5e4ea6194847c6ef4bb7beb1c;hb=android-2.6.36 While the in-kernel interface is pretty similar between alarm-timers and Android alarm driver, the user-space interface for the Android alarm driver is via ioctls to a new char device. As mentioned above, I've instead chosen to export this functionality via the posix interface, as it seemed a little simpler and avoids creating duplicate interfaces to things like CLOCK_REALTIME and CLOCK_MONOTONIC under alternate names (ie:ANDROID_ALARM_RTC and ANDROID_ALARM_SYSTEMTIME). The semantics of the Android alarm driver are different from what this posix interface provides. For instance, threads other then the thread waiting on the Android alarm driver are able to modify the alarm being waited on. Also this interface does not allow the same wakelock semantics that the Android driver provides (ie: kernel takes a wakelock on RTC alarm-interupt, and holds it through process wakeup, and while the process runs, until the process either closes the char device or calls back in to wait on a new alarm). One potential way to implement similar semantics may be via the timerfd infrastructure, but this needs more research. There may also need to be some sort of sysfs system level policy hooks that allow alarm timers to be disabled to keep them from firing at inappropriate times (ie: laptop in a well insulated bag, mid-flight). CC: Arve Hjønnevåg CC: Thomas Gleixner CC: Alessandro Zummo Acked-by: Arnd Bergmann Signed-off-by: John Stultz --- include/linux/capability.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index 16ee8b49a200..7cb23eae693d 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -355,7 +355,12 @@ struct cpu_vfs_cap_data { #define CAP_SYSLOG 34 -#define CAP_LAST_CAP CAP_SYSLOG +/* Allow triggering something that will wake the system */ + +#define CAP_WAKE_ALARM 35 + + +#define CAP_LAST_CAP CAP_WAKE_ALARM #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) -- cgit v1.2.2 From 47a150edc2ae734c0f4bf50aa19499e23b9a46f8 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Fri, 13 May 2011 04:27:54 +0100 Subject: Cache user_ns in struct cred If !CONFIG_USERNS, have current_user_ns() defined to (&init_user_ns). Get rid of _current_user_ns. This requires nsown_capable() to be defined in capability.c rather than as static inline in capability.h, so do that. Request_key needs init_user_ns defined at current_user_ns if !CONFIG_USERNS, so forward-declare that in cred.h if !CONFIG_USERNS at current_user_ns() define. Compile-tested with and without CONFIG_USERNS. Signed-off-by: Serge E. Hallyn [ This makes a huge performance difference for acl_permission_check(), up to 30%. And that is one of the hottest kernel functions for loads that are pathname-lookup heavy. ] Signed-off-by: Linus Torvalds --- include/linux/capability.h | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index 16ee8b49a200..d4675af963fa 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -546,18 +546,7 @@ extern bool has_capability_noaudit(struct task_struct *t, int cap); extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool task_ns_capable(struct task_struct *t, int cap); - -/** - * nsown_capable - Check superior capability to one's own user_ns - * @cap: The capability in question - * - * Return true if the current task has the given superior capability - * targeted at its own user namespace. - */ -static inline bool nsown_capable(int cap) -{ - return ns_capable(current_user_ns(), cap); -} +extern bool nsown_capable(int cap); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); -- cgit v1.2.2