aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc/base.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-23 20:42:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-23 20:42:39 -0400
commit644473e9c60c1ff4f6351fed637a6e5551e3dce7 (patch)
tree10316518bedc735a2c6552886658d69dfd9f1eb0 /fs/proc/base.c
parentfb827ec68446c83e9e8754fa9b55aed27ecc4661 (diff)
parent4b06a81f1daee668fbd6de85557bfb36dd36078f (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull user namespace enhancements from Eric Biederman: "This is a course correction for the user namespace, so that we can reach an inexpensive, maintainable, and reasonably complete implementation. Highlights: - Config guards make it impossible to enable the user namespace and code that has not been converted to be user namespace safe. - Use of the new kuid_t type ensures the if you somehow get past the config guards the kernel will encounter type errors if you enable user namespaces and attempt to compile in code whose permission checks have not been updated to be user namespace safe. - All uids from child user namespaces are mapped into the initial user namespace before they are processed. Removing the need to add an additional check to see if the user namespace of the compared uids remains the same. - With the user namespaces compiled out the performance is as good or better than it is today. - For most operations absolutely nothing changes performance or operationally with the user namespace enabled. - The worst case performance I could come up with was timing 1 billion cache cold stat operations with the user namespace code enabled. This went from 156s to 164s on my laptop (or 156ns to 164ns per stat operation). - (uid_t)-1 and (gid_t)-1 are reserved as an internal error value. Most uid/gid setting system calls treat these value specially anyway so attempting to use -1 as a uid would likely cause entertaining failures in userspace. - If setuid is called with a uid that can not be mapped setuid fails. I have looked at sendmail, login, ssh and every other program I could think of that would call setuid and they all check for and handle the case where setuid fails. - If stat or a similar system call is called from a context in which we can not map a uid we lie and return overflowuid. The LFS experience suggests not lying and returning an error code might be better, but the historical precedent with uids is different and I can not think of anything that would break by lying about a uid we can't map. - Capabilities are localized to the current user namespace making it safe to give the initial user in a user namespace all capabilities. My git tree covers all of the modifications needed to convert the core kernel and enough changes to make a system bootable to runlevel 1." Fix up trivial conflicts due to nearby independent changes in fs/stat.c * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (46 commits) userns: Silence silly gcc warning. cred: use correct cred accessor with regards to rcu read lock userns: Convert the move_pages, and migrate_pages permission checks to use uid_eq userns: Convert cgroup permission checks to use uid_eq userns: Convert tmpfs to use kuid and kgid where appropriate userns: Convert sysfs to use kgid/kuid where appropriate userns: Convert sysctl permission checks to use kuid and kgids. userns: Convert proc to use kuid/kgid where appropriate userns: Convert ext4 to user kuid/kgid where appropriate userns: Convert ext3 to use kuid/kgid where appropriate userns: Convert ext2 to use kuid/kgid where appropriate. userns: Convert devpts to use kuid/kgid where appropriate userns: Convert binary formats to use kuid/kgid where appropriate userns: Add negative depends on entries to avoid building code that is userns unsafe userns: signal remove unnecessary map_cred_ns userns: Teach inode_capable to understand inodes whose uids map to other namespaces. userns: Fail exec for suid and sgid binaries with ids outside our user namespace. userns: Convert stat to return values mapped from kuids and kgids userns: Convert user specfied uids and gids in chown into kuids and kgid userns: Use uid_eq gid_eq helpers when comparing kuids and kgids in the vfs ...
Diffstat (limited to 'fs/proc/base.c')
-rw-r--r--fs/proc/base.c93
1 files changed, 85 insertions, 8 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 57b8159f26f3..d2d3108a611c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -81,6 +81,7 @@
81#include <linux/oom.h> 81#include <linux/oom.h>
82#include <linux/elf.h> 82#include <linux/elf.h>
83#include <linux/pid_namespace.h> 83#include <linux/pid_namespace.h>
84#include <linux/user_namespace.h>
84#include <linux/fs_struct.h> 85#include <linux/fs_struct.h>
85#include <linux/slab.h> 86#include <linux/slab.h>
86#include <linux/flex_array.h> 87#include <linux/flex_array.h>
@@ -1561,8 +1562,8 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1561 generic_fillattr(inode, stat); 1562 generic_fillattr(inode, stat);
1562 1563
1563 rcu_read_lock(); 1564 rcu_read_lock();
1564 stat->uid = 0; 1565 stat->uid = GLOBAL_ROOT_UID;
1565 stat->gid = 0; 1566 stat->gid = GLOBAL_ROOT_GID;
1566 task = pid_task(proc_pid(inode), PIDTYPE_PID); 1567 task = pid_task(proc_pid(inode), PIDTYPE_PID);
1567 if (task) { 1568 if (task) {
1568 if (!has_pid_permissions(pid, task, 2)) { 1569 if (!has_pid_permissions(pid, task, 2)) {
@@ -1622,8 +1623,8 @@ int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1622 inode->i_gid = cred->egid; 1623 inode->i_gid = cred->egid;
1623 rcu_read_unlock(); 1624 rcu_read_unlock();
1624 } else { 1625 } else {
1625 inode->i_uid = 0; 1626 inode->i_uid = GLOBAL_ROOT_UID;
1626 inode->i_gid = 0; 1627 inode->i_gid = GLOBAL_ROOT_GID;
1627 } 1628 }
1628 inode->i_mode &= ~(S_ISUID | S_ISGID); 1629 inode->i_mode &= ~(S_ISUID | S_ISGID);
1629 security_task_to_inode(task, inode); 1630 security_task_to_inode(task, inode);
@@ -1815,8 +1816,8 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1815 inode->i_gid = cred->egid; 1816 inode->i_gid = cred->egid;
1816 rcu_read_unlock(); 1817 rcu_read_unlock();
1817 } else { 1818 } else {
1818 inode->i_uid = 0; 1819 inode->i_uid = GLOBAL_ROOT_UID;
1819 inode->i_gid = 0; 1820 inode->i_gid = GLOBAL_ROOT_GID;
1820 } 1821 }
1821 1822
1822 i_mode = S_IFLNK; 1823 i_mode = S_IFLNK;
@@ -2045,8 +2046,8 @@ static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd)
2045 inode->i_gid = cred->egid; 2046 inode->i_gid = cred->egid;
2046 rcu_read_unlock(); 2047 rcu_read_unlock();
2047 } else { 2048 } else {
2048 inode->i_uid = 0; 2049 inode->i_uid = GLOBAL_ROOT_UID;
2049 inode->i_gid = 0; 2050 inode->i_gid = GLOBAL_ROOT_GID;
2050 } 2051 }
2051 security_task_to_inode(task, inode); 2052 security_task_to_inode(task, inode);
2052 status = 1; 2053 status = 1;
@@ -2924,6 +2925,74 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2924} 2925}
2925#endif /* CONFIG_TASK_IO_ACCOUNTING */ 2926#endif /* CONFIG_TASK_IO_ACCOUNTING */
2926 2927
2928#ifdef CONFIG_USER_NS
2929static int proc_id_map_open(struct inode *inode, struct file *file,
2930 struct seq_operations *seq_ops)
2931{
2932 struct user_namespace *ns = NULL;
2933 struct task_struct *task;
2934 struct seq_file *seq;
2935 int ret = -EINVAL;
2936
2937 task = get_proc_task(inode);
2938 if (task) {
2939 rcu_read_lock();
2940 ns = get_user_ns(task_cred_xxx(task, user_ns));
2941 rcu_read_unlock();
2942 put_task_struct(task);
2943 }
2944 if (!ns)
2945 goto err;
2946
2947 ret = seq_open(file, seq_ops);
2948 if (ret)
2949 goto err_put_ns;
2950
2951 seq = file->private_data;
2952 seq->private = ns;
2953
2954 return 0;
2955err_put_ns:
2956 put_user_ns(ns);
2957err:
2958 return ret;
2959}
2960
2961static int proc_id_map_release(struct inode *inode, struct file *file)
2962{
2963 struct seq_file *seq = file->private_data;
2964 struct user_namespace *ns = seq->private;
2965 put_user_ns(ns);
2966 return seq_release(inode, file);
2967}
2968
2969static int proc_uid_map_open(struct inode *inode, struct file *file)
2970{
2971 return proc_id_map_open(inode, file, &proc_uid_seq_operations);
2972}
2973
2974static int proc_gid_map_open(struct inode *inode, struct file *file)
2975{
2976 return proc_id_map_open(inode, file, &proc_gid_seq_operations);
2977}
2978
2979static const struct file_operations proc_uid_map_operations = {
2980 .open = proc_uid_map_open,
2981 .write = proc_uid_map_write,
2982 .read = seq_read,
2983 .llseek = seq_lseek,
2984 .release = proc_id_map_release,
2985};
2986
2987static const struct file_operations proc_gid_map_operations = {
2988 .open = proc_gid_map_open,
2989 .write = proc_gid_map_write,
2990 .read = seq_read,
2991 .llseek = seq_lseek,
2992 .release = proc_id_map_release,
2993};
2994#endif /* CONFIG_USER_NS */
2995
2927static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 2996static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
2928 struct pid *pid, struct task_struct *task) 2997 struct pid *pid, struct task_struct *task)
2929{ 2998{
@@ -3026,6 +3095,10 @@ static const struct pid_entry tgid_base_stuff[] = {
3026#ifdef CONFIG_HARDWALL 3095#ifdef CONFIG_HARDWALL
3027 INF("hardwall", S_IRUGO, proc_pid_hardwall), 3096 INF("hardwall", S_IRUGO, proc_pid_hardwall),
3028#endif 3097#endif
3098#ifdef CONFIG_USER_NS
3099 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
3100 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
3101#endif
3029}; 3102};
3030 3103
3031static int proc_tgid_base_readdir(struct file * filp, 3104static int proc_tgid_base_readdir(struct file * filp,
@@ -3381,6 +3454,10 @@ static const struct pid_entry tid_base_stuff[] = {
3381#ifdef CONFIG_HARDWALL 3454#ifdef CONFIG_HARDWALL
3382 INF("hardwall", S_IRUGO, proc_pid_hardwall), 3455 INF("hardwall", S_IRUGO, proc_pid_hardwall),
3383#endif 3456#endif
3457#ifdef CONFIG_USER_NS
3458 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
3459 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
3460#endif
3384}; 3461};
3385 3462
3386static int proc_tid_base_readdir(struct file * filp, 3463static int proc_tid_base_readdir(struct file * filp,