aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2011-11-17 03:11:58 -0500
committerEric W. Biederman <ebiederm@xmission.com>2012-04-26 05:01:39 -0400
commit22d917d80e842829d0ca0a561967d728eb1d6303 (patch)
treeb01e0566e136d3004fa9198e4cb1969fc6feff6c /fs/proc
parent783291e6900292521a3895583785e0c04a56c5b3 (diff)
userns: Rework the user_namespace adding uid/gid mapping support
- Convert the old uid mapping functions into compatibility wrappers - Add a uid/gid mapping layer from user space uid and gids to kernel internal uids and gids that is extent based for simplicty and speed. * Working with number space after mapping uids/gids into their kernel internal version adds only mapping complexity over what we have today, leaving the kernel code easy to understand and test. - Add proc files /proc/self/uid_map /proc/self/gid_map These files display the mapping and allow a mapping to be added if a mapping does not exist. - Allow entering the user namespace without a uid or gid mapping. Since we are starting with an existing user our uids and gids still have global mappings so are still valid and useful they just don't have local mappings. The requirement for things to work are global uid and gid so it is odd but perfectly fine not to have a local uid and gid mapping. Not requiring global uid and gid mappings greatly simplifies the logic of setting up the uid and gid mappings by allowing the mappings to be set after the namespace is created which makes the slight weirdness worth it. - Make the mappings in the initial user namespace to the global uid/gid space explicit. Today it is an identity mapping but in the future we may want to twist this for debugging, similar to what we do with jiffies. - Document the memory ordering requirements of setting the uid and gid mappings. We only allow the mappings to be set once and there are no pointers involved so the requirments are trivial but a little atypical. Performance: In this scheme for the permission checks the performance is expected to stay the same as the actuall machine instructions should remain the same. The worst case I could think of is ls -l on a large directory where all of the stat results need to be translated with from kuids and kgids to uids and gids. So I benchmarked that case on my laptop with a dual core hyperthread Intel i5-2520M cpu with 3M of cpu cache. My benchmark consisted of going to single user mode where nothing else was running. On an ext4 filesystem opening 1,000,000 files and looping through all of the files 1000 times and calling fstat on the individuals files. This was to ensure I was benchmarking stat times where the inodes were in the kernels cache, but the inode values were not in the processors cache. My results: v3.4-rc1: ~= 156ns (unmodified v3.4-rc1 with user namespace support disabled) v3.4-rc1-userns-: ~= 155ns (v3.4-rc1 with my user namespace patches and user namespace support disabled) v3.4-rc1-userns+: ~= 164ns (v3.4-rc1 with my user namespace patches and user namespace support enabled) All of the configurations ran in roughly 120ns when I performed tests that ran in the cpu cache. So in summary the performance impact is: 1ns improvement in the worst case with user namespace support compiled out. 8ns aka 5% slowdown in the worst case with user namespace support compiled in. Acked-by: Serge Hallyn <serge.hallyn@canonical.com> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/base.c77
1 files changed, 77 insertions, 0 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1c8b280146d7..2ee514c7e64a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -81,6 +81,7 @@
81#include <linux/oom.h> 81#include <linux/oom.h>
82#include <linux/elf.h> 82#include <linux/elf.h>
83#include <linux/pid_namespace.h> 83#include <linux/pid_namespace.h>
84#include <linux/user_namespace.h>
84#include <linux/fs_struct.h> 85#include <linux/fs_struct.h>
85#include <linux/slab.h> 86#include <linux/slab.h>
86#include <linux/flex_array.h> 87#include <linux/flex_array.h>
@@ -2943,6 +2944,74 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2943} 2944}
2944#endif /* CONFIG_TASK_IO_ACCOUNTING */ 2945#endif /* CONFIG_TASK_IO_ACCOUNTING */
2945 2946
2947#ifdef CONFIG_USER_NS
2948static int proc_id_map_open(struct inode *inode, struct file *file,
2949 struct seq_operations *seq_ops)
2950{
2951 struct user_namespace *ns = NULL;
2952 struct task_struct *task;
2953 struct seq_file *seq;
2954 int ret = -EINVAL;
2955
2956 task = get_proc_task(inode);
2957 if (task) {
2958 rcu_read_lock();
2959 ns = get_user_ns(task_cred_xxx(task, user_ns));
2960 rcu_read_unlock();
2961 put_task_struct(task);
2962 }
2963 if (!ns)
2964 goto err;
2965
2966 ret = seq_open(file, seq_ops);
2967 if (ret)
2968 goto err_put_ns;
2969
2970 seq = file->private_data;
2971 seq->private = ns;
2972
2973 return 0;
2974err_put_ns:
2975 put_user_ns(ns);
2976err:
2977 return ret;
2978}
2979
2980static int proc_id_map_release(struct inode *inode, struct file *file)
2981{
2982 struct seq_file *seq = file->private_data;
2983 struct user_namespace *ns = seq->private;
2984 put_user_ns(ns);
2985 return seq_release(inode, file);
2986}
2987
2988static int proc_uid_map_open(struct inode *inode, struct file *file)
2989{
2990 return proc_id_map_open(inode, file, &proc_uid_seq_operations);
2991}
2992
2993static int proc_gid_map_open(struct inode *inode, struct file *file)
2994{
2995 return proc_id_map_open(inode, file, &proc_gid_seq_operations);
2996}
2997
2998static const struct file_operations proc_uid_map_operations = {
2999 .open = proc_uid_map_open,
3000 .write = proc_uid_map_write,
3001 .read = seq_read,
3002 .llseek = seq_lseek,
3003 .release = proc_id_map_release,
3004};
3005
3006static const struct file_operations proc_gid_map_operations = {
3007 .open = proc_gid_map_open,
3008 .write = proc_gid_map_write,
3009 .read = seq_read,
3010 .llseek = seq_lseek,
3011 .release = proc_id_map_release,
3012};
3013#endif /* CONFIG_USER_NS */
3014
2946static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 3015static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
2947 struct pid *pid, struct task_struct *task) 3016 struct pid *pid, struct task_struct *task)
2948{ 3017{
@@ -3045,6 +3114,10 @@ static const struct pid_entry tgid_base_stuff[] = {
3045#ifdef CONFIG_HARDWALL 3114#ifdef CONFIG_HARDWALL
3046 INF("hardwall", S_IRUGO, proc_pid_hardwall), 3115 INF("hardwall", S_IRUGO, proc_pid_hardwall),
3047#endif 3116#endif
3117#ifdef CONFIG_USER_NS
3118 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
3119 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
3120#endif
3048}; 3121};
3049 3122
3050static int proc_tgid_base_readdir(struct file * filp, 3123static int proc_tgid_base_readdir(struct file * filp,
@@ -3400,6 +3473,10 @@ static const struct pid_entry tid_base_stuff[] = {
3400#ifdef CONFIG_HARDWALL 3473#ifdef CONFIG_HARDWALL
3401 INF("hardwall", S_IRUGO, proc_pid_hardwall), 3474 INF("hardwall", S_IRUGO, proc_pid_hardwall),
3402#endif 3475#endif
3476#ifdef CONFIG_USER_NS
3477 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
3478 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
3479#endif
3403}; 3480};
3404 3481
3405static int proc_tid_base_readdir(struct file * filp, 3482static int proc_tid_base_readdir(struct file * filp,