From b409e578d9a4ec95913e06d8fea2a33f1754ea69 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 31 May 2012 16:26:17 -0700 Subject: proc: clean up /proc//environ handling Similar to e268337dfe26 ("proc: clean up and fix /proc//mem handling"), move the check of permission to open(), this will simplify read() code. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Cong Wang Cc: Oleg Nesterov Cc: Alexey Dobriyan Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) (limited to 'fs/proc/base.c') diff --git a/fs/proc/base.c b/fs/proc/base.c index d7d711876b6a..e2d234243269 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -679,7 +679,7 @@ static const struct file_operations proc_single_file_operations = { .release = single_release, }; -static int mem_open(struct inode* inode, struct file* file) +static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) { struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); struct mm_struct *mm; @@ -687,7 +687,7 @@ static int mem_open(struct inode* inode, struct file* file) if (!task) return -ESRCH; - mm = mm_access(task, PTRACE_MODE_ATTACH); + mm = mm_access(task, mode); put_task_struct(task); if (IS_ERR(mm)) @@ -707,6 +707,11 @@ static int mem_open(struct inode* inode, struct file* file) return 0; } +static int mem_open(struct inode *inode, struct file *file) +{ + return __mem_open(inode, file, PTRACE_MODE_ATTACH); +} + static ssize_t mem_rw(struct file *file, char __user *buf, size_t count, loff_t *ppos, int write) { @@ -803,30 +808,29 @@ static const struct file_operations proc_mem_operations = { .release = mem_release, }; +static int environ_open(struct inode *inode, struct file *file) +{ + return __mem_open(inode, file, PTRACE_MODE_READ); +} + static ssize_t environ_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct task_struct *task = get_proc_task(file->f_dentry->d_inode); char *page; unsigned long src = *ppos; - int ret = -ESRCH; - struct mm_struct *mm; + int ret = 0; + struct mm_struct *mm = file->private_data; - if (!task) - goto out_no_task; + if (!mm) + return 0; - ret = -ENOMEM; page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) - goto out; - - - mm = mm_for_maps(task); - ret = PTR_ERR(mm); - if (!mm || IS_ERR(mm)) - goto out_free; + return -ENOMEM; ret = 0; + if (!atomic_inc_not_zero(&mm->mm_users)) + goto free; while (count > 0) { int this_len, retval, max_len; @@ -838,7 +842,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; this_len = (this_len > max_len) ? max_len : this_len; - retval = access_process_vm(task, (mm->env_start + src), + retval = access_remote_vm(mm, (mm->env_start + src), page, this_len, 0); if (retval <= 0) { @@ -857,19 +861,18 @@ static ssize_t environ_read(struct file *file, char __user *buf, count -= retval; } *ppos = src; - mmput(mm); -out_free: + +free: free_page((unsigned long) page); -out: - put_task_struct(task); -out_no_task: return ret; } static const struct file_operations proc_environ_operations = { + .open = environ_open, .read = environ_read, .llseek = generic_file_llseek, + .release = mem_release, }; static ssize_t oom_adjust_read(struct file *file, char __user *buf, -- cgit v1.2.2 From e7dcd9990e42ccfc798d4eb55e2dbf9d7d434c6b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 31 May 2012 16:26:17 -0700 Subject: proc: remove mm_for_maps() mm_for_maps() is a simple wrapper for mm_access(), and the name is misleading, so just remove it and use mm_access() directly. Signed-off-by: Cong Wang Cc: Oleg Nesterov Cc: Alexey Dobriyan Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'fs/proc/base.c') diff --git a/fs/proc/base.c b/fs/proc/base.c index e2d234243269..cca635d252d4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -199,11 +199,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path) return result; } -struct mm_struct *mm_for_maps(struct task_struct *task) -{ - return mm_access(task, PTRACE_MODE_READ); -} - static int proc_pid_cmdline(struct task_struct *task, char * buffer) { int res = 0; @@ -243,7 +238,7 @@ out: static int proc_pid_auxv(struct task_struct *task, char *buffer) { - struct mm_struct *mm = mm_for_maps(task); + struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ); int res = PTR_ERR(mm); if (mm && !IS_ERR(mm)) { unsigned int nwords = 0; -- cgit v1.2.2 From 2344bec788b097b2d1198758bd29c583812b864e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 31 May 2012 16:26:18 -0700 Subject: proc: use mm_access() instead of ptrace_may_access() mm_access() handles this much better, and avoids some race conditions. Signed-off-by: Cong Wang Cc: Oleg Nesterov Cc: Alexey Dobriyan Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs/proc/base.c') diff --git a/fs/proc/base.c b/fs/proc/base.c index cca635d252d4..155dee600ed4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2022,11 +2022,8 @@ static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd) if (!task) goto out_notask; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto out; - - mm = get_task_mm(task); - if (!mm) + mm = mm_access(task, PTRACE_MODE_READ); + if (IS_ERR_OR_NULL(mm)) goto out; if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { -- cgit v1.2.2 From f05ed3f1abefd37c08fbf08c766d2abd40607777 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 31 May 2012 16:26:18 -0700 Subject: proc: don't do dummy rcu_read_lock/rcu_read_unlock on error path rcu_read_lock()/rcu_read_unlock() is nop for TINY_RCU, but is not a nop for, say, PREEMPT_RCU. proc_fill_cache() is called without RCU lock, there is no need to lock/unlock on error path, simply jump out of the loop. Signed-off-by: Alexey Dobriyan Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs/proc/base.c') diff --git a/fs/proc/base.c b/fs/proc/base.c index 155dee600ed4..0f928cbba4a4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1928,21 +1928,22 @@ static int proc_readfd_common(struct file * filp, void * dirent, fd++, filp->f_pos++) { char name[PROC_NUMBUF]; int len; + int rv; if (!fcheck_files(files, fd)) continue; rcu_read_unlock(); len = snprintf(name, sizeof(name), "%d", fd); - if (proc_fill_cache(filp, dirent, filldir, - name, len, instantiate, - p, &fd) < 0) { - rcu_read_lock(); - break; - } + rv = proc_fill_cache(filp, dirent, filldir, + name, len, instantiate, p, + &fd); + if (rv < 0) + goto out_fd_loop; rcu_read_lock(); } rcu_read_unlock(); +out_fd_loop: put_files_struct(files); } out: -- cgit v1.2.2 From af5e6171437c9d62d84459b24877c94c23782676 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 31 May 2012 16:26:18 -0700 Subject: proc: pass "fd" by value in /proc/*/{fd,fdinfo} code Pass "fd" directly, not via pointer -- one less memory read. Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/proc/base.c') diff --git a/fs/proc/base.c b/fs/proc/base.c index 0f928cbba4a4..bd8b4ca6a610 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1848,7 +1848,7 @@ static const struct dentry_operations tid_fd_dentry_operations = static struct dentry *proc_fd_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { - unsigned fd = *(const unsigned *)ptr; + unsigned fd = (unsigned long)ptr; struct inode *inode; struct proc_inode *ei; struct dentry *error = ERR_PTR(-ENOENT); @@ -1885,7 +1885,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir, if (fd == ~0U) goto out; - result = instantiate(dir, dentry, task, &fd); + result = instantiate(dir, dentry, task, (void *)(unsigned long)fd); out: put_task_struct(task); out_no_task: @@ -1937,7 +1937,7 @@ static int proc_readfd_common(struct file * filp, void * dirent, len = snprintf(name, sizeof(name), "%d", fd); rv = proc_fill_cache(filp, dirent, filldir, name, len, instantiate, p, - &fd); + (void *)(unsigned long)fd); if (rv < 0) goto out_fd_loop; rcu_read_lock(); @@ -2353,7 +2353,7 @@ static const struct inode_operations proc_fd_inode_operations = { static struct dentry *proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { - unsigned fd = *(unsigned *)ptr; + unsigned fd = (unsigned long)ptr; struct inode *inode; struct proc_inode *ei; struct dentry *error = ERR_PTR(-ENOENT); -- cgit v1.2.2 From 818411616baf46ceba0cff6f05af3a9b294734f7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 31 May 2012 16:26:43 -0700 Subject: fs, proc: introduce /proc//task//children entry When we do checkpoint of a task we need to know the list of children the task, has but there is no easy and fast way to generate reverse parent->children chain from arbitrary (while a parent pid is provided in "PPid" field of /proc//status). So instead of walking over all pids in the system (creating one big process tree in memory, just to figure out which children a task has) -- we add explicit /proc//task//children entry, because the kernel already has this kind of information but it is not yet exported. This is a first level children, not the whole process tree. Signed-off-by: Cyrill Gorcunov Reviewed-by: Oleg Nesterov Reviewed-by: Kees Cook Cc: Pavel Emelyanov Cc: Serge Hallyn Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/proc/base.c') diff --git a/fs/proc/base.c b/fs/proc/base.c index bd8b4ca6a610..616f41a7cde6 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3400,6 +3400,9 @@ static const struct pid_entry tid_base_stuff[] = { ONE("stat", S_IRUGO, proc_tid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_tid_maps_operations), +#ifdef CONFIG_CHECKPOINT_RESTORE + REG("children", S_IRUGO, proc_tid_children_operations), +#endif #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations), #endif -- cgit v1.2.2