From 13f0feafa6b8aead57a2a328e2fca6a5828bf286 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 23 Jun 2009 21:25:32 +0200 Subject: mm_for_maps: simplify, use ptrace_may_access() It would be nice to kill __ptrace_may_access(). It requires task_lock(), but this lock is only needed to read mm->flags in the middle. Convert mm_for_maps() to use ptrace_may_access(), this also simplifies the code a little bit. Also, we do not need to take ->mmap_sem in advance. In fact I think mm_for_maps() should not play with ->mmap_sem at all, the caller should take this lock. With or without this patch, without ->cred_guard_mutex held we can race with exec() and get the new ->mm but check old creds. Signed-off-by: Oleg Nesterov Reviewed-by: Serge Hallyn Signed-off-by: James Morris --- fs/proc/base.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/base.c b/fs/proc/base.c index 3ce5ae9e3d2d..917f338a6739 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -237,20 +237,19 @@ struct mm_struct *mm_for_maps(struct task_struct *task) struct mm_struct *mm = get_task_mm(task); if (!mm) return NULL; + if (mm != current->mm) { + /* + * task->mm can be changed before security check, + * in that case we must notice the change after. + */ + if (!ptrace_may_access(task, PTRACE_MODE_READ) || + mm != task->mm) { + mmput(mm); + return NULL; + } + } down_read(&mm->mmap_sem); - task_lock(task); - if (task->mm != mm) - goto out; - if (task->mm != current->mm && - __ptrace_may_access(task, PTRACE_MODE_READ) < 0) - goto out; - task_unlock(task); return mm; -out: - task_unlock(task); - up_read(&mm->mmap_sem); - mmput(mm); - return NULL; } static int proc_pid_cmdline(struct task_struct *task, char * buffer) -- cgit v1.2.2 From 00f89d218523b9bf6b522349c039d5ac80aa536d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 10 Jul 2009 03:27:38 +0200 Subject: mm_for_maps: shift down_read(mmap_sem) to the caller mm_for_maps() takes ->mmap_sem after security checks, this looks strange and obfuscates the locking rules. Move this lock to its single caller, m_start(). Signed-off-by: Oleg Nesterov Acked-by: Serge Hallyn Signed-off-by: James Morris --- fs/proc/base.c | 8 +++----- fs/proc/task_mmu.c | 1 + fs/proc/task_nommu.c | 1 + 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/base.c b/fs/proc/base.c index 917f338a6739..f3c2e4085fed 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -235,9 +235,8 @@ static int check_mem_permission(struct task_struct *task) struct mm_struct *mm_for_maps(struct task_struct *task) { struct mm_struct *mm = get_task_mm(task); - if (!mm) - return NULL; - if (mm != current->mm) { + + if (mm && mm != current->mm) { /* * task->mm can be changed before security check, * in that case we must notice the change after. @@ -245,10 +244,9 @@ struct mm_struct *mm_for_maps(struct task_struct *task) if (!ptrace_may_access(task, PTRACE_MODE_READ) || mm != task->mm) { mmput(mm); - return NULL; + mm = NULL; } } - down_read(&mm->mmap_sem); return mm; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 6f61b7cc32e0..9bd8be1d235c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -119,6 +119,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) mm = mm_for_maps(priv->task); if (!mm) return NULL; + down_read(&mm->mmap_sem); tail_vma = get_gate_vma(priv->task); priv->tail_vma = tail_vma; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 64a72e2e7650..8f5c05d3dbd3 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -189,6 +189,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) priv->task = NULL; return NULL; } + down_read(&mm->mmap_sem); /* start from the Nth VMA */ for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) -- cgit v1.2.2 From 704b836cbf19e885f8366bccb2e4b0474346c02d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 10 Jul 2009 03:27:40 +0200 Subject: mm_for_maps: take ->cred_guard_mutex to fix the race with exec The problem is minor, but without ->cred_guard_mutex held we can race with exec() and get the new ->mm but check old creds. Now we do not need to re-check task->mm after ptrace_may_access(), it can't be changed to the new mm under us. Strictly speaking, this also fixes another very minor problem. Unless security check fails or the task exits mm_for_maps() should never return NULL, the caller should get either old or new ->mm. Signed-off-by: Oleg Nesterov Acked-by: Serge Hallyn Signed-off-by: James Morris --- fs/proc/base.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/base.c b/fs/proc/base.c index f3c2e4085fed..175db258942f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -234,19 +234,19 @@ static int check_mem_permission(struct task_struct *task) struct mm_struct *mm_for_maps(struct task_struct *task) { - struct mm_struct *mm = get_task_mm(task); + struct mm_struct *mm; - if (mm && mm != current->mm) { - /* - * task->mm can be changed before security check, - * in that case we must notice the change after. - */ - if (!ptrace_may_access(task, PTRACE_MODE_READ) || - mm != task->mm) { - mmput(mm); - mm = NULL; - } + if (mutex_lock_killable(&task->cred_guard_mutex)) + return NULL; + + mm = get_task_mm(task); + if (mm && mm != current->mm && + !ptrace_may_access(task, PTRACE_MODE_READ)) { + mmput(mm); + mm = NULL; } + mutex_unlock(&task->cred_guard_mutex); + return mm; } -- cgit v1.2.2 From 0753ba01e126020bf0f8150934903b48935b697d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 18 Aug 2009 14:11:10 -0700 Subject: mm: revert "oom: move oom_adj value" The commit 2ff05b2b (oom: move oom_adj value) moveed the oom_adj value to the mm_struct. It was a very good first step for sanitize OOM. However Paul Menage reported the commit makes regression to his job scheduler. Current OOM logic can kill OOM_DISABLED process. Why? His program has the code of similar to the following. ... set_oom_adj(OOM_DISABLE); /* The job scheduler never killed by oom */ ... if (vfork() == 0) { set_oom_adj(0); /* Invoked child can be killed */ execve("foo-bar-cmd"); } .... vfork() parent and child are shared the same mm_struct. then above set_oom_adj(0) doesn't only change oom_adj for vfork() child, it's also change oom_adj for vfork() parent. Then, vfork() parent (job scheduler) lost OOM immune and it was killed. Actually, fork-setting-exec idiom is very frequently used in userland program. We must not break this assumption. Then, this patch revert commit 2ff05b2b and related commit. Reverted commit list --------------------- - commit 2ff05b2b4e (oom: move oom_adj value from task_struct to mm_struct) - commit 4d8b9135c3 (oom: avoid unnecessary mm locking and scanning for OOM_DISABLE) - commit 8123681022 (oom: only oom kill exiting tasks with attached memory) - commit 933b787b57 (mm: copy over oom_adj value at fork time) Signed-off-by: KOSAKI Motohiro Cc: Paul Menage Cc: David Rientjes Cc: KAMEZAWA Hiroyuki Cc: Rik van Riel Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Nick Piggin Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/base.c b/fs/proc/base.c index 175db258942f..6f742f6658a9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1003,12 +1003,7 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, if (!task) return -ESRCH; - task_lock(task); - if (task->mm) - oom_adjust = task->mm->oom_adj; - else - oom_adjust = OOM_DISABLE; - task_unlock(task); + oom_adjust = task->oomkilladj; put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); @@ -1037,19 +1032,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, task = get_proc_task(file->f_path.dentry->d_inode); if (!task) return -ESRCH; - task_lock(task); - if (!task->mm) { - task_unlock(task); - put_task_struct(task); - return -EINVAL; - } - if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) { - task_unlock(task); + if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { put_task_struct(task); return -EACCES; } - task->mm->oom_adj = oom_adjust; - task_unlock(task); + task->oomkilladj = oom_adjust; put_task_struct(task); if (end - buffer == 0) return -EIO; -- cgit v1.2.2