diff options
author | KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> | 2009-08-18 17:11:10 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-08-18 19:31:13 -0400 |
commit | 0753ba01e126020bf0f8150934903b48935b697d (patch) | |
tree | fbfd7e2d0abbe724a8c5e0e17fb9af522ed2e097 /fs/proc | |
parent | 89a4eb4b66e8f4d395e14a14d262dac4d6ca52f0 (diff) |
mm: revert "oom: move oom_adj value"
The commit 2ff05b2b (oom: move oom_adj value) moveed the oom_adj value to
the mm_struct. It was a very good first step for sanitize OOM.
However Paul Menage reported the commit makes regression to his job
scheduler. Current OOM logic can kill OOM_DISABLED process.
Why? His program has the code of similar to the following.
...
set_oom_adj(OOM_DISABLE); /* The job scheduler never killed by oom */
...
if (vfork() == 0) {
set_oom_adj(0); /* Invoked child can be killed */
execve("foo-bar-cmd");
}
....
vfork() parent and child are shared the same mm_struct. then above
set_oom_adj(0) doesn't only change oom_adj for vfork() child, it's also
change oom_adj for vfork() parent. Then, vfork() parent (job scheduler)
lost OOM immune and it was killed.
Actually, fork-setting-exec idiom is very frequently used in userland program.
We must not break this assumption.
Then, this patch revert commit 2ff05b2b and related commit.
Reverted commit list
---------------------
- commit 2ff05b2b4e (oom: move oom_adj value from task_struct to mm_struct)
- commit 4d8b9135c3 (oom: avoid unnecessary mm locking and scanning for OOM_DISABLE)
- commit 8123681022 (oom: only oom kill exiting tasks with attached memory)
- commit 933b787b57 (mm: copy over oom_adj value at fork time)
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/base.c | 19 |
1 files changed, 3 insertions, 16 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index 175db258942f..6f742f6658a9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1003,12 +1003,7 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, | |||
1003 | 1003 | ||
1004 | if (!task) | 1004 | if (!task) |
1005 | return -ESRCH; | 1005 | return -ESRCH; |
1006 | task_lock(task); | 1006 | oom_adjust = task->oomkilladj; |
1007 | if (task->mm) | ||
1008 | oom_adjust = task->mm->oom_adj; | ||
1009 | else | ||
1010 | oom_adjust = OOM_DISABLE; | ||
1011 | task_unlock(task); | ||
1012 | put_task_struct(task); | 1007 | put_task_struct(task); |
1013 | 1008 | ||
1014 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); | 1009 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); |
@@ -1037,19 +1032,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | |||
1037 | task = get_proc_task(file->f_path.dentry->d_inode); | 1032 | task = get_proc_task(file->f_path.dentry->d_inode); |
1038 | if (!task) | 1033 | if (!task) |
1039 | return -ESRCH; | 1034 | return -ESRCH; |
1040 | task_lock(task); | 1035 | if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { |
1041 | if (!task->mm) { | ||
1042 | task_unlock(task); | ||
1043 | put_task_struct(task); | ||
1044 | return -EINVAL; | ||
1045 | } | ||
1046 | if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) { | ||
1047 | task_unlock(task); | ||
1048 | put_task_struct(task); | 1036 | put_task_struct(task); |
1049 | return -EACCES; | 1037 | return -EACCES; |
1050 | } | 1038 | } |
1051 | task->mm->oom_adj = oom_adjust; | 1039 | task->oomkilladj = oom_adjust; |
1052 | task_unlock(task); | ||
1053 | put_task_struct(task); | 1040 | put_task_struct(task); |
1054 | if (end - buffer == 0) | 1041 | if (end - buffer == 0) |
1055 | return -EIO; | 1042 | return -EIO; |