diff options
author | David Rientjes <rientjes@google.com> | 2009-06-16 18:32:56 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-16 22:47:43 -0400 |
commit | 2ff05b2b4eac2e63d345fc731ea151a060247f53 (patch) | |
tree | 1840bc2d3b381eca5d39869499339b0fcc6eabbf /fs/proc/base.c | |
parent | c9e444103b5e7a5a3519f9913f59767f92e33baf (diff) |
oom: move oom_adj value from task_struct to mm_struct
The per-task oom_adj value is a characteristic of its mm more than the
task itself since it's not possible to oom kill any thread that shares the
mm. If a task were to be killed while attached to an mm that could not be
freed because another thread were set to OOM_DISABLE, it would have
needlessly been terminated since there is no potential for future memory
freeing.
This patch moves oomkilladj (now more appropriately named oom_adj) from
struct task_struct to struct mm_struct. This requires task_lock() on a
task to check its oom_adj value to protect against exec, but it's already
necessary to take the lock when dereferencing the mm to find the total VM
size for the badness heuristic.
This fixes a livelock if the oom killer chooses a task and another thread
sharing the same memory has an oom_adj value of OOM_DISABLE. This occurs
because oom_kill_task() repeatedly returns 1 and refuses to kill the
chosen task while select_bad_process() will repeatedly choose the same
task during the next retry.
Taking task_lock() in select_bad_process() to check for OOM_DISABLE and in
oom_kill_task() to check for threads sharing the same memory will be
removed in the next patch in this series where it will no longer be
necessary.
Writing to /proc/pid/oom_adj for a kthread will now return -EINVAL since
these threads are immune from oom killing already. They simply report an
oom_adj value of OOM_DISABLE.
Cc: Nick Piggin <npiggin@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/proc/base.c')
-rw-r--r-- | fs/proc/base.c | 19 |
1 files changed, 16 insertions, 3 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index 1539e630c47d..3ce5ae9e3d2d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1006,7 +1006,12 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, | |||
1006 | 1006 | ||
1007 | if (!task) | 1007 | if (!task) |
1008 | return -ESRCH; | 1008 | return -ESRCH; |
1009 | oom_adjust = task->oomkilladj; | 1009 | task_lock(task); |
1010 | if (task->mm) | ||
1011 | oom_adjust = task->mm->oom_adj; | ||
1012 | else | ||
1013 | oom_adjust = OOM_DISABLE; | ||
1014 | task_unlock(task); | ||
1010 | put_task_struct(task); | 1015 | put_task_struct(task); |
1011 | 1016 | ||
1012 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); | 1017 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); |
@@ -1035,11 +1040,19 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | |||
1035 | task = get_proc_task(file->f_path.dentry->d_inode); | 1040 | task = get_proc_task(file->f_path.dentry->d_inode); |
1036 | if (!task) | 1041 | if (!task) |
1037 | return -ESRCH; | 1042 | return -ESRCH; |
1038 | if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { | 1043 | task_lock(task); |
1044 | if (!task->mm) { | ||
1045 | task_unlock(task); | ||
1046 | put_task_struct(task); | ||
1047 | return -EINVAL; | ||
1048 | } | ||
1049 | if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) { | ||
1050 | task_unlock(task); | ||
1039 | put_task_struct(task); | 1051 | put_task_struct(task); |
1040 | return -EACCES; | 1052 | return -EACCES; |
1041 | } | 1053 | } |
1042 | task->oomkilladj = oom_adjust; | 1054 | task->mm->oom_adj = oom_adjust; |
1055 | task_unlock(task); | ||
1043 | put_task_struct(task); | 1056 | put_task_struct(task); |
1044 | if (end - buffer == 0) | 1057 | if (end - buffer == 0) |
1045 | return -EIO; | 1058 | return -EIO; |