aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2009-06-16 18:32:56 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-16 22:47:43 -0400
commit2ff05b2b4eac2e63d345fc731ea151a060247f53 (patch)
tree1840bc2d3b381eca5d39869499339b0fcc6eabbf /fs/proc
parentc9e444103b5e7a5a3519f9913f59767f92e33baf (diff)
oom: move oom_adj value from task_struct to mm_struct
The per-task oom_adj value is a characteristic of its mm more than the task itself since it's not possible to oom kill any thread that shares the mm. If a task were to be killed while attached to an mm that could not be freed because another thread were set to OOM_DISABLE, it would have needlessly been terminated since there is no potential for future memory freeing. This patch moves oomkilladj (now more appropriately named oom_adj) from struct task_struct to struct mm_struct. This requires task_lock() on a task to check its oom_adj value to protect against exec, but it's already necessary to take the lock when dereferencing the mm to find the total VM size for the badness heuristic. This fixes a livelock if the oom killer chooses a task and another thread sharing the same memory has an oom_adj value of OOM_DISABLE. This occurs because oom_kill_task() repeatedly returns 1 and refuses to kill the chosen task while select_bad_process() will repeatedly choose the same task during the next retry. Taking task_lock() in select_bad_process() to check for OOM_DISABLE and in oom_kill_task() to check for threads sharing the same memory will be removed in the next patch in this series where it will no longer be necessary. Writing to /proc/pid/oom_adj for a kthread will now return -EINVAL since these threads are immune from oom killing already. They simply report an oom_adj value of OOM_DISABLE. Cc: Nick Piggin <npiggin@suse.de> Cc: Rik van Riel <riel@redhat.com> Cc: Mel Gorman <mel@csn.ul.ie> Signed-off-by: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/base.c19
1 files changed, 16 insertions, 3 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1539e630c47d..3ce5ae9e3d2d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1006,7 +1006,12 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
1006 1006
1007 if (!task) 1007 if (!task)
1008 return -ESRCH; 1008 return -ESRCH;
1009 oom_adjust = task->oomkilladj; 1009 task_lock(task);
1010 if (task->mm)
1011 oom_adjust = task->mm->oom_adj;
1012 else
1013 oom_adjust = OOM_DISABLE;
1014 task_unlock(task);
1010 put_task_struct(task); 1015 put_task_struct(task);
1011 1016
1012 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 1017 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1035,11 +1040,19 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1035 task = get_proc_task(file->f_path.dentry->d_inode); 1040 task = get_proc_task(file->f_path.dentry->d_inode);
1036 if (!task) 1041 if (!task)
1037 return -ESRCH; 1042 return -ESRCH;
1038 if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { 1043 task_lock(task);
1044 if (!task->mm) {
1045 task_unlock(task);
1046 put_task_struct(task);
1047 return -EINVAL;
1048 }
1049 if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) {
1050 task_unlock(task);
1039 put_task_struct(task); 1051 put_task_struct(task);
1040 return -EACCES; 1052 return -EACCES;
1041 } 1053 }
1042 task->oomkilladj = oom_adjust; 1054 task->mm->oom_adj = oom_adjust;
1055 task_unlock(task);
1043 put_task_struct(task); 1056 put_task_struct(task);
1044 if (end - buffer == 0) 1057 if (end - buffer == 0)
1045 return -EIO; 1058 return -EIO;