aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc/base.c
diff options
context:
space:
mode:
authorKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>2009-09-21 20:03:13 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-22 10:17:39 -0400
commit28b83c5193e7ab951e402252278f2cc79dc4d298 (patch)
tree10080e8d3957c2a03f8419ab44c9ecb0ffcdaee0 /fs/proc/base.c
parentf168e1b6390e2d79cf57e48e6ae6d9b0a9e2851a (diff)
oom: move oom_adj value from task_struct to signal_struct
Currently, OOM logic callflow is here. __out_of_memory() select_bad_process() for each task badness() calculate badness of one task oom_kill_process() search child oom_kill_task() kill target task and mm shared tasks with it example, process-A have two thread, thread-A and thread-B and it have very fat memory and each thread have following oom_adj and oom_score. thread-A: oom_adj = OOM_DISABLE, oom_score = 0 thread-B: oom_adj = 0, oom_score = very-high Then, select_bad_process() select thread-B, but oom_kill_task() refuse kill the task because thread-A have OOM_DISABLE. Thus __out_of_memory() call select_bad_process() again. but select_bad_process() select the same task. It mean kernel fall in livelock. The fact is, select_bad_process() must select killable task. otherwise OOM logic go into livelock. And root cause is, oom_adj shouldn't be per-thread value. it should be per-process value because OOM-killer kill a process, not thread. Thus This patch moves oomkilladj (now more appropriately named oom_adj) from struct task_struct to struct signal_struct. it naturally prevent select_bad_process() choose wrong task. Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Paul Menage <menage@google.com> Cc: David Rientjes <rientjes@google.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Rik van Riel <riel@redhat.com> Cc: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/proc/base.c')
-rw-r--r--fs/proc/base.c24
1 files changed, 20 insertions, 4 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6f742f6658a9..81cfff82875b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -999,11 +999,17 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
999 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 999 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
1000 char buffer[PROC_NUMBUF]; 1000 char buffer[PROC_NUMBUF];
1001 size_t len; 1001 size_t len;
1002 int oom_adjust; 1002 int oom_adjust = OOM_DISABLE;
1003 unsigned long flags;
1003 1004
1004 if (!task) 1005 if (!task)
1005 return -ESRCH; 1006 return -ESRCH;
1006 oom_adjust = task->oomkilladj; 1007
1008 if (lock_task_sighand(task, &flags)) {
1009 oom_adjust = task->signal->oom_adj;
1010 unlock_task_sighand(task, &flags);
1011 }
1012
1007 put_task_struct(task); 1013 put_task_struct(task);
1008 1014
1009 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 1015 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1017,6 +1023,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1017 struct task_struct *task; 1023 struct task_struct *task;
1018 char buffer[PROC_NUMBUF], *end; 1024 char buffer[PROC_NUMBUF], *end;
1019 int oom_adjust; 1025 int oom_adjust;
1026 unsigned long flags;
1020 1027
1021 memset(buffer, 0, sizeof(buffer)); 1028 memset(buffer, 0, sizeof(buffer));
1022 if (count > sizeof(buffer) - 1) 1029 if (count > sizeof(buffer) - 1)
@@ -1032,11 +1039,20 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1032 task = get_proc_task(file->f_path.dentry->d_inode); 1039 task = get_proc_task(file->f_path.dentry->d_inode);
1033 if (!task) 1040 if (!task)
1034 return -ESRCH; 1041 return -ESRCH;
1035 if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { 1042 if (!lock_task_sighand(task, &flags)) {
1043 put_task_struct(task);
1044 return -ESRCH;
1045 }
1046
1047 if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
1048 unlock_task_sighand(task, &flags);
1036 put_task_struct(task); 1049 put_task_struct(task);
1037 return -EACCES; 1050 return -EACCES;
1038 } 1051 }
1039 task->oomkilladj = oom_adjust; 1052
1053 task->signal->oom_adj = oom_adjust;
1054
1055 unlock_task_sighand(task, &flags);
1040 put_task_struct(task); 1056 put_task_struct(task);
1041 if (end - buffer == 0) 1057 if (end - buffer == 0)
1042 return -EIO; 1058 return -EIO;