aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMandeep Singh Baines <msb@chromium.org>2011-01-13 18:46:05 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:32:35 -0500
commitdabb16f639820267b3850d804571c70bd93d4e07 (patch)
tree7da59e6133cd2f820389574ac9206c56e046f5d4
parentd0a21265dfb5fa8ae54e90d0fb6d1c215b10a28a (diff)
oom: allow a non-CAP_SYS_RESOURCE proces to oom_score_adj down
We'd like to be able to oom_score_adj a process up/down as it enters/leaves the foreground. Currently, it is not possible to oom_adj down without CAP_SYS_RESOURCE. This patch allows a task to decrease its oom_score_adj back to the value that a CAP_SYS_RESOURCE thread set it to or its inherited value at fork. Assuming the thread that has forked it has oom_score_adj of 0, each process could decrease it back from 0 upon activation unless a CAP_SYS_RESOURCE thread elevated it to something higher. Alternative considered: * a setuid binary * a daemon with CAP_SYS_RESOURCE Since you don't wan't all processes to be able to reduce their oom_adj, a setuid or daemon implementation would be complex. The alternatives also have much higher overhead. This patch updated from original patch based on feedback from David Rientjes. Signed-off-by: Mandeep Singh Baines <msb@chromium.org> Acked-by: David Rientjes <rientjes@google.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Rik van Riel <riel@redhat.com> Cc: Ying Han <yinghan@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/filesystems/proc.txt4
-rw-r--r--fs/proc/base.c4
-rw-r--r--include/linux/sched.h2
-rw-r--r--kernel/fork.c1
4 files changed, 10 insertions, 1 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index ef757fca470b..23cae6548d3a 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1323,6 +1323,10 @@ scaled linearly with /proc/<pid>/oom_score_adj.
1323Writing to /proc/<pid>/oom_score_adj or /proc/<pid>/oom_adj will change the 1323Writing to /proc/<pid>/oom_score_adj or /proc/<pid>/oom_adj will change the
1324other with its scaled value. 1324other with its scaled value.
1325 1325
1326The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last
1327value set by a CAP_SYS_RESOURCE process. To reduce the value any lower
1328requires CAP_SYS_RESOURCE.
1329
1326NOTICE: /proc/<pid>/oom_adj is deprecated and will be removed, please see 1330NOTICE: /proc/<pid>/oom_adj is deprecated and will be removed, please see
1327Documentation/feature-removal-schedule.txt. 1331Documentation/feature-removal-schedule.txt.
1328 1332
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 93f1cdd5d3d7..9d096e82b201 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1151,7 +1151,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1151 goto err_task_lock; 1151 goto err_task_lock;
1152 } 1152 }
1153 1153
1154 if (oom_score_adj < task->signal->oom_score_adj && 1154 if (oom_score_adj < task->signal->oom_score_adj_min &&
1155 !capable(CAP_SYS_RESOURCE)) { 1155 !capable(CAP_SYS_RESOURCE)) {
1156 err = -EACCES; 1156 err = -EACCES;
1157 goto err_sighand; 1157 goto err_sighand;
@@ -1164,6 +1164,8 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1164 atomic_dec(&task->mm->oom_disable_count); 1164 atomic_dec(&task->mm->oom_disable_count);
1165 } 1165 }
1166 task->signal->oom_score_adj = oom_score_adj; 1166 task->signal->oom_score_adj = oom_score_adj;
1167 if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
1168 task->signal->oom_score_adj_min = oom_score_adj;
1167 /* 1169 /*
1168 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is 1170 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
1169 * always attainable. 1171 * always attainable.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 07402530fc70..f23b5bb6f52e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -634,6 +634,8 @@ struct signal_struct {
634 634
635 int oom_adj; /* OOM kill score adjustment (bit shift) */ 635 int oom_adj; /* OOM kill score adjustment (bit shift) */
636 int oom_score_adj; /* OOM kill score adjustment */ 636 int oom_score_adj; /* OOM kill score adjustment */
637 int oom_score_adj_min; /* OOM kill score adjustment minimum value.
638 * Only settable by CAP_SYS_RESOURCE. */
637 639
638 struct mutex cred_guard_mutex; /* guard against foreign influences on 640 struct mutex cred_guard_mutex; /* guard against foreign influences on
639 * credential calculations 641 * credential calculations
diff --git a/kernel/fork.c b/kernel/fork.c
index 76a1fdd80bdf..1499607e4da2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -910,6 +910,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
910 910
911 sig->oom_adj = current->signal->oom_adj; 911 sig->oom_adj = current->signal->oom_adj;
912 sig->oom_score_adj = current->signal->oom_score_adj; 912 sig->oom_score_adj = current->signal->oom_score_adj;
913 sig->oom_score_adj_min = current->signal->oom_score_adj_min;
913 914
914 mutex_init(&sig->cred_guard_mutex); 915 mutex_init(&sig->cred_guard_mutex);
915 916