diff options
author | Mandeep Singh Baines <msb@chromium.org> | 2011-01-13 18:46:05 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 20:32:35 -0500 |
commit | dabb16f639820267b3850d804571c70bd93d4e07 (patch) | |
tree | 7da59e6133cd2f820389574ac9206c56e046f5d4 | |
parent | d0a21265dfb5fa8ae54e90d0fb6d1c215b10a28a (diff) |
oom: allow a non-CAP_SYS_RESOURCE proces to oom_score_adj down
We'd like to be able to oom_score_adj a process up/down as it
enters/leaves the foreground. Currently, it is not possible to oom_adj
down without CAP_SYS_RESOURCE. This patch allows a task to decrease its
oom_score_adj back to the value that a CAP_SYS_RESOURCE thread set it to
or its inherited value at fork. Assuming the thread that has forked it
has oom_score_adj of 0, each process could decrease it back from 0 upon
activation unless a CAP_SYS_RESOURCE thread elevated it to something
higher.
Alternative considered:
* a setuid binary
* a daemon with CAP_SYS_RESOURCE
Since you don't wan't all processes to be able to reduce their oom_adj, a
setuid or daemon implementation would be complex. The alternatives also
have much higher overhead.
This patch updated from original patch based on feedback from David
Rientjes.
Signed-off-by: Mandeep Singh Baines <msb@chromium.org>
Acked-by: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Ying Han <yinghan@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/filesystems/proc.txt | 4 | ||||
-rw-r--r-- | fs/proc/base.c | 4 | ||||
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | kernel/fork.c | 1 |
4 files changed, 10 insertions, 1 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index ef757fca470b..23cae6548d3a 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -1323,6 +1323,10 @@ scaled linearly with /proc/<pid>/oom_score_adj. | |||
1323 | Writing to /proc/<pid>/oom_score_adj or /proc/<pid>/oom_adj will change the | 1323 | Writing to /proc/<pid>/oom_score_adj or /proc/<pid>/oom_adj will change the |
1324 | other with its scaled value. | 1324 | other with its scaled value. |
1325 | 1325 | ||
1326 | The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last | ||
1327 | value set by a CAP_SYS_RESOURCE process. To reduce the value any lower | ||
1328 | requires CAP_SYS_RESOURCE. | ||
1329 | |||
1326 | NOTICE: /proc/<pid>/oom_adj is deprecated and will be removed, please see | 1330 | NOTICE: /proc/<pid>/oom_adj is deprecated and will be removed, please see |
1327 | Documentation/feature-removal-schedule.txt. | 1331 | Documentation/feature-removal-schedule.txt. |
1328 | 1332 | ||
diff --git a/fs/proc/base.c b/fs/proc/base.c index 93f1cdd5d3d7..9d096e82b201 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1151,7 +1151,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, | |||
1151 | goto err_task_lock; | 1151 | goto err_task_lock; |
1152 | } | 1152 | } |
1153 | 1153 | ||
1154 | if (oom_score_adj < task->signal->oom_score_adj && | 1154 | if (oom_score_adj < task->signal->oom_score_adj_min && |
1155 | !capable(CAP_SYS_RESOURCE)) { | 1155 | !capable(CAP_SYS_RESOURCE)) { |
1156 | err = -EACCES; | 1156 | err = -EACCES; |
1157 | goto err_sighand; | 1157 | goto err_sighand; |
@@ -1164,6 +1164,8 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, | |||
1164 | atomic_dec(&task->mm->oom_disable_count); | 1164 | atomic_dec(&task->mm->oom_disable_count); |
1165 | } | 1165 | } |
1166 | task->signal->oom_score_adj = oom_score_adj; | 1166 | task->signal->oom_score_adj = oom_score_adj; |
1167 | if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) | ||
1168 | task->signal->oom_score_adj_min = oom_score_adj; | ||
1167 | /* | 1169 | /* |
1168 | * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is | 1170 | * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is |
1169 | * always attainable. | 1171 | * always attainable. |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 07402530fc70..f23b5bb6f52e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -634,6 +634,8 @@ struct signal_struct { | |||
634 | 634 | ||
635 | int oom_adj; /* OOM kill score adjustment (bit shift) */ | 635 | int oom_adj; /* OOM kill score adjustment (bit shift) */ |
636 | int oom_score_adj; /* OOM kill score adjustment */ | 636 | int oom_score_adj; /* OOM kill score adjustment */ |
637 | int oom_score_adj_min; /* OOM kill score adjustment minimum value. | ||
638 | * Only settable by CAP_SYS_RESOURCE. */ | ||
637 | 639 | ||
638 | struct mutex cred_guard_mutex; /* guard against foreign influences on | 640 | struct mutex cred_guard_mutex; /* guard against foreign influences on |
639 | * credential calculations | 641 | * credential calculations |
diff --git a/kernel/fork.c b/kernel/fork.c index 76a1fdd80bdf..1499607e4da2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -910,6 +910,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
910 | 910 | ||
911 | sig->oom_adj = current->signal->oom_adj; | 911 | sig->oom_adj = current->signal->oom_adj; |
912 | sig->oom_score_adj = current->signal->oom_score_adj; | 912 | sig->oom_score_adj = current->signal->oom_score_adj; |
913 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; | ||
913 | 914 | ||
914 | mutex_init(&sig->cred_guard_mutex); | 915 | mutex_init(&sig->cred_guard_mutex); |
915 | 916 | ||