aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2011-05-24 20:11:40 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-25 11:39:10 -0400
commit72788c385604523422592249c19cba0187021e9b (patch)
tree3552a4b6ae4adb77e723d8a8d1d2669bfb04a8bb
parentc6a140bf164829769499b5e50d380893da39b29e (diff)
oom: replace PF_OOM_ORIGIN with toggling oom_score_adj
There's a kernel-wide shortage of per-process flags, so it's always helpful to trim one when possible without incurring a significant penalty. It's even more important when you're planning on adding a per- process flag yourself, which I plan to do shortly for transparent hugepages. PF_OOM_ORIGIN is used by ksm and swapoff to prefer current since it has a tendency to allocate large amounts of memory and should be preferred for killing over other tasks. We'd rather immediately kill the task making the errant syscall rather than penalizing an innocent task. This patch removes PF_OOM_ORIGIN since its behavior is equivalent to setting the process's oom_score_adj to OOM_SCORE_ADJ_MAX. The process's old oom_score_adj is stored and then set to OOM_SCORE_ADJ_MAX during the time it used to have PF_OOM_ORIGIN. The old value is then reinstated when the process should no longer be considered a high priority for oom killing. Signed-off-by: David Rientjes <rientjes@google.com> Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Cc: Hugh Dickins <hughd@google.com> Cc: Izik Eidus <ieidus@redhat.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/oom.h2
-rw-r--r--include/linux/sched.h1
-rw-r--r--mm/ksm.c7
-rw-r--r--mm/oom_kill.c36
-rw-r--r--mm/swapfile.c6
5 files changed, 38 insertions, 14 deletions
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 5e3aa8311c5e..4952fb874ad3 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -40,6 +40,8 @@ enum oom_constraint {
40 CONSTRAINT_MEMCG, 40 CONSTRAINT_MEMCG,
41}; 41};
42 42
43extern int test_set_oom_score_adj(int new_val);
44
43extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, 45extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
44 const nodemask_t *nodemask, unsigned long totalpages); 46 const nodemask_t *nodemask, unsigned long totalpages);
45extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); 47extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index aaf71e08222c..44b8faaac7c0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1753,7 +1753,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
1753#define PF_FROZEN 0x00010000 /* frozen for system suspend */ 1753#define PF_FROZEN 0x00010000 /* frozen for system suspend */
1754#define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ 1754#define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */
1755#define PF_KSWAPD 0x00040000 /* I am kswapd */ 1755#define PF_KSWAPD 0x00040000 /* I am kswapd */
1756#define PF_OOM_ORIGIN 0x00080000 /* Allocating much memory to others */
1757#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ 1756#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */
1758#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ 1757#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
1759#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ 1758#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */
diff --git a/mm/ksm.c b/mm/ksm.c
index 942dfc73a2ff..d708b3ef2260 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -35,6 +35,7 @@
35#include <linux/ksm.h> 35#include <linux/ksm.h>
36#include <linux/hash.h> 36#include <linux/hash.h>
37#include <linux/freezer.h> 37#include <linux/freezer.h>
38#include <linux/oom.h>
38 39
39#include <asm/tlbflush.h> 40#include <asm/tlbflush.h>
40#include "internal.h" 41#include "internal.h"
@@ -1894,9 +1895,11 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
1894 if (ksm_run != flags) { 1895 if (ksm_run != flags) {
1895 ksm_run = flags; 1896 ksm_run = flags;
1896 if (flags & KSM_RUN_UNMERGE) { 1897 if (flags & KSM_RUN_UNMERGE) {
1897 current->flags |= PF_OOM_ORIGIN; 1898 int oom_score_adj;
1899
1900 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
1898 err = unmerge_and_remove_all_rmap_items(); 1901 err = unmerge_and_remove_all_rmap_items();
1899 current->flags &= ~PF_OOM_ORIGIN; 1902 test_set_oom_score_adj(oom_score_adj);
1900 if (err) { 1903 if (err) {
1901 ksm_run = KSM_RUN_STOP; 1904 ksm_run = KSM_RUN_STOP;
1902 count = err; 1905 count = err;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f52e85c80e8d..e4b0991ca351 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -38,6 +38,33 @@ int sysctl_oom_kill_allocating_task;
38int sysctl_oom_dump_tasks = 1; 38int sysctl_oom_dump_tasks = 1;
39static DEFINE_SPINLOCK(zone_scan_lock); 39static DEFINE_SPINLOCK(zone_scan_lock);
40 40
41/**
42 * test_set_oom_score_adj() - set current's oom_score_adj and return old value
43 * @new_val: new oom_score_adj value
44 *
45 * Sets the oom_score_adj value for current to @new_val with proper
46 * synchronization and returns the old value. Usually used to temporarily
47 * set a value, save the old value in the caller, and then reinstate it later.
48 */
49int test_set_oom_score_adj(int new_val)
50{
51 struct sighand_struct *sighand = current->sighand;
52 int old_val;
53
54 spin_lock_irq(&sighand->siglock);
55 old_val = current->signal->oom_score_adj;
56 if (new_val != old_val) {
57 if (new_val == OOM_SCORE_ADJ_MIN)
58 atomic_inc(&current->mm->oom_disable_count);
59 else if (old_val == OOM_SCORE_ADJ_MIN)
60 atomic_dec(&current->mm->oom_disable_count);
61 current->signal->oom_score_adj = new_val;
62 }
63 spin_unlock_irq(&sighand->siglock);
64
65 return old_val;
66}
67
41#ifdef CONFIG_NUMA 68#ifdef CONFIG_NUMA
42/** 69/**
43 * has_intersects_mems_allowed() - check task eligiblity for kill 70 * has_intersects_mems_allowed() - check task eligiblity for kill
@@ -155,15 +182,6 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
155 } 182 }
156 183
157 /* 184 /*
158 * When the PF_OOM_ORIGIN bit is set, it indicates the task should have
159 * priority for oom killing.
160 */
161 if (p->flags & PF_OOM_ORIGIN) {
162 task_unlock(p);
163 return 1000;
164 }
165
166 /*
167 * The memory controller may have a limit of 0 bytes, so avoid a divide 185 * The memory controller may have a limit of 0 bytes, so avoid a divide
168 * by zero, if necessary. 186 * by zero, if necessary.
169 */ 187 */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 8c6b3ce38f09..d537d29e9b7b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -31,6 +31,7 @@
31#include <linux/syscalls.h> 31#include <linux/syscalls.h>
32#include <linux/memcontrol.h> 32#include <linux/memcontrol.h>
33#include <linux/poll.h> 33#include <linux/poll.h>
34#include <linux/oom.h>
34 35
35#include <asm/pgtable.h> 36#include <asm/pgtable.h>
36#include <asm/tlbflush.h> 37#include <asm/tlbflush.h>
@@ -1555,6 +1556,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1555 struct address_space *mapping; 1556 struct address_space *mapping;
1556 struct inode *inode; 1557 struct inode *inode;
1557 char *pathname; 1558 char *pathname;
1559 int oom_score_adj;
1558 int i, type, prev; 1560 int i, type, prev;
1559 int err; 1561 int err;
1560 1562
@@ -1613,9 +1615,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1613 p->flags &= ~SWP_WRITEOK; 1615 p->flags &= ~SWP_WRITEOK;
1614 spin_unlock(&swap_lock); 1616 spin_unlock(&swap_lock);
1615 1617
1616 current->flags |= PF_OOM_ORIGIN; 1618 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
1617 err = try_to_unuse(type); 1619 err = try_to_unuse(type);
1618 current->flags &= ~PF_OOM_ORIGIN; 1620 test_set_oom_score_adj(oom_score_adj);
1619 1621
1620 if (err) { 1622 if (err) {
1621 /* 1623 /*