aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2012-12-11 19:02:56 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-11 20:22:27 -0500
commite1e12d2f3104be886073ac6c5c4678f30b1b9e51 (patch)
treeb08cba1dba28e18cf7c2ffd8d076ce744e368b5f /mm
parenta9c58b907dbc6821533dfc295b63caf111ff1f16 (diff)
mm, oom: fix race when specifying a thread as the oom origin
test_set_oom_score_adj() and compare_swap_oom_score_adj() are used to specify that current should be killed first if an oom condition occurs in between the two calls. The usage is short oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); ... compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); to store the thread's oom_score_adj, temporarily change it to the maximum score possible, and then restore the old value if it is still the same. This happens to still be racy, however, if the user writes OOM_SCORE_ADJ_MAX to /proc/pid/oom_score_adj in between the two calls. The compare_swap_oom_score_adj() will then incorrectly reset the old value prior to the write of OOM_SCORE_ADJ_MAX. To fix this, introduce a new oom_flags_t member in struct signal_struct that will be used for per-thread oom killer flags. KSM and swapoff can now use a bit in this member to specify that threads should be killed first in oom conditions without playing around with oom_score_adj. This also allows the correct oom_score_adj to always be shown when reading /proc/pid/oom_score. Signed-off-by: David Rientjes <rientjes@google.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Reviewed-by: Michal Hocko <mhocko@suse.cz> Cc: Anton Vorontsov <anton.vorontsov@linaro.org> Cc: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/ksm.c7
-rw-r--r--mm/oom_kill.c49
-rw-r--r--mm/swapfile.c5
3 files changed, 11 insertions, 50 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index b4d5a9deb17f..382d930a0bf1 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1919,12 +1919,9 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
1919 if (ksm_run != flags) { 1919 if (ksm_run != flags) {
1920 ksm_run = flags; 1920 ksm_run = flags;
1921 if (flags & KSM_RUN_UNMERGE) { 1921 if (flags & KSM_RUN_UNMERGE) {
1922 short oom_score_adj; 1922 set_current_oom_origin();
1923
1924 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
1925 err = unmerge_and_remove_all_rmap_items(); 1923 err = unmerge_and_remove_all_rmap_items();
1926 compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, 1924 clear_current_oom_origin();
1927 oom_score_adj);
1928 if (err) { 1925 if (err) {
1929 ksm_run = KSM_RUN_STOP; 1926 ksm_run = KSM_RUN_STOP;
1930 count = err; 1927 count = err;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 37ab4c5ab6e8..18f1ae2b45de 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -44,48 +44,6 @@ int sysctl_oom_kill_allocating_task;
44int sysctl_oom_dump_tasks = 1; 44int sysctl_oom_dump_tasks = 1;
45static DEFINE_SPINLOCK(zone_scan_lock); 45static DEFINE_SPINLOCK(zone_scan_lock);
46 46
47/*
48 * compare_swap_oom_score_adj() - compare and swap current's oom_score_adj
49 * @old_val: old oom_score_adj for compare
50 * @new_val: new oom_score_adj for swap
51 *
52 * Sets the oom_score_adj value for current to @new_val iff its present value is
53 * @old_val. Usually used to reinstate a previous value to prevent racing with
54 * userspacing tuning the value in the interim.
55 */
56void compare_swap_oom_score_adj(short old_val, short new_val)
57{
58 struct sighand_struct *sighand = current->sighand;
59
60 spin_lock_irq(&sighand->siglock);
61 if (current->signal->oom_score_adj == old_val)
62 current->signal->oom_score_adj = new_val;
63 trace_oom_score_adj_update(current);
64 spin_unlock_irq(&sighand->siglock);
65}
66
67/**
68 * test_set_oom_score_adj() - set current's oom_score_adj and return old value
69 * @new_val: new oom_score_adj value
70 *
71 * Sets the oom_score_adj value for current to @new_val with proper
72 * synchronization and returns the old value. Usually used to temporarily
73 * set a value, save the old value in the caller, and then reinstate it later.
74 */
75short test_set_oom_score_adj(short new_val)
76{
77 struct sighand_struct *sighand = current->sighand;
78 int old_val;
79
80 spin_lock_irq(&sighand->siglock);
81 old_val = current->signal->oom_score_adj;
82 current->signal->oom_score_adj = new_val;
83 trace_oom_score_adj_update(current);
84 spin_unlock_irq(&sighand->siglock);
85
86 return old_val;
87}
88
89#ifdef CONFIG_NUMA 47#ifdef CONFIG_NUMA
90/** 48/**
91 * has_intersects_mems_allowed() - check task eligiblity for kill 49 * has_intersects_mems_allowed() - check task eligiblity for kill
@@ -310,6 +268,13 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
310 if (!task->mm) 268 if (!task->mm)
311 return OOM_SCAN_CONTINUE; 269 return OOM_SCAN_CONTINUE;
312 270
271 /*
272 * If task is allocating a lot of memory and has been marked to be
273 * killed first if it triggers an oom, then select it.
274 */
275 if (oom_task_origin(task))
276 return OOM_SCAN_SELECT;
277
313 if (task->flags & PF_EXITING && !force_kill) { 278 if (task->flags & PF_EXITING && !force_kill) {
314 /* 279 /*
315 * If this task is not being ptraced on exit, then wait for it 280 * If this task is not being ptraced on exit, then wait for it
diff --git a/mm/swapfile.c b/mm/swapfile.c
index bb6f6a04e92d..e97a0e5aea91 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1498,7 +1498,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1498 struct address_space *mapping; 1498 struct address_space *mapping;
1499 struct inode *inode; 1499 struct inode *inode;
1500 struct filename *pathname; 1500 struct filename *pathname;
1501 short oom_score_adj;
1502 int i, type, prev; 1501 int i, type, prev;
1503 int err; 1502 int err;
1504 1503
@@ -1557,9 +1556,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1557 p->flags &= ~SWP_WRITEOK; 1556 p->flags &= ~SWP_WRITEOK;
1558 spin_unlock(&swap_lock); 1557 spin_unlock(&swap_lock);
1559 1558
1560 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); 1559 set_current_oom_origin();
1561 err = try_to_unuse(type, false, 0); /* force all pages to be unused */ 1560 err = try_to_unuse(type, false, 0); /* force all pages to be unused */
1562 compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); 1561 clear_current_oom_origin();
1563 1562
1564 if (err) { 1563 if (err) {
1565 /* re-insert swap space back into swap_list */ 1564 /* re-insert swap space back into swap_list */