aboutsummaryrefslogtreecommitdiffstats
path: root/mm/oom_kill.c
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <mchehab@redhat.com>2012-12-27 09:44:11 -0500
committerMauro Carvalho Chehab <mchehab@redhat.com>2012-12-27 09:44:11 -0500
commita44dca1717ce2c2381339e21c07d1731a63a7888 (patch)
tree3d0b3bd26492f9fa1f1f1c1ad838315b266da7c1 /mm/oom_kill.c
parent30ebc5e44d057a1619ad63fe32c8c1670c37c4b8 (diff)
parenta49f0d1ea3ec94fc7cf33a7c36a16343b74bd565 (diff)
Merge tag 'v3.8-rc1' into staging/for_v3.9
Linux 3.8-rc1 * tag 'v3.8-rc1': (10696 commits) Linux 3.8-rc1 Revert "nfsd: warn on odd reply state in nfsd_vfs_read" ARM: dts: fix duplicated build target and alphabetical sort out for exynos dm stripe: add WRITE SAME support dm: remove map_info dm snapshot: do not use map_context dm thin: dont use map_context dm raid1: dont use map_context dm flakey: dont use map_context dm raid1: rename read_record to bio_record dm: move target request nr to dm_target_io dm snapshot: use per_bio_data dm verity: use per_bio_data dm raid1: use per_bio_data dm: introduce per_bio_data dm kcopyd: add WRITE SAME support to dm_kcopyd_zero dm linear: add WRITE SAME support dm: add WRITE SAME support dm: prepare to support WRITE SAME dm ioctl: use kmalloc if possible ... Conflicts: MAINTAINERS
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r--mm/oom_kill.c138
1 files changed, 27 insertions, 111 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 79e0f3e24831..0399f146ae49 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -44,48 +44,6 @@ int sysctl_oom_kill_allocating_task;
44int sysctl_oom_dump_tasks = 1; 44int sysctl_oom_dump_tasks = 1;
45static DEFINE_SPINLOCK(zone_scan_lock); 45static DEFINE_SPINLOCK(zone_scan_lock);
46 46
47/*
48 * compare_swap_oom_score_adj() - compare and swap current's oom_score_adj
49 * @old_val: old oom_score_adj for compare
50 * @new_val: new oom_score_adj for swap
51 *
52 * Sets the oom_score_adj value for current to @new_val iff its present value is
53 * @old_val. Usually used to reinstate a previous value to prevent racing with
54 * userspacing tuning the value in the interim.
55 */
56void compare_swap_oom_score_adj(int old_val, int new_val)
57{
58 struct sighand_struct *sighand = current->sighand;
59
60 spin_lock_irq(&sighand->siglock);
61 if (current->signal->oom_score_adj == old_val)
62 current->signal->oom_score_adj = new_val;
63 trace_oom_score_adj_update(current);
64 spin_unlock_irq(&sighand->siglock);
65}
66
67/**
68 * test_set_oom_score_adj() - set current's oom_score_adj and return old value
69 * @new_val: new oom_score_adj value
70 *
71 * Sets the oom_score_adj value for current to @new_val with proper
72 * synchronization and returns the old value. Usually used to temporarily
73 * set a value, save the old value in the caller, and then reinstate it later.
74 */
75int test_set_oom_score_adj(int new_val)
76{
77 struct sighand_struct *sighand = current->sighand;
78 int old_val;
79
80 spin_lock_irq(&sighand->siglock);
81 old_val = current->signal->oom_score_adj;
82 current->signal->oom_score_adj = new_val;
83 trace_oom_score_adj_update(current);
84 spin_unlock_irq(&sighand->siglock);
85
86 return old_val;
87}
88
89#ifdef CONFIG_NUMA 47#ifdef CONFIG_NUMA
90/** 48/**
91 * has_intersects_mems_allowed() - check task eligiblity for kill 49 * has_intersects_mems_allowed() - check task eligiblity for kill
@@ -193,7 +151,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
193 if (!p) 151 if (!p)
194 return 0; 152 return 0;
195 153
196 adj = p->signal->oom_score_adj; 154 adj = (long)p->signal->oom_score_adj;
197 if (adj == OOM_SCORE_ADJ_MIN) { 155 if (adj == OOM_SCORE_ADJ_MIN) {
198 task_unlock(p); 156 task_unlock(p);
199 return 0; 157 return 0;
@@ -257,7 +215,7 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
257 * the page allocator means a mempolicy is in effect. Cpuset policy 215 * the page allocator means a mempolicy is in effect. Cpuset policy
258 * is enforced in get_page_from_freelist(). 216 * is enforced in get_page_from_freelist().
259 */ 217 */
260 if (nodemask && !nodes_subset(node_states[N_HIGH_MEMORY], *nodemask)) { 218 if (nodemask && !nodes_subset(node_states[N_MEMORY], *nodemask)) {
261 *totalpages = total_swap_pages; 219 *totalpages = total_swap_pages;
262 for_each_node_mask(nid, *nodemask) 220 for_each_node_mask(nid, *nodemask)
263 *totalpages += node_spanned_pages(nid); 221 *totalpages += node_spanned_pages(nid);
@@ -310,26 +268,20 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
310 if (!task->mm) 268 if (!task->mm)
311 return OOM_SCAN_CONTINUE; 269 return OOM_SCAN_CONTINUE;
312 270
313 if (task->flags & PF_EXITING) { 271 /*
272 * If task is allocating a lot of memory and has been marked to be
273 * killed first if it triggers an oom, then select it.
274 */
275 if (oom_task_origin(task))
276 return OOM_SCAN_SELECT;
277
278 if (task->flags & PF_EXITING && !force_kill) {
314 /* 279 /*
315 * If task is current and is in the process of releasing memory, 280 * If this task is not being ptraced on exit, then wait for it
316 * allow the "kill" to set TIF_MEMDIE, which will allow it to 281 * to finish before killing some other task unnecessarily.
317 * access memory reserves. Otherwise, it may stall forever.
318 *
319 * The iteration isn't broken here, however, in case other
320 * threads are found to have already been oom killed.
321 */ 282 */
322 if (task == current) 283 if (!(task->group_leader->ptrace & PT_TRACE_EXIT))
323 return OOM_SCAN_SELECT; 284 return OOM_SCAN_ABORT;
324 else if (!force_kill) {
325 /*
326 * If this task is not being ptraced on exit, then wait
327 * for it to finish before killing some other task
328 * unnecessarily.
329 */
330 if (!(task->group_leader->ptrace & PT_TRACE_EXIT))
331 return OOM_SCAN_ABORT;
332 }
333 } 285 }
334 return OOM_SCAN_OK; 286 return OOM_SCAN_OK;
335} 287}
@@ -412,7 +364,7 @@ static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemas
412 continue; 364 continue;
413 } 365 }
414 366
415 pr_info("[%5d] %5d %5d %8lu %8lu %7lu %8lu %5d %s\n", 367 pr_info("[%5d] %5d %5d %8lu %8lu %7lu %8lu %5hd %s\n",
416 task->pid, from_kuid(&init_user_ns, task_uid(task)), 368 task->pid, from_kuid(&init_user_ns, task_uid(task)),
417 task->tgid, task->mm->total_vm, get_mm_rss(task->mm), 369 task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
418 task->mm->nr_ptes, 370 task->mm->nr_ptes,
@@ -428,7 +380,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
428{ 380{
429 task_lock(current); 381 task_lock(current);
430 pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " 382 pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
431 "oom_score_adj=%d\n", 383 "oom_score_adj=%hd\n",
432 current->comm, gfp_mask, order, 384 current->comm, gfp_mask, order,
433 current->signal->oom_score_adj); 385 current->signal->oom_score_adj);
434 cpuset_print_task_mems_allowed(current); 386 cpuset_print_task_mems_allowed(current);
@@ -639,43 +591,6 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
639 spin_unlock(&zone_scan_lock); 591 spin_unlock(&zone_scan_lock);
640} 592}
641 593
642/*
643 * Try to acquire the oom killer lock for all system zones. Returns zero if a
644 * parallel oom killing is taking place, otherwise locks all zones and returns
645 * non-zero.
646 */
647static int try_set_system_oom(void)
648{
649 struct zone *zone;
650 int ret = 1;
651
652 spin_lock(&zone_scan_lock);
653 for_each_populated_zone(zone)
654 if (zone_is_oom_locked(zone)) {
655 ret = 0;
656 goto out;
657 }
658 for_each_populated_zone(zone)
659 zone_set_flag(zone, ZONE_OOM_LOCKED);
660out:
661 spin_unlock(&zone_scan_lock);
662 return ret;
663}
664
665/*
666 * Clears ZONE_OOM_LOCKED for all system zones so that failed allocation
667 * attempts or page faults may now recall the oom killer, if necessary.
668 */
669static void clear_system_oom(void)
670{
671 struct zone *zone;
672
673 spin_lock(&zone_scan_lock);
674 for_each_populated_zone(zone)
675 zone_clear_flag(zone, ZONE_OOM_LOCKED);
676 spin_unlock(&zone_scan_lock);
677}
678
679/** 594/**
680 * out_of_memory - kill the "best" process when we run out of memory 595 * out_of_memory - kill the "best" process when we run out of memory
681 * @zonelist: zonelist pointer 596 * @zonelist: zonelist pointer
@@ -706,11 +621,11 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
706 return; 621 return;
707 622
708 /* 623 /*
709 * If current has a pending SIGKILL, then automatically select it. The 624 * If current has a pending SIGKILL or is exiting, then automatically
710 * goal is to allow it to allocate so that it may quickly exit and free 625 * select it. The goal is to allow it to allocate so that it may
711 * its memory. 626 * quickly exit and free its memory.
712 */ 627 */
713 if (fatal_signal_pending(current)) { 628 if (fatal_signal_pending(current) || current->flags & PF_EXITING) {
714 set_thread_flag(TIF_MEMDIE); 629 set_thread_flag(TIF_MEMDIE);
715 return; 630 return;
716 } 631 }
@@ -756,15 +671,16 @@ out:
756 671
757/* 672/*
758 * The pagefault handler calls here because it is out of memory, so kill a 673 * The pagefault handler calls here because it is out of memory, so kill a
759 * memory-hogging task. If a populated zone has ZONE_OOM_LOCKED set, a parallel 674 * memory-hogging task. If any populated zone has ZONE_OOM_LOCKED set, a
760 * oom killing is already in progress so do nothing. If a task is found with 675 * parallel oom killing is already in progress so do nothing.
761 * TIF_MEMDIE set, it has been killed so do nothing and allow it to exit.
762 */ 676 */
763void pagefault_out_of_memory(void) 677void pagefault_out_of_memory(void)
764{ 678{
765 if (try_set_system_oom()) { 679 struct zonelist *zonelist = node_zonelist(first_online_node,
680 GFP_KERNEL);
681
682 if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) {
766 out_of_memory(NULL, 0, 0, NULL, false); 683 out_of_memory(NULL, 0, 0, NULL, false);
767 clear_system_oom(); 684 clear_zonelist_oom(zonelist, GFP_KERNEL);
768 } 685 }
769 schedule_timeout_killable(1);
770} 686}