diff options
author | Mauro Carvalho Chehab <mchehab@redhat.com> | 2012-12-27 09:44:11 -0500 |
---|---|---|
committer | Mauro Carvalho Chehab <mchehab@redhat.com> | 2012-12-27 09:44:11 -0500 |
commit | a44dca1717ce2c2381339e21c07d1731a63a7888 (patch) | |
tree | 3d0b3bd26492f9fa1f1f1c1ad838315b266da7c1 /mm/oom_kill.c | |
parent | 30ebc5e44d057a1619ad63fe32c8c1670c37c4b8 (diff) | |
parent | a49f0d1ea3ec94fc7cf33a7c36a16343b74bd565 (diff) |
Merge tag 'v3.8-rc1' into staging/for_v3.9
Linux 3.8-rc1
* tag 'v3.8-rc1': (10696 commits)
Linux 3.8-rc1
Revert "nfsd: warn on odd reply state in nfsd_vfs_read"
ARM: dts: fix duplicated build target and alphabetical sort out for exynos
dm stripe: add WRITE SAME support
dm: remove map_info
dm snapshot: do not use map_context
dm thin: dont use map_context
dm raid1: dont use map_context
dm flakey: dont use map_context
dm raid1: rename read_record to bio_record
dm: move target request nr to dm_target_io
dm snapshot: use per_bio_data
dm verity: use per_bio_data
dm raid1: use per_bio_data
dm: introduce per_bio_data
dm kcopyd: add WRITE SAME support to dm_kcopyd_zero
dm linear: add WRITE SAME support
dm: add WRITE SAME support
dm: prepare to support WRITE SAME
dm ioctl: use kmalloc if possible
...
Conflicts:
MAINTAINERS
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r-- | mm/oom_kill.c | 138 |
1 files changed, 27 insertions, 111 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 79e0f3e24831..0399f146ae49 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -44,48 +44,6 @@ int sysctl_oom_kill_allocating_task; | |||
44 | int sysctl_oom_dump_tasks = 1; | 44 | int sysctl_oom_dump_tasks = 1; |
45 | static DEFINE_SPINLOCK(zone_scan_lock); | 45 | static DEFINE_SPINLOCK(zone_scan_lock); |
46 | 46 | ||
47 | /* | ||
48 | * compare_swap_oom_score_adj() - compare and swap current's oom_score_adj | ||
49 | * @old_val: old oom_score_adj for compare | ||
50 | * @new_val: new oom_score_adj for swap | ||
51 | * | ||
52 | * Sets the oom_score_adj value for current to @new_val iff its present value is | ||
53 | * @old_val. Usually used to reinstate a previous value to prevent racing with | ||
54 | * userspacing tuning the value in the interim. | ||
55 | */ | ||
56 | void compare_swap_oom_score_adj(int old_val, int new_val) | ||
57 | { | ||
58 | struct sighand_struct *sighand = current->sighand; | ||
59 | |||
60 | spin_lock_irq(&sighand->siglock); | ||
61 | if (current->signal->oom_score_adj == old_val) | ||
62 | current->signal->oom_score_adj = new_val; | ||
63 | trace_oom_score_adj_update(current); | ||
64 | spin_unlock_irq(&sighand->siglock); | ||
65 | } | ||
66 | |||
67 | /** | ||
68 | * test_set_oom_score_adj() - set current's oom_score_adj and return old value | ||
69 | * @new_val: new oom_score_adj value | ||
70 | * | ||
71 | * Sets the oom_score_adj value for current to @new_val with proper | ||
72 | * synchronization and returns the old value. Usually used to temporarily | ||
73 | * set a value, save the old value in the caller, and then reinstate it later. | ||
74 | */ | ||
75 | int test_set_oom_score_adj(int new_val) | ||
76 | { | ||
77 | struct sighand_struct *sighand = current->sighand; | ||
78 | int old_val; | ||
79 | |||
80 | spin_lock_irq(&sighand->siglock); | ||
81 | old_val = current->signal->oom_score_adj; | ||
82 | current->signal->oom_score_adj = new_val; | ||
83 | trace_oom_score_adj_update(current); | ||
84 | spin_unlock_irq(&sighand->siglock); | ||
85 | |||
86 | return old_val; | ||
87 | } | ||
88 | |||
89 | #ifdef CONFIG_NUMA | 47 | #ifdef CONFIG_NUMA |
90 | /** | 48 | /** |
91 | * has_intersects_mems_allowed() - check task eligiblity for kill | 49 | * has_intersects_mems_allowed() - check task eligiblity for kill |
@@ -193,7 +151,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, | |||
193 | if (!p) | 151 | if (!p) |
194 | return 0; | 152 | return 0; |
195 | 153 | ||
196 | adj = p->signal->oom_score_adj; | 154 | adj = (long)p->signal->oom_score_adj; |
197 | if (adj == OOM_SCORE_ADJ_MIN) { | 155 | if (adj == OOM_SCORE_ADJ_MIN) { |
198 | task_unlock(p); | 156 | task_unlock(p); |
199 | return 0; | 157 | return 0; |
@@ -257,7 +215,7 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist, | |||
257 | * the page allocator means a mempolicy is in effect. Cpuset policy | 215 | * the page allocator means a mempolicy is in effect. Cpuset policy |
258 | * is enforced in get_page_from_freelist(). | 216 | * is enforced in get_page_from_freelist(). |
259 | */ | 217 | */ |
260 | if (nodemask && !nodes_subset(node_states[N_HIGH_MEMORY], *nodemask)) { | 218 | if (nodemask && !nodes_subset(node_states[N_MEMORY], *nodemask)) { |
261 | *totalpages = total_swap_pages; | 219 | *totalpages = total_swap_pages; |
262 | for_each_node_mask(nid, *nodemask) | 220 | for_each_node_mask(nid, *nodemask) |
263 | *totalpages += node_spanned_pages(nid); | 221 | *totalpages += node_spanned_pages(nid); |
@@ -310,26 +268,20 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task, | |||
310 | if (!task->mm) | 268 | if (!task->mm) |
311 | return OOM_SCAN_CONTINUE; | 269 | return OOM_SCAN_CONTINUE; |
312 | 270 | ||
313 | if (task->flags & PF_EXITING) { | 271 | /* |
272 | * If task is allocating a lot of memory and has been marked to be | ||
273 | * killed first if it triggers an oom, then select it. | ||
274 | */ | ||
275 | if (oom_task_origin(task)) | ||
276 | return OOM_SCAN_SELECT; | ||
277 | |||
278 | if (task->flags & PF_EXITING && !force_kill) { | ||
314 | /* | 279 | /* |
315 | * If task is current and is in the process of releasing memory, | 280 | * If this task is not being ptraced on exit, then wait for it |
316 | * allow the "kill" to set TIF_MEMDIE, which will allow it to | 281 | * to finish before killing some other task unnecessarily. |
317 | * access memory reserves. Otherwise, it may stall forever. | ||
318 | * | ||
319 | * The iteration isn't broken here, however, in case other | ||
320 | * threads are found to have already been oom killed. | ||
321 | */ | 282 | */ |
322 | if (task == current) | 283 | if (!(task->group_leader->ptrace & PT_TRACE_EXIT)) |
323 | return OOM_SCAN_SELECT; | 284 | return OOM_SCAN_ABORT; |
324 | else if (!force_kill) { | ||
325 | /* | ||
326 | * If this task is not being ptraced on exit, then wait | ||
327 | * for it to finish before killing some other task | ||
328 | * unnecessarily. | ||
329 | */ | ||
330 | if (!(task->group_leader->ptrace & PT_TRACE_EXIT)) | ||
331 | return OOM_SCAN_ABORT; | ||
332 | } | ||
333 | } | 285 | } |
334 | return OOM_SCAN_OK; | 286 | return OOM_SCAN_OK; |
335 | } | 287 | } |
@@ -412,7 +364,7 @@ static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemas | |||
412 | continue; | 364 | continue; |
413 | } | 365 | } |
414 | 366 | ||
415 | pr_info("[%5d] %5d %5d %8lu %8lu %7lu %8lu %5d %s\n", | 367 | pr_info("[%5d] %5d %5d %8lu %8lu %7lu %8lu %5hd %s\n", |
416 | task->pid, from_kuid(&init_user_ns, task_uid(task)), | 368 | task->pid, from_kuid(&init_user_ns, task_uid(task)), |
417 | task->tgid, task->mm->total_vm, get_mm_rss(task->mm), | 369 | task->tgid, task->mm->total_vm, get_mm_rss(task->mm), |
418 | task->mm->nr_ptes, | 370 | task->mm->nr_ptes, |
@@ -428,7 +380,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, | |||
428 | { | 380 | { |
429 | task_lock(current); | 381 | task_lock(current); |
430 | pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " | 382 | pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " |
431 | "oom_score_adj=%d\n", | 383 | "oom_score_adj=%hd\n", |
432 | current->comm, gfp_mask, order, | 384 | current->comm, gfp_mask, order, |
433 | current->signal->oom_score_adj); | 385 | current->signal->oom_score_adj); |
434 | cpuset_print_task_mems_allowed(current); | 386 | cpuset_print_task_mems_allowed(current); |
@@ -639,43 +591,6 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) | |||
639 | spin_unlock(&zone_scan_lock); | 591 | spin_unlock(&zone_scan_lock); |
640 | } | 592 | } |
641 | 593 | ||
642 | /* | ||
643 | * Try to acquire the oom killer lock for all system zones. Returns zero if a | ||
644 | * parallel oom killing is taking place, otherwise locks all zones and returns | ||
645 | * non-zero. | ||
646 | */ | ||
647 | static int try_set_system_oom(void) | ||
648 | { | ||
649 | struct zone *zone; | ||
650 | int ret = 1; | ||
651 | |||
652 | spin_lock(&zone_scan_lock); | ||
653 | for_each_populated_zone(zone) | ||
654 | if (zone_is_oom_locked(zone)) { | ||
655 | ret = 0; | ||
656 | goto out; | ||
657 | } | ||
658 | for_each_populated_zone(zone) | ||
659 | zone_set_flag(zone, ZONE_OOM_LOCKED); | ||
660 | out: | ||
661 | spin_unlock(&zone_scan_lock); | ||
662 | return ret; | ||
663 | } | ||
664 | |||
665 | /* | ||
666 | * Clears ZONE_OOM_LOCKED for all system zones so that failed allocation | ||
667 | * attempts or page faults may now recall the oom killer, if necessary. | ||
668 | */ | ||
669 | static void clear_system_oom(void) | ||
670 | { | ||
671 | struct zone *zone; | ||
672 | |||
673 | spin_lock(&zone_scan_lock); | ||
674 | for_each_populated_zone(zone) | ||
675 | zone_clear_flag(zone, ZONE_OOM_LOCKED); | ||
676 | spin_unlock(&zone_scan_lock); | ||
677 | } | ||
678 | |||
679 | /** | 594 | /** |
680 | * out_of_memory - kill the "best" process when we run out of memory | 595 | * out_of_memory - kill the "best" process when we run out of memory |
681 | * @zonelist: zonelist pointer | 596 | * @zonelist: zonelist pointer |
@@ -706,11 +621,11 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, | |||
706 | return; | 621 | return; |
707 | 622 | ||
708 | /* | 623 | /* |
709 | * If current has a pending SIGKILL, then automatically select it. The | 624 | * If current has a pending SIGKILL or is exiting, then automatically |
710 | * goal is to allow it to allocate so that it may quickly exit and free | 625 | * select it. The goal is to allow it to allocate so that it may |
711 | * its memory. | 626 | * quickly exit and free its memory. |
712 | */ | 627 | */ |
713 | if (fatal_signal_pending(current)) { | 628 | if (fatal_signal_pending(current) || current->flags & PF_EXITING) { |
714 | set_thread_flag(TIF_MEMDIE); | 629 | set_thread_flag(TIF_MEMDIE); |
715 | return; | 630 | return; |
716 | } | 631 | } |
@@ -756,15 +671,16 @@ out: | |||
756 | 671 | ||
757 | /* | 672 | /* |
758 | * The pagefault handler calls here because it is out of memory, so kill a | 673 | * The pagefault handler calls here because it is out of memory, so kill a |
759 | * memory-hogging task. If a populated zone has ZONE_OOM_LOCKED set, a parallel | 674 | * memory-hogging task. If any populated zone has ZONE_OOM_LOCKED set, a |
760 | * oom killing is already in progress so do nothing. If a task is found with | 675 | * parallel oom killing is already in progress so do nothing. |
761 | * TIF_MEMDIE set, it has been killed so do nothing and allow it to exit. | ||
762 | */ | 676 | */ |
763 | void pagefault_out_of_memory(void) | 677 | void pagefault_out_of_memory(void) |
764 | { | 678 | { |
765 | if (try_set_system_oom()) { | 679 | struct zonelist *zonelist = node_zonelist(first_online_node, |
680 | GFP_KERNEL); | ||
681 | |||
682 | if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) { | ||
766 | out_of_memory(NULL, 0, 0, NULL, false); | 683 | out_of_memory(NULL, 0, 0, NULL, false); |
767 | clear_system_oom(); | 684 | clear_zonelist_oom(zonelist, GFP_KERNEL); |
768 | } | 685 | } |
769 | schedule_timeout_killable(1); | ||
770 | } | 686 | } |