aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/mempolicy.h13
-rw-r--r--mm/mempolicy.c44
-rw-r--r--mm/oom_kill.c104
3 files changed, 124 insertions, 37 deletions
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 7b9ef6bf45aa..31ac26ca4acf 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -210,6 +210,8 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
210 unsigned long addr, gfp_t gfp_flags, 210 unsigned long addr, gfp_t gfp_flags,
211 struct mempolicy **mpol, nodemask_t **nodemask); 211 struct mempolicy **mpol, nodemask_t **nodemask);
212extern bool init_nodemask_of_mempolicy(nodemask_t *mask); 212extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
213extern bool mempolicy_nodemask_intersects(struct task_struct *tsk,
214 const nodemask_t *mask);
213extern unsigned slab_node(struct mempolicy *policy); 215extern unsigned slab_node(struct mempolicy *policy);
214 216
215extern enum zone_type policy_zone; 217extern enum zone_type policy_zone;
@@ -338,7 +340,16 @@ static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
338 return node_zonelist(0, gfp_flags); 340 return node_zonelist(0, gfp_flags);
339} 341}
340 342
341static inline bool init_nodemask_of_mempolicy(nodemask_t *m) { return false; } 343static inline bool init_nodemask_of_mempolicy(nodemask_t *m)
344{
345 return false;
346}
347
348static inline bool mempolicy_nodemask_intersects(struct task_struct *tsk,
349 const nodemask_t *mask)
350{
351 return false;
352}
342 353
343static inline int do_migrate_pages(struct mm_struct *mm, 354static inline int do_migrate_pages(struct mm_struct *mm,
344 const nodemask_t *from_nodes, 355 const nodemask_t *from_nodes,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 5bc0a96beb51..8a73708d59bb 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1712,6 +1712,50 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
1712} 1712}
1713#endif 1713#endif
1714 1714
1715/*
1716 * mempolicy_nodemask_intersects
1717 *
1718 * If tsk's mempolicy is "default" [NULL], return 'true' to indicate default
1719 * policy. Otherwise, check for intersection between mask and the policy
1720 * nodemask for 'bind' or 'interleave' policy. For 'perferred' or 'local'
1721 * policy, always return true since it may allocate elsewhere on fallback.
1722 *
1723 * Takes task_lock(tsk) to prevent freeing of its mempolicy.
1724 */
1725bool mempolicy_nodemask_intersects(struct task_struct *tsk,
1726 const nodemask_t *mask)
1727{
1728 struct mempolicy *mempolicy;
1729 bool ret = true;
1730
1731 if (!mask)
1732 return ret;
1733 task_lock(tsk);
1734 mempolicy = tsk->mempolicy;
1735 if (!mempolicy)
1736 goto out;
1737
1738 switch (mempolicy->mode) {
1739 case MPOL_PREFERRED:
1740 /*
1741 * MPOL_PREFERRED and MPOL_F_LOCAL are only preferred nodes to
1742 * allocate from, they may fallback to other nodes when oom.
1743 * Thus, it's possible for tsk to have allocated memory from
1744 * nodes in mask.
1745 */
1746 break;
1747 case MPOL_BIND:
1748 case MPOL_INTERLEAVE:
1749 ret = nodes_intersects(mempolicy->v.nodes, *mask);
1750 break;
1751 default:
1752 BUG();
1753 }
1754out:
1755 task_unlock(tsk);
1756 return ret;
1757}
1758
1715/* Allocate a page in interleaved policy. 1759/* Allocate a page in interleaved policy.
1716 Own path because it needs to do special accounting. */ 1760 Own path because it needs to do special accounting. */
1717static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, 1761static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 7c8488f6a3f5..13ceed78bc45 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -27,6 +27,7 @@
27#include <linux/module.h> 27#include <linux/module.h>
28#include <linux/notifier.h> 28#include <linux/notifier.h>
29#include <linux/memcontrol.h> 29#include <linux/memcontrol.h>
30#include <linux/mempolicy.h>
30#include <linux/security.h> 31#include <linux/security.h>
31 32
32int sysctl_panic_on_oom; 33int sysctl_panic_on_oom;
@@ -35,23 +36,57 @@ int sysctl_oom_dump_tasks;
35static DEFINE_SPINLOCK(zone_scan_lock); 36static DEFINE_SPINLOCK(zone_scan_lock);
36/* #define DEBUG */ 37/* #define DEBUG */
37 38
38/* 39#ifdef CONFIG_NUMA
39 * Is all threads of the target process nodes overlap ours? 40/**
41 * has_intersects_mems_allowed() - check task eligiblity for kill
42 * @tsk: task struct of which task to consider
43 * @mask: nodemask passed to page allocator for mempolicy ooms
44 *
45 * Task eligibility is determined by whether or not a candidate task, @tsk,
46 * shares the same mempolicy nodes as current if it is bound by such a policy
47 * and whether or not it has the same set of allowed cpuset nodes.
40 */ 48 */
41static int has_intersects_mems_allowed(struct task_struct *tsk) 49static bool has_intersects_mems_allowed(struct task_struct *tsk,
50 const nodemask_t *mask)
42{ 51{
43 struct task_struct *t; 52 struct task_struct *start = tsk;
44 53
45 t = tsk;
46 do { 54 do {
47 if (cpuset_mems_allowed_intersects(current, t)) 55 if (mask) {
48 return 1; 56 /*
49 t = next_thread(t); 57 * If this is a mempolicy constrained oom, tsk's
50 } while (t != tsk); 58 * cpuset is irrelevant. Only return true if its
51 59 * mempolicy intersects current, otherwise it may be
52 return 0; 60 * needlessly killed.
61 */
62 if (mempolicy_nodemask_intersects(tsk, mask))
63 return true;
64 } else {
65 /*
66 * This is not a mempolicy constrained oom, so only
67 * check the mems of tsk's cpuset.
68 */
69 if (cpuset_mems_allowed_intersects(current, tsk))
70 return true;
71 }
72 tsk = next_thread(tsk);
73 } while (tsk != start);
74 return false;
75}
76#else
77static bool has_intersects_mems_allowed(struct task_struct *tsk,
78 const nodemask_t *mask)
79{
80 return true;
53} 81}
82#endif /* CONFIG_NUMA */
54 83
84/*
85 * The process p may have detached its own ->mm while exiting or through
86 * use_mm(), but one or more of its subthreads may still have a valid
87 * pointer. Return p, or any of its subthreads with a valid ->mm, with
88 * task_lock() held.
89 */
55static struct task_struct *find_lock_task_mm(struct task_struct *p) 90static struct task_struct *find_lock_task_mm(struct task_struct *p)
56{ 91{
57 struct task_struct *t = p; 92 struct task_struct *t = p;
@@ -106,10 +141,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
106 * The memory size of the process is the basis for the badness. 141 * The memory size of the process is the basis for the badness.
107 */ 142 */
108 points = p->mm->total_vm; 143 points = p->mm->total_vm;
109
110 /*
111 * After this unlock we can no longer dereference local variable `mm'
112 */
113 task_unlock(p); 144 task_unlock(p);
114 145
115 /* 146 /*
@@ -253,7 +284,8 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
253 * (not docbooked, we don't want this one cluttering up the manual) 284 * (not docbooked, we don't want this one cluttering up the manual)
254 */ 285 */
255static struct task_struct *select_bad_process(unsigned long *ppoints, 286static struct task_struct *select_bad_process(unsigned long *ppoints,
256 struct mem_cgroup *mem) 287 struct mem_cgroup *mem, enum oom_constraint constraint,
288 const nodemask_t *mask)
257{ 289{
258 struct task_struct *p; 290 struct task_struct *p;
259 struct task_struct *chosen = NULL; 291 struct task_struct *chosen = NULL;
@@ -269,7 +301,9 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
269 continue; 301 continue;
270 if (mem && !task_in_mem_cgroup(p, mem)) 302 if (mem && !task_in_mem_cgroup(p, mem))
271 continue; 303 continue;
272 if (!has_intersects_mems_allowed(p)) 304 if (!has_intersects_mems_allowed(p,
305 constraint == CONSTRAINT_MEMORY_POLICY ? mask :
306 NULL))
273 continue; 307 continue;
274 308
275 /* 309 /*
@@ -497,7 +531,7 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
497 panic("out of memory(memcg). panic_on_oom is selected.\n"); 531 panic("out of memory(memcg). panic_on_oom is selected.\n");
498 read_lock(&tasklist_lock); 532 read_lock(&tasklist_lock);
499retry: 533retry:
500 p = select_bad_process(&points, mem); 534 p = select_bad_process(&points, mem, CONSTRAINT_NONE, NULL);
501 if (!p || PTR_ERR(p) == -1UL) 535 if (!p || PTR_ERR(p) == -1UL)
502 goto out; 536 goto out;
503 537
@@ -576,7 +610,8 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
576/* 610/*
577 * Must be called with tasklist_lock held for read. 611 * Must be called with tasklist_lock held for read.
578 */ 612 */
579static void __out_of_memory(gfp_t gfp_mask, int order) 613static void __out_of_memory(gfp_t gfp_mask, int order,
614 enum oom_constraint constraint, const nodemask_t *mask)
580{ 615{
581 struct task_struct *p; 616 struct task_struct *p;
582 unsigned long points; 617 unsigned long points;
@@ -590,7 +625,7 @@ retry:
590 * Rambo mode: Shoot down a process and hope it solves whatever 625 * Rambo mode: Shoot down a process and hope it solves whatever
591 * issues we may have. 626 * issues we may have.
592 */ 627 */
593 p = select_bad_process(&points, NULL); 628 p = select_bad_process(&points, NULL, constraint, mask);
594 629
595 if (PTR_ERR(p) == -1UL) 630 if (PTR_ERR(p) == -1UL)
596 return; 631 return;
@@ -624,7 +659,8 @@ void pagefault_out_of_memory(void)
624 panic("out of memory from page fault. panic_on_oom is selected.\n"); 659 panic("out of memory from page fault. panic_on_oom is selected.\n");
625 660
626 read_lock(&tasklist_lock); 661 read_lock(&tasklist_lock);
627 __out_of_memory(0, 0); /* unknown gfp_mask and order */ 662 /* unknown gfp_mask and order */
663 __out_of_memory(0, 0, CONSTRAINT_NONE, NULL);
628 read_unlock(&tasklist_lock); 664 read_unlock(&tasklist_lock);
629 665
630 /* 666 /*
@@ -640,6 +676,7 @@ void pagefault_out_of_memory(void)
640 * @zonelist: zonelist pointer 676 * @zonelist: zonelist pointer
641 * @gfp_mask: memory allocation flags 677 * @gfp_mask: memory allocation flags
642 * @order: amount of memory being requested as a power of 2 678 * @order: amount of memory being requested as a power of 2
679 * @nodemask: nodemask passed to page allocator
643 * 680 *
644 * If we run out of memory, we have the choice between either 681 * If we run out of memory, we have the choice between either
645 * killing a random task (bad), letting the system crash (worse) 682 * killing a random task (bad), letting the system crash (worse)
@@ -678,24 +715,19 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
678 */ 715 */
679 constraint = constrained_alloc(zonelist, gfp_mask, nodemask); 716 constraint = constrained_alloc(zonelist, gfp_mask, nodemask);
680 read_lock(&tasklist_lock); 717 read_lock(&tasklist_lock);
681 718 if (unlikely(sysctl_panic_on_oom)) {
682 switch (constraint) { 719 /*
683 case CONSTRAINT_MEMORY_POLICY: 720 * panic_on_oom only affects CONSTRAINT_NONE, the kernel
684 oom_kill_process(current, gfp_mask, order, 0, NULL, 721 * should not panic for cpuset or mempolicy induced memory
685 "No available memory (MPOL_BIND)"); 722 * failures.
686 break; 723 */
687 724 if (constraint == CONSTRAINT_NONE) {
688 case CONSTRAINT_NONE:
689 if (sysctl_panic_on_oom) {
690 dump_header(NULL, gfp_mask, order, NULL); 725 dump_header(NULL, gfp_mask, order, NULL);
691 panic("out of memory. panic_on_oom is selected\n"); 726 read_unlock(&tasklist_lock);
727 panic("Out of memory: panic_on_oom is enabled\n");
692 } 728 }
693 /* Fall-through */
694 case CONSTRAINT_CPUSET:
695 __out_of_memory(gfp_mask, order);
696 break;
697 } 729 }
698 730 __out_of_memory(gfp_mask, order, constraint, nodemask);
699 read_unlock(&tasklist_lock); 731 read_unlock(&tasklist_lock);
700 732
701 /* 733 /*