diff options
-rw-r--r-- | include/linux/mempolicy.h | 13 | ||||
-rw-r--r-- | mm/mempolicy.c | 44 | ||||
-rw-r--r-- | mm/oom_kill.c | 104 |
3 files changed, 124 insertions, 37 deletions
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 7b9ef6bf45aa..31ac26ca4acf 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
@@ -210,6 +210,8 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, | |||
210 | unsigned long addr, gfp_t gfp_flags, | 210 | unsigned long addr, gfp_t gfp_flags, |
211 | struct mempolicy **mpol, nodemask_t **nodemask); | 211 | struct mempolicy **mpol, nodemask_t **nodemask); |
212 | extern bool init_nodemask_of_mempolicy(nodemask_t *mask); | 212 | extern bool init_nodemask_of_mempolicy(nodemask_t *mask); |
213 | extern bool mempolicy_nodemask_intersects(struct task_struct *tsk, | ||
214 | const nodemask_t *mask); | ||
213 | extern unsigned slab_node(struct mempolicy *policy); | 215 | extern unsigned slab_node(struct mempolicy *policy); |
214 | 216 | ||
215 | extern enum zone_type policy_zone; | 217 | extern enum zone_type policy_zone; |
@@ -338,7 +340,16 @@ static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, | |||
338 | return node_zonelist(0, gfp_flags); | 340 | return node_zonelist(0, gfp_flags); |
339 | } | 341 | } |
340 | 342 | ||
341 | static inline bool init_nodemask_of_mempolicy(nodemask_t *m) { return false; } | 343 | static inline bool init_nodemask_of_mempolicy(nodemask_t *m) |
344 | { | ||
345 | return false; | ||
346 | } | ||
347 | |||
348 | static inline bool mempolicy_nodemask_intersects(struct task_struct *tsk, | ||
349 | const nodemask_t *mask) | ||
350 | { | ||
351 | return false; | ||
352 | } | ||
342 | 353 | ||
343 | static inline int do_migrate_pages(struct mm_struct *mm, | 354 | static inline int do_migrate_pages(struct mm_struct *mm, |
344 | const nodemask_t *from_nodes, | 355 | const nodemask_t *from_nodes, |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 5bc0a96beb51..8a73708d59bb 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1712,6 +1712,50 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask) | |||
1712 | } | 1712 | } |
1713 | #endif | 1713 | #endif |
1714 | 1714 | ||
1715 | /* | ||
1716 | * mempolicy_nodemask_intersects | ||
1717 | * | ||
1718 | * If tsk's mempolicy is "default" [NULL], return 'true' to indicate default | ||
1719 | * policy. Otherwise, check for intersection between mask and the policy | ||
1720 | * nodemask for 'bind' or 'interleave' policy. For 'perferred' or 'local' | ||
1721 | * policy, always return true since it may allocate elsewhere on fallback. | ||
1722 | * | ||
1723 | * Takes task_lock(tsk) to prevent freeing of its mempolicy. | ||
1724 | */ | ||
1725 | bool mempolicy_nodemask_intersects(struct task_struct *tsk, | ||
1726 | const nodemask_t *mask) | ||
1727 | { | ||
1728 | struct mempolicy *mempolicy; | ||
1729 | bool ret = true; | ||
1730 | |||
1731 | if (!mask) | ||
1732 | return ret; | ||
1733 | task_lock(tsk); | ||
1734 | mempolicy = tsk->mempolicy; | ||
1735 | if (!mempolicy) | ||
1736 | goto out; | ||
1737 | |||
1738 | switch (mempolicy->mode) { | ||
1739 | case MPOL_PREFERRED: | ||
1740 | /* | ||
1741 | * MPOL_PREFERRED and MPOL_F_LOCAL are only preferred nodes to | ||
1742 | * allocate from, they may fallback to other nodes when oom. | ||
1743 | * Thus, it's possible for tsk to have allocated memory from | ||
1744 | * nodes in mask. | ||
1745 | */ | ||
1746 | break; | ||
1747 | case MPOL_BIND: | ||
1748 | case MPOL_INTERLEAVE: | ||
1749 | ret = nodes_intersects(mempolicy->v.nodes, *mask); | ||
1750 | break; | ||
1751 | default: | ||
1752 | BUG(); | ||
1753 | } | ||
1754 | out: | ||
1755 | task_unlock(tsk); | ||
1756 | return ret; | ||
1757 | } | ||
1758 | |||
1715 | /* Allocate a page in interleaved policy. | 1759 | /* Allocate a page in interleaved policy. |
1716 | Own path because it needs to do special accounting. */ | 1760 | Own path because it needs to do special accounting. */ |
1717 | static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, | 1761 | static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 7c8488f6a3f5..13ceed78bc45 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/module.h> | 27 | #include <linux/module.h> |
28 | #include <linux/notifier.h> | 28 | #include <linux/notifier.h> |
29 | #include <linux/memcontrol.h> | 29 | #include <linux/memcontrol.h> |
30 | #include <linux/mempolicy.h> | ||
30 | #include <linux/security.h> | 31 | #include <linux/security.h> |
31 | 32 | ||
32 | int sysctl_panic_on_oom; | 33 | int sysctl_panic_on_oom; |
@@ -35,23 +36,57 @@ int sysctl_oom_dump_tasks; | |||
35 | static DEFINE_SPINLOCK(zone_scan_lock); | 36 | static DEFINE_SPINLOCK(zone_scan_lock); |
36 | /* #define DEBUG */ | 37 | /* #define DEBUG */ |
37 | 38 | ||
38 | /* | 39 | #ifdef CONFIG_NUMA |
39 | * Is all threads of the target process nodes overlap ours? | 40 | /** |
41 | * has_intersects_mems_allowed() - check task eligiblity for kill | ||
42 | * @tsk: task struct of which task to consider | ||
43 | * @mask: nodemask passed to page allocator for mempolicy ooms | ||
44 | * | ||
45 | * Task eligibility is determined by whether or not a candidate task, @tsk, | ||
46 | * shares the same mempolicy nodes as current if it is bound by such a policy | ||
47 | * and whether or not it has the same set of allowed cpuset nodes. | ||
40 | */ | 48 | */ |
41 | static int has_intersects_mems_allowed(struct task_struct *tsk) | 49 | static bool has_intersects_mems_allowed(struct task_struct *tsk, |
50 | const nodemask_t *mask) | ||
42 | { | 51 | { |
43 | struct task_struct *t; | 52 | struct task_struct *start = tsk; |
44 | 53 | ||
45 | t = tsk; | ||
46 | do { | 54 | do { |
47 | if (cpuset_mems_allowed_intersects(current, t)) | 55 | if (mask) { |
48 | return 1; | 56 | /* |
49 | t = next_thread(t); | 57 | * If this is a mempolicy constrained oom, tsk's |
50 | } while (t != tsk); | 58 | * cpuset is irrelevant. Only return true if its |
51 | 59 | * mempolicy intersects current, otherwise it may be | |
52 | return 0; | 60 | * needlessly killed. |
61 | */ | ||
62 | if (mempolicy_nodemask_intersects(tsk, mask)) | ||
63 | return true; | ||
64 | } else { | ||
65 | /* | ||
66 | * This is not a mempolicy constrained oom, so only | ||
67 | * check the mems of tsk's cpuset. | ||
68 | */ | ||
69 | if (cpuset_mems_allowed_intersects(current, tsk)) | ||
70 | return true; | ||
71 | } | ||
72 | tsk = next_thread(tsk); | ||
73 | } while (tsk != start); | ||
74 | return false; | ||
75 | } | ||
76 | #else | ||
77 | static bool has_intersects_mems_allowed(struct task_struct *tsk, | ||
78 | const nodemask_t *mask) | ||
79 | { | ||
80 | return true; | ||
53 | } | 81 | } |
82 | #endif /* CONFIG_NUMA */ | ||
54 | 83 | ||
84 | /* | ||
85 | * The process p may have detached its own ->mm while exiting or through | ||
86 | * use_mm(), but one or more of its subthreads may still have a valid | ||
87 | * pointer. Return p, or any of its subthreads with a valid ->mm, with | ||
88 | * task_lock() held. | ||
89 | */ | ||
55 | static struct task_struct *find_lock_task_mm(struct task_struct *p) | 90 | static struct task_struct *find_lock_task_mm(struct task_struct *p) |
56 | { | 91 | { |
57 | struct task_struct *t = p; | 92 | struct task_struct *t = p; |
@@ -106,10 +141,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
106 | * The memory size of the process is the basis for the badness. | 141 | * The memory size of the process is the basis for the badness. |
107 | */ | 142 | */ |
108 | points = p->mm->total_vm; | 143 | points = p->mm->total_vm; |
109 | |||
110 | /* | ||
111 | * After this unlock we can no longer dereference local variable `mm' | ||
112 | */ | ||
113 | task_unlock(p); | 144 | task_unlock(p); |
114 | 145 | ||
115 | /* | 146 | /* |
@@ -253,7 +284,8 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist, | |||
253 | * (not docbooked, we don't want this one cluttering up the manual) | 284 | * (not docbooked, we don't want this one cluttering up the manual) |
254 | */ | 285 | */ |
255 | static struct task_struct *select_bad_process(unsigned long *ppoints, | 286 | static struct task_struct *select_bad_process(unsigned long *ppoints, |
256 | struct mem_cgroup *mem) | 287 | struct mem_cgroup *mem, enum oom_constraint constraint, |
288 | const nodemask_t *mask) | ||
257 | { | 289 | { |
258 | struct task_struct *p; | 290 | struct task_struct *p; |
259 | struct task_struct *chosen = NULL; | 291 | struct task_struct *chosen = NULL; |
@@ -269,7 +301,9 @@ static struct task_struct *select_bad_process(unsigned long *ppoints, | |||
269 | continue; | 301 | continue; |
270 | if (mem && !task_in_mem_cgroup(p, mem)) | 302 | if (mem && !task_in_mem_cgroup(p, mem)) |
271 | continue; | 303 | continue; |
272 | if (!has_intersects_mems_allowed(p)) | 304 | if (!has_intersects_mems_allowed(p, |
305 | constraint == CONSTRAINT_MEMORY_POLICY ? mask : | ||
306 | NULL)) | ||
273 | continue; | 307 | continue; |
274 | 308 | ||
275 | /* | 309 | /* |
@@ -497,7 +531,7 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) | |||
497 | panic("out of memory(memcg). panic_on_oom is selected.\n"); | 531 | panic("out of memory(memcg). panic_on_oom is selected.\n"); |
498 | read_lock(&tasklist_lock); | 532 | read_lock(&tasklist_lock); |
499 | retry: | 533 | retry: |
500 | p = select_bad_process(&points, mem); | 534 | p = select_bad_process(&points, mem, CONSTRAINT_NONE, NULL); |
501 | if (!p || PTR_ERR(p) == -1UL) | 535 | if (!p || PTR_ERR(p) == -1UL) |
502 | goto out; | 536 | goto out; |
503 | 537 | ||
@@ -576,7 +610,8 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) | |||
576 | /* | 610 | /* |
577 | * Must be called with tasklist_lock held for read. | 611 | * Must be called with tasklist_lock held for read. |
578 | */ | 612 | */ |
579 | static void __out_of_memory(gfp_t gfp_mask, int order) | 613 | static void __out_of_memory(gfp_t gfp_mask, int order, |
614 | enum oom_constraint constraint, const nodemask_t *mask) | ||
580 | { | 615 | { |
581 | struct task_struct *p; | 616 | struct task_struct *p; |
582 | unsigned long points; | 617 | unsigned long points; |
@@ -590,7 +625,7 @@ retry: | |||
590 | * Rambo mode: Shoot down a process and hope it solves whatever | 625 | * Rambo mode: Shoot down a process and hope it solves whatever |
591 | * issues we may have. | 626 | * issues we may have. |
592 | */ | 627 | */ |
593 | p = select_bad_process(&points, NULL); | 628 | p = select_bad_process(&points, NULL, constraint, mask); |
594 | 629 | ||
595 | if (PTR_ERR(p) == -1UL) | 630 | if (PTR_ERR(p) == -1UL) |
596 | return; | 631 | return; |
@@ -624,7 +659,8 @@ void pagefault_out_of_memory(void) | |||
624 | panic("out of memory from page fault. panic_on_oom is selected.\n"); | 659 | panic("out of memory from page fault. panic_on_oom is selected.\n"); |
625 | 660 | ||
626 | read_lock(&tasklist_lock); | 661 | read_lock(&tasklist_lock); |
627 | __out_of_memory(0, 0); /* unknown gfp_mask and order */ | 662 | /* unknown gfp_mask and order */ |
663 | __out_of_memory(0, 0, CONSTRAINT_NONE, NULL); | ||
628 | read_unlock(&tasklist_lock); | 664 | read_unlock(&tasklist_lock); |
629 | 665 | ||
630 | /* | 666 | /* |
@@ -640,6 +676,7 @@ void pagefault_out_of_memory(void) | |||
640 | * @zonelist: zonelist pointer | 676 | * @zonelist: zonelist pointer |
641 | * @gfp_mask: memory allocation flags | 677 | * @gfp_mask: memory allocation flags |
642 | * @order: amount of memory being requested as a power of 2 | 678 | * @order: amount of memory being requested as a power of 2 |
679 | * @nodemask: nodemask passed to page allocator | ||
643 | * | 680 | * |
644 | * If we run out of memory, we have the choice between either | 681 | * If we run out of memory, we have the choice between either |
645 | * killing a random task (bad), letting the system crash (worse) | 682 | * killing a random task (bad), letting the system crash (worse) |
@@ -678,24 +715,19 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, | |||
678 | */ | 715 | */ |
679 | constraint = constrained_alloc(zonelist, gfp_mask, nodemask); | 716 | constraint = constrained_alloc(zonelist, gfp_mask, nodemask); |
680 | read_lock(&tasklist_lock); | 717 | read_lock(&tasklist_lock); |
681 | 718 | if (unlikely(sysctl_panic_on_oom)) { | |
682 | switch (constraint) { | 719 | /* |
683 | case CONSTRAINT_MEMORY_POLICY: | 720 | * panic_on_oom only affects CONSTRAINT_NONE, the kernel |
684 | oom_kill_process(current, gfp_mask, order, 0, NULL, | 721 | * should not panic for cpuset or mempolicy induced memory |
685 | "No available memory (MPOL_BIND)"); | 722 | * failures. |
686 | break; | 723 | */ |
687 | 724 | if (constraint == CONSTRAINT_NONE) { | |
688 | case CONSTRAINT_NONE: | ||
689 | if (sysctl_panic_on_oom) { | ||
690 | dump_header(NULL, gfp_mask, order, NULL); | 725 | dump_header(NULL, gfp_mask, order, NULL); |
691 | panic("out of memory. panic_on_oom is selected\n"); | 726 | read_unlock(&tasklist_lock); |
727 | panic("Out of memory: panic_on_oom is enabled\n"); | ||
692 | } | 728 | } |
693 | /* Fall-through */ | ||
694 | case CONSTRAINT_CPUSET: | ||
695 | __out_of_memory(gfp_mask, order); | ||
696 | break; | ||
697 | } | 729 | } |
698 | 730 | __out_of_memory(gfp_mask, order, constraint, nodemask); | |
699 | read_unlock(&tasklist_lock); | 731 | read_unlock(&tasklist_lock); |
700 | 732 | ||
701 | /* | 733 | /* |