aboutsummaryrefslogtreecommitdiffstats
path: root/mm/oom_kill.c
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hioryu@jp.fujitsu.com>2009-12-15 19:45:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-16 10:19:57 -0500
commit4365a5676fa3aa1d5ae6c90c22a0044f09ba584e (patch)
tree5b9914ccbdcf2aa695473421e71f6299fbe78cef /mm/oom_kill.c
parent3b4798cbc13dd8d1150aa6377f97f0e11450a67d (diff)
oom-kill: fix NUMA constraint check with nodemask
Fix node-oriented allocation handling in oom-kill.c I myself think of this as a bugfix not as an ehnancement. In these days, things are changed as - alloc_pages() eats nodemask as its arguments, __alloc_pages_nodemask(). - mempolicy don't maintain its own private zonelists. (And cpuset doesn't use nodemask for __alloc_pages_nodemask()) So, current oom-killer's check function is wrong. This patch does - check nodemask, if nodemask && nodemask doesn't cover all node_states[N_HIGH_MEMORY], this is CONSTRAINT_MEMORY_POLICY. - Scan all zonelist under nodemask, if it hits cpuset's wall this faiulre is from cpuset. And - modifies the caller of out_of_memory not to call oom if __GFP_THISNODE. This doesn't change "current" behavior. If callers use __GFP_THISNODE it should handle "page allocation failure" by itself. - handle __GFP_NOFAIL+__GFP_THISNODE path. This is something like a FIXME but this gfpmask is not used now. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hioryu@jp.fujitsu.com> Acked-by: David Rientjes <rientjes@google.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r--mm/oom_kill.c46
1 files changed, 33 insertions, 13 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 6bb8a7a7ec9a..25c679e0288a 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -196,27 +196,46 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
196/* 196/*
197 * Determine the type of allocation constraint. 197 * Determine the type of allocation constraint.
198 */ 198 */
199static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
200 gfp_t gfp_mask)
201{
202#ifdef CONFIG_NUMA 199#ifdef CONFIG_NUMA
200static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
201 gfp_t gfp_mask, nodemask_t *nodemask)
202{
203 struct zone *zone; 203 struct zone *zone;
204 struct zoneref *z; 204 struct zoneref *z;
205 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 205 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
206 nodemask_t nodes = node_states[N_HIGH_MEMORY];
207 206
208 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) 207 /*
209 if (cpuset_zone_allowed_softwall(zone, gfp_mask)) 208 * Reach here only when __GFP_NOFAIL is used. So, we should avoid
210 node_clear(zone_to_nid(zone), nodes); 209 * to kill current.We have to random task kill in this case.
211 else 210 * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
212 return CONSTRAINT_CPUSET; 211 */
212 if (gfp_mask & __GFP_THISNODE)
213 return CONSTRAINT_NONE;
213 214
214 if (!nodes_empty(nodes)) 215 /*
216 * The nodemask here is a nodemask passed to alloc_pages(). Now,
217 * cpuset doesn't use this nodemask for its hardwall/softwall/hierarchy
218 * feature. mempolicy is an only user of nodemask here.
219 * check mempolicy's nodemask contains all N_HIGH_MEMORY
220 */
221 if (nodemask && !nodes_subset(node_states[N_HIGH_MEMORY], *nodemask))
215 return CONSTRAINT_MEMORY_POLICY; 222 return CONSTRAINT_MEMORY_POLICY;
216#endif 223
224 /* Check this allocation failure is caused by cpuset's wall function */
225 for_each_zone_zonelist_nodemask(zone, z, zonelist,
226 high_zoneidx, nodemask)
227 if (!cpuset_zone_allowed_softwall(zone, gfp_mask))
228 return CONSTRAINT_CPUSET;
217 229
218 return CONSTRAINT_NONE; 230 return CONSTRAINT_NONE;
219} 231}
232#else
233static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
234 gfp_t gfp_mask, nodemask_t *nodemask)
235{
236 return CONSTRAINT_NONE;
237}
238#endif
220 239
221/* 240/*
222 * Simple selection loop. We chose the process with the highest 241 * Simple selection loop. We chose the process with the highest
@@ -613,7 +632,8 @@ rest_and_return:
613 * OR try to be smart about which process to kill. Note that we 632 * OR try to be smart about which process to kill. Note that we
614 * don't have to be perfect here, we just have to be good. 633 * don't have to be perfect here, we just have to be good.
615 */ 634 */
616void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) 635void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
636 int order, nodemask_t *nodemask)
617{ 637{
618 unsigned long freed = 0; 638 unsigned long freed = 0;
619 enum oom_constraint constraint; 639 enum oom_constraint constraint;
@@ -632,7 +652,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
632 * Check if there were limitations on the allocation (only relevant for 652 * Check if there were limitations on the allocation (only relevant for
633 * NUMA) that may require different handling. 653 * NUMA) that may require different handling.
634 */ 654 */
635 constraint = constrained_alloc(zonelist, gfp_mask); 655 constraint = constrained_alloc(zonelist, gfp_mask, nodemask);
636 read_lock(&tasklist_lock); 656 read_lock(&tasklist_lock);
637 657
638 switch (constraint) { 658 switch (constraint) {