diff options
author | KAMEZAWA Hiroyuki <kamezawa.hioryu@jp.fujitsu.com> | 2009-12-15 19:45:33 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-16 10:19:57 -0500 |
commit | 4365a5676fa3aa1d5ae6c90c22a0044f09ba584e (patch) | |
tree | 5b9914ccbdcf2aa695473421e71f6299fbe78cef /mm/oom_kill.c | |
parent | 3b4798cbc13dd8d1150aa6377f97f0e11450a67d (diff) |
oom-kill: fix NUMA constraint check with nodemask
Fix node-oriented allocation handling in oom-kill.c I myself think of this
as a bugfix not as an ehnancement.
In these days, things are changed as
- alloc_pages() eats nodemask as its arguments, __alloc_pages_nodemask().
- mempolicy don't maintain its own private zonelists.
(And cpuset doesn't use nodemask for __alloc_pages_nodemask())
So, current oom-killer's check function is wrong.
This patch does
- check nodemask, if nodemask && nodemask doesn't cover all
node_states[N_HIGH_MEMORY], this is CONSTRAINT_MEMORY_POLICY.
- Scan all zonelist under nodemask, if it hits cpuset's wall
this faiulre is from cpuset.
And
- modifies the caller of out_of_memory not to call oom if __GFP_THISNODE.
This doesn't change "current" behavior. If callers use __GFP_THISNODE
it should handle "page allocation failure" by itself.
- handle __GFP_NOFAIL+__GFP_THISNODE path.
This is something like a FIXME but this gfpmask is not used now.
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hioryu@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r-- | mm/oom_kill.c | 46 |
1 files changed, 33 insertions, 13 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 6bb8a7a7ec9a..25c679e0288a 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -196,27 +196,46 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) | |||
196 | /* | 196 | /* |
197 | * Determine the type of allocation constraint. | 197 | * Determine the type of allocation constraint. |
198 | */ | 198 | */ |
199 | static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist, | ||
200 | gfp_t gfp_mask) | ||
201 | { | ||
202 | #ifdef CONFIG_NUMA | 199 | #ifdef CONFIG_NUMA |
200 | static enum oom_constraint constrained_alloc(struct zonelist *zonelist, | ||
201 | gfp_t gfp_mask, nodemask_t *nodemask) | ||
202 | { | ||
203 | struct zone *zone; | 203 | struct zone *zone; |
204 | struct zoneref *z; | 204 | struct zoneref *z; |
205 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | 205 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); |
206 | nodemask_t nodes = node_states[N_HIGH_MEMORY]; | ||
207 | 206 | ||
208 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) | 207 | /* |
209 | if (cpuset_zone_allowed_softwall(zone, gfp_mask)) | 208 | * Reach here only when __GFP_NOFAIL is used. So, we should avoid |
210 | node_clear(zone_to_nid(zone), nodes); | 209 | * to kill current.We have to random task kill in this case. |
211 | else | 210 | * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now. |
212 | return CONSTRAINT_CPUSET; | 211 | */ |
212 | if (gfp_mask & __GFP_THISNODE) | ||
213 | return CONSTRAINT_NONE; | ||
213 | 214 | ||
214 | if (!nodes_empty(nodes)) | 215 | /* |
216 | * The nodemask here is a nodemask passed to alloc_pages(). Now, | ||
217 | * cpuset doesn't use this nodemask for its hardwall/softwall/hierarchy | ||
218 | * feature. mempolicy is an only user of nodemask here. | ||
219 | * check mempolicy's nodemask contains all N_HIGH_MEMORY | ||
220 | */ | ||
221 | if (nodemask && !nodes_subset(node_states[N_HIGH_MEMORY], *nodemask)) | ||
215 | return CONSTRAINT_MEMORY_POLICY; | 222 | return CONSTRAINT_MEMORY_POLICY; |
216 | #endif | 223 | |
224 | /* Check this allocation failure is caused by cpuset's wall function */ | ||
225 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | ||
226 | high_zoneidx, nodemask) | ||
227 | if (!cpuset_zone_allowed_softwall(zone, gfp_mask)) | ||
228 | return CONSTRAINT_CPUSET; | ||
217 | 229 | ||
218 | return CONSTRAINT_NONE; | 230 | return CONSTRAINT_NONE; |
219 | } | 231 | } |
232 | #else | ||
233 | static enum oom_constraint constrained_alloc(struct zonelist *zonelist, | ||
234 | gfp_t gfp_mask, nodemask_t *nodemask) | ||
235 | { | ||
236 | return CONSTRAINT_NONE; | ||
237 | } | ||
238 | #endif | ||
220 | 239 | ||
221 | /* | 240 | /* |
222 | * Simple selection loop. We chose the process with the highest | 241 | * Simple selection loop. We chose the process with the highest |
@@ -613,7 +632,8 @@ rest_and_return: | |||
613 | * OR try to be smart about which process to kill. Note that we | 632 | * OR try to be smart about which process to kill. Note that we |
614 | * don't have to be perfect here, we just have to be good. | 633 | * don't have to be perfect here, we just have to be good. |
615 | */ | 634 | */ |
616 | void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) | 635 | void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, |
636 | int order, nodemask_t *nodemask) | ||
617 | { | 637 | { |
618 | unsigned long freed = 0; | 638 | unsigned long freed = 0; |
619 | enum oom_constraint constraint; | 639 | enum oom_constraint constraint; |
@@ -632,7 +652,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) | |||
632 | * Check if there were limitations on the allocation (only relevant for | 652 | * Check if there were limitations on the allocation (only relevant for |
633 | * NUMA) that may require different handling. | 653 | * NUMA) that may require different handling. |
634 | */ | 654 | */ |
635 | constraint = constrained_alloc(zonelist, gfp_mask); | 655 | constraint = constrained_alloc(zonelist, gfp_mask, nodemask); |
636 | read_lock(&tasklist_lock); | 656 | read_lock(&tasklist_lock); |
637 | 657 | ||
638 | switch (constraint) { | 658 | switch (constraint) { |