aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2007-10-17 02:25:55 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-17 11:42:45 -0400
commit098d7f128a4e53cb64930628915ac767785e0e60 (patch)
treeed3cab1daecab7f2a64b27deed190df3ec218789
parente815af95f94914993bbad279c71cf5fef9f4eaac (diff)
oom: add per-zone locking
OOM killer synchronization should be done with zone granularity so that memory policy and cpuset allocations may have their corresponding zones locked and allow parallel kills for other OOM conditions that may exist elsewhere in the system. DMA allocations can be targeted at the zone level, which would not be possible if locking was done in nodes or globally. Synchronization shall be done with a variation of "trylocks." The goal is to put the current task to sleep and restart the failed allocation attempt later if the trylock fails. Otherwise, the OOM killer is invoked. Each zone in the zonelist that __alloc_pages() was called with is checked for the newly-introduced ZONE_OOM_LOCKED flag. If any zone has this flag present, the "trylock" to serialize the OOM killer fails and returns zero. Otherwise, all the zones have ZONE_OOM_LOCKED set and the try_set_zone_oom() function returns non-zero. Cc: Andrea Arcangeli <andrea@suse.de> Cc: Christoph Lameter <clameter@sgi.com> Signed-off-by: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/mmzone.h5
-rw-r--r--include/linux/oom.h3
-rw-r--r--mm/oom_kill.c52
3 files changed, 60 insertions, 0 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index bad9486ee0cc..9011505e740d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -344,6 +344,7 @@ struct zone {
344typedef enum { 344typedef enum {
345 ZONE_ALL_UNRECLAIMABLE, /* all pages pinned */ 345 ZONE_ALL_UNRECLAIMABLE, /* all pages pinned */
346 ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */ 346 ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */
347 ZONE_OOM_LOCKED, /* zone is in OOM killer zonelist */
347} zone_flags_t; 348} zone_flags_t;
348 349
349static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) 350static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
@@ -363,6 +364,10 @@ static inline int zone_is_reclaim_locked(const struct zone *zone)
363{ 364{
364 return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); 365 return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
365} 366}
367static inline int zone_is_oom_locked(const struct zone *zone)
368{
369 return test_bit(ZONE_OOM_LOCKED, &zone->flags);
370}
366 371
367/* 372/*
368 * The "priority" of VM scanning is how much of the queues we will scan in one 373 * The "priority" of VM scanning is how much of the queues we will scan in one
diff --git a/include/linux/oom.h b/include/linux/oom.h
index cf6ebf5b422c..e9081200cdb7 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -20,6 +20,9 @@ enum oom_constraint {
20 CONSTRAINT_MEMORY_POLICY, 20 CONSTRAINT_MEMORY_POLICY,
21}; 21};
22 22
23extern int try_set_zone_oom(struct zonelist *zonelist);
24extern void clear_zonelist_oom(struct zonelist *zonelist);
25
23extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order); 26extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
24extern int register_oom_notifier(struct notifier_block *nb); 27extern int register_oom_notifier(struct notifier_block *nb);
25extern int unregister_oom_notifier(struct notifier_block *nb); 28extern int unregister_oom_notifier(struct notifier_block *nb);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 1a7a4ef04e27..6e999c88c503 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -27,6 +27,7 @@
27#include <linux/notifier.h> 27#include <linux/notifier.h>
28 28
29int sysctl_panic_on_oom; 29int sysctl_panic_on_oom;
30static DEFINE_MUTEX(zone_scan_mutex);
30/* #define DEBUG */ 31/* #define DEBUG */
31 32
32/** 33/**
@@ -374,6 +375,57 @@ int unregister_oom_notifier(struct notifier_block *nb)
374} 375}
375EXPORT_SYMBOL_GPL(unregister_oom_notifier); 376EXPORT_SYMBOL_GPL(unregister_oom_notifier);
376 377
378/*
379 * Try to acquire the OOM killer lock for the zones in zonelist. Returns zero
380 * if a parallel OOM killing is already taking place that includes a zone in
381 * the zonelist. Otherwise, locks all zones in the zonelist and returns 1.
382 */
383int try_set_zone_oom(struct zonelist *zonelist)
384{
385 struct zone **z;
386 int ret = 1;
387
388 z = zonelist->zones;
389
390 mutex_lock(&zone_scan_mutex);
391 do {
392 if (zone_is_oom_locked(*z)) {
393 ret = 0;
394 goto out;
395 }
396 } while (*(++z) != NULL);
397
398 /*
399 * Lock each zone in the zonelist under zone_scan_mutex so a parallel
400 * invocation of try_set_zone_oom() doesn't succeed when it shouldn't.
401 */
402 z = zonelist->zones;
403 do {
404 zone_set_flag(*z, ZONE_OOM_LOCKED);
405 } while (*(++z) != NULL);
406out:
407 mutex_unlock(&zone_scan_mutex);
408 return ret;
409}
410
411/*
412 * Clears the ZONE_OOM_LOCKED flag for all zones in the zonelist so that failed
413 * allocation attempts with zonelists containing them may now recall the OOM
414 * killer, if necessary.
415 */
416void clear_zonelist_oom(struct zonelist *zonelist)
417{
418 struct zone **z;
419
420 z = zonelist->zones;
421
422 mutex_lock(&zone_scan_mutex);
423 do {
424 zone_clear_flag(*z, ZONE_OOM_LOCKED);
425 } while (*(++z) != NULL);
426 mutex_unlock(&zone_scan_mutex);
427}
428
377/** 429/**
378 * out_of_memory - kill the "best" process when we run out of memory 430 * out_of_memory - kill the "best" process when we run out of memory
379 * 431 *