diff options
| author | KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> | 2011-04-14 18:22:12 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-04-14 19:06:56 -0400 |
| commit | 929bea7c714220fc76ce3f75bef9056477c28e74 (patch) | |
| tree | d41b4592b658173e00c7b8bad2bce048f02e0ead | |
| parent | fe936dfc23fed3475b11067e8d9b70553eafcd9e (diff) | |
vmscan: all_unreclaimable() use zone->all_unreclaimable as a name
all_unreclaimable check in direct reclaim has been introduced at 2.6.19
by following commit.
2006 Sep 25; commit 408d8544; oom: use unreclaimable info
And it went through strange history. firstly, following commit broke
the logic unintentionally.
2008 Apr 29; commit a41f24ea; page allocator: smarter retry of
costly-order allocations
Two years later, I've found obvious meaningless code fragment and
restored original intention by following commit.
2010 Jun 04; commit bb21c7ce; vmscan: fix do_try_to_free_pages()
return value when priority==0
But, the logic didn't works when 32bit highmem system goes hibernation
and Minchan slightly changed the algorithm and fixed it .
2010 Sep 22: commit d1908362: vmscan: check all_unreclaimable
in direct reclaim path
But, recently, Andrey Vagin found the new corner case. Look,
struct zone {
..
int all_unreclaimable;
..
unsigned long pages_scanned;
..
}
zone->all_unreclaimable and zone->pages_scanned are neigher atomic
variables nor protected by lock. Therefore zones can become a state of
zone->page_scanned=0 and zone->all_unreclaimable=1. In this case, current
all_unreclaimable() return false even though zone->all_unreclaimabe=1.
This resulted in the kernel hanging up when executing a loop of the form
1. fork
2. mmap
3. touch memory
4. read memory
5. munmmap
as described in
http://www.gossamer-threads.com/lists/linux/kernel/1348725#1348725
Is this ignorable minor issue? No. Unfortunately, x86 has very small dma
zone and it become zone->all_unreclamble=1 easily. and if it become
all_unreclaimable=1, it never restore all_unreclaimable=0. Why? if
all_unreclaimable=1, vmscan only try DEF_PRIORITY reclaim and
a-few-lru-pages>>DEF_PRIORITY always makes 0. that mean no page scan at
all!
Eventually, oom-killer never works on such systems. That said, we can't
use zone->pages_scanned for this purpose. This patch restore
all_unreclaimable() use zone->all_unreclaimable as old. and in addition,
to add oom_killer_disabled check to avoid reintroduce the issue of commit
d1908362 ("vmscan: check all_unreclaimable in direct reclaim path").
Reported-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | mm/vmscan.c | 24 |
1 files changed, 13 insertions, 11 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index c7f5a6d4b75b..f6b435c80079 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -41,6 +41,7 @@ | |||
| 41 | #include <linux/memcontrol.h> | 41 | #include <linux/memcontrol.h> |
| 42 | #include <linux/delayacct.h> | 42 | #include <linux/delayacct.h> |
| 43 | #include <linux/sysctl.h> | 43 | #include <linux/sysctl.h> |
| 44 | #include <linux/oom.h> | ||
| 44 | 45 | ||
| 45 | #include <asm/tlbflush.h> | 46 | #include <asm/tlbflush.h> |
| 46 | #include <asm/div64.h> | 47 | #include <asm/div64.h> |
| @@ -1988,17 +1989,12 @@ static bool zone_reclaimable(struct zone *zone) | |||
| 1988 | return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; | 1989 | return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; |
| 1989 | } | 1990 | } |
| 1990 | 1991 | ||
| 1991 | /* | 1992 | /* All zones in zonelist are unreclaimable? */ |
| 1992 | * As hibernation is going on, kswapd is freezed so that it can't mark | ||
| 1993 | * the zone into all_unreclaimable. It can't handle OOM during hibernation. | ||
| 1994 | * So let's check zone's unreclaimable in direct reclaim as well as kswapd. | ||
| 1995 | */ | ||
| 1996 | static bool all_unreclaimable(struct zonelist *zonelist, | 1993 | static bool all_unreclaimable(struct zonelist *zonelist, |
| 1997 | struct scan_control *sc) | 1994 | struct scan_control *sc) |
| 1998 | { | 1995 | { |
| 1999 | struct zoneref *z; | 1996 | struct zoneref *z; |
| 2000 | struct zone *zone; | 1997 | struct zone *zone; |
| 2001 | bool all_unreclaimable = true; | ||
| 2002 | 1998 | ||
| 2003 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | 1999 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
| 2004 | gfp_zone(sc->gfp_mask), sc->nodemask) { | 2000 | gfp_zone(sc->gfp_mask), sc->nodemask) { |
| @@ -2006,13 +2002,11 @@ static bool all_unreclaimable(struct zonelist *zonelist, | |||
| 2006 | continue; | 2002 | continue; |
| 2007 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 2003 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
| 2008 | continue; | 2004 | continue; |
| 2009 | if (zone_reclaimable(zone)) { | 2005 | if (!zone->all_unreclaimable) |
| 2010 | all_unreclaimable = false; | 2006 | return false; |
| 2011 | break; | ||
| 2012 | } | ||
| 2013 | } | 2007 | } |
| 2014 | 2008 | ||
| 2015 | return all_unreclaimable; | 2009 | return true; |
| 2016 | } | 2010 | } |
| 2017 | 2011 | ||
| 2018 | /* | 2012 | /* |
| @@ -2108,6 +2102,14 @@ out: | |||
| 2108 | if (sc->nr_reclaimed) | 2102 | if (sc->nr_reclaimed) |
| 2109 | return sc->nr_reclaimed; | 2103 | return sc->nr_reclaimed; |
| 2110 | 2104 | ||
| 2105 | /* | ||
| 2106 | * As hibernation is going on, kswapd is freezed so that it can't mark | ||
| 2107 | * the zone into all_unreclaimable. Thus bypassing all_unreclaimable | ||
| 2108 | * check. | ||
| 2109 | */ | ||
| 2110 | if (oom_killer_disabled) | ||
| 2111 | return 0; | ||
| 2112 | |||
| 2111 | /* top priority shrink_zones still had more to do? don't OOM, then */ | 2113 | /* top priority shrink_zones still had more to do? don't OOM, then */ |
| 2112 | if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc)) | 2114 | if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc)) |
| 2113 | return 1; | 2115 | return 1; |
