diff options
author | KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> | 2011-04-14 18:22:12 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-04-14 19:06:56 -0400 |
commit | 929bea7c714220fc76ce3f75bef9056477c28e74 (patch) | |
tree | d41b4592b658173e00c7b8bad2bce048f02e0ead /mm/vmscan.c | |
parent | fe936dfc23fed3475b11067e8d9b70553eafcd9e (diff) |
vmscan: all_unreclaimable() use zone->all_unreclaimable as a name
all_unreclaimable check in direct reclaim has been introduced at 2.6.19
by following commit.
2006 Sep 25; commit 408d8544; oom: use unreclaimable info
And it went through strange history. firstly, following commit broke
the logic unintentionally.
2008 Apr 29; commit a41f24ea; page allocator: smarter retry of
costly-order allocations
Two years later, I've found obvious meaningless code fragment and
restored original intention by following commit.
2010 Jun 04; commit bb21c7ce; vmscan: fix do_try_to_free_pages()
return value when priority==0
But, the logic didn't works when 32bit highmem system goes hibernation
and Minchan slightly changed the algorithm and fixed it .
2010 Sep 22: commit d1908362: vmscan: check all_unreclaimable
in direct reclaim path
But, recently, Andrey Vagin found the new corner case. Look,
struct zone {
..
int all_unreclaimable;
..
unsigned long pages_scanned;
..
}
zone->all_unreclaimable and zone->pages_scanned are neigher atomic
variables nor protected by lock. Therefore zones can become a state of
zone->page_scanned=0 and zone->all_unreclaimable=1. In this case, current
all_unreclaimable() return false even though zone->all_unreclaimabe=1.
This resulted in the kernel hanging up when executing a loop of the form
1. fork
2. mmap
3. touch memory
4. read memory
5. munmmap
as described in
http://www.gossamer-threads.com/lists/linux/kernel/1348725#1348725
Is this ignorable minor issue? No. Unfortunately, x86 has very small dma
zone and it become zone->all_unreclamble=1 easily. and if it become
all_unreclaimable=1, it never restore all_unreclaimable=0. Why? if
all_unreclaimable=1, vmscan only try DEF_PRIORITY reclaim and
a-few-lru-pages>>DEF_PRIORITY always makes 0. that mean no page scan at
all!
Eventually, oom-killer never works on such systems. That said, we can't
use zone->pages_scanned for this purpose. This patch restore
all_unreclaimable() use zone->all_unreclaimable as old. and in addition,
to add oom_killer_disabled check to avoid reintroduce the issue of commit
d1908362 ("vmscan: check all_unreclaimable in direct reclaim path").
Reported-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 24 |
1 files changed, 13 insertions, 11 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index c7f5a6d4b75b..f6b435c80079 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/memcontrol.h> | 41 | #include <linux/memcontrol.h> |
42 | #include <linux/delayacct.h> | 42 | #include <linux/delayacct.h> |
43 | #include <linux/sysctl.h> | 43 | #include <linux/sysctl.h> |
44 | #include <linux/oom.h> | ||
44 | 45 | ||
45 | #include <asm/tlbflush.h> | 46 | #include <asm/tlbflush.h> |
46 | #include <asm/div64.h> | 47 | #include <asm/div64.h> |
@@ -1988,17 +1989,12 @@ static bool zone_reclaimable(struct zone *zone) | |||
1988 | return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; | 1989 | return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; |
1989 | } | 1990 | } |
1990 | 1991 | ||
1991 | /* | 1992 | /* All zones in zonelist are unreclaimable? */ |
1992 | * As hibernation is going on, kswapd is freezed so that it can't mark | ||
1993 | * the zone into all_unreclaimable. It can't handle OOM during hibernation. | ||
1994 | * So let's check zone's unreclaimable in direct reclaim as well as kswapd. | ||
1995 | */ | ||
1996 | static bool all_unreclaimable(struct zonelist *zonelist, | 1993 | static bool all_unreclaimable(struct zonelist *zonelist, |
1997 | struct scan_control *sc) | 1994 | struct scan_control *sc) |
1998 | { | 1995 | { |
1999 | struct zoneref *z; | 1996 | struct zoneref *z; |
2000 | struct zone *zone; | 1997 | struct zone *zone; |
2001 | bool all_unreclaimable = true; | ||
2002 | 1998 | ||
2003 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | 1999 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
2004 | gfp_zone(sc->gfp_mask), sc->nodemask) { | 2000 | gfp_zone(sc->gfp_mask), sc->nodemask) { |
@@ -2006,13 +2002,11 @@ static bool all_unreclaimable(struct zonelist *zonelist, | |||
2006 | continue; | 2002 | continue; |
2007 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 2003 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
2008 | continue; | 2004 | continue; |
2009 | if (zone_reclaimable(zone)) { | 2005 | if (!zone->all_unreclaimable) |
2010 | all_unreclaimable = false; | 2006 | return false; |
2011 | break; | ||
2012 | } | ||
2013 | } | 2007 | } |
2014 | 2008 | ||
2015 | return all_unreclaimable; | 2009 | return true; |
2016 | } | 2010 | } |
2017 | 2011 | ||
2018 | /* | 2012 | /* |
@@ -2108,6 +2102,14 @@ out: | |||
2108 | if (sc->nr_reclaimed) | 2102 | if (sc->nr_reclaimed) |
2109 | return sc->nr_reclaimed; | 2103 | return sc->nr_reclaimed; |
2110 | 2104 | ||
2105 | /* | ||
2106 | * As hibernation is going on, kswapd is freezed so that it can't mark | ||
2107 | * the zone into all_unreclaimable. Thus bypassing all_unreclaimable | ||
2108 | * check. | ||
2109 | */ | ||
2110 | if (oom_killer_disabled) | ||
2111 | return 0; | ||
2112 | |||
2111 | /* top priority shrink_zones still had more to do? don't OOM, then */ | 2113 | /* top priority shrink_zones still had more to do? don't OOM, then */ |
2112 | if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc)) | 2114 | if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc)) |
2113 | return 1; | 2115 | return 1; |