diff options
author | KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> | 2010-06-04 17:15:05 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-06-04 18:21:45 -0400 |
commit | bb21c7ce18eff8e6e7877ca1d06c6db719376e3c (patch) | |
tree | 555edaded1e0a771df406ce2b6b63368df6de6cd | |
parent | 9e506f7adce8e6165a104d3d78fddd8ff0cdccf8 (diff) |
vmscan: fix do_try_to_free_pages() return value when priority==0 reclaim failure
Greg Thelen reported recent Johannes's stack diet patch makes kernel hang.
His test is following.
mount -t cgroup none /cgroups -o memory
mkdir /cgroups/cg1
echo $$ > /cgroups/cg1/tasks
dd bs=1024 count=1024 if=/dev/null of=/data/foo
echo $$ > /cgroups/tasks
echo 1 > /cgroups/cg1/memory.force_empty
Actually, This OOM hard to try logic have been corrupted since following
two years old patch.
commit a41f24ea9fd6169b147c53c2392e2887cc1d9247
Author: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Tue Apr 29 00:58:25 2008 -0700
page allocator: smarter retry of costly-order allocations
Original intention was "return success if the system have shrinkable zones
though priority==0 reclaim was failure". But the above patch changed to
"return nr_reclaimed if .....". Oh, That forgot nr_reclaimed may be 0 if
priority==0 reclaim failure.
And Johannes's patch 0aeb2339e54e ("vmscan: remove all_unreclaimable scan
control") made it more corrupt. Originally, priority==0 reclaim failure
on memcg return 0, but this patch changed to return 1. It totally
confused memcg.
This patch fixes it completely.
Reported-by: Greg Thelen <gthelen@google.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested-by: Greg Thelen <gthelen@google.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/vmscan.c | 29 |
1 files changed, 16 insertions, 13 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 915dceb487c1..9c7e57cc63a3 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1724,13 +1724,13 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1724 | * If a zone is deemed to be full of pinned pages then just give it a light | 1724 | * If a zone is deemed to be full of pinned pages then just give it a light |
1725 | * scan then give up on it. | 1725 | * scan then give up on it. |
1726 | */ | 1726 | */ |
1727 | static int shrink_zones(int priority, struct zonelist *zonelist, | 1727 | static bool shrink_zones(int priority, struct zonelist *zonelist, |
1728 | struct scan_control *sc) | 1728 | struct scan_control *sc) |
1729 | { | 1729 | { |
1730 | enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); | 1730 | enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); |
1731 | struct zoneref *z; | 1731 | struct zoneref *z; |
1732 | struct zone *zone; | 1732 | struct zone *zone; |
1733 | int progress = 0; | 1733 | bool all_unreclaimable = true; |
1734 | 1734 | ||
1735 | for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, | 1735 | for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, |
1736 | sc->nodemask) { | 1736 | sc->nodemask) { |
@@ -1757,9 +1757,9 @@ static int shrink_zones(int priority, struct zonelist *zonelist, | |||
1757 | } | 1757 | } |
1758 | 1758 | ||
1759 | shrink_zone(priority, zone, sc); | 1759 | shrink_zone(priority, zone, sc); |
1760 | progress = 1; | 1760 | all_unreclaimable = false; |
1761 | } | 1761 | } |
1762 | return progress; | 1762 | return all_unreclaimable; |
1763 | } | 1763 | } |
1764 | 1764 | ||
1765 | /* | 1765 | /* |
@@ -1782,7 +1782,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1782 | struct scan_control *sc) | 1782 | struct scan_control *sc) |
1783 | { | 1783 | { |
1784 | int priority; | 1784 | int priority; |
1785 | unsigned long ret = 0; | 1785 | bool all_unreclaimable; |
1786 | unsigned long total_scanned = 0; | 1786 | unsigned long total_scanned = 0; |
1787 | struct reclaim_state *reclaim_state = current->reclaim_state; | 1787 | struct reclaim_state *reclaim_state = current->reclaim_state; |
1788 | unsigned long lru_pages = 0; | 1788 | unsigned long lru_pages = 0; |
@@ -1813,7 +1813,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1813 | sc->nr_scanned = 0; | 1813 | sc->nr_scanned = 0; |
1814 | if (!priority) | 1814 | if (!priority) |
1815 | disable_swap_token(); | 1815 | disable_swap_token(); |
1816 | ret = shrink_zones(priority, zonelist, sc); | 1816 | all_unreclaimable = shrink_zones(priority, zonelist, sc); |
1817 | /* | 1817 | /* |
1818 | * Don't shrink slabs when reclaiming memory from | 1818 | * Don't shrink slabs when reclaiming memory from |
1819 | * over limit cgroups | 1819 | * over limit cgroups |
@@ -1826,10 +1826,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1826 | } | 1826 | } |
1827 | } | 1827 | } |
1828 | total_scanned += sc->nr_scanned; | 1828 | total_scanned += sc->nr_scanned; |
1829 | if (sc->nr_reclaimed >= sc->nr_to_reclaim) { | 1829 | if (sc->nr_reclaimed >= sc->nr_to_reclaim) |
1830 | ret = sc->nr_reclaimed; | ||
1831 | goto out; | 1830 | goto out; |
1832 | } | ||
1833 | 1831 | ||
1834 | /* | 1832 | /* |
1835 | * Try to write back as many pages as we just scanned. This | 1833 | * Try to write back as many pages as we just scanned. This |
@@ -1849,9 +1847,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1849 | priority < DEF_PRIORITY - 2) | 1847 | priority < DEF_PRIORITY - 2) |
1850 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 1848 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
1851 | } | 1849 | } |
1852 | /* top priority shrink_zones still had more to do? don't OOM, then */ | 1850 | |
1853 | if (ret && scanning_global_lru(sc)) | ||
1854 | ret = sc->nr_reclaimed; | ||
1855 | out: | 1851 | out: |
1856 | /* | 1852 | /* |
1857 | * Now that we've scanned all the zones at this priority level, note | 1853 | * Now that we've scanned all the zones at this priority level, note |
@@ -1877,7 +1873,14 @@ out: | |||
1877 | delayacct_freepages_end(); | 1873 | delayacct_freepages_end(); |
1878 | put_mems_allowed(); | 1874 | put_mems_allowed(); |
1879 | 1875 | ||
1880 | return ret; | 1876 | if (sc->nr_reclaimed) |
1877 | return sc->nr_reclaimed; | ||
1878 | |||
1879 | /* top priority shrink_zones still had more to do? don't OOM, then */ | ||
1880 | if (scanning_global_lru(sc) && !all_unreclaimable) | ||
1881 | return 1; | ||
1882 | |||
1883 | return 0; | ||
1881 | } | 1884 | } |
1882 | 1885 | ||
1883 | unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | 1886 | unsigned long try_to_free_pages(struct zonelist *zonelist, int order, |