aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2011-01-25 18:07:20 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-25 19:50:00 -0500
commitf33261d75b88f55a08e6a9648cef73509979bfba (patch)
treef3d8b4f41c860e9f6d054173870319a75c14c155 /mm
parent4f542e3dd90a96ee0f8fcb8173cb4104f5f753e6 (diff)
mm: fix deferred congestion timeout if preferred zone is not allowed
Before 0e093d99763e ("writeback: do not sleep on the congestion queue if there are no congested BDIs or if significant congestion is not being encountered in the current zone"), preferred_zone was only used for NUMA statistics, to determine the zoneidx from which to allocate from given the type requested, and whether to utilize memory compaction. wait_iff_congested(), though, uses preferred_zone to determine if the congestion wait should be deferred because its dirty pages are backed by a congested bdi. This incorrectly defers the timeout and busy loops in the page allocator with various cond_resched() calls if preferred_zone is not allowed in the current context, usually consuming 100% of a cpu. This patch ensures preferred_zone is an allowed zone in the fastpath depending on whether current is constrained by its cpuset or nodes in its mempolicy (when the nodemask passed is non-NULL). This is correct since the fastpath allocation always passes ALLOC_CPUSET when trying to allocate memory. In the slowpath, this patch resets preferred_zone to the first zone of the allowed type when the allocation is not constrained by current's cpuset, i.e. it does not pass ALLOC_CPUSET. This patch also ensures preferred_zone is from the set of allowed nodes when called from within direct reclaim since allocations are always constrained by cpusets in this context (it is blockable). Both of these uses of cpuset_current_mems_allowed are protected by get_mems_allowed(). Signed-off-by: David Rientjes <rientjes@google.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/page_alloc.c12
-rw-r--r--mm/vmscan.c3
2 files changed, 13 insertions, 2 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 90c1439549fd..f4967910c967 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2034,6 +2034,14 @@ restart:
2034 */ 2034 */
2035 alloc_flags = gfp_to_alloc_flags(gfp_mask); 2035 alloc_flags = gfp_to_alloc_flags(gfp_mask);
2036 2036
2037 /*
2038 * Find the true preferred zone if the allocation is unconstrained by
2039 * cpusets.
2040 */
2041 if (!(alloc_flags & ALLOC_CPUSET) && !nodemask)
2042 first_zones_zonelist(zonelist, high_zoneidx, NULL,
2043 &preferred_zone);
2044
2037 /* This is the last chance, in general, before the goto nopage. */ 2045 /* This is the last chance, in general, before the goto nopage. */
2038 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, 2046 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
2039 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, 2047 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
@@ -2192,7 +2200,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2192 2200
2193 get_mems_allowed(); 2201 get_mems_allowed();
2194 /* The preferred zone is used for statistics later */ 2202 /* The preferred zone is used for statistics later */
2195 first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone); 2203 first_zones_zonelist(zonelist, high_zoneidx,
2204 nodemask ? : &cpuset_current_mems_allowed,
2205 &preferred_zone);
2196 if (!preferred_zone) { 2206 if (!preferred_zone) {
2197 put_mems_allowed(); 2207 put_mems_allowed();
2198 return NULL; 2208 return NULL;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f5d90dedebba..148c6e630df2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2083,7 +2083,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2083 struct zone *preferred_zone; 2083 struct zone *preferred_zone;
2084 2084
2085 first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), 2085 first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask),
2086 NULL, &preferred_zone); 2086 &cpuset_current_mems_allowed,
2087 &preferred_zone);
2087 wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); 2088 wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10);
2088 } 2089 }
2089 } 2090 }