aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2010-05-24 17:32:08 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-25 11:06:57 -0400
commitc0ff7453bb5c7c98e0885fb94279f2571946f280 (patch)
tree8bb2b169a5145f0496575dbd2f48bb4b1c83f819 /mm/page_alloc.c
parent708c1bbc9d0c3e57f40501794d9b0eed29d10fce (diff)
cpuset,mm: fix no node to alloc memory when changing cpuset's mems
Before applying this patch, cpuset updates task->mems_allowed and mempolicy by setting all new bits in the nodemask first, and clearing all old unallowed bits later. But in the way, the allocator may find that there is no node to alloc memory. The reason is that cpuset rebinds the task's mempolicy, it cleans the nodes which the allocater can alloc pages on, for example: (mpol: mempolicy) task1 task1's mpol task2 alloc page 1 alloc on node0? NO 1 1 change mems from 1 to 0 1 rebind task1's mpol 0-1 set new bits 0 clear disallowed bits alloc on node1? NO 0 ... can't alloc page goto oom This patch fixes this problem by expanding the nodes range first(set newly allowed bits) and shrink it lazily(clear newly disallowed bits). So we use a variable to tell the write-side task that read-side task is reading nodemask, and the write-side task clears newly disallowed nodes after read-side task ends the current memory allocation. [akpm@linux-foundation.org: fix spello] Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Cc: David Rientjes <rientjes@google.com> Cc: Nick Piggin <npiggin@suse.de> Cc: Paul Menage <menage@google.com> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Ravikiran Thirumalai <kiran@scalex86.org> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: Andi Kleen <andi@firstfloor.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c6
1 files changed, 5 insertions, 1 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 596180fedd3..f7da2a2934b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1990,10 +1990,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
1990 if (unlikely(!zonelist->_zonerefs->zone)) 1990 if (unlikely(!zonelist->_zonerefs->zone))
1991 return NULL; 1991 return NULL;
1992 1992
1993 get_mems_allowed();
1993 /* The preferred zone is used for statistics later */ 1994 /* The preferred zone is used for statistics later */
1994 first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone); 1995 first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone);
1995 if (!preferred_zone) 1996 if (!preferred_zone) {
1997 put_mems_allowed();
1996 return NULL; 1998 return NULL;
1999 }
1997 2000
1998 /* First allocation attempt */ 2001 /* First allocation attempt */
1999 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, 2002 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
@@ -2003,6 +2006,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2003 page = __alloc_pages_slowpath(gfp_mask, order, 2006 page = __alloc_pages_slowpath(gfp_mask, order,
2004 zonelist, high_zoneidx, nodemask, 2007 zonelist, high_zoneidx, nodemask,
2005 preferred_zone, migratetype); 2008 preferred_zone, migratetype);
2009 put_mems_allowed();
2006 2010
2007 trace_mm_page_alloc(page, order, gfp_mask, migratetype); 2011 trace_mm_page_alloc(page, order, gfp_mask, migratetype);
2008 return page; 2012 return page;