aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2010-05-24 17:32:08 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-25 11:06:57 -0400
commitc0ff7453bb5c7c98e0885fb94279f2571946f280 (patch)
tree8bb2b169a5145f0496575dbd2f48bb4b1c83f819
parent708c1bbc9d0c3e57f40501794d9b0eed29d10fce (diff)
cpuset,mm: fix no node to alloc memory when changing cpuset's mems
Before applying this patch, cpuset updates task->mems_allowed and mempolicy by setting all new bits in the nodemask first, and clearing all old unallowed bits later. But in the way, the allocator may find that there is no node to alloc memory. The reason is that cpuset rebinds the task's mempolicy, it cleans the nodes which the allocater can alloc pages on, for example: (mpol: mempolicy) task1 task1's mpol task2 alloc page 1 alloc on node0? NO 1 1 change mems from 1 to 0 1 rebind task1's mpol 0-1 set new bits 0 clear disallowed bits alloc on node1? NO 0 ... can't alloc page goto oom This patch fixes this problem by expanding the nodes range first(set newly allowed bits) and shrink it lazily(clear newly disallowed bits). So we use a variable to tell the write-side task that read-side task is reading nodemask, and the write-side task clears newly disallowed nodes after read-side task ends the current memory allocation. [akpm@linux-foundation.org: fix spello] Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Cc: David Rientjes <rientjes@google.com> Cc: Nick Piggin <npiggin@suse.de> Cc: Paul Menage <menage@google.com> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Ravikiran Thirumalai <kiran@scalex86.org> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: Andi Kleen <andi@firstfloor.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/cpuset.h43
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/cpuset.c58
-rw-r--r--kernel/exit.c2
-rw-r--r--mm/filemap.c10
-rw-r--r--mm/hugetlb.c12
-rw-r--r--mm/mempolicy.c24
-rw-r--r--mm/page_alloc.c6
-rw-r--r--mm/slab.c4
-rw-r--r--mm/slub.c6
-rw-r--r--mm/vmscan.c2
11 files changed, 148 insertions, 20 deletions
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index a73454aec333..20b51cab6593 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -86,9 +86,44 @@ extern void rebuild_sched_domains(void);
86 86
87extern void cpuset_print_task_mems_allowed(struct task_struct *p); 87extern void cpuset_print_task_mems_allowed(struct task_struct *p);
88 88
89/*
90 * reading current mems_allowed and mempolicy in the fastpath must protected
91 * by get_mems_allowed()
92 */
93static inline void get_mems_allowed(void)
94{
95 current->mems_allowed_change_disable++;
96
97 /*
98 * ensure that reading mems_allowed and mempolicy happens after the
99 * update of ->mems_allowed_change_disable.
100 *
101 * the write-side task finds ->mems_allowed_change_disable is not 0,
102 * and knows the read-side task is reading mems_allowed or mempolicy,
103 * so it will clear old bits lazily.
104 */
105 smp_mb();
106}
107
108static inline void put_mems_allowed(void)
109{
110 /*
111 * ensure that reading mems_allowed and mempolicy before reducing
112 * mems_allowed_change_disable.
113 *
114 * the write-side task will know that the read-side task is still
115 * reading mems_allowed or mempolicy, don't clears old bits in the
116 * nodemask.
117 */
118 smp_mb();
119 --ACCESS_ONCE(current->mems_allowed_change_disable);
120}
121
89static inline void set_mems_allowed(nodemask_t nodemask) 122static inline void set_mems_allowed(nodemask_t nodemask)
90{ 123{
124 task_lock(current);
91 current->mems_allowed = nodemask; 125 current->mems_allowed = nodemask;
126 task_unlock(current);
92} 127}
93 128
94#else /* !CONFIG_CPUSETS */ 129#else /* !CONFIG_CPUSETS */
@@ -187,6 +222,14 @@ static inline void set_mems_allowed(nodemask_t nodemask)
187{ 222{
188} 223}
189 224
225static inline void get_mems_allowed(void)
226{
227}
228
229static inline void put_mems_allowed(void)
230{
231}
232
190#endif /* !CONFIG_CPUSETS */ 233#endif /* !CONFIG_CPUSETS */
191 234
192#endif /* _LINUX_CPUSET_H */ 235#endif /* _LINUX_CPUSET_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b55e988988b5..415b8f8a3f45 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1421,6 +1421,7 @@ struct task_struct {
1421#endif 1421#endif
1422#ifdef CONFIG_CPUSETS 1422#ifdef CONFIG_CPUSETS
1423 nodemask_t mems_allowed; /* Protected by alloc_lock */ 1423 nodemask_t mems_allowed; /* Protected by alloc_lock */
1424 int mems_allowed_change_disable;
1424 int cpuset_mem_spread_rotor; 1425 int cpuset_mem_spread_rotor;
1425#endif 1426#endif
1426#ifdef CONFIG_CGROUPS 1427#ifdef CONFIG_CGROUPS
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index db0990ac3fac..61d6af7fa676 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -946,16 +946,62 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
946 * In order to avoid seeing no nodes if the old and new nodes are disjoint, 946 * In order to avoid seeing no nodes if the old and new nodes are disjoint,
947 * we structure updates as setting all new allowed nodes, then clearing newly 947 * we structure updates as setting all new allowed nodes, then clearing newly
948 * disallowed ones. 948 * disallowed ones.
949 *
950 * Called with task's alloc_lock held
951 */ 949 */
952static void cpuset_change_task_nodemask(struct task_struct *tsk, 950static void cpuset_change_task_nodemask(struct task_struct *tsk,
953 nodemask_t *newmems) 951 nodemask_t *newmems)
954{ 952{
953repeat:
954 /*
955 * Allow tasks that have access to memory reserves because they have
956 * been OOM killed to get memory anywhere.
957 */
958 if (unlikely(test_thread_flag(TIF_MEMDIE)))
959 return;
960 if (current->flags & PF_EXITING) /* Let dying task have memory */
961 return;
962
963 task_lock(tsk);
955 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); 964 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
956 mpol_rebind_task(tsk, &tsk->mems_allowed, MPOL_REBIND_ONCE); 965 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
957 mpol_rebind_task(tsk, newmems, MPOL_REBIND_ONCE); 966
967
968 /*
969 * ensure checking ->mems_allowed_change_disable after setting all new
970 * allowed nodes.
971 *
972 * the read-side task can see an nodemask with new allowed nodes and
973 * old allowed nodes. and if it allocates page when cpuset clears newly
974 * disallowed ones continuous, it can see the new allowed bits.
975 *
976 * And if setting all new allowed nodes is after the checking, setting
977 * all new allowed nodes and clearing newly disallowed ones will be done
978 * continuous, and the read-side task may find no node to alloc page.
979 */
980 smp_mb();
981
982 /*
983 * Allocation of memory is very fast, we needn't sleep when waiting
984 * for the read-side.
985 */
986 while (ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
987 task_unlock(tsk);
988 if (!task_curr(tsk))
989 yield();
990 goto repeat;
991 }
992
993 /*
994 * ensure checking ->mems_allowed_change_disable before clearing all new
995 * disallowed nodes.
996 *
997 * if clearing newly disallowed bits before the checking, the read-side
998 * task may find no node to alloc page.
999 */
1000 smp_mb();
1001
1002 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
958 tsk->mems_allowed = *newmems; 1003 tsk->mems_allowed = *newmems;
1004 task_unlock(tsk);
959} 1005}
960 1006
961/* 1007/*
@@ -978,9 +1024,7 @@ static void cpuset_change_nodemask(struct task_struct *p,
978 cs = cgroup_cs(scan->cg); 1024 cs = cgroup_cs(scan->cg);
979 guarantee_online_mems(cs, newmems); 1025 guarantee_online_mems(cs, newmems);
980 1026
981 task_lock(p);
982 cpuset_change_task_nodemask(p, newmems); 1027 cpuset_change_task_nodemask(p, newmems);
983 task_unlock(p);
984 1028
985 NODEMASK_FREE(newmems); 1029 NODEMASK_FREE(newmems);
986 1030
@@ -1383,9 +1427,7 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
1383 err = set_cpus_allowed_ptr(tsk, cpus_attach); 1427 err = set_cpus_allowed_ptr(tsk, cpus_attach);
1384 WARN_ON_ONCE(err); 1428 WARN_ON_ONCE(err);
1385 1429
1386 task_lock(tsk);
1387 cpuset_change_task_nodemask(tsk, to); 1430 cpuset_change_task_nodemask(tsk, to);
1388 task_unlock(tsk);
1389 cpuset_update_task_spread_flag(cs, tsk); 1431 cpuset_update_task_spread_flag(cs, tsk);
1390 1432
1391} 1433}
diff --git a/kernel/exit.c b/kernel/exit.c
index eabca5a73a85..019a2843bf95 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1002,8 +1002,10 @@ NORET_TYPE void do_exit(long code)
1002 1002
1003 exit_notify(tsk, group_dead); 1003 exit_notify(tsk, group_dead);
1004#ifdef CONFIG_NUMA 1004#ifdef CONFIG_NUMA
1005 task_lock(tsk);
1005 mpol_put(tsk->mempolicy); 1006 mpol_put(tsk->mempolicy);
1006 tsk->mempolicy = NULL; 1007 tsk->mempolicy = NULL;
1008 task_unlock(tsk);
1007#endif 1009#endif
1008#ifdef CONFIG_FUTEX 1010#ifdef CONFIG_FUTEX
1009 if (unlikely(current->pi_state_cache)) 1011 if (unlikely(current->pi_state_cache))
diff --git a/mm/filemap.c b/mm/filemap.c
index d6f4f073836e..88d719665a28 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -461,9 +461,15 @@ EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
461#ifdef CONFIG_NUMA 461#ifdef CONFIG_NUMA
462struct page *__page_cache_alloc(gfp_t gfp) 462struct page *__page_cache_alloc(gfp_t gfp)
463{ 463{
464 int n;
465 struct page *page;
466
464 if (cpuset_do_page_mem_spread()) { 467 if (cpuset_do_page_mem_spread()) {
465 int n = cpuset_mem_spread_node(); 468 get_mems_allowed();
466 return alloc_pages_exact_node(n, gfp, 0); 469 n = cpuset_mem_spread_node();
470 page = alloc_pages_exact_node(n, gfp, 0);
471 put_mems_allowed();
472 return page;
467 } 473 }
468 return alloc_pages(gfp, 0); 474 return alloc_pages(gfp, 0);
469} 475}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4c9e6bbf3772..54d42b009dbe 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -465,11 +465,13 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
465 struct page *page = NULL; 465 struct page *page = NULL;
466 struct mempolicy *mpol; 466 struct mempolicy *mpol;
467 nodemask_t *nodemask; 467 nodemask_t *nodemask;
468 struct zonelist *zonelist = huge_zonelist(vma, address, 468 struct zonelist *zonelist;
469 htlb_alloc_mask, &mpol, &nodemask);
470 struct zone *zone; 469 struct zone *zone;
471 struct zoneref *z; 470 struct zoneref *z;
472 471
472 get_mems_allowed();
473 zonelist = huge_zonelist(vma, address,
474 htlb_alloc_mask, &mpol, &nodemask);
473 /* 475 /*
474 * A child process with MAP_PRIVATE mappings created by their parent 476 * A child process with MAP_PRIVATE mappings created by their parent
475 * have no page reserves. This check ensures that reservations are 477 * have no page reserves. This check ensures that reservations are
@@ -477,11 +479,11 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
477 */ 479 */
478 if (!vma_has_reserves(vma) && 480 if (!vma_has_reserves(vma) &&
479 h->free_huge_pages - h->resv_huge_pages == 0) 481 h->free_huge_pages - h->resv_huge_pages == 0)
480 return NULL; 482 goto err;
481 483
482 /* If reserves cannot be used, ensure enough pages are in the pool */ 484 /* If reserves cannot be used, ensure enough pages are in the pool */
483 if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0) 485 if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0)
484 return NULL; 486 goto err;;
485 487
486 for_each_zone_zonelist_nodemask(zone, z, zonelist, 488 for_each_zone_zonelist_nodemask(zone, z, zonelist,
487 MAX_NR_ZONES - 1, nodemask) { 489 MAX_NR_ZONES - 1, nodemask) {
@@ -500,7 +502,9 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
500 break; 502 break;
501 } 503 }
502 } 504 }
505err:
503 mpol_cond_put(mpol); 506 mpol_cond_put(mpol);
507 put_mems_allowed();
504 return page; 508 return page;
505} 509}
506 510
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 8a993db88029..721b2b338032 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1639,6 +1639,8 @@ static inline unsigned interleave_nid(struct mempolicy *pol,
1639 * to the struct mempolicy for conditional unref after allocation. 1639 * to the struct mempolicy for conditional unref after allocation.
1640 * If the effective policy is 'BIND, returns a pointer to the mempolicy's 1640 * If the effective policy is 'BIND, returns a pointer to the mempolicy's
1641 * @nodemask for filtering the zonelist. 1641 * @nodemask for filtering the zonelist.
1642 *
1643 * Must be protected by get_mems_allowed()
1642 */ 1644 */
1643struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, 1645struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
1644 gfp_t gfp_flags, struct mempolicy **mpol, 1646 gfp_t gfp_flags, struct mempolicy **mpol,
@@ -1684,6 +1686,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
1684 if (!(mask && current->mempolicy)) 1686 if (!(mask && current->mempolicy))
1685 return false; 1687 return false;
1686 1688
1689 task_lock(current);
1687 mempolicy = current->mempolicy; 1690 mempolicy = current->mempolicy;
1688 switch (mempolicy->mode) { 1691 switch (mempolicy->mode) {
1689 case MPOL_PREFERRED: 1692 case MPOL_PREFERRED:
@@ -1703,6 +1706,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
1703 default: 1706 default:
1704 BUG(); 1707 BUG();
1705 } 1708 }
1709 task_unlock(current);
1706 1710
1707 return true; 1711 return true;
1708} 1712}
@@ -1750,13 +1754,17 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
1750{ 1754{
1751 struct mempolicy *pol = get_vma_policy(current, vma, addr); 1755 struct mempolicy *pol = get_vma_policy(current, vma, addr);
1752 struct zonelist *zl; 1756 struct zonelist *zl;
1757 struct page *page;
1753 1758
1759 get_mems_allowed();
1754 if (unlikely(pol->mode == MPOL_INTERLEAVE)) { 1760 if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
1755 unsigned nid; 1761 unsigned nid;
1756 1762
1757 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); 1763 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
1758 mpol_cond_put(pol); 1764 mpol_cond_put(pol);
1759 return alloc_page_interleave(gfp, 0, nid); 1765 page = alloc_page_interleave(gfp, 0, nid);
1766 put_mems_allowed();
1767 return page;
1760 } 1768 }
1761 zl = policy_zonelist(gfp, pol); 1769 zl = policy_zonelist(gfp, pol);
1762 if (unlikely(mpol_needs_cond_ref(pol))) { 1770 if (unlikely(mpol_needs_cond_ref(pol))) {
@@ -1766,12 +1774,15 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
1766 struct page *page = __alloc_pages_nodemask(gfp, 0, 1774 struct page *page = __alloc_pages_nodemask(gfp, 0,
1767 zl, policy_nodemask(gfp, pol)); 1775 zl, policy_nodemask(gfp, pol));
1768 __mpol_put(pol); 1776 __mpol_put(pol);
1777 put_mems_allowed();
1769 return page; 1778 return page;
1770 } 1779 }
1771 /* 1780 /*
1772 * fast path: default or task policy 1781 * fast path: default or task policy
1773 */ 1782 */
1774 return __alloc_pages_nodemask(gfp, 0, zl, policy_nodemask(gfp, pol)); 1783 page = __alloc_pages_nodemask(gfp, 0, zl, policy_nodemask(gfp, pol));
1784 put_mems_allowed();
1785 return page;
1775} 1786}
1776 1787
1777/** 1788/**
@@ -1796,18 +1807,23 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
1796struct page *alloc_pages_current(gfp_t gfp, unsigned order) 1807struct page *alloc_pages_current(gfp_t gfp, unsigned order)
1797{ 1808{
1798 struct mempolicy *pol = current->mempolicy; 1809 struct mempolicy *pol = current->mempolicy;
1810 struct page *page;
1799 1811
1800 if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) 1812 if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))
1801 pol = &default_policy; 1813 pol = &default_policy;
1802 1814
1815 get_mems_allowed();
1803 /* 1816 /*
1804 * No reference counting needed for current->mempolicy 1817 * No reference counting needed for current->mempolicy
1805 * nor system default_policy 1818 * nor system default_policy
1806 */ 1819 */
1807 if (pol->mode == MPOL_INTERLEAVE) 1820 if (pol->mode == MPOL_INTERLEAVE)
1808 return alloc_page_interleave(gfp, order, interleave_nodes(pol)); 1821 page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
1809 return __alloc_pages_nodemask(gfp, order, 1822 else
1823 page = __alloc_pages_nodemask(gfp, order,
1810 policy_zonelist(gfp, pol), policy_nodemask(gfp, pol)); 1824 policy_zonelist(gfp, pol), policy_nodemask(gfp, pol));
1825 put_mems_allowed();
1826 return page;
1811} 1827}
1812EXPORT_SYMBOL(alloc_pages_current); 1828EXPORT_SYMBOL(alloc_pages_current);
1813 1829
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 596180fedd3a..f7da2a2934b7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1990,10 +1990,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
1990 if (unlikely(!zonelist->_zonerefs->zone)) 1990 if (unlikely(!zonelist->_zonerefs->zone))
1991 return NULL; 1991 return NULL;
1992 1992
1993 get_mems_allowed();
1993 /* The preferred zone is used for statistics later */ 1994 /* The preferred zone is used for statistics later */
1994 first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone); 1995 first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone);
1995 if (!preferred_zone) 1996 if (!preferred_zone) {
1997 put_mems_allowed();
1996 return NULL; 1998 return NULL;
1999 }
1997 2000
1998 /* First allocation attempt */ 2001 /* First allocation attempt */
1999 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, 2002 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
@@ -2003,6 +2006,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2003 page = __alloc_pages_slowpath(gfp_mask, order, 2006 page = __alloc_pages_slowpath(gfp_mask, order,
2004 zonelist, high_zoneidx, nodemask, 2007 zonelist, high_zoneidx, nodemask,
2005 preferred_zone, migratetype); 2008 preferred_zone, migratetype);
2009 put_mems_allowed();
2006 2010
2007 trace_mm_page_alloc(page, order, gfp_mask, migratetype); 2011 trace_mm_page_alloc(page, order, gfp_mask, migratetype);
2008 return page; 2012 return page;
diff --git a/mm/slab.c b/mm/slab.c
index 50a73fca19c4..02786e1a32d2 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3217,10 +3217,12 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3217 if (in_interrupt() || (flags & __GFP_THISNODE)) 3217 if (in_interrupt() || (flags & __GFP_THISNODE))
3218 return NULL; 3218 return NULL;
3219 nid_alloc = nid_here = numa_node_id(); 3219 nid_alloc = nid_here = numa_node_id();
3220 get_mems_allowed();
3220 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) 3221 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3221 nid_alloc = cpuset_mem_spread_node(); 3222 nid_alloc = cpuset_mem_spread_node();
3222 else if (current->mempolicy) 3223 else if (current->mempolicy)
3223 nid_alloc = slab_node(current->mempolicy); 3224 nid_alloc = slab_node(current->mempolicy);
3225 put_mems_allowed();
3224 if (nid_alloc != nid_here) 3226 if (nid_alloc != nid_here)
3225 return ____cache_alloc_node(cachep, flags, nid_alloc); 3227 return ____cache_alloc_node(cachep, flags, nid_alloc);
3226 return NULL; 3228 return NULL;
@@ -3247,6 +3249,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3247 if (flags & __GFP_THISNODE) 3249 if (flags & __GFP_THISNODE)
3248 return NULL; 3250 return NULL;
3249 3251
3252 get_mems_allowed();
3250 zonelist = node_zonelist(slab_node(current->mempolicy), flags); 3253 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
3251 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); 3254 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
3252 3255
@@ -3302,6 +3305,7 @@ retry:
3302 } 3305 }
3303 } 3306 }
3304 } 3307 }
3308 put_mems_allowed();
3305 return obj; 3309 return obj;
3306} 3310}
3307 3311
diff --git a/mm/slub.c b/mm/slub.c
index e46e3129697d..26f0cb9cc584 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1360,6 +1360,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
1360 get_cycles() % 1024 > s->remote_node_defrag_ratio) 1360 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1361 return NULL; 1361 return NULL;
1362 1362
1363 get_mems_allowed();
1363 zonelist = node_zonelist(slab_node(current->mempolicy), flags); 1364 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
1364 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 1365 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1365 struct kmem_cache_node *n; 1366 struct kmem_cache_node *n;
@@ -1369,10 +1370,13 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
1369 if (n && cpuset_zone_allowed_hardwall(zone, flags) && 1370 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1370 n->nr_partial > s->min_partial) { 1371 n->nr_partial > s->min_partial) {
1371 page = get_partial_node(n); 1372 page = get_partial_node(n);
1372 if (page) 1373 if (page) {
1374 put_mems_allowed();
1373 return page; 1375 return page;
1376 }
1374 } 1377 }
1375 } 1378 }
1379 put_mems_allowed();
1376#endif 1380#endif
1377 return NULL; 1381 return NULL;
1378} 1382}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3ff3311447f5..f2c367c9ec12 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1774,6 +1774,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1774 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); 1774 enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
1775 unsigned long writeback_threshold; 1775 unsigned long writeback_threshold;
1776 1776
1777 get_mems_allowed();
1777 delayacct_freepages_start(); 1778 delayacct_freepages_start();
1778 1779
1779 if (scanning_global_lru(sc)) 1780 if (scanning_global_lru(sc))
@@ -1857,6 +1858,7 @@ out:
1857 mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority); 1858 mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority);
1858 1859
1859 delayacct_freepages_end(); 1860 delayacct_freepages_end();
1861 put_mems_allowed();
1860 1862
1861 return ret; 1863 return ret;
1862} 1864}