aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/cpuset.h47
-rw-r--r--include/linux/init_task.h8
-rw-r--r--include/linux/sched.h2
-rw-r--r--kernel/cpuset.c43
-rw-r--r--kernel/fork.c1
-rw-r--r--mm/filemap.c11
-rw-r--r--mm/hugetlb.c15
-rw-r--r--mm/mempolicy.c28
-rw-r--r--mm/page_alloc.c33
-rw-r--r--mm/slab.c13
-rw-r--r--mm/slub.c40
-rw-r--r--mm/vmscan.c2
12 files changed, 133 insertions, 110 deletions
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index e9eaec522655..7a7e5fd2a277 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -89,42 +89,33 @@ extern void rebuild_sched_domains(void);
89extern void cpuset_print_task_mems_allowed(struct task_struct *p); 89extern void cpuset_print_task_mems_allowed(struct task_struct *p);
90 90
91/* 91/*
92 * reading current mems_allowed and mempolicy in the fastpath must protected 92 * get_mems_allowed is required when making decisions involving mems_allowed
93 * by get_mems_allowed() 93 * such as during page allocation. mems_allowed can be updated in parallel
94 * and depending on the new value an operation can fail potentially causing
95 * process failure. A retry loop with get_mems_allowed and put_mems_allowed
96 * prevents these artificial failures.
94 */ 97 */
95static inline void get_mems_allowed(void) 98static inline unsigned int get_mems_allowed(void)
96{ 99{
97 current->mems_allowed_change_disable++; 100 return read_seqcount_begin(&current->mems_allowed_seq);
98
99 /*
100 * ensure that reading mems_allowed and mempolicy happens after the
101 * update of ->mems_allowed_change_disable.
102 *
103 * the write-side task finds ->mems_allowed_change_disable is not 0,
104 * and knows the read-side task is reading mems_allowed or mempolicy,
105 * so it will clear old bits lazily.
106 */
107 smp_mb();
108} 101}
109 102
110static inline void put_mems_allowed(void) 103/*
104 * If this returns false, the operation that took place after get_mems_allowed
105 * may have failed. It is up to the caller to retry the operation if
106 * appropriate.
107 */
108static inline bool put_mems_allowed(unsigned int seq)
111{ 109{
112 /* 110 return !read_seqcount_retry(&current->mems_allowed_seq, seq);
113 * ensure that reading mems_allowed and mempolicy before reducing
114 * mems_allowed_change_disable.
115 *
116 * the write-side task will know that the read-side task is still
117 * reading mems_allowed or mempolicy, don't clears old bits in the
118 * nodemask.
119 */
120 smp_mb();
121 --ACCESS_ONCE(current->mems_allowed_change_disable);
122} 111}
123 112
124static inline void set_mems_allowed(nodemask_t nodemask) 113static inline void set_mems_allowed(nodemask_t nodemask)
125{ 114{
126 task_lock(current); 115 task_lock(current);
116 write_seqcount_begin(&current->mems_allowed_seq);
127 current->mems_allowed = nodemask; 117 current->mems_allowed = nodemask;
118 write_seqcount_end(&current->mems_allowed_seq);
128 task_unlock(current); 119 task_unlock(current);
129} 120}
130 121
@@ -234,12 +225,14 @@ static inline void set_mems_allowed(nodemask_t nodemask)
234{ 225{
235} 226}
236 227
237static inline void get_mems_allowed(void) 228static inline unsigned int get_mems_allowed(void)
238{ 229{
230 return 0;
239} 231}
240 232
241static inline void put_mems_allowed(void) 233static inline bool put_mems_allowed(unsigned int seq)
242{ 234{
235 return true;
243} 236}
244 237
245#endif /* !CONFIG_CPUSETS */ 238#endif /* !CONFIG_CPUSETS */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index f994d51f70f2..e4baff5f7ff4 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -29,6 +29,13 @@ extern struct fs_struct init_fs;
29#define INIT_GROUP_RWSEM(sig) 29#define INIT_GROUP_RWSEM(sig)
30#endif 30#endif
31 31
32#ifdef CONFIG_CPUSETS
33#define INIT_CPUSET_SEQ \
34 .mems_allowed_seq = SEQCNT_ZERO,
35#else
36#define INIT_CPUSET_SEQ
37#endif
38
32#define INIT_SIGNALS(sig) { \ 39#define INIT_SIGNALS(sig) { \
33 .nr_threads = 1, \ 40 .nr_threads = 1, \
34 .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\ 41 .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
@@ -192,6 +199,7 @@ extern struct cred init_cred;
192 INIT_FTRACE_GRAPH \ 199 INIT_FTRACE_GRAPH \
193 INIT_TRACE_RECURSION \ 200 INIT_TRACE_RECURSION \
194 INIT_TASK_RCU_PREEMPT(tsk) \ 201 INIT_TASK_RCU_PREEMPT(tsk) \
202 INIT_CPUSET_SEQ \
195} 203}
196 204
197 205
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e074e1e54f85..0c147a4260a5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1514,7 +1514,7 @@ struct task_struct {
1514#endif 1514#endif
1515#ifdef CONFIG_CPUSETS 1515#ifdef CONFIG_CPUSETS
1516 nodemask_t mems_allowed; /* Protected by alloc_lock */ 1516 nodemask_t mems_allowed; /* Protected by alloc_lock */
1517 int mems_allowed_change_disable; 1517 seqcount_t mems_allowed_seq; /* Seqence no to catch updates */
1518 int cpuset_mem_spread_rotor; 1518 int cpuset_mem_spread_rotor;
1519 int cpuset_slab_spread_rotor; 1519 int cpuset_slab_spread_rotor;
1520#endif 1520#endif
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 5d575836dba6..1010cc61931f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -964,7 +964,6 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
964{ 964{
965 bool need_loop; 965 bool need_loop;
966 966
967repeat:
968 /* 967 /*
969 * Allow tasks that have access to memory reserves because they have 968 * Allow tasks that have access to memory reserves because they have
970 * been OOM killed to get memory anywhere. 969 * been OOM killed to get memory anywhere.
@@ -983,45 +982,19 @@ repeat:
983 */ 982 */
984 need_loop = task_has_mempolicy(tsk) || 983 need_loop = task_has_mempolicy(tsk) ||
985 !nodes_intersects(*newmems, tsk->mems_allowed); 984 !nodes_intersects(*newmems, tsk->mems_allowed);
986 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
987 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
988 985
989 /* 986 if (need_loop)
990 * ensure checking ->mems_allowed_change_disable after setting all new 987 write_seqcount_begin(&tsk->mems_allowed_seq);
991 * allowed nodes.
992 *
993 * the read-side task can see an nodemask with new allowed nodes and
994 * old allowed nodes. and if it allocates page when cpuset clears newly
995 * disallowed ones continuous, it can see the new allowed bits.
996 *
997 * And if setting all new allowed nodes is after the checking, setting
998 * all new allowed nodes and clearing newly disallowed ones will be done
999 * continuous, and the read-side task may find no node to alloc page.
1000 */
1001 smp_mb();
1002 988
1003 /* 989 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
1004 * Allocation of memory is very fast, we needn't sleep when waiting 990 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
1005 * for the read-side.
1006 */
1007 while (need_loop && ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
1008 task_unlock(tsk);
1009 if (!task_curr(tsk))
1010 yield();
1011 goto repeat;
1012 }
1013
1014 /*
1015 * ensure checking ->mems_allowed_change_disable before clearing all new
1016 * disallowed nodes.
1017 *
1018 * if clearing newly disallowed bits before the checking, the read-side
1019 * task may find no node to alloc page.
1020 */
1021 smp_mb();
1022 991
1023 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); 992 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
1024 tsk->mems_allowed = *newmems; 993 tsk->mems_allowed = *newmems;
994
995 if (need_loop)
996 write_seqcount_end(&tsk->mems_allowed_seq);
997
1025 task_unlock(tsk); 998 task_unlock(tsk);
1026} 999}
1027 1000
diff --git a/kernel/fork.c b/kernel/fork.c
index a9e99f3c18e0..9cc227d54102 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1237,6 +1237,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1237#ifdef CONFIG_CPUSETS 1237#ifdef CONFIG_CPUSETS
1238 p->cpuset_mem_spread_rotor = NUMA_NO_NODE; 1238 p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
1239 p->cpuset_slab_spread_rotor = NUMA_NO_NODE; 1239 p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
1240 seqcount_init(&p->mems_allowed_seq);
1240#endif 1241#endif
1241#ifdef CONFIG_TRACE_IRQFLAGS 1242#ifdef CONFIG_TRACE_IRQFLAGS
1242 p->irq_events = 0; 1243 p->irq_events = 0;
diff --git a/mm/filemap.c b/mm/filemap.c
index f3230604006c..843042045dc9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -499,10 +499,13 @@ struct page *__page_cache_alloc(gfp_t gfp)
499 struct page *page; 499 struct page *page;
500 500
501 if (cpuset_do_page_mem_spread()) { 501 if (cpuset_do_page_mem_spread()) {
502 get_mems_allowed(); 502 unsigned int cpuset_mems_cookie;
503 n = cpuset_mem_spread_node(); 503 do {
504 page = alloc_pages_exact_node(n, gfp, 0); 504 cpuset_mems_cookie = get_mems_allowed();
505 put_mems_allowed(); 505 n = cpuset_mem_spread_node();
506 page = alloc_pages_exact_node(n, gfp, 0);
507 } while (!put_mems_allowed(cpuset_mems_cookie) && !page);
508
506 return page; 509 return page;
507 } 510 }
508 return alloc_pages(gfp, 0); 511 return alloc_pages(gfp, 0);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 62f9fada4d6d..b1c314877334 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -454,14 +454,16 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
454 struct vm_area_struct *vma, 454 struct vm_area_struct *vma,
455 unsigned long address, int avoid_reserve) 455 unsigned long address, int avoid_reserve)
456{ 456{
457 struct page *page = NULL; 457 struct page *page;
458 struct mempolicy *mpol; 458 struct mempolicy *mpol;
459 nodemask_t *nodemask; 459 nodemask_t *nodemask;
460 struct zonelist *zonelist; 460 struct zonelist *zonelist;
461 struct zone *zone; 461 struct zone *zone;
462 struct zoneref *z; 462 struct zoneref *z;
463 unsigned int cpuset_mems_cookie;
463 464
464 get_mems_allowed(); 465retry_cpuset:
466 cpuset_mems_cookie = get_mems_allowed();
465 zonelist = huge_zonelist(vma, address, 467 zonelist = huge_zonelist(vma, address,
466 htlb_alloc_mask, &mpol, &nodemask); 468 htlb_alloc_mask, &mpol, &nodemask);
467 /* 469 /*
@@ -488,10 +490,15 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
488 } 490 }
489 } 491 }
490 } 492 }
491err: 493
492 mpol_cond_put(mpol); 494 mpol_cond_put(mpol);
493 put_mems_allowed(); 495 if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
496 goto retry_cpuset;
494 return page; 497 return page;
498
499err:
500 mpol_cond_put(mpol);
501 return NULL;
495} 502}
496 503
497static void update_and_free_page(struct hstate *h, struct page *page) 504static void update_and_free_page(struct hstate *h, struct page *page)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 71e1a523e209..cfb6c8678754 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1850,18 +1850,24 @@ struct page *
1850alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, 1850alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
1851 unsigned long addr, int node) 1851 unsigned long addr, int node)
1852{ 1852{
1853 struct mempolicy *pol = get_vma_policy(current, vma, addr); 1853 struct mempolicy *pol;
1854 struct zonelist *zl; 1854 struct zonelist *zl;
1855 struct page *page; 1855 struct page *page;
1856 unsigned int cpuset_mems_cookie;
1857
1858retry_cpuset:
1859 pol = get_vma_policy(current, vma, addr);
1860 cpuset_mems_cookie = get_mems_allowed();
1856 1861
1857 get_mems_allowed();
1858 if (unlikely(pol->mode == MPOL_INTERLEAVE)) { 1862 if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
1859 unsigned nid; 1863 unsigned nid;
1860 1864
1861 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); 1865 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
1862 mpol_cond_put(pol); 1866 mpol_cond_put(pol);
1863 page = alloc_page_interleave(gfp, order, nid); 1867 page = alloc_page_interleave(gfp, order, nid);
1864 put_mems_allowed(); 1868 if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
1869 goto retry_cpuset;
1870
1865 return page; 1871 return page;
1866 } 1872 }
1867 zl = policy_zonelist(gfp, pol, node); 1873 zl = policy_zonelist(gfp, pol, node);
@@ -1872,7 +1878,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
1872 struct page *page = __alloc_pages_nodemask(gfp, order, 1878 struct page *page = __alloc_pages_nodemask(gfp, order,
1873 zl, policy_nodemask(gfp, pol)); 1879 zl, policy_nodemask(gfp, pol));
1874 __mpol_put(pol); 1880 __mpol_put(pol);
1875 put_mems_allowed(); 1881 if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
1882 goto retry_cpuset;
1876 return page; 1883 return page;
1877 } 1884 }
1878 /* 1885 /*
@@ -1880,7 +1887,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
1880 */ 1887 */
1881 page = __alloc_pages_nodemask(gfp, order, zl, 1888 page = __alloc_pages_nodemask(gfp, order, zl,
1882 policy_nodemask(gfp, pol)); 1889 policy_nodemask(gfp, pol));
1883 put_mems_allowed(); 1890 if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
1891 goto retry_cpuset;
1884 return page; 1892 return page;
1885} 1893}
1886 1894
@@ -1907,11 +1915,14 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
1907{ 1915{
1908 struct mempolicy *pol = current->mempolicy; 1916 struct mempolicy *pol = current->mempolicy;
1909 struct page *page; 1917 struct page *page;
1918 unsigned int cpuset_mems_cookie;
1910 1919
1911 if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) 1920 if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))
1912 pol = &default_policy; 1921 pol = &default_policy;
1913 1922
1914 get_mems_allowed(); 1923retry_cpuset:
1924 cpuset_mems_cookie = get_mems_allowed();
1925
1915 /* 1926 /*
1916 * No reference counting needed for current->mempolicy 1927 * No reference counting needed for current->mempolicy
1917 * nor system default_policy 1928 * nor system default_policy
@@ -1922,7 +1933,10 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
1922 page = __alloc_pages_nodemask(gfp, order, 1933 page = __alloc_pages_nodemask(gfp, order,
1923 policy_zonelist(gfp, pol, numa_node_id()), 1934 policy_zonelist(gfp, pol, numa_node_id()),
1924 policy_nodemask(gfp, pol)); 1935 policy_nodemask(gfp, pol));
1925 put_mems_allowed(); 1936
1937 if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
1938 goto retry_cpuset;
1939
1926 return page; 1940 return page;
1927} 1941}
1928EXPORT_SYMBOL(alloc_pages_current); 1942EXPORT_SYMBOL(alloc_pages_current);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 673596ad9c80..40de6854b980 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2380,8 +2380,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2380{ 2380{
2381 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 2381 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
2382 struct zone *preferred_zone; 2382 struct zone *preferred_zone;
2383 struct page *page; 2383 struct page *page = NULL;
2384 int migratetype = allocflags_to_migratetype(gfp_mask); 2384 int migratetype = allocflags_to_migratetype(gfp_mask);
2385 unsigned int cpuset_mems_cookie;
2385 2386
2386 gfp_mask &= gfp_allowed_mask; 2387 gfp_mask &= gfp_allowed_mask;
2387 2388
@@ -2400,15 +2401,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2400 if (unlikely(!zonelist->_zonerefs->zone)) 2401 if (unlikely(!zonelist->_zonerefs->zone))
2401 return NULL; 2402 return NULL;
2402 2403
2403 get_mems_allowed(); 2404retry_cpuset:
2405 cpuset_mems_cookie = get_mems_allowed();
2406
2404 /* The preferred zone is used for statistics later */ 2407 /* The preferred zone is used for statistics later */
2405 first_zones_zonelist(zonelist, high_zoneidx, 2408 first_zones_zonelist(zonelist, high_zoneidx,
2406 nodemask ? : &cpuset_current_mems_allowed, 2409 nodemask ? : &cpuset_current_mems_allowed,
2407 &preferred_zone); 2410 &preferred_zone);
2408 if (!preferred_zone) { 2411 if (!preferred_zone)
2409 put_mems_allowed(); 2412 goto out;
2410 return NULL;
2411 }
2412 2413
2413 /* First allocation attempt */ 2414 /* First allocation attempt */
2414 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, 2415 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
@@ -2418,9 +2419,19 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2418 page = __alloc_pages_slowpath(gfp_mask, order, 2419 page = __alloc_pages_slowpath(gfp_mask, order,
2419 zonelist, high_zoneidx, nodemask, 2420 zonelist, high_zoneidx, nodemask,
2420 preferred_zone, migratetype); 2421 preferred_zone, migratetype);
2421 put_mems_allowed();
2422 2422
2423 trace_mm_page_alloc(page, order, gfp_mask, migratetype); 2423 trace_mm_page_alloc(page, order, gfp_mask, migratetype);
2424
2425out:
2426 /*
2427 * When updating a task's mems_allowed, it is possible to race with
2428 * parallel threads in such a way that an allocation can fail while
2429 * the mask is being updated. If a page allocation is about to fail,
2430 * check if the cpuset changed during allocation and if so, retry.
2431 */
2432 if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
2433 goto retry_cpuset;
2434
2424 return page; 2435 return page;
2425} 2436}
2426EXPORT_SYMBOL(__alloc_pages_nodemask); 2437EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -2634,13 +2645,15 @@ void si_meminfo_node(struct sysinfo *val, int nid)
2634bool skip_free_areas_node(unsigned int flags, int nid) 2645bool skip_free_areas_node(unsigned int flags, int nid)
2635{ 2646{
2636 bool ret = false; 2647 bool ret = false;
2648 unsigned int cpuset_mems_cookie;
2637 2649
2638 if (!(flags & SHOW_MEM_FILTER_NODES)) 2650 if (!(flags & SHOW_MEM_FILTER_NODES))
2639 goto out; 2651 goto out;
2640 2652
2641 get_mems_allowed(); 2653 do {
2642 ret = !node_isset(nid, cpuset_current_mems_allowed); 2654 cpuset_mems_cookie = get_mems_allowed();
2643 put_mems_allowed(); 2655 ret = !node_isset(nid, cpuset_current_mems_allowed);
2656 } while (!put_mems_allowed(cpuset_mems_cookie));
2644out: 2657out:
2645 return ret; 2658 return ret;
2646} 2659}
diff --git a/mm/slab.c b/mm/slab.c
index f0bd7857ab3b..29c8716eb7a9 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3284,12 +3284,10 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3284 if (in_interrupt() || (flags & __GFP_THISNODE)) 3284 if (in_interrupt() || (flags & __GFP_THISNODE))
3285 return NULL; 3285 return NULL;
3286 nid_alloc = nid_here = numa_mem_id(); 3286 nid_alloc = nid_here = numa_mem_id();
3287 get_mems_allowed();
3288 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) 3287 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3289 nid_alloc = cpuset_slab_spread_node(); 3288 nid_alloc = cpuset_slab_spread_node();
3290 else if (current->mempolicy) 3289 else if (current->mempolicy)
3291 nid_alloc = slab_node(current->mempolicy); 3290 nid_alloc = slab_node(current->mempolicy);
3292 put_mems_allowed();
3293 if (nid_alloc != nid_here) 3291 if (nid_alloc != nid_here)
3294 return ____cache_alloc_node(cachep, flags, nid_alloc); 3292 return ____cache_alloc_node(cachep, flags, nid_alloc);
3295 return NULL; 3293 return NULL;
@@ -3312,14 +3310,17 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3312 enum zone_type high_zoneidx = gfp_zone(flags); 3310 enum zone_type high_zoneidx = gfp_zone(flags);
3313 void *obj = NULL; 3311 void *obj = NULL;
3314 int nid; 3312 int nid;
3313 unsigned int cpuset_mems_cookie;
3315 3314
3316 if (flags & __GFP_THISNODE) 3315 if (flags & __GFP_THISNODE)
3317 return NULL; 3316 return NULL;
3318 3317
3319 get_mems_allowed();
3320 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
3321 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); 3318 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
3322 3319
3320retry_cpuset:
3321 cpuset_mems_cookie = get_mems_allowed();
3322 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
3323
3323retry: 3324retry:
3324 /* 3325 /*
3325 * Look through allowed nodes for objects available 3326 * Look through allowed nodes for objects available
@@ -3372,7 +3373,9 @@ retry:
3372 } 3373 }
3373 } 3374 }
3374 } 3375 }
3375 put_mems_allowed(); 3376
3377 if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !obj))
3378 goto retry_cpuset;
3376 return obj; 3379 return obj;
3377} 3380}
3378 3381
diff --git a/mm/slub.c b/mm/slub.c
index 4907563ef7ff..f4a6229848fd 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1581,6 +1581,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags,
1581 struct zone *zone; 1581 struct zone *zone;
1582 enum zone_type high_zoneidx = gfp_zone(flags); 1582 enum zone_type high_zoneidx = gfp_zone(flags);
1583 void *object; 1583 void *object;
1584 unsigned int cpuset_mems_cookie;
1584 1585
1585 /* 1586 /*
1586 * The defrag ratio allows a configuration of the tradeoffs between 1587 * The defrag ratio allows a configuration of the tradeoffs between
@@ -1604,23 +1605,32 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags,
1604 get_cycles() % 1024 > s->remote_node_defrag_ratio) 1605 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1605 return NULL; 1606 return NULL;
1606 1607
1607 get_mems_allowed(); 1608 do {
1608 zonelist = node_zonelist(slab_node(current->mempolicy), flags); 1609 cpuset_mems_cookie = get_mems_allowed();
1609 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 1610 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
1610 struct kmem_cache_node *n; 1611 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1611 1612 struct kmem_cache_node *n;
1612 n = get_node(s, zone_to_nid(zone)); 1613
1613 1614 n = get_node(s, zone_to_nid(zone));
1614 if (n && cpuset_zone_allowed_hardwall(zone, flags) && 1615
1615 n->nr_partial > s->min_partial) { 1616 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1616 object = get_partial_node(s, n, c); 1617 n->nr_partial > s->min_partial) {
1617 if (object) { 1618 object = get_partial_node(s, n, c);
1618 put_mems_allowed(); 1619 if (object) {
1619 return object; 1620 /*
1621 * Return the object even if
1622 * put_mems_allowed indicated that
1623 * the cpuset mems_allowed was
1624 * updated in parallel. It's a
1625 * harmless race between the alloc
1626 * and the cpuset update.
1627 */
1628 put_mems_allowed(cpuset_mems_cookie);
1629 return object;
1630 }
1620 } 1631 }
1621 } 1632 }
1622 } 1633 } while (!put_mems_allowed(cpuset_mems_cookie));
1623 put_mems_allowed();
1624#endif 1634#endif
1625 return NULL; 1635 return NULL;
1626} 1636}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 440af1d899b9..55d86c9506f3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2343,7 +2343,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2343 unsigned long writeback_threshold; 2343 unsigned long writeback_threshold;
2344 bool aborted_reclaim; 2344 bool aborted_reclaim;
2345 2345
2346 get_mems_allowed();
2347 delayacct_freepages_start(); 2346 delayacct_freepages_start();
2348 2347
2349 if (global_reclaim(sc)) 2348 if (global_reclaim(sc))
@@ -2407,7 +2406,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2407 2406
2408out: 2407out:
2409 delayacct_freepages_end(); 2408 delayacct_freepages_end();
2410 put_mems_allowed();
2411 2409
2412 if (sc->nr_reclaimed) 2410 if (sc->nr_reclaimed)
2413 return sc->nr_reclaimed; 2411 return sc->nr_reclaimed;