aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@parallels.com>2014-10-20 07:50:30 -0400
committerTejun Heo <tj@kernel.org>2014-10-27 11:15:27 -0400
commit344736f29b359790facd0b7a521e367f1715c11c (patch)
treec32487de22e7640a828f28819b5707790ede5105
parent8447a0fee974433f7e0035fd30e1edecf00e014f (diff)
cpuset: simplify cpuset_node_allowed API
Current cpuset API for checking if a zone/node is allowed to allocate from looks rather awkward. We have hardwall and softwall versions of cpuset_node_allowed with the softwall version doing literally the same as the hardwall version if __GFP_HARDWALL is passed to it in gfp flags. If it isn't, the softwall version may check the given node against the enclosing hardwall cpuset, which it needs to take the callback lock to do. Such a distinction was introduced by commit 02a0e53d8227 ("cpuset: rework cpuset_zone_allowed api"). Before, we had the only version with the __GFP_HARDWALL flag determining its behavior. The purpose of the commit was to avoid sleep-in-atomic bugs when someone would mistakenly call the function without the __GFP_HARDWALL flag for an atomic allocation. The suffixes introduced were intended to make the callers think before using the function. However, since the callback lock was converted from mutex to spinlock by the previous patch, the softwall check function cannot sleep, and these precautions are no longer necessary. So let's simplify the API back to the single check. Suggested-by: David Rientjes <rientjes@google.com> Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Acked-by: Christoph Lameter <cl@linux.com> Acked-by: Zefan Li <lizefan@huawei.com> Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r--include/linux/cpuset.h37
-rw-r--r--kernel/cpuset.c55
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page_alloc.c6
-rw-r--r--mm/slab.c2
-rw-r--r--mm/slub.c3
-rw-r--r--mm/vmscan.c5
8 files changed, 20 insertions, 92 deletions
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 2f073db7392e..1b357997cac5 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -48,29 +48,16 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
48void cpuset_init_current_mems_allowed(void); 48void cpuset_init_current_mems_allowed(void);
49int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask); 49int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);
50 50
51extern int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask); 51extern int __cpuset_node_allowed(int node, gfp_t gfp_mask);
52extern int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask);
53 52
54static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) 53static inline int cpuset_node_allowed(int node, gfp_t gfp_mask)
55{ 54{
56 return nr_cpusets() <= 1 || 55 return nr_cpusets() <= 1 || __cpuset_node_allowed(node, gfp_mask);
57 __cpuset_node_allowed_softwall(node, gfp_mask);
58} 56}
59 57
60static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask) 58static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
61{ 59{
62 return nr_cpusets() <= 1 || 60 return cpuset_node_allowed(zone_to_nid(z), gfp_mask);
63 __cpuset_node_allowed_hardwall(node, gfp_mask);
64}
65
66static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
67{
68 return cpuset_node_allowed_softwall(zone_to_nid(z), gfp_mask);
69}
70
71static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
72{
73 return cpuset_node_allowed_hardwall(zone_to_nid(z), gfp_mask);
74} 61}
75 62
76extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, 63extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
@@ -179,22 +166,12 @@ static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
179 return 1; 166 return 1;
180} 167}
181 168
182static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) 169static inline int cpuset_node_allowed(int node, gfp_t gfp_mask)
183{
184 return 1;
185}
186
187static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
188{
189 return 1;
190}
191
192static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
193{ 170{
194 return 1; 171 return 1;
195} 172}
196 173
197static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) 174static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
198{ 175{
199 return 1; 176 return 1;
200} 177}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f21ba868f0d1..38f7433c1cd2 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2453,7 +2453,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
2453} 2453}
2454 2454
2455/** 2455/**
2456 * cpuset_node_allowed_softwall - Can we allocate on a memory node? 2456 * cpuset_node_allowed - Can we allocate on a memory node?
2457 * @node: is this an allowed node? 2457 * @node: is this an allowed node?
2458 * @gfp_mask: memory allocation flags 2458 * @gfp_mask: memory allocation flags
2459 * 2459 *
@@ -2465,13 +2465,6 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
2465 * flag, yes. 2465 * flag, yes.
2466 * Otherwise, no. 2466 * Otherwise, no.
2467 * 2467 *
2468 * If __GFP_HARDWALL is set, cpuset_node_allowed_softwall() reduces to
2469 * cpuset_node_allowed_hardwall(). Otherwise, cpuset_node_allowed_softwall()
2470 * might sleep, and might allow a node from an enclosing cpuset.
2471 *
2472 * cpuset_node_allowed_hardwall() only handles the simpler case of hardwall
2473 * cpusets, and never sleeps.
2474 *
2475 * The __GFP_THISNODE placement logic is really handled elsewhere, 2468 * The __GFP_THISNODE placement logic is really handled elsewhere,
2476 * by forcibly using a zonelist starting at a specified node, and by 2469 * by forcibly using a zonelist starting at a specified node, and by
2477 * (in get_page_from_freelist()) refusing to consider the zones for 2470 * (in get_page_from_freelist()) refusing to consider the zones for
@@ -2506,13 +2499,8 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
2506 * TIF_MEMDIE - any node ok 2499 * TIF_MEMDIE - any node ok
2507 * GFP_KERNEL - any node in enclosing hardwalled cpuset ok 2500 * GFP_KERNEL - any node in enclosing hardwalled cpuset ok
2508 * GFP_USER - only nodes in current tasks mems allowed ok. 2501 * GFP_USER - only nodes in current tasks mems allowed ok.
2509 *
2510 * Rule:
2511 * Don't call cpuset_node_allowed_softwall if you can't sleep, unless you
2512 * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables
2513 * the code that might scan up ancestor cpusets and sleep.
2514 */ 2502 */
2515int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) 2503int __cpuset_node_allowed(int node, gfp_t gfp_mask)
2516{ 2504{
2517 struct cpuset *cs; /* current cpuset ancestors */ 2505 struct cpuset *cs; /* current cpuset ancestors */
2518 int allowed; /* is allocation in zone z allowed? */ 2506 int allowed; /* is allocation in zone z allowed? */
@@ -2520,7 +2508,6 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
2520 2508
2521 if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) 2509 if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
2522 return 1; 2510 return 1;
2523 might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
2524 if (node_isset(node, current->mems_allowed)) 2511 if (node_isset(node, current->mems_allowed))
2525 return 1; 2512 return 1;
2526 /* 2513 /*
@@ -2547,44 +2534,6 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
2547 return allowed; 2534 return allowed;
2548} 2535}
2549 2536
2550/*
2551 * cpuset_node_allowed_hardwall - Can we allocate on a memory node?
2552 * @node: is this an allowed node?
2553 * @gfp_mask: memory allocation flags
2554 *
2555 * If we're in interrupt, yes, we can always allocate. If __GFP_THISNODE is
2556 * set, yes, we can always allocate. If node is in our task's mems_allowed,
2557 * yes. If the task has been OOM killed and has access to memory reserves as
2558 * specified by the TIF_MEMDIE flag, yes.
2559 * Otherwise, no.
2560 *
2561 * The __GFP_THISNODE placement logic is really handled elsewhere,
2562 * by forcibly using a zonelist starting at a specified node, and by
2563 * (in get_page_from_freelist()) refusing to consider the zones for
2564 * any node on the zonelist except the first. By the time any such
2565 * calls get to this routine, we should just shut up and say 'yes'.
2566 *
2567 * Unlike the cpuset_node_allowed_softwall() variant, above,
2568 * this variant requires that the node be in the current task's
2569 * mems_allowed or that we're in interrupt. It does not scan up the
2570 * cpuset hierarchy for the nearest enclosing mem_exclusive cpuset.
2571 * It never sleeps.
2572 */
2573int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
2574{
2575 if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
2576 return 1;
2577 if (node_isset(node, current->mems_allowed))
2578 return 1;
2579 /*
2580 * Allow tasks that have access to memory reserves because they have
2581 * been OOM killed to get memory anywhere.
2582 */
2583 if (unlikely(test_thread_flag(TIF_MEMDIE)))
2584 return 1;
2585 return 0;
2586}
2587
2588/** 2537/**
2589 * cpuset_mem_spread_node() - On which node to begin search for a file page 2538 * cpuset_mem_spread_node() - On which node to begin search for a file page
2590 * cpuset_slab_spread_node() - On which node to begin search for a slab page 2539 * cpuset_slab_spread_node() - On which node to begin search for a slab page
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9fd722769927..82da930fa3f8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -582,7 +582,7 @@ retry_cpuset:
582 582
583 for_each_zone_zonelist_nodemask(zone, z, zonelist, 583 for_each_zone_zonelist_nodemask(zone, z, zonelist,
584 MAX_NR_ZONES - 1, nodemask) { 584 MAX_NR_ZONES - 1, nodemask) {
585 if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask(h))) { 585 if (cpuset_zone_allowed(zone, htlb_alloc_mask(h))) {
586 page = dequeue_huge_page_node(h, zone_to_nid(zone)); 586 page = dequeue_huge_page_node(h, zone_to_nid(zone));
587 if (page) { 587 if (page) {
588 if (avoid_reserve) 588 if (avoid_reserve)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 5340f6b91312..3348280eef89 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -233,7 +233,7 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
233 /* Check this allocation failure is caused by cpuset's wall function */ 233 /* Check this allocation failure is caused by cpuset's wall function */
234 for_each_zone_zonelist_nodemask(zone, z, zonelist, 234 for_each_zone_zonelist_nodemask(zone, z, zonelist,
235 high_zoneidx, nodemask) 235 high_zoneidx, nodemask)
236 if (!cpuset_zone_allowed_softwall(zone, gfp_mask)) 236 if (!cpuset_zone_allowed(zone, gfp_mask))
237 cpuset_limited = true; 237 cpuset_limited = true;
238 238
239 if (cpuset_limited) { 239 if (cpuset_limited) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9cd36b822444..ab07b496672f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1962,7 +1962,7 @@ zonelist_scan:
1962 1962
1963 /* 1963 /*
1964 * Scan zonelist, looking for a zone with enough free. 1964 * Scan zonelist, looking for a zone with enough free.
1965 * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c. 1965 * See also __cpuset_node_allowed() comment in kernel/cpuset.c.
1966 */ 1966 */
1967 for_each_zone_zonelist_nodemask(zone, z, zonelist, 1967 for_each_zone_zonelist_nodemask(zone, z, zonelist,
1968 high_zoneidx, nodemask) { 1968 high_zoneidx, nodemask) {
@@ -1973,7 +1973,7 @@ zonelist_scan:
1973 continue; 1973 continue;
1974 if (cpusets_enabled() && 1974 if (cpusets_enabled() &&
1975 (alloc_flags & ALLOC_CPUSET) && 1975 (alloc_flags & ALLOC_CPUSET) &&
1976 !cpuset_zone_allowed_softwall(zone, gfp_mask)) 1976 !cpuset_zone_allowed(zone, gfp_mask))
1977 continue; 1977 continue;
1978 /* 1978 /*
1979 * Distribute pages in proportion to the individual 1979 * Distribute pages in proportion to the individual
@@ -2514,7 +2514,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
2514 alloc_flags |= ALLOC_HARDER; 2514 alloc_flags |= ALLOC_HARDER;
2515 /* 2515 /*
2516 * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the 2516 * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
2517 * comment for __cpuset_node_allowed_softwall(). 2517 * comment for __cpuset_node_allowed().
2518 */ 2518 */
2519 alloc_flags &= ~ALLOC_CPUSET; 2519 alloc_flags &= ~ALLOC_CPUSET;
2520 } else if (unlikely(rt_task(current)) && !in_interrupt()) 2520 } else if (unlikely(rt_task(current)) && !in_interrupt())
diff --git a/mm/slab.c b/mm/slab.c
index eb2b2ea30130..063a91bc8826 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3012,7 +3012,7 @@ retry:
3012 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 3012 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
3013 nid = zone_to_nid(zone); 3013 nid = zone_to_nid(zone);
3014 3014
3015 if (cpuset_zone_allowed_hardwall(zone, flags) && 3015 if (cpuset_zone_allowed(zone, flags | __GFP_HARDWALL) &&
3016 get_node(cache, nid) && 3016 get_node(cache, nid) &&
3017 get_node(cache, nid)->free_objects) { 3017 get_node(cache, nid)->free_objects) {
3018 obj = ____cache_alloc_node(cache, 3018 obj = ____cache_alloc_node(cache,
diff --git a/mm/slub.c b/mm/slub.c
index ae7b9f1ad394..7d12f51d9bac 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1662,7 +1662,8 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1662 1662
1663 n = get_node(s, zone_to_nid(zone)); 1663 n = get_node(s, zone_to_nid(zone));
1664 1664
1665 if (n && cpuset_zone_allowed_hardwall(zone, flags) && 1665 if (n && cpuset_zone_allowed(zone,
1666 flags | __GFP_HARDWALL) &&
1666 n->nr_partial > s->min_partial) { 1667 n->nr_partial > s->min_partial) {
1667 object = get_partial_node(s, n, c, flags); 1668 object = get_partial_node(s, n, c, flags);
1668 if (object) { 1669 if (object) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index dcb47074ae03..38878b2ab1d0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2405,7 +2405,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2405 * to global LRU. 2405 * to global LRU.
2406 */ 2406 */
2407 if (global_reclaim(sc)) { 2407 if (global_reclaim(sc)) {
2408 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 2408 if (!cpuset_zone_allowed(zone,
2409 GFP_KERNEL | __GFP_HARDWALL))
2409 continue; 2410 continue;
2410 2411
2411 lru_pages += zone_reclaimable_pages(zone); 2412 lru_pages += zone_reclaimable_pages(zone);
@@ -3388,7 +3389,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
3388 if (!populated_zone(zone)) 3389 if (!populated_zone(zone))
3389 return; 3390 return;
3390 3391
3391 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 3392 if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL))
3392 return; 3393 return;
3393 pgdat = zone->zone_pgdat; 3394 pgdat = zone->zone_pgdat;
3394 if (pgdat->kswapd_max_order < order) { 3395 if (pgdat->kswapd_max_order < order) {