aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2016-05-19 20:14:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-19 22:12:14 -0400
commit002f290627c27068087f6204baec7a334e5a3b48 (patch)
treeed5a013173e28c0bf864de3a05669967cfb74f57
parent0b423ca22f95a867f789aab1fe57ee4e378df43b (diff)
cpuset: use static key better and convert to new API
An important function for cpusets is cpuset_node_allowed(), which optimizes on the fact if there's a single root CPU set, it must be trivially allowed. But the check "nr_cpusets() <= 1" doesn't use the cpusets_enabled_key static key the right way where static keys eliminate branching overhead with jump labels. This patch converts it so that static key is used properly. It's also switched to the new static key API and the checking functions are converted to return bool instead of int. We also provide a new variant __cpuset_zone_allowed() which expects that the static key check was already done and they key was enabled. This is needed for get_page_from_freelist() where we want to also avoid the relatively slower check when ALLOC_CPUSET is not set in alloc_flags. The impact on the page allocator microbenchmark is less than expected but the cleanup in itself is worthwhile. 4.6.0-rc2 4.6.0-rc2 multcheck-v1r20 cpuset-v1r20 Min alloc-odr0-1 348.00 ( 0.00%) 348.00 ( 0.00%) Min alloc-odr0-2 254.00 ( 0.00%) 254.00 ( 0.00%) Min alloc-odr0-4 213.00 ( 0.00%) 213.00 ( 0.00%) Min alloc-odr0-8 186.00 ( 0.00%) 183.00 ( 1.61%) Min alloc-odr0-16 173.00 ( 0.00%) 171.00 ( 1.16%) Min alloc-odr0-32 166.00 ( 0.00%) 163.00 ( 1.81%) Min alloc-odr0-64 162.00 ( 0.00%) 159.00 ( 1.85%) Min alloc-odr0-128 160.00 ( 0.00%) 157.00 ( 1.88%) Min alloc-odr0-256 169.00 ( 0.00%) 166.00 ( 1.78%) Min alloc-odr0-512 180.00 ( 0.00%) 180.00 ( 0.00%) Min alloc-odr0-1024 188.00 ( 0.00%) 187.00 ( 0.53%) Min alloc-odr0-2048 194.00 ( 0.00%) 193.00 ( 0.52%) Min alloc-odr0-4096 199.00 ( 0.00%) 198.00 ( 0.50%) Min alloc-odr0-8192 202.00 ( 0.00%) 201.00 ( 0.50%) Min alloc-odr0-16384 203.00 ( 0.00%) 202.00 ( 0.49%) Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Zefan Li <lizefan@huawei.com> Cc: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/cpuset.h42
-rw-r--r--kernel/cpuset.c14
-rw-r--r--mm/page_alloc.c2
3 files changed, 36 insertions, 22 deletions
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 85a868ccb493..bfc204e70338 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -16,26 +16,26 @@
16 16
17#ifdef CONFIG_CPUSETS 17#ifdef CONFIG_CPUSETS
18 18
19extern struct static_key cpusets_enabled_key; 19extern struct static_key_false cpusets_enabled_key;
20static inline bool cpusets_enabled(void) 20static inline bool cpusets_enabled(void)
21{ 21{
22 return static_key_false(&cpusets_enabled_key); 22 return static_branch_unlikely(&cpusets_enabled_key);
23} 23}
24 24
25static inline int nr_cpusets(void) 25static inline int nr_cpusets(void)
26{ 26{
27 /* jump label reference count + the top-level cpuset */ 27 /* jump label reference count + the top-level cpuset */
28 return static_key_count(&cpusets_enabled_key) + 1; 28 return static_key_count(&cpusets_enabled_key.key) + 1;
29} 29}
30 30
31static inline void cpuset_inc(void) 31static inline void cpuset_inc(void)
32{ 32{
33 static_key_slow_inc(&cpusets_enabled_key); 33 static_branch_inc(&cpusets_enabled_key);
34} 34}
35 35
36static inline void cpuset_dec(void) 36static inline void cpuset_dec(void)
37{ 37{
38 static_key_slow_dec(&cpusets_enabled_key); 38 static_branch_dec(&cpusets_enabled_key);
39} 39}
40 40
41extern int cpuset_init(void); 41extern int cpuset_init(void);
@@ -48,16 +48,25 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
48void cpuset_init_current_mems_allowed(void); 48void cpuset_init_current_mems_allowed(void);
49int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask); 49int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);
50 50
51extern int __cpuset_node_allowed(int node, gfp_t gfp_mask); 51extern bool __cpuset_node_allowed(int node, gfp_t gfp_mask);
52 52
53static inline int cpuset_node_allowed(int node, gfp_t gfp_mask) 53static inline bool cpuset_node_allowed(int node, gfp_t gfp_mask)
54{ 54{
55 return nr_cpusets() <= 1 || __cpuset_node_allowed(node, gfp_mask); 55 if (cpusets_enabled())
56 return __cpuset_node_allowed(node, gfp_mask);
57 return true;
56} 58}
57 59
58static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) 60static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
59{ 61{
60 return cpuset_node_allowed(zone_to_nid(z), gfp_mask); 62 return __cpuset_node_allowed(zone_to_nid(z), gfp_mask);
63}
64
65static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
66{
67 if (cpusets_enabled())
68 return __cpuset_zone_allowed(z, gfp_mask);
69 return true;
61} 70}
62 71
63extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, 72extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
@@ -172,14 +181,19 @@ static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
172 return 1; 181 return 1;
173} 182}
174 183
175static inline int cpuset_node_allowed(int node, gfp_t gfp_mask) 184static inline bool cpuset_node_allowed(int node, gfp_t gfp_mask)
176{ 185{
177 return 1; 186 return true;
178} 187}
179 188
180static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) 189static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
181{ 190{
182 return 1; 191 return true;
192}
193
194static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
195{
196 return true;
183} 197}
184 198
185static inline int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, 199static inline int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 611cc69af8f0..73e93e53884d 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -61,7 +61,7 @@
61#include <linux/cgroup.h> 61#include <linux/cgroup.h>
62#include <linux/wait.h> 62#include <linux/wait.h>
63 63
64struct static_key cpusets_enabled_key __read_mostly = STATIC_KEY_INIT_FALSE; 64DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
65 65
66/* See "Frequency meter" comments, below. */ 66/* See "Frequency meter" comments, below. */
67 67
@@ -2528,27 +2528,27 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
2528 * GFP_KERNEL - any node in enclosing hardwalled cpuset ok 2528 * GFP_KERNEL - any node in enclosing hardwalled cpuset ok
2529 * GFP_USER - only nodes in current tasks mems allowed ok. 2529 * GFP_USER - only nodes in current tasks mems allowed ok.
2530 */ 2530 */
2531int __cpuset_node_allowed(int node, gfp_t gfp_mask) 2531bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
2532{ 2532{
2533 struct cpuset *cs; /* current cpuset ancestors */ 2533 struct cpuset *cs; /* current cpuset ancestors */
2534 int allowed; /* is allocation in zone z allowed? */ 2534 int allowed; /* is allocation in zone z allowed? */
2535 unsigned long flags; 2535 unsigned long flags;
2536 2536
2537 if (in_interrupt()) 2537 if (in_interrupt())
2538 return 1; 2538 return true;
2539 if (node_isset(node, current->mems_allowed)) 2539 if (node_isset(node, current->mems_allowed))
2540 return 1; 2540 return true;
2541 /* 2541 /*
2542 * Allow tasks that have access to memory reserves because they have 2542 * Allow tasks that have access to memory reserves because they have
2543 * been OOM killed to get memory anywhere. 2543 * been OOM killed to get memory anywhere.
2544 */ 2544 */
2545 if (unlikely(test_thread_flag(TIF_MEMDIE))) 2545 if (unlikely(test_thread_flag(TIF_MEMDIE)))
2546 return 1; 2546 return true;
2547 if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ 2547 if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
2548 return 0; 2548 return false;
2549 2549
2550 if (current->flags & PF_EXITING) /* Let dying task have memory */ 2550 if (current->flags & PF_EXITING) /* Let dying task have memory */
2551 return 1; 2551 return true;
2552 2552
2553 /* Not hardwall and node outside mems_allowed: scan up cpusets */ 2553 /* Not hardwall and node outside mems_allowed: scan up cpusets */
2554 spin_lock_irqsave(&callback_lock, flags); 2554 spin_lock_irqsave(&callback_lock, flags);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bdf7a13311b5..39c441bb8d61 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2859,7 +2859,7 @@ zonelist_scan:
2859 2859
2860 if (cpusets_enabled() && 2860 if (cpusets_enabled() &&
2861 (alloc_flags & ALLOC_CPUSET) && 2861 (alloc_flags & ALLOC_CPUSET) &&
2862 !cpuset_zone_allowed(zone, gfp_mask)) 2862 !__cpuset_zone_allowed(zone, gfp_mask))
2863 continue; 2863 continue;
2864 /* 2864 /*
2865 * Distribute pages in proportion to the individual 2865 * Distribute pages in proportion to the individual