diff options
author | Alexey Dobriyan <adobriyan@gmail.com> | 2017-09-08 19:17:15 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-08 21:26:51 -0400 |
commit | f22ef333c32cc683922d7e3361a83ebc31b2ac6d (patch) | |
tree | 57173374d87f88a280e80a6b7b8ee3eb237d2c0b /lib/cpumask.c | |
parent | 0ce2c2029312ed78e37b56b08fa0f59ba97ef50b (diff) |
cpumask: make cpumask_next() out-of-line
Every for_each_XXX_cpu() invocation calls cpumask_next() which is an
inline function:
static inline unsigned int cpumask_next(int n, const struct cpumask *srcp)
{
/* -1 is a legal arg here. */
if (n != -1)
cpumask_check(n);
return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n + 1);
}
However!
find_next_bit() is regular out-of-line function which means "nr_cpu_ids"
load and increment happen at the caller resulting in a lot of bloat
x86_64 defconfig:
add/remove: 3/0 grow/shrink: 8/373 up/down: 155/-5668 (-5513)
x86_64 allyesconfig-ish:
add/remove: 3/1 grow/shrink: 57/634 up/down: 3515/-28177 (-24662) !!!
Some archs redefine find_next_bit() but it is OK:
m68k inline but SMP is not supported
arm out-of-line
unicore32 out-of-line
Function call will happen anyway, so move load and increment into callee.
Link: http://lkml.kernel.org/r/20170824230010.GA1593@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'lib/cpumask.c')
-rw-r--r-- | lib/cpumask.c | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/lib/cpumask.c b/lib/cpumask.c index 4731a0895760..8b1a1bd77539 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c | |||
@@ -6,6 +6,22 @@ | |||
6 | #include <linux/bootmem.h> | 6 | #include <linux/bootmem.h> |
7 | 7 | ||
8 | /** | 8 | /** |
9 | * cpumask_next - get the next cpu in a cpumask | ||
10 | * @n: the cpu prior to the place to search (ie. return will be > @n) | ||
11 | * @srcp: the cpumask pointer | ||
12 | * | ||
13 | * Returns >= nr_cpu_ids if no further cpus set. | ||
14 | */ | ||
15 | unsigned int cpumask_next(int n, const struct cpumask *srcp) | ||
16 | { | ||
17 | /* -1 is a legal arg here. */ | ||
18 | if (n != -1) | ||
19 | cpumask_check(n); | ||
20 | return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n + 1); | ||
21 | } | ||
22 | EXPORT_SYMBOL(cpumask_next); | ||
23 | |||
24 | /** | ||
9 | * cpumask_next_and - get the next cpu in *src1p & *src2p | 25 | * cpumask_next_and - get the next cpu in *src1p & *src2p |
10 | * @n: the cpu prior to the place to search (ie. return will be > @n) | 26 | * @n: the cpu prior to the place to search (ie. return will be > @n) |
11 | * @src1p: the first cpumask pointer | 27 | * @src1p: the first cpumask pointer |