diff options
author | Mike Travis <travis@sgi.com> | 2008-05-12 15:21:13 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2008-05-23 12:23:38 -0400 |
commit | 41df0d61c266998b8049df7fec119cd518a43aa1 (patch) | |
tree | 6791fa6e3fdaee51ae348f9fe63972d648beaf5a | |
parent | 143aa5c53bd3895d42d7c08753fe58293988a69d (diff) |
x86: Add performance variants of cpumask operators
* Increase performance for systems with large count NR_CPUS by limiting
the range of the cpumask operators that loop over the bits in a cpumask_t
variable. This removes a large amount of wasted cpu cycles.
* Add performance variants of the cpumask operators:
int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS
int first_cpu_nr(mask) Number lowest set bit, or nr_cpu_ids
int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids
for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids
* Modify following to use performance variants:
#define num_online_cpus() cpus_weight_nr(cpu_online_map)
#define num_possible_cpus() cpus_weight_nr(cpu_possible_map)
#define num_present_cpus() cpus_weight_nr(cpu_present_map)
#define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), ...)
#define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), ...)
#define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), ...)
* Comment added to include/linux/cpumask.h:
Note: The alternate operations with the suffix "_nr" are used
to limit the range of the loop to nr_cpu_ids instead of
NR_CPUS when NR_CPUS > 64 for performance reasons.
If NR_CPUS is <= 64 then most assembler bitmask
operators execute faster with a constant range, so
the operator will continue to use NR_CPUS.
Another consideration is that nr_cpu_ids is initialized
to NR_CPUS and isn't lowered until the possible cpus are
discovered (including any disabled cpus). So early uses
will span the entire range of NR_CPUS.
(The net effect is that for systems with 64 or less CPU's there are no
functional changes.)
For inclusion into sched-devel/latest tree.
Based on:
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
+ sched-devel/latest .../mingo/linux-2.6-sched-devel.git
Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@sgi.com>
Reviewed-by: Paul Jackson <pj@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/cpumask.h | 92 | ||||
-rw-r--r-- | lib/cpumask.c | 9 |
2 files changed, 71 insertions, 30 deletions
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 5df3db58fcc6..b49472d1af84 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h | |||
@@ -17,6 +17,20 @@ | |||
17 | * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c. | 17 | * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c. |
18 | * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c. | 18 | * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c. |
19 | * | 19 | * |
20 | * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | ||
21 | * Note: The alternate operations with the suffix "_nr" are used | ||
22 | * to limit the range of the loop to nr_cpu_ids instead of | ||
23 | * NR_CPUS when NR_CPUS > 64 for performance reasons. | ||
24 | * If NR_CPUS is <= 64 then most assembler bitmask | ||
25 | * operators execute faster with a constant range, so | ||
26 | * the operator will continue to use NR_CPUS. | ||
27 | * | ||
28 | * Another consideration is that nr_cpu_ids is initialized | ||
29 | * to NR_CPUS and isn't lowered until the possible cpus are | ||
30 | * discovered (including any disabled cpus). So early uses | ||
31 | * will span the entire range of NR_CPUS. | ||
32 | * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | ||
33 | * | ||
20 | * The available cpumask operations are: | 34 | * The available cpumask operations are: |
21 | * | 35 | * |
22 | * void cpu_set(cpu, mask) turn on bit 'cpu' in mask | 36 | * void cpu_set(cpu, mask) turn on bit 'cpu' in mask |
@@ -38,12 +52,14 @@ | |||
38 | * int cpus_empty(mask) Is mask empty (no bits sets)? | 52 | * int cpus_empty(mask) Is mask empty (no bits sets)? |
39 | * int cpus_full(mask) Is mask full (all bits sets)? | 53 | * int cpus_full(mask) Is mask full (all bits sets)? |
40 | * int cpus_weight(mask) Hamming weigh - number of set bits | 54 | * int cpus_weight(mask) Hamming weigh - number of set bits |
55 | * int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS | ||
41 | * | 56 | * |
42 | * void cpus_shift_right(dst, src, n) Shift right | 57 | * void cpus_shift_right(dst, src, n) Shift right |
43 | * void cpus_shift_left(dst, src, n) Shift left | 58 | * void cpus_shift_left(dst, src, n) Shift left |
44 | * | 59 | * |
45 | * int first_cpu(mask) Number lowest set bit, or NR_CPUS | 60 | * int first_cpu(mask) Number lowest set bit, or NR_CPUS |
46 | * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS | 61 | * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS |
62 | * int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids | ||
47 | * | 63 | * |
48 | * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set | 64 | * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set |
49 | * CPU_MASK_ALL Initializer - all bits set | 65 | * CPU_MASK_ALL Initializer - all bits set |
@@ -59,7 +75,8 @@ | |||
59 | * void cpus_onto(dst, orig, relmap) *dst = orig relative to relmap | 75 | * void cpus_onto(dst, orig, relmap) *dst = orig relative to relmap |
60 | * void cpus_fold(dst, orig, sz) dst bits = orig bits mod sz | 76 | * void cpus_fold(dst, orig, sz) dst bits = orig bits mod sz |
61 | * | 77 | * |
62 | * for_each_cpu_mask(cpu, mask) for-loop cpu over mask | 78 | * for_each_cpu_mask(cpu, mask) for-loop cpu over mask using NR_CPUS |
79 | * for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids | ||
63 | * | 80 | * |
64 | * int num_online_cpus() Number of online CPUs | 81 | * int num_online_cpus() Number of online CPUs |
65 | * int num_possible_cpus() Number of all possible CPUs | 82 | * int num_possible_cpus() Number of all possible CPUs |
@@ -216,15 +233,6 @@ static inline void __cpus_shift_left(cpumask_t *dstp, | |||
216 | bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); | 233 | bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); |
217 | } | 234 | } |
218 | 235 | ||
219 | #ifdef CONFIG_SMP | ||
220 | int __first_cpu(const cpumask_t *srcp); | ||
221 | #define first_cpu(src) __first_cpu(&(src)) | ||
222 | int __next_cpu(int n, const cpumask_t *srcp); | ||
223 | #define next_cpu(n, src) __next_cpu((n), &(src)) | ||
224 | #else | ||
225 | #define first_cpu(src) ({ (void)(src); 0; }) | ||
226 | #define next_cpu(n, src) ({ (void)(src); 1; }) | ||
227 | #endif | ||
228 | 236 | ||
229 | #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP | 237 | #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP |
230 | extern cpumask_t *cpumask_of_cpu_map; | 238 | extern cpumask_t *cpumask_of_cpu_map; |
@@ -343,15 +351,48 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, | |||
343 | bitmap_fold(dstp->bits, origp->bits, sz, nbits); | 351 | bitmap_fold(dstp->bits, origp->bits, sz, nbits); |
344 | } | 352 | } |
345 | 353 | ||
346 | #if NR_CPUS > 1 | 354 | #if NR_CPUS == 1 |
355 | |||
356 | #define nr_cpu_ids 1 | ||
357 | #define first_cpu(src) ({ (void)(src); 0; }) | ||
358 | #define next_cpu(n, src) ({ (void)(src); 1; }) | ||
359 | #define any_online_cpu(mask) 0 | ||
360 | #define for_each_cpu_mask(cpu, mask) \ | ||
361 | for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) | ||
362 | |||
363 | #else /* NR_CPUS > 1 */ | ||
364 | |||
365 | extern int nr_cpu_ids; | ||
366 | int __first_cpu(const cpumask_t *srcp); | ||
367 | int __next_cpu(int n, const cpumask_t *srcp); | ||
368 | int __any_online_cpu(const cpumask_t *mask); | ||
369 | |||
370 | #define first_cpu(src) __first_cpu(&(src)) | ||
371 | #define next_cpu(n, src) __next_cpu((n), &(src)) | ||
372 | #define any_online_cpu(mask) __any_online_cpu(&(mask)) | ||
347 | #define for_each_cpu_mask(cpu, mask) \ | 373 | #define for_each_cpu_mask(cpu, mask) \ |
348 | for ((cpu) = first_cpu(mask); \ | 374 | for ((cpu) = first_cpu(mask); \ |
349 | (cpu) < NR_CPUS; \ | 375 | (cpu) < NR_CPUS; \ |
350 | (cpu) = next_cpu((cpu), (mask))) | 376 | (cpu) = next_cpu((cpu), (mask))) |
351 | #else /* NR_CPUS == 1 */ | 377 | #endif |
352 | #define for_each_cpu_mask(cpu, mask) \ | 378 | |
353 | for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) | 379 | #if NR_CPUS <= 64 |
354 | #endif /* NR_CPUS */ | 380 | |
381 | #define next_cpu_nr(n, src) next_cpu(n, src) | ||
382 | #define cpus_weight_nr(cpumask) cpus_weight(cpumask) | ||
383 | #define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask) | ||
384 | |||
385 | #else /* NR_CPUS > 64 */ | ||
386 | |||
387 | int __next_cpu_nr(int n, const cpumask_t *srcp); | ||
388 | #define next_cpu_nr(n, src) __next_cpu_nr((n), &(src)) | ||
389 | #define cpus_weight_nr(cpumask) __cpus_weight(&(cpumask), nr_cpu_ids) | ||
390 | #define for_each_cpu_mask_nr(cpu, mask) \ | ||
391 | for ((cpu) = first_cpu(mask); \ | ||
392 | (cpu) < nr_cpu_ids; \ | ||
393 | (cpu) = next_cpu_nr((cpu), (mask))) | ||
394 | |||
395 | #endif /* NR_CPUS > 64 */ | ||
355 | 396 | ||
356 | /* | 397 | /* |
357 | * The following particular system cpumasks and operations manage | 398 | * The following particular system cpumasks and operations manage |
@@ -414,9 +455,9 @@ extern cpumask_t cpu_online_map; | |||
414 | extern cpumask_t cpu_present_map; | 455 | extern cpumask_t cpu_present_map; |
415 | 456 | ||
416 | #if NR_CPUS > 1 | 457 | #if NR_CPUS > 1 |
417 | #define num_online_cpus() cpus_weight(cpu_online_map) | 458 | #define num_online_cpus() cpus_weight_nr(cpu_online_map) |
418 | #define num_possible_cpus() cpus_weight(cpu_possible_map) | 459 | #define num_possible_cpus() cpus_weight_nr(cpu_possible_map) |
419 | #define num_present_cpus() cpus_weight(cpu_present_map) | 460 | #define num_present_cpus() cpus_weight_nr(cpu_present_map) |
420 | #define cpu_online(cpu) cpu_isset((cpu), cpu_online_map) | 461 | #define cpu_online(cpu) cpu_isset((cpu), cpu_online_map) |
421 | #define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map) | 462 | #define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map) |
422 | #define cpu_present(cpu) cpu_isset((cpu), cpu_present_map) | 463 | #define cpu_present(cpu) cpu_isset((cpu), cpu_present_map) |
@@ -431,17 +472,8 @@ extern cpumask_t cpu_present_map; | |||
431 | 472 | ||
432 | #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) | 473 | #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) |
433 | 474 | ||
434 | #ifdef CONFIG_SMP | 475 | #define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_possible_map) |
435 | extern int nr_cpu_ids; | 476 | #define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_online_map) |
436 | #define any_online_cpu(mask) __any_online_cpu(&(mask)) | 477 | #define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_present_map) |
437 | int __any_online_cpu(const cpumask_t *mask); | ||
438 | #else | ||
439 | #define nr_cpu_ids 1 | ||
440 | #define any_online_cpu(mask) 0 | ||
441 | #endif | ||
442 | |||
443 | #define for_each_possible_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map) | ||
444 | #define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map) | ||
445 | #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) | ||
446 | 478 | ||
447 | #endif /* __LINUX_CPUMASK_H */ | 479 | #endif /* __LINUX_CPUMASK_H */ |
diff --git a/lib/cpumask.c b/lib/cpumask.c index bb4f76d3c3e7..5f97dc25ef9c 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c | |||
@@ -15,6 +15,15 @@ int __next_cpu(int n, const cpumask_t *srcp) | |||
15 | } | 15 | } |
16 | EXPORT_SYMBOL(__next_cpu); | 16 | EXPORT_SYMBOL(__next_cpu); |
17 | 17 | ||
18 | #if NR_CPUS > 64 | ||
19 | int __next_cpu_nr(int n, const cpumask_t *srcp) | ||
20 | { | ||
21 | return min_t(int, nr_cpu_ids, | ||
22 | find_next_bit(srcp->bits, nr_cpu_ids, n+1)); | ||
23 | } | ||
24 | EXPORT_SYMBOL(__next_cpu_nr); | ||
25 | #endif | ||
26 | |||
18 | int __any_online_cpu(const cpumask_t *mask) | 27 | int __any_online_cpu(const cpumask_t *mask) |
19 | { | 28 | { |
20 | int cpu; | 29 | int cpu; |