diff options
| author | Mike Travis <travis@sgi.com> | 2008-05-12 15:21:13 -0400 |
|---|---|---|
| committer | Thomas Gleixner <tglx@linutronix.de> | 2008-05-23 12:23:38 -0400 |
| commit | 41df0d61c266998b8049df7fec119cd518a43aa1 (patch) | |
| tree | 6791fa6e3fdaee51ae348f9fe63972d648beaf5a /include/linux | |
| parent | 143aa5c53bd3895d42d7c08753fe58293988a69d (diff) | |
x86: Add performance variants of cpumask operators
* Increase performance for systems with large count NR_CPUS by limiting
the range of the cpumask operators that loop over the bits in a cpumask_t
variable. This removes a large amount of wasted cpu cycles.
* Add performance variants of the cpumask operators:
int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS
int first_cpu_nr(mask) Number lowest set bit, or nr_cpu_ids
int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids
for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids
* Modify following to use performance variants:
#define num_online_cpus() cpus_weight_nr(cpu_online_map)
#define num_possible_cpus() cpus_weight_nr(cpu_possible_map)
#define num_present_cpus() cpus_weight_nr(cpu_present_map)
#define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), ...)
#define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), ...)
#define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), ...)
* Comment added to include/linux/cpumask.h:
Note: The alternate operations with the suffix "_nr" are used
to limit the range of the loop to nr_cpu_ids instead of
NR_CPUS when NR_CPUS > 64 for performance reasons.
If NR_CPUS is <= 64 then most assembler bitmask
operators execute faster with a constant range, so
the operator will continue to use NR_CPUS.
Another consideration is that nr_cpu_ids is initialized
to NR_CPUS and isn't lowered until the possible cpus are
discovered (including any disabled cpus). So early uses
will span the entire range of NR_CPUS.
(The net effect is that for systems with 64 or less CPU's there are no
functional changes.)
For inclusion into sched-devel/latest tree.
Based on:
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
+ sched-devel/latest .../mingo/linux-2.6-sched-devel.git
Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@sgi.com>
Reviewed-by: Paul Jackson <pj@sgi.com>
Reviewed-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/cpumask.h | 92 |
1 files changed, 62 insertions, 30 deletions
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 5df3db58fcc..b49472d1af8 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h | |||
| @@ -17,6 +17,20 @@ | |||
| 17 | * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c. | 17 | * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c. |
| 18 | * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c. | 18 | * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c. |
| 19 | * | 19 | * |
| 20 | * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | ||
| 21 | * Note: The alternate operations with the suffix "_nr" are used | ||
| 22 | * to limit the range of the loop to nr_cpu_ids instead of | ||
| 23 | * NR_CPUS when NR_CPUS > 64 for performance reasons. | ||
| 24 | * If NR_CPUS is <= 64 then most assembler bitmask | ||
| 25 | * operators execute faster with a constant range, so | ||
| 26 | * the operator will continue to use NR_CPUS. | ||
| 27 | * | ||
| 28 | * Another consideration is that nr_cpu_ids is initialized | ||
| 29 | * to NR_CPUS and isn't lowered until the possible cpus are | ||
| 30 | * discovered (including any disabled cpus). So early uses | ||
| 31 | * will span the entire range of NR_CPUS. | ||
| 32 | * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | ||
| 33 | * | ||
| 20 | * The available cpumask operations are: | 34 | * The available cpumask operations are: |
| 21 | * | 35 | * |
| 22 | * void cpu_set(cpu, mask) turn on bit 'cpu' in mask | 36 | * void cpu_set(cpu, mask) turn on bit 'cpu' in mask |
| @@ -38,12 +52,14 @@ | |||
| 38 | * int cpus_empty(mask) Is mask empty (no bits sets)? | 52 | * int cpus_empty(mask) Is mask empty (no bits sets)? |
| 39 | * int cpus_full(mask) Is mask full (all bits sets)? | 53 | * int cpus_full(mask) Is mask full (all bits sets)? |
| 40 | * int cpus_weight(mask) Hamming weigh - number of set bits | 54 | * int cpus_weight(mask) Hamming weigh - number of set bits |
| 55 | * int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS | ||
| 41 | * | 56 | * |
| 42 | * void cpus_shift_right(dst, src, n) Shift right | 57 | * void cpus_shift_right(dst, src, n) Shift right |
| 43 | * void cpus_shift_left(dst, src, n) Shift left | 58 | * void cpus_shift_left(dst, src, n) Shift left |
| 44 | * | 59 | * |
| 45 | * int first_cpu(mask) Number lowest set bit, or NR_CPUS | 60 | * int first_cpu(mask) Number lowest set bit, or NR_CPUS |
| 46 | * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS | 61 | * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS |
| 62 | * int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids | ||
| 47 | * | 63 | * |
| 48 | * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set | 64 | * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set |
| 49 | * CPU_MASK_ALL Initializer - all bits set | 65 | * CPU_MASK_ALL Initializer - all bits set |
| @@ -59,7 +75,8 @@ | |||
| 59 | * void cpus_onto(dst, orig, relmap) *dst = orig relative to relmap | 75 | * void cpus_onto(dst, orig, relmap) *dst = orig relative to relmap |
| 60 | * void cpus_fold(dst, orig, sz) dst bits = orig bits mod sz | 76 | * void cpus_fold(dst, orig, sz) dst bits = orig bits mod sz |
| 61 | * | 77 | * |
| 62 | * for_each_cpu_mask(cpu, mask) for-loop cpu over mask | 78 | * for_each_cpu_mask(cpu, mask) for-loop cpu over mask using NR_CPUS |
| 79 | * for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids | ||
| 63 | * | 80 | * |
| 64 | * int num_online_cpus() Number of online CPUs | 81 | * int num_online_cpus() Number of online CPUs |
| 65 | * int num_possible_cpus() Number of all possible CPUs | 82 | * int num_possible_cpus() Number of all possible CPUs |
| @@ -216,15 +233,6 @@ static inline void __cpus_shift_left(cpumask_t *dstp, | |||
| 216 | bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); | 233 | bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); |
| 217 | } | 234 | } |
| 218 | 235 | ||
| 219 | #ifdef CONFIG_SMP | ||
| 220 | int __first_cpu(const cpumask_t *srcp); | ||
| 221 | #define first_cpu(src) __first_cpu(&(src)) | ||
| 222 | int __next_cpu(int n, const cpumask_t *srcp); | ||
| 223 | #define next_cpu(n, src) __next_cpu((n), &(src)) | ||
| 224 | #else | ||
| 225 | #define first_cpu(src) ({ (void)(src); 0; }) | ||
| 226 | #define next_cpu(n, src) ({ (void)(src); 1; }) | ||
| 227 | #endif | ||
| 228 | 236 | ||
| 229 | #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP | 237 | #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP |
| 230 | extern cpumask_t *cpumask_of_cpu_map; | 238 | extern cpumask_t *cpumask_of_cpu_map; |
| @@ -343,15 +351,48 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, | |||
| 343 | bitmap_fold(dstp->bits, origp->bits, sz, nbits); | 351 | bitmap_fold(dstp->bits, origp->bits, sz, nbits); |
| 344 | } | 352 | } |
| 345 | 353 | ||
| 346 | #if NR_CPUS > 1 | 354 | #if NR_CPUS == 1 |
| 355 | |||
| 356 | #define nr_cpu_ids 1 | ||
| 357 | #define first_cpu(src) ({ (void)(src); 0; }) | ||
| 358 | #define next_cpu(n, src) ({ (void)(src); 1; }) | ||
| 359 | #define any_online_cpu(mask) 0 | ||
| 360 | #define for_each_cpu_mask(cpu, mask) \ | ||
| 361 | for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) | ||
| 362 | |||
| 363 | #else /* NR_CPUS > 1 */ | ||
| 364 | |||
| 365 | extern int nr_cpu_ids; | ||
| 366 | int __first_cpu(const cpumask_t *srcp); | ||
| 367 | int __next_cpu(int n, const cpumask_t *srcp); | ||
| 368 | int __any_online_cpu(const cpumask_t *mask); | ||
| 369 | |||
| 370 | #define first_cpu(src) __first_cpu(&(src)) | ||
| 371 | #define next_cpu(n, src) __next_cpu((n), &(src)) | ||
| 372 | #define any_online_cpu(mask) __any_online_cpu(&(mask)) | ||
| 347 | #define for_each_cpu_mask(cpu, mask) \ | 373 | #define for_each_cpu_mask(cpu, mask) \ |
| 348 | for ((cpu) = first_cpu(mask); \ | 374 | for ((cpu) = first_cpu(mask); \ |
| 349 | (cpu) < NR_CPUS; \ | 375 | (cpu) < NR_CPUS; \ |
| 350 | (cpu) = next_cpu((cpu), (mask))) | 376 | (cpu) = next_cpu((cpu), (mask))) |
| 351 | #else /* NR_CPUS == 1 */ | 377 | #endif |
| 352 | #define for_each_cpu_mask(cpu, mask) \ | 378 | |
| 353 | for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) | 379 | #if NR_CPUS <= 64 |
| 354 | #endif /* NR_CPUS */ | 380 | |
| 381 | #define next_cpu_nr(n, src) next_cpu(n, src) | ||
| 382 | #define cpus_weight_nr(cpumask) cpus_weight(cpumask) | ||
| 383 | #define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask) | ||
| 384 | |||
| 385 | #else /* NR_CPUS > 64 */ | ||
| 386 | |||
| 387 | int __next_cpu_nr(int n, const cpumask_t *srcp); | ||
| 388 | #define next_cpu_nr(n, src) __next_cpu_nr((n), &(src)) | ||
| 389 | #define cpus_weight_nr(cpumask) __cpus_weight(&(cpumask), nr_cpu_ids) | ||
| 390 | #define for_each_cpu_mask_nr(cpu, mask) \ | ||
| 391 | for ((cpu) = first_cpu(mask); \ | ||
| 392 | (cpu) < nr_cpu_ids; \ | ||
| 393 | (cpu) = next_cpu_nr((cpu), (mask))) | ||
| 394 | |||
| 395 | #endif /* NR_CPUS > 64 */ | ||
| 355 | 396 | ||
| 356 | /* | 397 | /* |
| 357 | * The following particular system cpumasks and operations manage | 398 | * The following particular system cpumasks and operations manage |
| @@ -414,9 +455,9 @@ extern cpumask_t cpu_online_map; | |||
| 414 | extern cpumask_t cpu_present_map; | 455 | extern cpumask_t cpu_present_map; |
| 415 | 456 | ||
| 416 | #if NR_CPUS > 1 | 457 | #if NR_CPUS > 1 |
| 417 | #define num_online_cpus() cpus_weight(cpu_online_map) | 458 | #define num_online_cpus() cpus_weight_nr(cpu_online_map) |
| 418 | #define num_possible_cpus() cpus_weight(cpu_possible_map) | 459 | #define num_possible_cpus() cpus_weight_nr(cpu_possible_map) |
| 419 | #define num_present_cpus() cpus_weight(cpu_present_map) | 460 | #define num_present_cpus() cpus_weight_nr(cpu_present_map) |
| 420 | #define cpu_online(cpu) cpu_isset((cpu), cpu_online_map) | 461 | #define cpu_online(cpu) cpu_isset((cpu), cpu_online_map) |
| 421 | #define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map) | 462 | #define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map) |
| 422 | #define cpu_present(cpu) cpu_isset((cpu), cpu_present_map) | 463 | #define cpu_present(cpu) cpu_isset((cpu), cpu_present_map) |
| @@ -431,17 +472,8 @@ extern cpumask_t cpu_present_map; | |||
| 431 | 472 | ||
| 432 | #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) | 473 | #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) |
| 433 | 474 | ||
| 434 | #ifdef CONFIG_SMP | 475 | #define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_possible_map) |
| 435 | extern int nr_cpu_ids; | 476 | #define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_online_map) |
| 436 | #define any_online_cpu(mask) __any_online_cpu(&(mask)) | 477 | #define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_present_map) |
| 437 | int __any_online_cpu(const cpumask_t *mask); | ||
| 438 | #else | ||
| 439 | #define nr_cpu_ids 1 | ||
| 440 | #define any_online_cpu(mask) 0 | ||
| 441 | #endif | ||
| 442 | |||
| 443 | #define for_each_possible_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map) | ||
| 444 | #define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map) | ||
| 445 | #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) | ||
| 446 | 478 | ||
| 447 | #endif /* __LINUX_CPUMASK_H */ | 479 | #endif /* __LINUX_CPUMASK_H */ |
