aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Travis <travis@sgi.com>2008-05-12 15:21:13 -0400
committerThomas Gleixner <tglx@linutronix.de>2008-05-23 12:23:38 -0400
commit41df0d61c266998b8049df7fec119cd518a43aa1 (patch)
tree6791fa6e3fdaee51ae348f9fe63972d648beaf5a
parent143aa5c53bd3895d42d7c08753fe58293988a69d (diff)
x86: Add performance variants of cpumask operators
* Increase performance for systems with large count NR_CPUS by limiting the range of the cpumask operators that loop over the bits in a cpumask_t variable. This removes a large amount of wasted cpu cycles. * Add performance variants of the cpumask operators: int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS int first_cpu_nr(mask) Number lowest set bit, or nr_cpu_ids int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids * Modify following to use performance variants: #define num_online_cpus() cpus_weight_nr(cpu_online_map) #define num_possible_cpus() cpus_weight_nr(cpu_possible_map) #define num_present_cpus() cpus_weight_nr(cpu_present_map) #define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), ...) #define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), ...) #define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), ...) * Comment added to include/linux/cpumask.h: Note: The alternate operations with the suffix "_nr" are used to limit the range of the loop to nr_cpu_ids instead of NR_CPUS when NR_CPUS > 64 for performance reasons. If NR_CPUS is <= 64 then most assembler bitmask operators execute faster with a constant range, so the operator will continue to use NR_CPUS. Another consideration is that nr_cpu_ids is initialized to NR_CPUS and isn't lowered until the possible cpus are discovered (including any disabled cpus). So early uses will span the entire range of NR_CPUS. (The net effect is that for systems with 64 or less CPU's there are no functional changes.) For inclusion into sched-devel/latest tree. Based on: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git + sched-devel/latest .../mingo/linux-2.6-sched-devel.git Cc: Paul Jackson <pj@sgi.com> Cc: Christoph Lameter <clameter@sgi.com> Reviewed-by: Paul Jackson <pj@sgi.com> Reviewed-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Mike Travis <travis@sgi.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/cpumask.h92
-rw-r--r--lib/cpumask.c9
2 files changed, 71 insertions, 30 deletions
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 5df3db58fcc6..b49472d1af84 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -17,6 +17,20 @@
17 * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c. 17 * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c.
18 * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c. 18 * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c.
19 * 19 *
20 * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
21 * Note: The alternate operations with the suffix "_nr" are used
22 * to limit the range of the loop to nr_cpu_ids instead of
23 * NR_CPUS when NR_CPUS > 64 for performance reasons.
24 * If NR_CPUS is <= 64 then most assembler bitmask
25 * operators execute faster with a constant range, so
26 * the operator will continue to use NR_CPUS.
27 *
28 * Another consideration is that nr_cpu_ids is initialized
29 * to NR_CPUS and isn't lowered until the possible cpus are
30 * discovered (including any disabled cpus). So early uses
31 * will span the entire range of NR_CPUS.
32 * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
33 *
20 * The available cpumask operations are: 34 * The available cpumask operations are:
21 * 35 *
22 * void cpu_set(cpu, mask) turn on bit 'cpu' in mask 36 * void cpu_set(cpu, mask) turn on bit 'cpu' in mask
@@ -38,12 +52,14 @@
38 * int cpus_empty(mask) Is mask empty (no bits sets)? 52 * int cpus_empty(mask) Is mask empty (no bits sets)?
39 * int cpus_full(mask) Is mask full (all bits sets)? 53 * int cpus_full(mask) Is mask full (all bits sets)?
40 * int cpus_weight(mask) Hamming weigh - number of set bits 54 * int cpus_weight(mask) Hamming weigh - number of set bits
55 * int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS
41 * 56 *
42 * void cpus_shift_right(dst, src, n) Shift right 57 * void cpus_shift_right(dst, src, n) Shift right
43 * void cpus_shift_left(dst, src, n) Shift left 58 * void cpus_shift_left(dst, src, n) Shift left
44 * 59 *
45 * int first_cpu(mask) Number lowest set bit, or NR_CPUS 60 * int first_cpu(mask) Number lowest set bit, or NR_CPUS
46 * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS 61 * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS
62 * int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids
47 * 63 *
48 * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set 64 * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set
49 * CPU_MASK_ALL Initializer - all bits set 65 * CPU_MASK_ALL Initializer - all bits set
@@ -59,7 +75,8 @@
59 * void cpus_onto(dst, orig, relmap) *dst = orig relative to relmap 75 * void cpus_onto(dst, orig, relmap) *dst = orig relative to relmap
60 * void cpus_fold(dst, orig, sz) dst bits = orig bits mod sz 76 * void cpus_fold(dst, orig, sz) dst bits = orig bits mod sz
61 * 77 *
62 * for_each_cpu_mask(cpu, mask) for-loop cpu over mask 78 * for_each_cpu_mask(cpu, mask) for-loop cpu over mask using NR_CPUS
79 * for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids
63 * 80 *
64 * int num_online_cpus() Number of online CPUs 81 * int num_online_cpus() Number of online CPUs
65 * int num_possible_cpus() Number of all possible CPUs 82 * int num_possible_cpus() Number of all possible CPUs
@@ -216,15 +233,6 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
216 bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); 233 bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
217} 234}
218 235
219#ifdef CONFIG_SMP
220int __first_cpu(const cpumask_t *srcp);
221#define first_cpu(src) __first_cpu(&(src))
222int __next_cpu(int n, const cpumask_t *srcp);
223#define next_cpu(n, src) __next_cpu((n), &(src))
224#else
225#define first_cpu(src) ({ (void)(src); 0; })
226#define next_cpu(n, src) ({ (void)(src); 1; })
227#endif
228 236
229#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP 237#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
230extern cpumask_t *cpumask_of_cpu_map; 238extern cpumask_t *cpumask_of_cpu_map;
@@ -343,15 +351,48 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp,
343 bitmap_fold(dstp->bits, origp->bits, sz, nbits); 351 bitmap_fold(dstp->bits, origp->bits, sz, nbits);
344} 352}
345 353
346#if NR_CPUS > 1 354#if NR_CPUS == 1
355
356#define nr_cpu_ids 1
357#define first_cpu(src) ({ (void)(src); 0; })
358#define next_cpu(n, src) ({ (void)(src); 1; })
359#define any_online_cpu(mask) 0
360#define for_each_cpu_mask(cpu, mask) \
361 for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
362
363#else /* NR_CPUS > 1 */
364
365extern int nr_cpu_ids;
366int __first_cpu(const cpumask_t *srcp);
367int __next_cpu(int n, const cpumask_t *srcp);
368int __any_online_cpu(const cpumask_t *mask);
369
370#define first_cpu(src) __first_cpu(&(src))
371#define next_cpu(n, src) __next_cpu((n), &(src))
372#define any_online_cpu(mask) __any_online_cpu(&(mask))
347#define for_each_cpu_mask(cpu, mask) \ 373#define for_each_cpu_mask(cpu, mask) \
348 for ((cpu) = first_cpu(mask); \ 374 for ((cpu) = first_cpu(mask); \
349 (cpu) < NR_CPUS; \ 375 (cpu) < NR_CPUS; \
350 (cpu) = next_cpu((cpu), (mask))) 376 (cpu) = next_cpu((cpu), (mask)))
351#else /* NR_CPUS == 1 */ 377#endif
352#define for_each_cpu_mask(cpu, mask) \ 378
353 for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) 379#if NR_CPUS <= 64
354#endif /* NR_CPUS */ 380
381#define next_cpu_nr(n, src) next_cpu(n, src)
382#define cpus_weight_nr(cpumask) cpus_weight(cpumask)
383#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask)
384
385#else /* NR_CPUS > 64 */
386
387int __next_cpu_nr(int n, const cpumask_t *srcp);
388#define next_cpu_nr(n, src) __next_cpu_nr((n), &(src))
389#define cpus_weight_nr(cpumask) __cpus_weight(&(cpumask), nr_cpu_ids)
390#define for_each_cpu_mask_nr(cpu, mask) \
391 for ((cpu) = first_cpu(mask); \
392 (cpu) < nr_cpu_ids; \
393 (cpu) = next_cpu_nr((cpu), (mask)))
394
395#endif /* NR_CPUS > 64 */
355 396
356/* 397/*
357 * The following particular system cpumasks and operations manage 398 * The following particular system cpumasks and operations manage
@@ -414,9 +455,9 @@ extern cpumask_t cpu_online_map;
414extern cpumask_t cpu_present_map; 455extern cpumask_t cpu_present_map;
415 456
416#if NR_CPUS > 1 457#if NR_CPUS > 1
417#define num_online_cpus() cpus_weight(cpu_online_map) 458#define num_online_cpus() cpus_weight_nr(cpu_online_map)
418#define num_possible_cpus() cpus_weight(cpu_possible_map) 459#define num_possible_cpus() cpus_weight_nr(cpu_possible_map)
419#define num_present_cpus() cpus_weight(cpu_present_map) 460#define num_present_cpus() cpus_weight_nr(cpu_present_map)
420#define cpu_online(cpu) cpu_isset((cpu), cpu_online_map) 461#define cpu_online(cpu) cpu_isset((cpu), cpu_online_map)
421#define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map) 462#define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map)
422#define cpu_present(cpu) cpu_isset((cpu), cpu_present_map) 463#define cpu_present(cpu) cpu_isset((cpu), cpu_present_map)
@@ -431,17 +472,8 @@ extern cpumask_t cpu_present_map;
431 472
432#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) 473#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
433 474
434#ifdef CONFIG_SMP 475#define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_possible_map)
435extern int nr_cpu_ids; 476#define for_each_online_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_online_map)
436#define any_online_cpu(mask) __any_online_cpu(&(mask)) 477#define for_each_present_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_present_map)
437int __any_online_cpu(const cpumask_t *mask);
438#else
439#define nr_cpu_ids 1
440#define any_online_cpu(mask) 0
441#endif
442
443#define for_each_possible_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map)
444#define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map)
445#define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map)
446 478
447#endif /* __LINUX_CPUMASK_H */ 479#endif /* __LINUX_CPUMASK_H */
diff --git a/lib/cpumask.c b/lib/cpumask.c
index bb4f76d3c3e7..5f97dc25ef9c 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -15,6 +15,15 @@ int __next_cpu(int n, const cpumask_t *srcp)
15} 15}
16EXPORT_SYMBOL(__next_cpu); 16EXPORT_SYMBOL(__next_cpu);
17 17
18#if NR_CPUS > 64
19int __next_cpu_nr(int n, const cpumask_t *srcp)
20{
21 return min_t(int, nr_cpu_ids,
22 find_next_bit(srcp->bits, nr_cpu_ids, n+1));
23}
24EXPORT_SYMBOL(__next_cpu_nr);
25#endif
26
18int __any_online_cpu(const cpumask_t *mask) 27int __any_online_cpu(const cpumask_t *mask)
19{ 28{
20 int cpu; 29 int cpu;