aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-07-28 14:32:33 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-28 16:20:41 -0400
commite56b3bc7942982ac2589c942fb345e38bc7a341a (patch)
tree8130492904f5bb9cff061f62ebb1c5d6eed3308b /kernel
parent414f746d232d41ed6ae8632c4495ae795373c44b (diff)
cpu masks: optimize and clean up cpumask_of_cpu()
Clean up and optimize cpumask_of_cpu(), by sharing all the zero words. Instead of stupidly generating all possible i=0...NR_CPUS 2^i patterns creating a huge array of constant bitmasks, realize that the zero words can be shared. In other words, on a 64-bit architecture, we only ever need 64 of these arrays - with a different bit set in one single world (with enough zero words around it so that we can create any bitmask by just offsetting in that big array). And then we just put enough zeroes around it that we can point every single cpumask to be one of those things. So when we have 4k CPU's, instead of having 4k arrays (of 4k bits each, with one bit set in each array - 2MB memory total), we have exactly 64 arrays instead, each 8k bits in size (64kB total). And then we just point cpumask(n) to the right position (which we can calculate dynamically). Once we have the right arrays, getting "cpumask(n)" ends up being: static inline const cpumask_t *get_cpu_mask(unsigned int cpu) { const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; p -= cpu / BITS_PER_LONG; return (const cpumask_t *)p; } This brings other advantages and simplifications as well: - we are not wasting memory that is just filled with a single bit in various different places - we don't need all those games to re-create the arrays in some dense format, because they're already going to be dense enough. if we compile a kernel for up to 4k CPU's, "wasting" that 64kB of memory is a non-issue (especially since by doing this "overlapping" trick we probably get better cache behaviour anyway). [ mingo@elte.hu: Converted Linus's mails into a commit. See: http://lkml.org/lkml/2008/7/27/156 http://lkml.org/lkml/2008/7/28/320 Also applied a family filter - which also has the side-effect of leaving out the bits where Linus calls me an idio... Oh, never mind ;-) ] Signed-off-by: Ingo Molnar <mingo@elte.hu> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Al Viro <viro@ZenIV.linux.org.uk> Cc: Mike Travis <travis@sgi.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpu.c128
1 files changed, 20 insertions, 108 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index a35d8995dc8c..06a8358bb418 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -462,115 +462,27 @@ out:
462 462
463#endif /* CONFIG_SMP */ 463#endif /* CONFIG_SMP */
464 464
465/* 64 bits of zeros, for initializers. */ 465/*
466#if BITS_PER_LONG == 32 466 * cpu_bit_bitmap[] is a special, "compressed" data structure that
467#define Z64 0, 0 467 * represents all NR_CPUS bits binary values of 1<<nr.
468#else 468 *
469#define Z64 0 469 * It is used by cpumask_of_cpu() to get a constant address to a CPU
470#endif 470 * mask value that has a single bit set only.
471 */
471 472
472/* Initializer macros. */ 473/* cpu_bit_bitmap[0] is empty - so we can back into it */
473#define CMI0(n) { .bits = { 1UL << (n) } } 474#define MASK_DECLARE_1(x) [x+1][0] = 1UL << (x)
474#define CMI(n, ...) { .bits = { __VA_ARGS__, 1UL << ((n) % BITS_PER_LONG) } } 475#define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
475 476#define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
476#define CMI8(n, ...) \ 477#define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
477 CMI((n), __VA_ARGS__), CMI((n)+1, __VA_ARGS__), \
478 CMI((n)+2, __VA_ARGS__), CMI((n)+3, __VA_ARGS__), \
479 CMI((n)+4, __VA_ARGS__), CMI((n)+5, __VA_ARGS__), \
480 CMI((n)+6, __VA_ARGS__), CMI((n)+7, __VA_ARGS__)
481
482#if BITS_PER_LONG == 32
483#define CMI64(n, ...) \
484 CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__), \
485 CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__), \
486 CMI8((n)+32, 0, __VA_ARGS__), CMI8((n)+40, 0, __VA_ARGS__), \
487 CMI8((n)+48, 0, __VA_ARGS__), CMI8((n)+56, 0, __VA_ARGS__)
488#else
489#define CMI64(n, ...) \
490 CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__), \
491 CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__), \
492 CMI8((n)+32, __VA_ARGS__), CMI8((n)+40, __VA_ARGS__), \
493 CMI8((n)+48, __VA_ARGS__), CMI8((n)+56, __VA_ARGS__)
494#endif
495 478
496#define CMI256(n, ...) \ 479const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
497 CMI64((n), __VA_ARGS__), CMI64((n)+64, Z64, __VA_ARGS__), \ 480
498 CMI64((n)+128, Z64, Z64, __VA_ARGS__), \ 481 MASK_DECLARE_8(0), MASK_DECLARE_8(8),
499 CMI64((n)+192, Z64, Z64, Z64, __VA_ARGS__) 482 MASK_DECLARE_8(16), MASK_DECLARE_8(24),
500#define Z256 Z64, Z64, Z64, Z64 483#if BITS_PER_LONG > 32
501 484 MASK_DECLARE_8(32), MASK_DECLARE_8(40),
502#define CMI1024(n, ...) \ 485 MASK_DECLARE_8(48), MASK_DECLARE_8(56),
503 CMI256((n), __VA_ARGS__), \
504 CMI256((n)+256, Z256, __VA_ARGS__), \
505 CMI256((n)+512, Z256, Z256, __VA_ARGS__), \
506 CMI256((n)+768, Z256, Z256, Z256, __VA_ARGS__)
507#define Z1024 Z256, Z256, Z256, Z256
508
509/* We want this statically initialized, just to be safe. We try not
510 * to waste too much space, either. */
511static const cpumask_t cpumask_map[]
512#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
513__initdata
514#endif
515= {
516 CMI0(0), CMI0(1), CMI0(2), CMI0(3),
517#if NR_CPUS > 4
518 CMI0(4), CMI0(5), CMI0(6), CMI0(7),
519#endif
520#if NR_CPUS > 8
521 CMI0(8), CMI0(9), CMI0(10), CMI0(11),
522 CMI0(12), CMI0(13), CMI0(14), CMI0(15),
523#endif
524#if NR_CPUS > 16
525 CMI0(16), CMI0(17), CMI0(18), CMI0(19),
526 CMI0(20), CMI0(21), CMI0(22), CMI0(23),
527 CMI0(24), CMI0(25), CMI0(26), CMI0(27),
528 CMI0(28), CMI0(29), CMI0(30), CMI0(31),
529#endif
530#if NR_CPUS > 32
531#if BITS_PER_LONG == 32
532 CMI(32, 0), CMI(33, 0), CMI(34, 0), CMI(35, 0),
533 CMI(36, 0), CMI(37, 0), CMI(38, 0), CMI(39, 0),
534 CMI(40, 0), CMI(41, 0), CMI(42, 0), CMI(43, 0),
535 CMI(44, 0), CMI(45, 0), CMI(46, 0), CMI(47, 0),
536 CMI(48, 0), CMI(49, 0), CMI(50, 0), CMI(51, 0),
537 CMI(52, 0), CMI(53, 0), CMI(54, 0), CMI(55, 0),
538 CMI(56, 0), CMI(57, 0), CMI(58, 0), CMI(59, 0),
539 CMI(60, 0), CMI(61, 0), CMI(62, 0), CMI(63, 0),
540#else
541 CMI0(32), CMI0(33), CMI0(34), CMI0(35),
542 CMI0(36), CMI0(37), CMI0(38), CMI0(39),
543 CMI0(40), CMI0(41), CMI0(42), CMI0(43),
544 CMI0(44), CMI0(45), CMI0(46), CMI0(47),
545 CMI0(48), CMI0(49), CMI0(50), CMI0(51),
546 CMI0(52), CMI0(53), CMI0(54), CMI0(55),
547 CMI0(56), CMI0(57), CMI0(58), CMI0(59),
548 CMI0(60), CMI0(61), CMI0(62), CMI0(63),
549#endif /* BITS_PER_LONG == 64 */
550#endif
551#if NR_CPUS > 64
552 CMI64(64, Z64),
553#endif
554#if NR_CPUS > 128
555 CMI64(128, Z64, Z64), CMI64(192, Z64, Z64, Z64),
556#endif
557#if NR_CPUS > 256
558 CMI256(256, Z256),
559#endif
560#if NR_CPUS > 512
561 CMI256(512, Z256, Z256), CMI256(768, Z256, Z256, Z256),
562#endif
563#if NR_CPUS > 1024
564 CMI1024(1024, Z1024),
565#endif
566#if NR_CPUS > 2048
567 CMI1024(2048, Z1024, Z1024), CMI1024(3072, Z1024, Z1024, Z1024),
568#endif
569#if NR_CPUS > 4096
570#error NR_CPUS too big. Fix initializers or set CONFIG_HAVE_CPUMASK_OF_CPU_MAP
571#endif 486#endif
572}; 487};
573 488EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
574const cpumask_t *cpumask_of_cpu_map = cpumask_map;
575
576EXPORT_SYMBOL_GPL(cpumask_of_cpu_map);