diff options
author | Tejun Heo <tj@kernel.org> | 2011-01-25 08:26:50 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-01-25 08:26:50 -0500 |
commit | 19df0c2fef010e94e90df514aaf4e73f6b80145c (patch) | |
tree | 4b0b9c10622aead0d8b658cca6c49090149a91a8 /include/asm-generic | |
parent | c723fdab8aa728dc2bf0da6a0de8bb9c3f588d84 (diff) |
percpu: align percpu readmostly subsection to cacheline
Currently percpu readmostly subsection may share cachelines with other
percpu subsections which may result in unnecessary cacheline bounce
and performance degradation.
This patch adds @cacheline parameter to PERCPU() and PERCPU_VADDR()
linker macros, makes each arch linker scripts specify its cacheline
size and use it to align percpu subsections.
This is based on Shaohua's x86 only patch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shaohua.li@intel.com>
Diffstat (limited to 'include/asm-generic')
-rw-r--r-- | include/asm-generic/vmlinux.lds.h | 35 |
1 files changed, 22 insertions, 13 deletions
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 6ebb81030d2d..439df587c12c 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h | |||
@@ -15,7 +15,7 @@ | |||
15 | * HEAD_TEXT_SECTION | 15 | * HEAD_TEXT_SECTION |
16 | * INIT_TEXT_SECTION(PAGE_SIZE) | 16 | * INIT_TEXT_SECTION(PAGE_SIZE) |
17 | * INIT_DATA_SECTION(...) | 17 | * INIT_DATA_SECTION(...) |
18 | * PERCPU(PAGE_SIZE) | 18 | * PERCPU(CACHELINE_SIZE, PAGE_SIZE) |
19 | * __init_end = .; | 19 | * __init_end = .; |
20 | * | 20 | * |
21 | * _stext = .; | 21 | * _stext = .; |
@@ -683,13 +683,18 @@ | |||
683 | 683 | ||
684 | /** | 684 | /** |
685 | * PERCPU_VADDR - define output section for percpu area | 685 | * PERCPU_VADDR - define output section for percpu area |
686 | * @cacheline: cacheline size | ||
686 | * @vaddr: explicit base address (optional) | 687 | * @vaddr: explicit base address (optional) |
687 | * @phdr: destination PHDR (optional) | 688 | * @phdr: destination PHDR (optional) |
688 | * | 689 | * |
689 | * Macro which expands to output section for percpu area. If @vaddr | 690 | * Macro which expands to output section for percpu area. |
690 | * is not blank, it specifies explicit base address and all percpu | 691 | * |
691 | * symbols will be offset from the given address. If blank, @vaddr | 692 | * @cacheline is used to align subsections to avoid false cacheline |
692 | * always equals @laddr + LOAD_OFFSET. | 693 | * sharing between subsections for different purposes. |
694 | * | ||
695 | * If @vaddr is not blank, it specifies explicit base address and all | ||
696 | * percpu symbols will be offset from the given address. If blank, | ||
697 | * @vaddr always equals @laddr + LOAD_OFFSET. | ||
693 | * | 698 | * |
694 | * @phdr defines the output PHDR to use if not blank. Be warned that | 699 | * @phdr defines the output PHDR to use if not blank. Be warned that |
695 | * output PHDR is sticky. If @phdr is specified, the next output | 700 | * output PHDR is sticky. If @phdr is specified, the next output |
@@ -700,7 +705,7 @@ | |||
700 | * If there is no need to put the percpu section at a predetermined | 705 | * If there is no need to put the percpu section at a predetermined |
701 | * address, use PERCPU(). | 706 | * address, use PERCPU(). |
702 | */ | 707 | */ |
703 | #define PERCPU_VADDR(vaddr, phdr) \ | 708 | #define PERCPU_VADDR(cacheline, vaddr, phdr) \ |
704 | VMLINUX_SYMBOL(__per_cpu_load) = .; \ | 709 | VMLINUX_SYMBOL(__per_cpu_load) = .; \ |
705 | .data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ | 710 | .data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ |
706 | - LOAD_OFFSET) { \ | 711 | - LOAD_OFFSET) { \ |
@@ -708,7 +713,9 @@ | |||
708 | *(.data..percpu..first) \ | 713 | *(.data..percpu..first) \ |
709 | . = ALIGN(PAGE_SIZE); \ | 714 | . = ALIGN(PAGE_SIZE); \ |
710 | *(.data..percpu..page_aligned) \ | 715 | *(.data..percpu..page_aligned) \ |
716 | . = ALIGN(cacheline); \ | ||
711 | *(.data..percpu..readmostly) \ | 717 | *(.data..percpu..readmostly) \ |
718 | . = ALIGN(cacheline); \ | ||
712 | *(.data..percpu) \ | 719 | *(.data..percpu) \ |
713 | *(.data..percpu..shared_aligned) \ | 720 | *(.data..percpu..shared_aligned) \ |
714 | VMLINUX_SYMBOL(__per_cpu_end) = .; \ | 721 | VMLINUX_SYMBOL(__per_cpu_end) = .; \ |
@@ -717,18 +724,18 @@ | |||
717 | 724 | ||
718 | /** | 725 | /** |
719 | * PERCPU - define output section for percpu area, simple version | 726 | * PERCPU - define output section for percpu area, simple version |
727 | * @cacheline: cacheline size | ||
720 | * @align: required alignment | 728 | * @align: required alignment |
721 | * | 729 | * |
722 | * Align to @align and outputs output section for percpu area. This | 730 | * Align to @align and outputs output section for percpu area. This macro |
723 | * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and | 731 | * doesn't manipulate @vaddr or @phdr and __per_cpu_load and |
724 | * __per_cpu_start will be identical. | 732 | * __per_cpu_start will be identical. |
725 | * | 733 | * |
726 | * This macro is equivalent to ALIGN(align); PERCPU_VADDR( , ) except | 734 | * This macro is equivalent to ALIGN(@align); PERCPU_VADDR(@cacheline,,) |
727 | * that __per_cpu_load is defined as a relative symbol against | 735 | * except that __per_cpu_load is defined as a relative symbol against |
728 | * .data..percpu which is required for relocatable x86_32 | 736 | * .data..percpu which is required for relocatable x86_32 configuration. |
729 | * configuration. | ||
730 | */ | 737 | */ |
731 | #define PERCPU(align) \ | 738 | #define PERCPU(cacheline, align) \ |
732 | . = ALIGN(align); \ | 739 | . = ALIGN(align); \ |
733 | .data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \ | 740 | .data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \ |
734 | VMLINUX_SYMBOL(__per_cpu_load) = .; \ | 741 | VMLINUX_SYMBOL(__per_cpu_load) = .; \ |
@@ -736,7 +743,9 @@ | |||
736 | *(.data..percpu..first) \ | 743 | *(.data..percpu..first) \ |
737 | . = ALIGN(PAGE_SIZE); \ | 744 | . = ALIGN(PAGE_SIZE); \ |
738 | *(.data..percpu..page_aligned) \ | 745 | *(.data..percpu..page_aligned) \ |
746 | . = ALIGN(cacheline); \ | ||
739 | *(.data..percpu..readmostly) \ | 747 | *(.data..percpu..readmostly) \ |
748 | . = ALIGN(cacheline); \ | ||
740 | *(.data..percpu) \ | 749 | *(.data..percpu) \ |
741 | *(.data..percpu..shared_aligned) \ | 750 | *(.data..percpu..shared_aligned) \ |
742 | VMLINUX_SYMBOL(__per_cpu_end) = .; \ | 751 | VMLINUX_SYMBOL(__per_cpu_end) = .; \ |