diff options
author | Jiri Olsa <jolsa@redhat.com> | 2011-03-07 13:10:39 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-03-08 11:22:11 -0500 |
commit | ea7145477a461e09d8d194cac4b996dc4f449107 (patch) | |
tree | aa45594b6303b854f5fa85f82ba5dca01e0aa8ac /arch/x86 | |
parent | 86cb2ec7b22a0a89b8660110dc03321fadbef45f (diff) |
x86: Separate out entry text section
Put x86 entry code into a separate link section: .entry.text.
Separating the entry text section seems to have performance
benefits - caused by more efficient instruction cache usage.
Running hackbench with perf stat --repeat showed that the change
compresses the icache footprint. The icache load miss rate went
down by about 15%:
before patch:
19417627 L1-icache-load-misses ( +- 0.147% )
after patch:
16490788 L1-icache-load-misses ( +- 0.180% )
The motivation of the patch was to fix a particular kprobes
bug that relates to the entry text section, the performance
advantage was discovered accidentally.
Whole perf output follows:
- results for current tip tree:
Performance counter stats for './hackbench/hackbench 10' (500 runs):
19417627 L1-icache-load-misses ( +- 0.147% )
2676914223 instructions # 0.497 IPC ( +- 0.079% )
5389516026 cycles ( +- 0.144% )
0.206267711 seconds time elapsed ( +- 0.138% )
- results for current tip tree with the patch applied:
Performance counter stats for './hackbench/hackbench 10' (500 runs):
16490788 L1-icache-load-misses ( +- 0.180% )
2717734941 instructions # 0.502 IPC ( +- 0.079% )
5414756975 cycles ( +- 0.148% )
0.206747566 seconds time elapsed ( +- 0.137% )
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: masami.hiramatsu.pt@hitachi.com
Cc: ananth@in.ibm.com
Cc: davem@davemloft.net
Cc: 2nddept-manager@sdl.hitachi.co.jp
LKML-Reference: <20110307181039.GB15197@jolsa.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/ia32/ia32entry.S | 2 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 6 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 6 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 1 |
4 files changed, 11 insertions, 4 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 518bb99c3394..f729b2e9679c 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -25,6 +25,8 @@ | |||
25 | #define sysretl_audit ia32_ret_from_sys_call | 25 | #define sysretl_audit ia32_ret_from_sys_call |
26 | #endif | 26 | #endif |
27 | 27 | ||
28 | .section .entry.text, "ax" | ||
29 | |||
28 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) | 30 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) |
29 | 31 | ||
30 | .macro IA32_ARG_FIXUP noebp=0 | 32 | .macro IA32_ARG_FIXUP noebp=0 |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c8b4efad7ebb..f5accf8eaa78 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -65,6 +65,8 @@ | |||
65 | #define sysexit_audit syscall_exit_work | 65 | #define sysexit_audit syscall_exit_work |
66 | #endif | 66 | #endif |
67 | 67 | ||
68 | .section .entry.text, "ax" | ||
69 | |||
68 | /* | 70 | /* |
69 | * We use macros for low-level operations which need to be overridden | 71 | * We use macros for low-level operations which need to be overridden |
70 | * for paravirtualization. The following will never clobber any registers: | 72 | * for paravirtualization. The following will never clobber any registers: |
@@ -788,7 +790,7 @@ ENDPROC(ptregs_clone) | |||
788 | */ | 790 | */ |
789 | .section .init.rodata,"a" | 791 | .section .init.rodata,"a" |
790 | ENTRY(interrupt) | 792 | ENTRY(interrupt) |
791 | .text | 793 | .section .entry.text, "ax" |
792 | .p2align 5 | 794 | .p2align 5 |
793 | .p2align CONFIG_X86_L1_CACHE_SHIFT | 795 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
794 | ENTRY(irq_entries_start) | 796 | ENTRY(irq_entries_start) |
@@ -807,7 +809,7 @@ vector=FIRST_EXTERNAL_VECTOR | |||
807 | .endif | 809 | .endif |
808 | .previous | 810 | .previous |
809 | .long 1b | 811 | .long 1b |
810 | .text | 812 | .section .entry.text, "ax" |
811 | vector=vector+1 | 813 | vector=vector+1 |
812 | .endif | 814 | .endif |
813 | .endr | 815 | .endr |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index aed1ffbeb0c9..0a0ed794edb2 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -61,6 +61,8 @@ | |||
61 | #define __AUDIT_ARCH_LE 0x40000000 | 61 | #define __AUDIT_ARCH_LE 0x40000000 |
62 | 62 | ||
63 | .code64 | 63 | .code64 |
64 | .section .entry.text, "ax" | ||
65 | |||
64 | #ifdef CONFIG_FUNCTION_TRACER | 66 | #ifdef CONFIG_FUNCTION_TRACER |
65 | #ifdef CONFIG_DYNAMIC_FTRACE | 67 | #ifdef CONFIG_DYNAMIC_FTRACE |
66 | ENTRY(mcount) | 68 | ENTRY(mcount) |
@@ -744,7 +746,7 @@ END(stub_rt_sigreturn) | |||
744 | */ | 746 | */ |
745 | .section .init.rodata,"a" | 747 | .section .init.rodata,"a" |
746 | ENTRY(interrupt) | 748 | ENTRY(interrupt) |
747 | .text | 749 | .section .entry.text |
748 | .p2align 5 | 750 | .p2align 5 |
749 | .p2align CONFIG_X86_L1_CACHE_SHIFT | 751 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
750 | ENTRY(irq_entries_start) | 752 | ENTRY(irq_entries_start) |
@@ -763,7 +765,7 @@ vector=FIRST_EXTERNAL_VECTOR | |||
763 | .endif | 765 | .endif |
764 | .previous | 766 | .previous |
765 | .quad 1b | 767 | .quad 1b |
766 | .text | 768 | .section .entry.text |
767 | vector=vector+1 | 769 | vector=vector+1 |
768 | .endif | 770 | .endif |
769 | .endr | 771 | .endr |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index bf4700755184..6d4341d5c52a 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -105,6 +105,7 @@ SECTIONS | |||
105 | SCHED_TEXT | 105 | SCHED_TEXT |
106 | LOCK_TEXT | 106 | LOCK_TEXT |
107 | KPROBES_TEXT | 107 | KPROBES_TEXT |
108 | ENTRY_TEXT | ||
108 | IRQENTRY_TEXT | 109 | IRQENTRY_TEXT |
109 | *(.fixup) | 110 | *(.fixup) |
110 | *(.gnu.warning) | 111 | *(.gnu.warning) |