diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-11-08 10:19:55 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-11-08 10:48:19 -0500 |
commit | 0d12cdd5f883f508d33b85c1bae98fa28987c8c7 (patch) | |
tree | e07bb1f9ef49062fbd9817fe41cab66964bedf03 | |
parent | 52c642f33b14bfa1b00ef2b68296effb34a573f3 (diff) |
sched: improve sched_clock() performance
in scheduler-intense workloads native_read_tsc() overhead accounts for
20% of the system overhead:
659567 system_call 41222.9375
686796 schedule 435.7843
718382 __switch_to 665.1685
823875 switch_mm 4526.7857
1883122 native_read_tsc 55385.9412
9761990 total 2.8468
this is large part due to the rdtsc_barrier() that is done before
and after reading the TSC.
But sched_clock() is not a precise clock in the GTOD sense, using such
barriers is completely pointless. So remove the barriers and only use
them in vget_cycles().
This improves lat_ctx performance by about 5%.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/include/asm/msr.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/tsc.h | 8 |
2 files changed, 7 insertions, 3 deletions
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 46be2fa7ac26..c2a812ebde89 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h | |||
@@ -108,9 +108,7 @@ static __always_inline unsigned long long __native_read_tsc(void) | |||
108 | { | 108 | { |
109 | DECLARE_ARGS(val, low, high); | 109 | DECLARE_ARGS(val, low, high); |
110 | 110 | ||
111 | rdtsc_barrier(); | ||
112 | asm volatile("rdtsc" : EAX_EDX_RET(val, low, high)); | 111 | asm volatile("rdtsc" : EAX_EDX_RET(val, low, high)); |
113 | rdtsc_barrier(); | ||
114 | 112 | ||
115 | return EAX_EDX_VAL(val, low, high); | 113 | return EAX_EDX_VAL(val, low, high); |
116 | } | 114 | } |
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 38ae163cc91b..9cd83a8e40d5 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h | |||
@@ -34,6 +34,8 @@ static inline cycles_t get_cycles(void) | |||
34 | 34 | ||
35 | static __always_inline cycles_t vget_cycles(void) | 35 | static __always_inline cycles_t vget_cycles(void) |
36 | { | 36 | { |
37 | cycles_t cycles; | ||
38 | |||
37 | /* | 39 | /* |
38 | * We only do VDSOs on TSC capable CPUs, so this shouldnt | 40 | * We only do VDSOs on TSC capable CPUs, so this shouldnt |
39 | * access boot_cpu_data (which is not VDSO-safe): | 41 | * access boot_cpu_data (which is not VDSO-safe): |
@@ -42,7 +44,11 @@ static __always_inline cycles_t vget_cycles(void) | |||
42 | if (!cpu_has_tsc) | 44 | if (!cpu_has_tsc) |
43 | return 0; | 45 | return 0; |
44 | #endif | 46 | #endif |
45 | return (cycles_t)__native_read_tsc(); | 47 | rdtsc_barrier(); |
48 | cycles = (cycles_t)__native_read_tsc(); | ||
49 | rdtsc_barrier(); | ||
50 | |||
51 | return cycles; | ||
46 | } | 52 | } |
47 | 53 | ||
48 | extern void tsc_init(void); | 54 | extern void tsc_init(void); |