diff options
| author | Borislav Petkov <bp@suse.de> | 2018-04-12 07:11:36 -0400 |
|---|---|---|
| committer | Borislav Petkov <bp@suse.de> | 2019-01-16 06:43:08 -0500 |
| commit | 093ae8f9a86a974c920b613860f1f7fd5bbd70ab (patch) | |
| tree | 645fe92d87a76409fa3ae942d816e80341c26c3c | |
| parent | 71a93c26930471e976dd184ef91931b2a5393afc (diff) | |
x86/TSC: Use RDTSCP
Currently, the kernel uses
[LM]FENCE; RDTSC
in the timekeeping code, to guarantee monotonicity of time where the
*FENCE is selected based on vendor.
Replace that sequence with RDTSCP which is faster or on-par and gives
the same guarantees.
A microbenchmark on Intel shows that the change is on-par.
On AMD, the change is either on-par with the current LFENCE-prefixed
RDTSC or slightly better with RDTSCP.
The comparison is done with the LFENCE-prefixed RDTSC (and not with the
MFENCE-prefixed one, as one would normally expect) because all modern
AMD families make LFENCE serializing and thus avoid the heavy MFENCE by
effectively enabling X86_FEATURE_LFENCE_RDTSC.
Co-developed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: x86@kernel.org
Link: https://lkml.kernel.org/r/20181119184556.11479-1-bp@alien8.de
| -rw-r--r-- | arch/x86/include/asm/msr.h | 16 |
1 files changed, 14 insertions, 2 deletions
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 91e4cf189914..5cc3930cb465 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h | |||
| @@ -217,6 +217,8 @@ static __always_inline unsigned long long rdtsc(void) | |||
| 217 | */ | 217 | */ |
| 218 | static __always_inline unsigned long long rdtsc_ordered(void) | 218 | static __always_inline unsigned long long rdtsc_ordered(void) |
| 219 | { | 219 | { |
| 220 | DECLARE_ARGS(val, low, high); | ||
| 221 | |||
| 220 | /* | 222 | /* |
| 221 | * The RDTSC instruction is not ordered relative to memory | 223 | * The RDTSC instruction is not ordered relative to memory |
| 222 | * access. The Intel SDM and the AMD APM are both vague on this | 224 | * access. The Intel SDM and the AMD APM are both vague on this |
| @@ -227,9 +229,19 @@ static __always_inline unsigned long long rdtsc_ordered(void) | |||
| 227 | * ordering guarantees as reading from a global memory location | 229 | * ordering guarantees as reading from a global memory location |
| 228 | * that some other imaginary CPU is updating continuously with a | 230 | * that some other imaginary CPU is updating continuously with a |
| 229 | * time stamp. | 231 | * time stamp. |
| 232 | * | ||
| 233 | * Thus, use the preferred barrier on the respective CPU, aiming for | ||
| 234 | * RDTSCP as the default. | ||
| 230 | */ | 235 | */ |
| 231 | barrier_nospec(); | 236 | asm volatile(ALTERNATIVE_3("rdtsc", |
| 232 | return rdtsc(); | 237 | "mfence; rdtsc", X86_FEATURE_MFENCE_RDTSC, |
| 238 | "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC, | ||
| 239 | "rdtscp", X86_FEATURE_RDTSCP) | ||
| 240 | : EAX_EDX_RET(val, low, high) | ||
| 241 | /* RDTSCP clobbers ECX with MSR_TSC_AUX. */ | ||
| 242 | :: "ecx"); | ||
| 243 | |||
| 244 | return EAX_EDX_VAL(val, low, high); | ||
| 233 | } | 245 | } |
| 234 | 246 | ||
| 235 | static inline unsigned long long native_read_pmc(int counter) | 247 | static inline unsigned long long native_read_pmc(int counter) |
