diff options
author | Kan Liang <kan.liang@intel.com> | 2014-07-14 15:25:56 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-07-16 07:18:43 -0400 |
commit | 338b522ca43cfd32d11a370f4203bcd089c6c877 (patch) | |
tree | b42d503642417da49decd501a5d372536cb7db80 /arch | |
parent | 4a1c0f262f88e2676fda80a6bf80e7dbccae1dcb (diff) |
perf/x86/intel: Protect LBR and extra_regs against KVM lying
With -cpu host, KVM reports LBR and extra_regs support, if the host has
support.
When the guest perf driver tries to access LBR or extra_regs MSR,
it #GPs all MSR accesses,since KVM doesn't handle LBR and extra_regs support.
So check the related MSRs access right once at initialization time to avoid
the error access at runtime.
For reproducing the issue, please build the kernel with CONFIG_KVM_INTEL = y
(for host kernel).
And CONFIG_PARAVIRT = n and CONFIG_KVM_GUEST = n (for guest kernel).
Start the guest with -cpu host.
Run perf record with --branch-any or --branch-filter in guest to trigger LBR
Run perf stat offcore events (E.g. LLC-loads/LLC-load-misses ...) in guest to
trigger offcore_rsp #GP
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maria Dimakopoulou <maria.n.dimakopoulou@gmail.com>
Cc: Mark Davies <junk@eslaf.co.uk>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Yan, Zheng <zheng.z.yan@intel.com>
Link: http://lkml.kernel.org/r/1405365957-20202-1-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 12 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 66 |
3 files changed, 75 insertions, 6 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2bdfbff8a4f6..2879ecdaac43 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) | |||
118 | continue; | 118 | continue; |
119 | if (event->attr.config1 & ~er->valid_mask) | 119 | if (event->attr.config1 & ~er->valid_mask) |
120 | return -EINVAL; | 120 | return -EINVAL; |
121 | /* Check if the extra msrs can be safely accessed*/ | ||
122 | if (!er->extra_msr_access) | ||
123 | return -ENXIO; | ||
121 | 124 | ||
122 | reg->idx = er->idx; | 125 | reg->idx = er->idx; |
123 | reg->config = event->attr.config1; | 126 | reg->config = event->attr.config1; |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 3b2f9bdd974b..8ade93111e03 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -295,14 +295,16 @@ struct extra_reg { | |||
295 | u64 config_mask; | 295 | u64 config_mask; |
296 | u64 valid_mask; | 296 | u64 valid_mask; |
297 | int idx; /* per_xxx->regs[] reg index */ | 297 | int idx; /* per_xxx->regs[] reg index */ |
298 | bool extra_msr_access; | ||
298 | }; | 299 | }; |
299 | 300 | ||
300 | #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ | 301 | #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ |
301 | .event = (e), \ | 302 | .event = (e), \ |
302 | .msr = (ms), \ | 303 | .msr = (ms), \ |
303 | .config_mask = (m), \ | 304 | .config_mask = (m), \ |
304 | .valid_mask = (vm), \ | 305 | .valid_mask = (vm), \ |
305 | .idx = EXTRA_REG_##i, \ | 306 | .idx = EXTRA_REG_##i, \ |
307 | .extra_msr_access = true, \ | ||
306 | } | 308 | } |
307 | 309 | ||
308 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ | 310 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index c206815b9556..2502d0d9d246 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -2182,6 +2182,41 @@ static void intel_snb_check_microcode(void) | |||
2182 | } | 2182 | } |
2183 | } | 2183 | } |
2184 | 2184 | ||
2185 | /* | ||
2186 | * Under certain circumstances, access certain MSR may cause #GP. | ||
2187 | * The function tests if the input MSR can be safely accessed. | ||
2188 | */ | ||
2189 | static bool check_msr(unsigned long msr, u64 mask) | ||
2190 | { | ||
2191 | u64 val_old, val_new, val_tmp; | ||
2192 | |||
2193 | /* | ||
2194 | * Read the current value, change it and read it back to see if it | ||
2195 | * matches, this is needed to detect certain hardware emulators | ||
2196 | * (qemu/kvm) that don't trap on the MSR access and always return 0s. | ||
2197 | */ | ||
2198 | if (rdmsrl_safe(msr, &val_old)) | ||
2199 | return false; | ||
2200 | |||
2201 | /* | ||
2202 | * Only change the bits which can be updated by wrmsrl. | ||
2203 | */ | ||
2204 | val_tmp = val_old ^ mask; | ||
2205 | if (wrmsrl_safe(msr, val_tmp) || | ||
2206 | rdmsrl_safe(msr, &val_new)) | ||
2207 | return false; | ||
2208 | |||
2209 | if (val_new != val_tmp) | ||
2210 | return false; | ||
2211 | |||
2212 | /* Here it's sure that the MSR can be safely accessed. | ||
2213 | * Restore the old value and return. | ||
2214 | */ | ||
2215 | wrmsrl(msr, val_old); | ||
2216 | |||
2217 | return true; | ||
2218 | } | ||
2219 | |||
2185 | static __init void intel_sandybridge_quirk(void) | 2220 | static __init void intel_sandybridge_quirk(void) |
2186 | { | 2221 | { |
2187 | x86_pmu.check_microcode = intel_snb_check_microcode; | 2222 | x86_pmu.check_microcode = intel_snb_check_microcode; |
@@ -2271,7 +2306,8 @@ __init int intel_pmu_init(void) | |||
2271 | union cpuid10_ebx ebx; | 2306 | union cpuid10_ebx ebx; |
2272 | struct event_constraint *c; | 2307 | struct event_constraint *c; |
2273 | unsigned int unused; | 2308 | unsigned int unused; |
2274 | int version; | 2309 | struct extra_reg *er; |
2310 | int version, i; | ||
2275 | 2311 | ||
2276 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 2312 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
2277 | switch (boot_cpu_data.x86) { | 2313 | switch (boot_cpu_data.x86) { |
@@ -2577,6 +2613,34 @@ __init int intel_pmu_init(void) | |||
2577 | } | 2613 | } |
2578 | } | 2614 | } |
2579 | 2615 | ||
2616 | /* | ||
2617 | * Access LBR MSR may cause #GP under certain circumstances. | ||
2618 | * E.g. KVM doesn't support LBR MSR | ||
2619 | * Check all LBT MSR here. | ||
2620 | * Disable LBR access if any LBR MSRs can not be accessed. | ||
2621 | */ | ||
2622 | if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) | ||
2623 | x86_pmu.lbr_nr = 0; | ||
2624 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
2625 | if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && | ||
2626 | check_msr(x86_pmu.lbr_to + i, 0xffffUL))) | ||
2627 | x86_pmu.lbr_nr = 0; | ||
2628 | } | ||
2629 | |||
2630 | /* | ||
2631 | * Access extra MSR may cause #GP under certain circumstances. | ||
2632 | * E.g. KVM doesn't support offcore event | ||
2633 | * Check all extra_regs here. | ||
2634 | */ | ||
2635 | if (x86_pmu.extra_regs) { | ||
2636 | for (er = x86_pmu.extra_regs; er->msr; er++) { | ||
2637 | er->extra_msr_access = check_msr(er->msr, 0x1ffUL); | ||
2638 | /* Disable LBR select mapping */ | ||
2639 | if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) | ||
2640 | x86_pmu.lbr_sel_map = NULL; | ||
2641 | } | ||
2642 | } | ||
2643 | |||
2580 | /* Support full width counters using alternative MSR range */ | 2644 | /* Support full width counters using alternative MSR range */ |
2581 | if (x86_pmu.intel_cap.full_width_write) { | 2645 | if (x86_pmu.intel_cap.full_width_write) { |
2582 | x86_pmu.max_period = x86_pmu.cntval_mask; | 2646 | x86_pmu.max_period = x86_pmu.cntval_mask; |