aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeorge Dunlap <george.dunlap@eu.citrix.com>2013-04-03 10:46:28 -0400
committerIngo Molnar <mingo@kernel.org>2013-04-21 05:16:29 -0400
commita5ebe0ba3dff658c5286e8d5f20e4328f719d5a3 (patch)
treed4c6cd248ee16978284d85626992af7a0011a18e
parentc43ca5091a374c1f6778bd7e4a39a5a10735a917 (diff)
perf/x86: Check all MSRs before passing hw check
check_hw_exists() has a number of checks which go to two exit paths: msr_fail and bios_fail. Checks classified as msr_fail will cause check_hw_exists() to return false, causing the PMU not to be used; bios_fail checks will only cause a warning to be printed, but will return true. The problem is that if there are both msr failures and bios failures, and the routine hits a bios_fail check first, it will exit early and return true, not finishing the rest of the msr checks. If those msrs are in fact broken, it will cause them to be used erroneously. In the case of a Xen PV VM, the guest OS has read access to all the MSRs, but write access is white-listed to supported features. Writes to unsupported MSRs have no effect. The PMU MSRs are not (typically) supported, because they are expensive to save and restore on a VM context switch. One of the "msr_fail" checks is supposed to detect this circumstance (ether for Xen or KVM) and disable the harware PMU. However, on one of my AMD boxen, there is (apparently) a broken BIOS which triggers one of the bios_fail checks. In particular, MSR_K7_EVNTSEL0 has the ARCH_PERFMON_EVENTSEL_ENABLE bit set. The guest kernel detects this because it has read access to all MSRs, and causes it to skip the rest of the checks and try to use the non-existent hardware PMU. This minimally causes a lot of useless instruction emulation and Xen console spam; it may cause other issues with the watchdog as well. This changset causes check_hw_exists() to go through all of the msr checks, failing and returning false if any of them fail. This makes sure that a guest running under Xen without a virtual PMU will detect that there is no functioning PMU and not attempt to use it. This problem affects kernels as far back as 3.2, and should thus be considered for backport. Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> Cc: Konrad Wilk <konrad.wilk@oracle.com> Cc: Ian Campbell <ian.campbell@citrix.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Andrew Cooper <andrew.cooper3@citrix.com> Link: http://lkml.kernel.org/r/1365000388-32448-1-git-send-email-george.dunlap@eu.citrix.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/kernel/cpu/perf_event.c28
1 files changed, 17 insertions, 11 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5ed7a4c5baf7..1025f3c99d20 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -180,8 +180,9 @@ static void release_pmc_hardware(void) {}
180 180
181static bool check_hw_exists(void) 181static bool check_hw_exists(void)
182{ 182{
183 u64 val, val_new = ~0; 183 u64 val, val_fail, val_new= ~0;
184 int i, reg, ret = 0; 184 int i, reg, reg_fail, ret = 0;
185 int bios_fail = 0;
185 186
186 /* 187 /*
187 * Check to see if the BIOS enabled any of the counters, if so 188 * Check to see if the BIOS enabled any of the counters, if so
@@ -192,8 +193,11 @@ static bool check_hw_exists(void)
192 ret = rdmsrl_safe(reg, &val); 193 ret = rdmsrl_safe(reg, &val);
193 if (ret) 194 if (ret)
194 goto msr_fail; 195 goto msr_fail;
195 if (val & ARCH_PERFMON_EVENTSEL_ENABLE) 196 if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
196 goto bios_fail; 197 bios_fail = 1;
198 val_fail = val;
199 reg_fail = reg;
200 }
197 } 201 }
198 202
199 if (x86_pmu.num_counters_fixed) { 203 if (x86_pmu.num_counters_fixed) {
@@ -202,8 +206,11 @@ static bool check_hw_exists(void)
202 if (ret) 206 if (ret)
203 goto msr_fail; 207 goto msr_fail;
204 for (i = 0; i < x86_pmu.num_counters_fixed; i++) { 208 for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
205 if (val & (0x03 << i*4)) 209 if (val & (0x03 << i*4)) {
206 goto bios_fail; 210 bios_fail = 1;
211 val_fail = val;
212 reg_fail = reg;
213 }
207 } 214 }
208 } 215 }
209 216
@@ -221,14 +228,13 @@ static bool check_hw_exists(void)
221 if (ret || val != val_new) 228 if (ret || val != val_new)
222 goto msr_fail; 229 goto msr_fail;
223 230
224 return true;
225
226bios_fail:
227 /* 231 /*
228 * We still allow the PMU driver to operate: 232 * We still allow the PMU driver to operate:
229 */ 233 */
230 printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n"); 234 if (bios_fail) {
231 printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val); 235 printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
236 printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg_fail, val_fail);
237 }
232 238
233 return true; 239 return true;
234 240