diff options
Diffstat (limited to 'arch/x86/kernel/cpu')
23 files changed, 2821 insertions, 784 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index c202b62f367..3a785da34b6 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp) | |||
14 | 14 | ||
15 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 15 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
16 | obj-y += proc.o capflags.o powerflags.o common.o | 16 | obj-y += proc.o capflags.o powerflags.o common.o |
17 | obj-y += vmware.o hypervisor.o sched.o | 17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o |
18 | 18 | ||
19 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 19 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
20 | obj-$(CONFIG_X86_64) += bugs_64.o | 20 | obj-$(CONFIG_X86_64) += bugs_64.o |
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 97ad79cdf68..10fa5684a66 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
@@ -30,12 +30,14 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | |||
30 | const struct cpuid_bit *cb; | 30 | const struct cpuid_bit *cb; |
31 | 31 | ||
32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | 32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { |
33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, | 33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, |
34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, | 34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, |
35 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a }, | 35 | { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006 }, |
36 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a }, | 36 | { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007 }, |
37 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a }, | 37 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a }, |
38 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a }, | 38 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a }, |
39 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a }, | ||
40 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a }, | ||
39 | { 0, 0, 0, 0 } | 41 | { 0, 0, 0, 0 } |
40 | }; | 42 | }; |
41 | 43 | ||
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 01a26521239..c39576cb301 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -86,7 +86,7 @@ static void __init check_fpu(void) | |||
86 | 86 | ||
87 | static void __init check_hlt(void) | 87 | static void __init check_hlt(void) |
88 | { | 88 | { |
89 | if (paravirt_enabled()) | 89 | if (boot_cpu_data.x86 >= 5 || paravirt_enabled()) |
90 | return; | 90 | return; |
91 | 91 | ||
92 | printk(KERN_INFO "Checking 'hlt' instruction... "); | 92 | printk(KERN_INFO "Checking 'hlt' instruction... "); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4868e4a951e..c1c00d0b169 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1243,10 +1243,7 @@ void __cpuinit cpu_init(void) | |||
1243 | /* | 1243 | /* |
1244 | * Force FPU initialization: | 1244 | * Force FPU initialization: |
1245 | */ | 1245 | */ |
1246 | if (cpu_has_xsave) | 1246 | current_thread_info()->status = 0; |
1247 | current_thread_info()->status = TS_XSAVE; | ||
1248 | else | ||
1249 | current_thread_info()->status = 0; | ||
1250 | clear_used_math(); | 1247 | clear_used_math(); |
1251 | mxcsr_feature_mask_init(); | 1248 | mxcsr_feature_mask_init(); |
1252 | 1249 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile index 1840c0a5170..bd54bf67e6f 100644 --- a/arch/x86/kernel/cpu/cpufreq/Makefile +++ b/arch/x86/kernel/cpu/cpufreq/Makefile | |||
@@ -2,8 +2,8 @@ | |||
2 | # K8 systems. ACPI is preferred to all other hardware-specific drivers. | 2 | # K8 systems. ACPI is preferred to all other hardware-specific drivers. |
3 | # speedstep-* is preferred over p4-clockmod. | 3 | # speedstep-* is preferred over p4-clockmod. |
4 | 4 | ||
5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o | 5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o |
6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o | 6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o |
7 | obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o | 7 | obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o |
8 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o | 8 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o |
9 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o | 9 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 459168083b7..1d3cddaa40e 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <asm/msr.h> | 46 | #include <asm/msr.h> |
47 | #include <asm/processor.h> | 47 | #include <asm/processor.h> |
48 | #include <asm/cpufeature.h> | 48 | #include <asm/cpufeature.h> |
49 | #include "mperf.h" | ||
49 | 50 | ||
50 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | 51 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ |
51 | "acpi-cpufreq", msg) | 52 | "acpi-cpufreq", msg) |
@@ -71,8 +72,6 @@ struct acpi_cpufreq_data { | |||
71 | 72 | ||
72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); | 73 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); |
73 | 74 | ||
74 | static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); | ||
75 | |||
76 | /* acpi_perf_data is a pointer to percpu data. */ | 75 | /* acpi_perf_data is a pointer to percpu data. */ |
77 | static struct acpi_processor_performance *acpi_perf_data; | 76 | static struct acpi_processor_performance *acpi_perf_data; |
78 | 77 | ||
@@ -240,45 +239,6 @@ static u32 get_cur_val(const struct cpumask *mask) | |||
240 | return cmd.val; | 239 | return cmd.val; |
241 | } | 240 | } |
242 | 241 | ||
243 | /* Called via smp_call_function_single(), on the target CPU */ | ||
244 | static void read_measured_perf_ctrs(void *_cur) | ||
245 | { | ||
246 | struct aperfmperf *am = _cur; | ||
247 | |||
248 | get_aperfmperf(am); | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * Return the measured active (C0) frequency on this CPU since last call | ||
253 | * to this function. | ||
254 | * Input: cpu number | ||
255 | * Return: Average CPU frequency in terms of max frequency (zero on error) | ||
256 | * | ||
257 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance | ||
258 | * over a period of time, while CPU is in C0 state. | ||
259 | * IA32_MPERF counts at the rate of max advertised frequency | ||
260 | * IA32_APERF counts at the rate of actual CPU frequency | ||
261 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | ||
262 | * no meaning should be associated with absolute values of these MSRs. | ||
263 | */ | ||
264 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, | ||
265 | unsigned int cpu) | ||
266 | { | ||
267 | struct aperfmperf perf; | ||
268 | unsigned long ratio; | ||
269 | unsigned int retval; | ||
270 | |||
271 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) | ||
272 | return 0; | ||
273 | |||
274 | ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); | ||
275 | per_cpu(acfreq_old_perf, cpu) = perf; | ||
276 | |||
277 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; | ||
278 | |||
279 | return retval; | ||
280 | } | ||
281 | |||
282 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | 242 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) |
283 | { | 243 | { |
284 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); | 244 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); |
@@ -702,7 +662,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
702 | 662 | ||
703 | /* Check for APERF/MPERF support in hardware */ | 663 | /* Check for APERF/MPERF support in hardware */ |
704 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | 664 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) |
705 | acpi_cpufreq_driver.getavg = get_measured_perf; | 665 | acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf; |
706 | 666 | ||
707 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); | 667 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); |
708 | for (i = 0; i < perf->state_count; i++) | 668 | for (i = 0; i < perf->state_count; i++) |
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.c b/arch/x86/kernel/cpu/cpufreq/mperf.c new file mode 100644 index 00000000000..911e193018a --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/mperf.c | |||
@@ -0,0 +1,51 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/smp.h> | ||
3 | #include <linux/module.h> | ||
4 | #include <linux/init.h> | ||
5 | #include <linux/cpufreq.h> | ||
6 | #include <linux/slab.h> | ||
7 | |||
8 | #include "mperf.h" | ||
9 | |||
10 | static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); | ||
11 | |||
12 | /* Called via smp_call_function_single(), on the target CPU */ | ||
13 | static void read_measured_perf_ctrs(void *_cur) | ||
14 | { | ||
15 | struct aperfmperf *am = _cur; | ||
16 | |||
17 | get_aperfmperf(am); | ||
18 | } | ||
19 | |||
20 | /* | ||
21 | * Return the measured active (C0) frequency on this CPU since last call | ||
22 | * to this function. | ||
23 | * Input: cpu number | ||
24 | * Return: Average CPU frequency in terms of max frequency (zero on error) | ||
25 | * | ||
26 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance | ||
27 | * over a period of time, while CPU is in C0 state. | ||
28 | * IA32_MPERF counts at the rate of max advertised frequency | ||
29 | * IA32_APERF counts at the rate of actual CPU frequency | ||
30 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | ||
31 | * no meaning should be associated with absolute values of these MSRs. | ||
32 | */ | ||
33 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
34 | unsigned int cpu) | ||
35 | { | ||
36 | struct aperfmperf perf; | ||
37 | unsigned long ratio; | ||
38 | unsigned int retval; | ||
39 | |||
40 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) | ||
41 | return 0; | ||
42 | |||
43 | ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); | ||
44 | per_cpu(acfreq_old_perf, cpu) = perf; | ||
45 | |||
46 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; | ||
47 | |||
48 | return retval; | ||
49 | } | ||
50 | EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf); | ||
51 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.h b/arch/x86/kernel/cpu/cpufreq/mperf.h new file mode 100644 index 00000000000..5dbf2950dc2 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/mperf.h | |||
@@ -0,0 +1,9 @@ | |||
1 | /* | ||
2 | * (c) 2010 Advanced Micro Devices, Inc. | ||
3 | * Your use of this code is subject to the terms and conditions of the | ||
4 | * GNU general public license version 2. See "COPYING" or | ||
5 | * http://www.gnu.org/licenses/gpl.html | ||
6 | */ | ||
7 | |||
8 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
9 | unsigned int cpu); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index b6215b9798e..6f3dc8fbbfd 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -1,6 +1,5 @@ | |||
1 | |||
2 | /* | 1 | /* |
3 | * (c) 2003-2006 Advanced Micro Devices, Inc. | 2 | * (c) 2003-2010 Advanced Micro Devices, Inc. |
4 | * Your use of this code is subject to the terms and conditions of the | 3 | * Your use of this code is subject to the terms and conditions of the |
5 | * GNU general public license version 2. See "COPYING" or | 4 | * GNU general public license version 2. See "COPYING" or |
6 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
@@ -46,6 +45,7 @@ | |||
46 | #define PFX "powernow-k8: " | 45 | #define PFX "powernow-k8: " |
47 | #define VERSION "version 2.20.00" | 46 | #define VERSION "version 2.20.00" |
48 | #include "powernow-k8.h" | 47 | #include "powernow-k8.h" |
48 | #include "mperf.h" | ||
49 | 49 | ||
50 | /* serialize freq changes */ | 50 | /* serialize freq changes */ |
51 | static DEFINE_MUTEX(fidvid_mutex); | 51 | static DEFINE_MUTEX(fidvid_mutex); |
@@ -54,6 +54,12 @@ static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); | |||
54 | 54 | ||
55 | static int cpu_family = CPU_OPTERON; | 55 | static int cpu_family = CPU_OPTERON; |
56 | 56 | ||
57 | /* core performance boost */ | ||
58 | static bool cpb_capable, cpb_enabled; | ||
59 | static struct msr __percpu *msrs; | ||
60 | |||
61 | static struct cpufreq_driver cpufreq_amd64_driver; | ||
62 | |||
57 | #ifndef CONFIG_SMP | 63 | #ifndef CONFIG_SMP |
58 | static inline const struct cpumask *cpu_core_mask(int cpu) | 64 | static inline const struct cpumask *cpu_core_mask(int cpu) |
59 | { | 65 | { |
@@ -1249,6 +1255,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1249 | struct powernow_k8_data *data; | 1255 | struct powernow_k8_data *data; |
1250 | struct init_on_cpu init_on_cpu; | 1256 | struct init_on_cpu init_on_cpu; |
1251 | int rc; | 1257 | int rc; |
1258 | struct cpuinfo_x86 *c = &cpu_data(pol->cpu); | ||
1252 | 1259 | ||
1253 | if (!cpu_online(pol->cpu)) | 1260 | if (!cpu_online(pol->cpu)) |
1254 | return -ENODEV; | 1261 | return -ENODEV; |
@@ -1323,6 +1330,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1323 | return -EINVAL; | 1330 | return -EINVAL; |
1324 | } | 1331 | } |
1325 | 1332 | ||
1333 | /* Check for APERF/MPERF support in hardware */ | ||
1334 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | ||
1335 | cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf; | ||
1336 | |||
1326 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); | 1337 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); |
1327 | 1338 | ||
1328 | if (cpu_family == CPU_HW_PSTATE) | 1339 | if (cpu_family == CPU_HW_PSTATE) |
@@ -1394,8 +1405,77 @@ out: | |||
1394 | return khz; | 1405 | return khz; |
1395 | } | 1406 | } |
1396 | 1407 | ||
1408 | static void _cpb_toggle_msrs(bool t) | ||
1409 | { | ||
1410 | int cpu; | ||
1411 | |||
1412 | get_online_cpus(); | ||
1413 | |||
1414 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1415 | |||
1416 | for_each_cpu(cpu, cpu_online_mask) { | ||
1417 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
1418 | if (t) | ||
1419 | reg->l &= ~BIT(25); | ||
1420 | else | ||
1421 | reg->l |= BIT(25); | ||
1422 | } | ||
1423 | wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1424 | |||
1425 | put_online_cpus(); | ||
1426 | } | ||
1427 | |||
1428 | /* | ||
1429 | * Switch on/off core performance boosting. | ||
1430 | * | ||
1431 | * 0=disable | ||
1432 | * 1=enable. | ||
1433 | */ | ||
1434 | static void cpb_toggle(bool t) | ||
1435 | { | ||
1436 | if (!cpb_capable) | ||
1437 | return; | ||
1438 | |||
1439 | if (t && !cpb_enabled) { | ||
1440 | cpb_enabled = true; | ||
1441 | _cpb_toggle_msrs(t); | ||
1442 | printk(KERN_INFO PFX "Core Boosting enabled.\n"); | ||
1443 | } else if (!t && cpb_enabled) { | ||
1444 | cpb_enabled = false; | ||
1445 | _cpb_toggle_msrs(t); | ||
1446 | printk(KERN_INFO PFX "Core Boosting disabled.\n"); | ||
1447 | } | ||
1448 | } | ||
1449 | |||
1450 | static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, | ||
1451 | size_t count) | ||
1452 | { | ||
1453 | int ret = -EINVAL; | ||
1454 | unsigned long val = 0; | ||
1455 | |||
1456 | ret = strict_strtoul(buf, 10, &val); | ||
1457 | if (!ret && (val == 0 || val == 1) && cpb_capable) | ||
1458 | cpb_toggle(val); | ||
1459 | else | ||
1460 | return -EINVAL; | ||
1461 | |||
1462 | return count; | ||
1463 | } | ||
1464 | |||
1465 | static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf) | ||
1466 | { | ||
1467 | return sprintf(buf, "%u\n", cpb_enabled); | ||
1468 | } | ||
1469 | |||
1470 | #define define_one_rw(_name) \ | ||
1471 | static struct freq_attr _name = \ | ||
1472 | __ATTR(_name, 0644, show_##_name, store_##_name) | ||
1473 | |||
1474 | define_one_rw(cpb); | ||
1475 | |||
1397 | static struct freq_attr *powernow_k8_attr[] = { | 1476 | static struct freq_attr *powernow_k8_attr[] = { |
1398 | &cpufreq_freq_attr_scaling_available_freqs, | 1477 | &cpufreq_freq_attr_scaling_available_freqs, |
1478 | &cpb, | ||
1399 | NULL, | 1479 | NULL, |
1400 | }; | 1480 | }; |
1401 | 1481 | ||
@@ -1411,10 +1491,51 @@ static struct cpufreq_driver cpufreq_amd64_driver = { | |||
1411 | .attr = powernow_k8_attr, | 1491 | .attr = powernow_k8_attr, |
1412 | }; | 1492 | }; |
1413 | 1493 | ||
1494 | /* | ||
1495 | * Clear the boost-disable flag on the CPU_DOWN path so that this cpu | ||
1496 | * cannot block the remaining ones from boosting. On the CPU_UP path we | ||
1497 | * simply keep the boost-disable flag in sync with the current global | ||
1498 | * state. | ||
1499 | */ | ||
1500 | static int __cpuinit cpb_notify(struct notifier_block *nb, unsigned long action, | ||
1501 | void *hcpu) | ||
1502 | { | ||
1503 | unsigned cpu = (long)hcpu; | ||
1504 | u32 lo, hi; | ||
1505 | |||
1506 | switch (action) { | ||
1507 | case CPU_UP_PREPARE: | ||
1508 | case CPU_UP_PREPARE_FROZEN: | ||
1509 | |||
1510 | if (!cpb_enabled) { | ||
1511 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
1512 | lo |= BIT(25); | ||
1513 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
1514 | } | ||
1515 | break; | ||
1516 | |||
1517 | case CPU_DOWN_PREPARE: | ||
1518 | case CPU_DOWN_PREPARE_FROZEN: | ||
1519 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
1520 | lo &= ~BIT(25); | ||
1521 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
1522 | break; | ||
1523 | |||
1524 | default: | ||
1525 | break; | ||
1526 | } | ||
1527 | |||
1528 | return NOTIFY_OK; | ||
1529 | } | ||
1530 | |||
1531 | static struct notifier_block __cpuinitdata cpb_nb = { | ||
1532 | .notifier_call = cpb_notify, | ||
1533 | }; | ||
1534 | |||
1414 | /* driver entry point for init */ | 1535 | /* driver entry point for init */ |
1415 | static int __cpuinit powernowk8_init(void) | 1536 | static int __cpuinit powernowk8_init(void) |
1416 | { | 1537 | { |
1417 | unsigned int i, supported_cpus = 0; | 1538 | unsigned int i, supported_cpus = 0, cpu; |
1418 | 1539 | ||
1419 | for_each_online_cpu(i) { | 1540 | for_each_online_cpu(i) { |
1420 | int rc; | 1541 | int rc; |
@@ -1423,15 +1544,36 @@ static int __cpuinit powernowk8_init(void) | |||
1423 | supported_cpus++; | 1544 | supported_cpus++; |
1424 | } | 1545 | } |
1425 | 1546 | ||
1426 | if (supported_cpus == num_online_cpus()) { | 1547 | if (supported_cpus != num_online_cpus()) |
1427 | printk(KERN_INFO PFX "Found %d %s " | 1548 | return -ENODEV; |
1428 | "processors (%d cpu cores) (" VERSION ")\n", | 1549 | |
1429 | num_online_nodes(), | 1550 | printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n", |
1430 | boot_cpu_data.x86_model_id, supported_cpus); | 1551 | num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus); |
1431 | return cpufreq_register_driver(&cpufreq_amd64_driver); | 1552 | |
1553 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
1554 | |||
1555 | cpb_capable = true; | ||
1556 | |||
1557 | register_cpu_notifier(&cpb_nb); | ||
1558 | |||
1559 | msrs = msrs_alloc(); | ||
1560 | if (!msrs) { | ||
1561 | printk(KERN_ERR "%s: Error allocating msrs!\n", __func__); | ||
1562 | return -ENOMEM; | ||
1563 | } | ||
1564 | |||
1565 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1566 | |||
1567 | for_each_cpu(cpu, cpu_online_mask) { | ||
1568 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
1569 | cpb_enabled |= !(!!(reg->l & BIT(25))); | ||
1570 | } | ||
1571 | |||
1572 | printk(KERN_INFO PFX "Core Performance Boosting: %s.\n", | ||
1573 | (cpb_enabled ? "on" : "off")); | ||
1432 | } | 1574 | } |
1433 | 1575 | ||
1434 | return -ENODEV; | 1576 | return cpufreq_register_driver(&cpufreq_amd64_driver); |
1435 | } | 1577 | } |
1436 | 1578 | ||
1437 | /* driver entry point for term */ | 1579 | /* driver entry point for term */ |
@@ -1439,6 +1581,13 @@ static void __exit powernowk8_exit(void) | |||
1439 | { | 1581 | { |
1440 | dprintk("exit\n"); | 1582 | dprintk("exit\n"); |
1441 | 1583 | ||
1584 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
1585 | msrs_free(msrs); | ||
1586 | msrs = NULL; | ||
1587 | |||
1588 | unregister_cpu_notifier(&cpb_nb); | ||
1589 | } | ||
1590 | |||
1442 | cpufreq_unregister_driver(&cpufreq_amd64_driver); | 1591 | cpufreq_unregister_driver(&cpufreq_amd64_driver); |
1443 | } | 1592 | } |
1444 | 1593 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index 02ce824073c..df3529b1c02 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h | |||
@@ -5,7 +5,6 @@ | |||
5 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
6 | */ | 6 | */ |
7 | 7 | ||
8 | |||
9 | enum pstate { | 8 | enum pstate { |
10 | HW_PSTATE_INVALID = 0xff, | 9 | HW_PSTATE_INVALID = 0xff, |
11 | HW_PSTATE_0 = 0, | 10 | HW_PSTATE_0 = 0, |
@@ -55,7 +54,6 @@ struct powernow_k8_data { | |||
55 | struct cpumask *available_cores; | 54 | struct cpumask *available_cores; |
56 | }; | 55 | }; |
57 | 56 | ||
58 | |||
59 | /* processor's cpuid instruction support */ | 57 | /* processor's cpuid instruction support */ |
60 | #define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ | 58 | #define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ |
61 | #define CPUID_XFAM 0x0ff00000 /* extended family */ | 59 | #define CPUID_XFAM 0x0ff00000 /* extended family */ |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 08be922de33..dd531cc56a8 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -21,37 +21,55 @@ | |||
21 | * | 21 | * |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/module.h> | ||
24 | #include <asm/processor.h> | 25 | #include <asm/processor.h> |
25 | #include <asm/vmware.h> | ||
26 | #include <asm/hypervisor.h> | 26 | #include <asm/hypervisor.h> |
27 | 27 | ||
28 | static inline void __cpuinit | 28 | /* |
29 | detect_hypervisor_vendor(struct cpuinfo_x86 *c) | 29 | * Hypervisor detect order. This is specified explicitly here because |
30 | * some hypervisors might implement compatibility modes for other | ||
31 | * hypervisors and therefore need to be detected in specific sequence. | ||
32 | */ | ||
33 | static const __initconst struct hypervisor_x86 * const hypervisors[] = | ||
30 | { | 34 | { |
31 | if (vmware_platform()) | 35 | &x86_hyper_vmware, |
32 | c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; | 36 | &x86_hyper_ms_hyperv, |
33 | else | 37 | }; |
34 | c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; | ||
35 | } | ||
36 | 38 | ||
37 | static inline void __cpuinit | 39 | const struct hypervisor_x86 *x86_hyper; |
38 | hypervisor_set_feature_bits(struct cpuinfo_x86 *c) | 40 | EXPORT_SYMBOL(x86_hyper); |
41 | |||
42 | static inline void __init | ||
43 | detect_hypervisor_vendor(void) | ||
39 | { | 44 | { |
40 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) { | 45 | const struct hypervisor_x86 *h, * const *p; |
41 | vmware_set_feature_bits(c); | 46 | |
42 | return; | 47 | for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { |
48 | h = *p; | ||
49 | if (h->detect()) { | ||
50 | x86_hyper = h; | ||
51 | printk(KERN_INFO "Hypervisor detected: %s\n", h->name); | ||
52 | break; | ||
53 | } | ||
43 | } | 54 | } |
44 | } | 55 | } |
45 | 56 | ||
46 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) | 57 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) |
47 | { | 58 | { |
48 | detect_hypervisor_vendor(c); | 59 | if (x86_hyper && x86_hyper->set_cpu_features) |
49 | hypervisor_set_feature_bits(c); | 60 | x86_hyper->set_cpu_features(c); |
50 | } | 61 | } |
51 | 62 | ||
52 | void __init init_hypervisor_platform(void) | 63 | void __init init_hypervisor_platform(void) |
53 | { | 64 | { |
65 | |||
66 | detect_hypervisor_vendor(); | ||
67 | |||
68 | if (!x86_hyper) | ||
69 | return; | ||
70 | |||
54 | init_hypervisor(&boot_cpu_data); | 71 | init_hypervisor(&boot_cpu_data); |
55 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) | 72 | |
56 | vmware_platform_setup(); | 73 | if (x86_hyper->init_platform) |
74 | x86_hyper->init_platform(); | ||
57 | } | 75 | } |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1366c7cfd48..85f69cdeae1 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <asm/processor.h> | 12 | #include <asm/processor.h> |
13 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable.h> |
14 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
15 | #include <asm/ds.h> | ||
16 | #include <asm/bugs.h> | 15 | #include <asm/bugs.h> |
17 | #include <asm/cpu.h> | 16 | #include <asm/cpu.h> |
18 | 17 | ||
@@ -373,12 +372,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
373 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | 372 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); |
374 | } | 373 | } |
375 | 374 | ||
376 | if (c->cpuid_level > 6) { | ||
377 | unsigned ecx = cpuid_ecx(6); | ||
378 | if (ecx & 0x01) | ||
379 | set_cpu_cap(c, X86_FEATURE_APERFMPERF); | ||
380 | } | ||
381 | |||
382 | if (cpu_has_xmm2) | 375 | if (cpu_has_xmm2) |
383 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | 376 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); |
384 | if (cpu_has_ds) { | 377 | if (cpu_has_ds) { |
@@ -388,7 +381,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
388 | set_cpu_cap(c, X86_FEATURE_BTS); | 381 | set_cpu_cap(c, X86_FEATURE_BTS); |
389 | if (!(l1 & (1<<12))) | 382 | if (!(l1 & (1<<12))) |
390 | set_cpu_cap(c, X86_FEATURE_PEBS); | 383 | set_cpu_cap(c, X86_FEATURE_PEBS); |
391 | ds_init_intel(c); | ||
392 | } | 384 | } |
393 | 385 | ||
394 | if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) | 386 | if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index b3eeb66c0a5..33eae2062cf 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -148,13 +148,19 @@ union _cpuid4_leaf_ecx { | |||
148 | u32 full; | 148 | u32 full; |
149 | }; | 149 | }; |
150 | 150 | ||
151 | struct amd_l3_cache { | ||
152 | struct pci_dev *dev; | ||
153 | bool can_disable; | ||
154 | unsigned indices; | ||
155 | u8 subcaches[4]; | ||
156 | }; | ||
157 | |||
151 | struct _cpuid4_info { | 158 | struct _cpuid4_info { |
152 | union _cpuid4_leaf_eax eax; | 159 | union _cpuid4_leaf_eax eax; |
153 | union _cpuid4_leaf_ebx ebx; | 160 | union _cpuid4_leaf_ebx ebx; |
154 | union _cpuid4_leaf_ecx ecx; | 161 | union _cpuid4_leaf_ecx ecx; |
155 | unsigned long size; | 162 | unsigned long size; |
156 | bool can_disable; | 163 | struct amd_l3_cache *l3; |
157 | unsigned int l3_indices; | ||
158 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); | 164 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); |
159 | }; | 165 | }; |
160 | 166 | ||
@@ -164,8 +170,7 @@ struct _cpuid4_info_regs { | |||
164 | union _cpuid4_leaf_ebx ebx; | 170 | union _cpuid4_leaf_ebx ebx; |
165 | union _cpuid4_leaf_ecx ecx; | 171 | union _cpuid4_leaf_ecx ecx; |
166 | unsigned long size; | 172 | unsigned long size; |
167 | bool can_disable; | 173 | struct amd_l3_cache *l3; |
168 | unsigned int l3_indices; | ||
169 | }; | 174 | }; |
170 | 175 | ||
171 | unsigned short num_cache_leaves; | 176 | unsigned short num_cache_leaves; |
@@ -302,87 +307,163 @@ struct _cache_attr { | |||
302 | }; | 307 | }; |
303 | 308 | ||
304 | #ifdef CONFIG_CPU_SUP_AMD | 309 | #ifdef CONFIG_CPU_SUP_AMD |
305 | static unsigned int __cpuinit amd_calc_l3_indices(void) | 310 | |
311 | /* | ||
312 | * L3 cache descriptors | ||
313 | */ | ||
314 | static struct amd_l3_cache **__cpuinitdata l3_caches; | ||
315 | |||
316 | static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) | ||
306 | { | 317 | { |
307 | /* | ||
308 | * We're called over smp_call_function_single() and therefore | ||
309 | * are on the correct cpu. | ||
310 | */ | ||
311 | int cpu = smp_processor_id(); | ||
312 | int node = cpu_to_node(cpu); | ||
313 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
314 | unsigned int sc0, sc1, sc2, sc3; | 318 | unsigned int sc0, sc1, sc2, sc3; |
315 | u32 val = 0; | 319 | u32 val = 0; |
316 | 320 | ||
317 | pci_read_config_dword(dev, 0x1C4, &val); | 321 | pci_read_config_dword(l3->dev, 0x1C4, &val); |
318 | 322 | ||
319 | /* calculate subcache sizes */ | 323 | /* calculate subcache sizes */ |
320 | sc0 = !(val & BIT(0)); | 324 | l3->subcaches[0] = sc0 = !(val & BIT(0)); |
321 | sc1 = !(val & BIT(4)); | 325 | l3->subcaches[1] = sc1 = !(val & BIT(4)); |
322 | sc2 = !(val & BIT(8)) + !(val & BIT(9)); | 326 | l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); |
323 | sc3 = !(val & BIT(12)) + !(val & BIT(13)); | 327 | l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); |
324 | 328 | ||
325 | return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; | 329 | l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; |
330 | } | ||
331 | |||
332 | static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) | ||
333 | { | ||
334 | struct amd_l3_cache *l3; | ||
335 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
336 | |||
337 | l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC); | ||
338 | if (!l3) { | ||
339 | printk(KERN_WARNING "Error allocating L3 struct\n"); | ||
340 | return NULL; | ||
341 | } | ||
342 | |||
343 | l3->dev = dev; | ||
344 | |||
345 | amd_calc_l3_indices(l3); | ||
346 | |||
347 | return l3; | ||
326 | } | 348 | } |
327 | 349 | ||
328 | static void __cpuinit | 350 | static void __cpuinit |
329 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | 351 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) |
330 | { | 352 | { |
331 | if (index < 3) | 353 | int node; |
354 | |||
355 | if (boot_cpu_data.x86 != 0x10) | ||
332 | return; | 356 | return; |
333 | 357 | ||
334 | if (boot_cpu_data.x86 == 0x11) | 358 | if (index < 3) |
335 | return; | 359 | return; |
336 | 360 | ||
337 | /* see errata #382 and #388 */ | 361 | /* see errata #382 and #388 */ |
338 | if ((boot_cpu_data.x86 == 0x10) && | 362 | if (boot_cpu_data.x86_model < 0x8) |
339 | ((boot_cpu_data.x86_model < 0x8) || | 363 | return; |
340 | (boot_cpu_data.x86_mask < 0x1))) | 364 | |
365 | if ((boot_cpu_data.x86_model == 0x8 || | ||
366 | boot_cpu_data.x86_model == 0x9) | ||
367 | && | ||
368 | boot_cpu_data.x86_mask < 0x1) | ||
369 | return; | ||
370 | |||
371 | /* not in virtualized environments */ | ||
372 | if (num_k8_northbridges == 0) | ||
341 | return; | 373 | return; |
342 | 374 | ||
343 | this_leaf->can_disable = true; | 375 | /* |
344 | this_leaf->l3_indices = amd_calc_l3_indices(); | 376 | * Strictly speaking, the amount in @size below is leaked since it is |
377 | * never freed but this is done only on shutdown so it doesn't matter. | ||
378 | */ | ||
379 | if (!l3_caches) { | ||
380 | int size = num_k8_northbridges * sizeof(struct amd_l3_cache *); | ||
381 | |||
382 | l3_caches = kzalloc(size, GFP_ATOMIC); | ||
383 | if (!l3_caches) | ||
384 | return; | ||
385 | } | ||
386 | |||
387 | node = amd_get_nb_id(smp_processor_id()); | ||
388 | |||
389 | if (!l3_caches[node]) { | ||
390 | l3_caches[node] = amd_init_l3_cache(node); | ||
391 | l3_caches[node]->can_disable = true; | ||
392 | } | ||
393 | |||
394 | WARN_ON(!l3_caches[node]); | ||
395 | |||
396 | this_leaf->l3 = l3_caches[node]; | ||
345 | } | 397 | } |
346 | 398 | ||
347 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | 399 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, |
348 | unsigned int index) | 400 | unsigned int slot) |
349 | { | 401 | { |
350 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | 402 | struct pci_dev *dev = this_leaf->l3->dev; |
351 | int node = amd_get_nb_id(cpu); | ||
352 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
353 | unsigned int reg = 0; | 403 | unsigned int reg = 0; |
354 | 404 | ||
355 | if (!this_leaf->can_disable) | 405 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) |
356 | return -EINVAL; | 406 | return -EINVAL; |
357 | 407 | ||
358 | if (!dev) | 408 | if (!dev) |
359 | return -EINVAL; | 409 | return -EINVAL; |
360 | 410 | ||
361 | pci_read_config_dword(dev, 0x1BC + index * 4, ®); | 411 | pci_read_config_dword(dev, 0x1BC + slot * 4, ®); |
362 | return sprintf(buf, "0x%08x\n", reg); | 412 | return sprintf(buf, "0x%08x\n", reg); |
363 | } | 413 | } |
364 | 414 | ||
365 | #define SHOW_CACHE_DISABLE(index) \ | 415 | #define SHOW_CACHE_DISABLE(slot) \ |
366 | static ssize_t \ | 416 | static ssize_t \ |
367 | show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ | 417 | show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \ |
368 | { \ | 418 | { \ |
369 | return show_cache_disable(this_leaf, buf, index); \ | 419 | return show_cache_disable(this_leaf, buf, slot); \ |
370 | } | 420 | } |
371 | SHOW_CACHE_DISABLE(0) | 421 | SHOW_CACHE_DISABLE(0) |
372 | SHOW_CACHE_DISABLE(1) | 422 | SHOW_CACHE_DISABLE(1) |
373 | 423 | ||
424 | static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, | ||
425 | unsigned slot, unsigned long idx) | ||
426 | { | ||
427 | int i; | ||
428 | |||
429 | idx |= BIT(30); | ||
430 | |||
431 | /* | ||
432 | * disable index in all 4 subcaches | ||
433 | */ | ||
434 | for (i = 0; i < 4; i++) { | ||
435 | u32 reg = idx | (i << 20); | ||
436 | |||
437 | if (!l3->subcaches[i]) | ||
438 | continue; | ||
439 | |||
440 | pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); | ||
441 | |||
442 | /* | ||
443 | * We need to WBINVD on a core on the node containing the L3 | ||
444 | * cache which indices we disable therefore a simple wbinvd() | ||
445 | * is not sufficient. | ||
446 | */ | ||
447 | wbinvd_on_cpu(cpu); | ||
448 | |||
449 | reg |= BIT(31); | ||
450 | pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); | ||
451 | } | ||
452 | } | ||
453 | |||
454 | |||
374 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | 455 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, |
375 | const char *buf, size_t count, unsigned int index) | 456 | const char *buf, size_t count, |
457 | unsigned int slot) | ||
376 | { | 458 | { |
459 | struct pci_dev *dev = this_leaf->l3->dev; | ||
377 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | 460 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); |
378 | int node = amd_get_nb_id(cpu); | ||
379 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
380 | unsigned long val = 0; | 461 | unsigned long val = 0; |
381 | 462 | ||
382 | #define SUBCACHE_MASK (3UL << 20) | 463 | #define SUBCACHE_MASK (3UL << 20) |
383 | #define SUBCACHE_INDEX 0xfff | 464 | #define SUBCACHE_INDEX 0xfff |
384 | 465 | ||
385 | if (!this_leaf->can_disable) | 466 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) |
386 | return -EINVAL; | 467 | return -EINVAL; |
387 | 468 | ||
388 | if (!capable(CAP_SYS_ADMIN)) | 469 | if (!capable(CAP_SYS_ADMIN)) |
@@ -396,26 +477,20 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | |||
396 | 477 | ||
397 | /* do not allow writes outside of allowed bits */ | 478 | /* do not allow writes outside of allowed bits */ |
398 | if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || | 479 | if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || |
399 | ((val & SUBCACHE_INDEX) > this_leaf->l3_indices)) | 480 | ((val & SUBCACHE_INDEX) > this_leaf->l3->indices)) |
400 | return -EINVAL; | 481 | return -EINVAL; |
401 | 482 | ||
402 | val |= BIT(30); | 483 | amd_l3_disable_index(this_leaf->l3, cpu, slot, val); |
403 | pci_write_config_dword(dev, 0x1BC + index * 4, val); | 484 | |
404 | /* | ||
405 | * We need to WBINVD on a core on the node containing the L3 cache which | ||
406 | * indices we disable therefore a simple wbinvd() is not sufficient. | ||
407 | */ | ||
408 | wbinvd_on_cpu(cpu); | ||
409 | pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31)); | ||
410 | return count; | 485 | return count; |
411 | } | 486 | } |
412 | 487 | ||
413 | #define STORE_CACHE_DISABLE(index) \ | 488 | #define STORE_CACHE_DISABLE(slot) \ |
414 | static ssize_t \ | 489 | static ssize_t \ |
415 | store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ | 490 | store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ |
416 | const char *buf, size_t count) \ | 491 | const char *buf, size_t count) \ |
417 | { \ | 492 | { \ |
418 | return store_cache_disable(this_leaf, buf, count, index); \ | 493 | return store_cache_disable(this_leaf, buf, count, slot); \ |
419 | } | 494 | } |
420 | STORE_CACHE_DISABLE(0) | 495 | STORE_CACHE_DISABLE(0) |
421 | STORE_CACHE_DISABLE(1) | 496 | STORE_CACHE_DISABLE(1) |
@@ -443,8 +518,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index, | |||
443 | 518 | ||
444 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | 519 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { |
445 | amd_cpuid4(index, &eax, &ebx, &ecx); | 520 | amd_cpuid4(index, &eax, &ebx, &ecx); |
446 | if (boot_cpu_data.x86 >= 0x10) | 521 | amd_check_l3_disable(index, this_leaf); |
447 | amd_check_l3_disable(index, this_leaf); | ||
448 | } else { | 522 | } else { |
449 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); | 523 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); |
450 | } | 524 | } |
@@ -701,6 +775,7 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) | |||
701 | for (i = 0; i < num_cache_leaves; i++) | 775 | for (i = 0; i < num_cache_leaves; i++) |
702 | cache_remove_shared_cpu_map(cpu, i); | 776 | cache_remove_shared_cpu_map(cpu, i); |
703 | 777 | ||
778 | kfree(per_cpu(ici_cpuid4_info, cpu)->l3); | ||
704 | kfree(per_cpu(ici_cpuid4_info, cpu)); | 779 | kfree(per_cpu(ici_cpuid4_info, cpu)); |
705 | per_cpu(ici_cpuid4_info, cpu) = NULL; | 780 | per_cpu(ici_cpuid4_info, cpu) = NULL; |
706 | } | 781 | } |
@@ -985,7 +1060,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
985 | 1060 | ||
986 | this_leaf = CPUID4_INFO_IDX(cpu, i); | 1061 | this_leaf = CPUID4_INFO_IDX(cpu, i); |
987 | 1062 | ||
988 | if (this_leaf->can_disable) | 1063 | if (this_leaf->l3 && this_leaf->l3->can_disable) |
989 | ktype_cache.default_attrs = default_l3_attrs; | 1064 | ktype_cache.default_attrs = default_l3_attrs; |
990 | else | 1065 | else |
991 | ktype_cache.default_attrs = default_attrs; | 1066 | ktype_cache.default_attrs = default_attrs; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8a6f0afa767..7a355ddcc64 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -539,7 +539,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
539 | struct mce m; | 539 | struct mce m; |
540 | int i; | 540 | int i; |
541 | 541 | ||
542 | __get_cpu_var(mce_poll_count)++; | 542 | percpu_inc(mce_poll_count); |
543 | 543 | ||
544 | mce_setup(&m); | 544 | mce_setup(&m); |
545 | 545 | ||
@@ -934,7 +934,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
934 | 934 | ||
935 | atomic_inc(&mce_entry); | 935 | atomic_inc(&mce_entry); |
936 | 936 | ||
937 | __get_cpu_var(mce_exception_count)++; | 937 | percpu_inc(mce_exception_count); |
938 | 938 | ||
939 | if (notify_die(DIE_NMI, "machine check", regs, error_code, | 939 | if (notify_die(DIE_NMI, "machine check", regs, error_code, |
940 | 18, SIGKILL) == NOTIFY_STOP) | 940 | 18, SIGKILL) == NOTIFY_STOP) |
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c new file mode 100644 index 00000000000..16f41bbe46b --- /dev/null +++ b/arch/x86/kernel/cpu/mshyperv.c | |||
@@ -0,0 +1,55 @@ | |||
1 | /* | ||
2 | * HyperV Detection code. | ||
3 | * | ||
4 | * Copyright (C) 2010, Novell, Inc. | ||
5 | * Author : K. Y. Srinivasan <ksrinivasan@novell.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; version 2 of the License. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/types.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <asm/processor.h> | ||
16 | #include <asm/hypervisor.h> | ||
17 | #include <asm/hyperv.h> | ||
18 | #include <asm/mshyperv.h> | ||
19 | |||
20 | struct ms_hyperv_info ms_hyperv; | ||
21 | |||
22 | static bool __init ms_hyperv_platform(void) | ||
23 | { | ||
24 | u32 eax; | ||
25 | u32 hyp_signature[3]; | ||
26 | |||
27 | if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) | ||
28 | return false; | ||
29 | |||
30 | cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, | ||
31 | &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); | ||
32 | |||
33 | return eax >= HYPERV_CPUID_MIN && | ||
34 | eax <= HYPERV_CPUID_MAX && | ||
35 | !memcmp("Microsoft Hv", hyp_signature, 12); | ||
36 | } | ||
37 | |||
38 | static void __init ms_hyperv_init_platform(void) | ||
39 | { | ||
40 | /* | ||
41 | * Extract the features and hints | ||
42 | */ | ||
43 | ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); | ||
44 | ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); | ||
45 | |||
46 | printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", | ||
47 | ms_hyperv.features, ms_hyperv.hints); | ||
48 | } | ||
49 | |||
50 | const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { | ||
51 | .name = "Microsoft HyperV", | ||
52 | .detect = ms_hyperv_platform, | ||
53 | .init_platform = ms_hyperv_init_platform, | ||
54 | }; | ||
55 | EXPORT_SYMBOL(x86_hyper_ms_hyperv); | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index db5bdc8addf..fd4db0db370 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -31,46 +31,51 @@ | |||
31 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
32 | #include <asm/compat.h> | 32 | #include <asm/compat.h> |
33 | 33 | ||
34 | static u64 perf_event_mask __read_mostly; | 34 | #if 0 |
35 | #undef wrmsrl | ||
36 | #define wrmsrl(msr, val) \ | ||
37 | do { \ | ||
38 | trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\ | ||
39 | (unsigned long)(val)); \ | ||
40 | native_write_msr((msr), (u32)((u64)(val)), \ | ||
41 | (u32)((u64)(val) >> 32)); \ | ||
42 | } while (0) | ||
43 | #endif | ||
35 | 44 | ||
36 | /* The maximal number of PEBS events: */ | 45 | /* |
37 | #define MAX_PEBS_EVENTS 4 | 46 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context |
47 | */ | ||
48 | static unsigned long | ||
49 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
50 | { | ||
51 | unsigned long offset, addr = (unsigned long)from; | ||
52 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
53 | unsigned long size, len = 0; | ||
54 | struct page *page; | ||
55 | void *map; | ||
56 | int ret; | ||
38 | 57 | ||
39 | /* The size of a BTS record in bytes: */ | 58 | do { |
40 | #define BTS_RECORD_SIZE 24 | 59 | ret = __get_user_pages_fast(addr, 1, 0, &page); |
60 | if (!ret) | ||
61 | break; | ||
41 | 62 | ||
42 | /* The size of a per-cpu BTS buffer in bytes: */ | 63 | offset = addr & (PAGE_SIZE - 1); |
43 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) | 64 | size = min(PAGE_SIZE - offset, n - len); |
44 | 65 | ||
45 | /* The BTS overflow threshold in bytes from the end of the buffer: */ | 66 | map = kmap_atomic(page, type); |
46 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) | 67 | memcpy(to, map+offset, size); |
68 | kunmap_atomic(map, type); | ||
69 | put_page(page); | ||
47 | 70 | ||
71 | len += size; | ||
72 | to += size; | ||
73 | addr += size; | ||
48 | 74 | ||
49 | /* | 75 | } while (len < n); |
50 | * Bits in the debugctlmsr controlling branch tracing. | ||
51 | */ | ||
52 | #define X86_DEBUGCTL_TR (1 << 6) | ||
53 | #define X86_DEBUGCTL_BTS (1 << 7) | ||
54 | #define X86_DEBUGCTL_BTINT (1 << 8) | ||
55 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | ||
56 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | ||
57 | 76 | ||
58 | /* | 77 | return len; |
59 | * A debug store configuration. | 78 | } |
60 | * | ||
61 | * We only support architectures that use 64bit fields. | ||
62 | */ | ||
63 | struct debug_store { | ||
64 | u64 bts_buffer_base; | ||
65 | u64 bts_index; | ||
66 | u64 bts_absolute_maximum; | ||
67 | u64 bts_interrupt_threshold; | ||
68 | u64 pebs_buffer_base; | ||
69 | u64 pebs_index; | ||
70 | u64 pebs_absolute_maximum; | ||
71 | u64 pebs_interrupt_threshold; | ||
72 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
73 | }; | ||
74 | 79 | ||
75 | struct event_constraint { | 80 | struct event_constraint { |
76 | union { | 81 | union { |
@@ -89,18 +94,41 @@ struct amd_nb { | |||
89 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | 94 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; |
90 | }; | 95 | }; |
91 | 96 | ||
97 | #define MAX_LBR_ENTRIES 16 | ||
98 | |||
92 | struct cpu_hw_events { | 99 | struct cpu_hw_events { |
100 | /* | ||
101 | * Generic x86 PMC bits | ||
102 | */ | ||
93 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ | 103 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ |
94 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 104 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
95 | unsigned long interrupts; | ||
96 | int enabled; | 105 | int enabled; |
97 | struct debug_store *ds; | ||
98 | 106 | ||
99 | int n_events; | 107 | int n_events; |
100 | int n_added; | 108 | int n_added; |
101 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | 109 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ |
102 | u64 tags[X86_PMC_IDX_MAX]; | 110 | u64 tags[X86_PMC_IDX_MAX]; |
103 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | 111 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
112 | |||
113 | unsigned int group_flag; | ||
114 | |||
115 | /* | ||
116 | * Intel DebugStore bits | ||
117 | */ | ||
118 | struct debug_store *ds; | ||
119 | u64 pebs_enabled; | ||
120 | |||
121 | /* | ||
122 | * Intel LBR bits | ||
123 | */ | ||
124 | int lbr_users; | ||
125 | void *lbr_context; | ||
126 | struct perf_branch_stack lbr_stack; | ||
127 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | ||
128 | |||
129 | /* | ||
130 | * AMD specific bits | ||
131 | */ | ||
104 | struct amd_nb *amd_nb; | 132 | struct amd_nb *amd_nb; |
105 | }; | 133 | }; |
106 | 134 | ||
@@ -114,44 +142,75 @@ struct cpu_hw_events { | |||
114 | #define EVENT_CONSTRAINT(c, n, m) \ | 142 | #define EVENT_CONSTRAINT(c, n, m) \ |
115 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | 143 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) |
116 | 144 | ||
145 | /* | ||
146 | * Constraint on the Event code. | ||
147 | */ | ||
117 | #define INTEL_EVENT_CONSTRAINT(c, n) \ | 148 | #define INTEL_EVENT_CONSTRAINT(c, n) \ |
118 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) | 149 | EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) |
119 | 150 | ||
151 | /* | ||
152 | * Constraint on the Event code + UMask + fixed-mask | ||
153 | * | ||
154 | * filter mask to validate fixed counter events. | ||
155 | * the following filters disqualify for fixed counters: | ||
156 | * - inv | ||
157 | * - edge | ||
158 | * - cnt-mask | ||
159 | * The other filters are supported by fixed counters. | ||
160 | * The any-thread option is supported starting with v3. | ||
161 | */ | ||
120 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | 162 | #define FIXED_EVENT_CONSTRAINT(c, n) \ |
121 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) | 163 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) |
164 | |||
165 | /* | ||
166 | * Constraint on the Event code + UMask | ||
167 | */ | ||
168 | #define PEBS_EVENT_CONSTRAINT(c, n) \ | ||
169 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | ||
122 | 170 | ||
123 | #define EVENT_CONSTRAINT_END \ | 171 | #define EVENT_CONSTRAINT_END \ |
124 | EVENT_CONSTRAINT(0, 0, 0) | 172 | EVENT_CONSTRAINT(0, 0, 0) |
125 | 173 | ||
126 | #define for_each_event_constraint(e, c) \ | 174 | #define for_each_event_constraint(e, c) \ |
127 | for ((e) = (c); (e)->cmask; (e)++) | 175 | for ((e) = (c); (e)->weight; (e)++) |
176 | |||
177 | union perf_capabilities { | ||
178 | struct { | ||
179 | u64 lbr_format : 6; | ||
180 | u64 pebs_trap : 1; | ||
181 | u64 pebs_arch_reg : 1; | ||
182 | u64 pebs_format : 4; | ||
183 | u64 smm_freeze : 1; | ||
184 | }; | ||
185 | u64 capabilities; | ||
186 | }; | ||
128 | 187 | ||
129 | /* | 188 | /* |
130 | * struct x86_pmu - generic x86 pmu | 189 | * struct x86_pmu - generic x86 pmu |
131 | */ | 190 | */ |
132 | struct x86_pmu { | 191 | struct x86_pmu { |
192 | /* | ||
193 | * Generic x86 PMC bits | ||
194 | */ | ||
133 | const char *name; | 195 | const char *name; |
134 | int version; | 196 | int version; |
135 | int (*handle_irq)(struct pt_regs *); | 197 | int (*handle_irq)(struct pt_regs *); |
136 | void (*disable_all)(void); | 198 | void (*disable_all)(void); |
137 | void (*enable_all)(void); | 199 | void (*enable_all)(int added); |
138 | void (*enable)(struct perf_event *); | 200 | void (*enable)(struct perf_event *); |
139 | void (*disable)(struct perf_event *); | 201 | void (*disable)(struct perf_event *); |
202 | int (*hw_config)(struct perf_event *event); | ||
203 | int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); | ||
140 | unsigned eventsel; | 204 | unsigned eventsel; |
141 | unsigned perfctr; | 205 | unsigned perfctr; |
142 | u64 (*event_map)(int); | 206 | u64 (*event_map)(int); |
143 | u64 (*raw_event)(u64); | ||
144 | int max_events; | 207 | int max_events; |
145 | int num_events; | 208 | int num_counters; |
146 | int num_events_fixed; | 209 | int num_counters_fixed; |
147 | int event_bits; | 210 | int cntval_bits; |
148 | u64 event_mask; | 211 | u64 cntval_mask; |
149 | int apic; | 212 | int apic; |
150 | u64 max_period; | 213 | u64 max_period; |
151 | u64 intel_ctrl; | ||
152 | void (*enable_bts)(u64 config); | ||
153 | void (*disable_bts)(void); | ||
154 | |||
155 | struct event_constraint * | 214 | struct event_constraint * |
156 | (*get_event_constraints)(struct cpu_hw_events *cpuc, | 215 | (*get_event_constraints)(struct cpu_hw_events *cpuc, |
157 | struct perf_event *event); | 216 | struct perf_event *event); |
@@ -159,11 +218,32 @@ struct x86_pmu { | |||
159 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 218 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
160 | struct perf_event *event); | 219 | struct perf_event *event); |
161 | struct event_constraint *event_constraints; | 220 | struct event_constraint *event_constraints; |
221 | void (*quirks)(void); | ||
162 | 222 | ||
163 | int (*cpu_prepare)(int cpu); | 223 | int (*cpu_prepare)(int cpu); |
164 | void (*cpu_starting)(int cpu); | 224 | void (*cpu_starting)(int cpu); |
165 | void (*cpu_dying)(int cpu); | 225 | void (*cpu_dying)(int cpu); |
166 | void (*cpu_dead)(int cpu); | 226 | void (*cpu_dead)(int cpu); |
227 | |||
228 | /* | ||
229 | * Intel Arch Perfmon v2+ | ||
230 | */ | ||
231 | u64 intel_ctrl; | ||
232 | union perf_capabilities intel_cap; | ||
233 | |||
234 | /* | ||
235 | * Intel DebugStore bits | ||
236 | */ | ||
237 | int bts, pebs; | ||
238 | int pebs_record_size; | ||
239 | void (*drain_pebs)(struct pt_regs *regs); | ||
240 | struct event_constraint *pebs_constraints; | ||
241 | |||
242 | /* | ||
243 | * Intel LBR | ||
244 | */ | ||
245 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | ||
246 | int lbr_nr; /* hardware stack size */ | ||
167 | }; | 247 | }; |
168 | 248 | ||
169 | static struct x86_pmu x86_pmu __read_mostly; | 249 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -198,7 +278,7 @@ static u64 | |||
198 | x86_perf_event_update(struct perf_event *event) | 278 | x86_perf_event_update(struct perf_event *event) |
199 | { | 279 | { |
200 | struct hw_perf_event *hwc = &event->hw; | 280 | struct hw_perf_event *hwc = &event->hw; |
201 | int shift = 64 - x86_pmu.event_bits; | 281 | int shift = 64 - x86_pmu.cntval_bits; |
202 | u64 prev_raw_count, new_raw_count; | 282 | u64 prev_raw_count, new_raw_count; |
203 | int idx = hwc->idx; | 283 | int idx = hwc->idx; |
204 | s64 delta; | 284 | s64 delta; |
@@ -241,33 +321,32 @@ again: | |||
241 | static atomic_t active_events; | 321 | static atomic_t active_events; |
242 | static DEFINE_MUTEX(pmc_reserve_mutex); | 322 | static DEFINE_MUTEX(pmc_reserve_mutex); |
243 | 323 | ||
324 | #ifdef CONFIG_X86_LOCAL_APIC | ||
325 | |||
244 | static bool reserve_pmc_hardware(void) | 326 | static bool reserve_pmc_hardware(void) |
245 | { | 327 | { |
246 | #ifdef CONFIG_X86_LOCAL_APIC | ||
247 | int i; | 328 | int i; |
248 | 329 | ||
249 | if (nmi_watchdog == NMI_LOCAL_APIC) | 330 | if (nmi_watchdog == NMI_LOCAL_APIC) |
250 | disable_lapic_nmi_watchdog(); | 331 | disable_lapic_nmi_watchdog(); |
251 | 332 | ||
252 | for (i = 0; i < x86_pmu.num_events; i++) { | 333 | for (i = 0; i < x86_pmu.num_counters; i++) { |
253 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) | 334 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) |
254 | goto perfctr_fail; | 335 | goto perfctr_fail; |
255 | } | 336 | } |
256 | 337 | ||
257 | for (i = 0; i < x86_pmu.num_events; i++) { | 338 | for (i = 0; i < x86_pmu.num_counters; i++) { |
258 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | 339 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) |
259 | goto eventsel_fail; | 340 | goto eventsel_fail; |
260 | } | 341 | } |
261 | #endif | ||
262 | 342 | ||
263 | return true; | 343 | return true; |
264 | 344 | ||
265 | #ifdef CONFIG_X86_LOCAL_APIC | ||
266 | eventsel_fail: | 345 | eventsel_fail: |
267 | for (i--; i >= 0; i--) | 346 | for (i--; i >= 0; i--) |
268 | release_evntsel_nmi(x86_pmu.eventsel + i); | 347 | release_evntsel_nmi(x86_pmu.eventsel + i); |
269 | 348 | ||
270 | i = x86_pmu.num_events; | 349 | i = x86_pmu.num_counters; |
271 | 350 | ||
272 | perfctr_fail: | 351 | perfctr_fail: |
273 | for (i--; i >= 0; i--) | 352 | for (i--; i >= 0; i--) |
@@ -277,128 +356,36 @@ perfctr_fail: | |||
277 | enable_lapic_nmi_watchdog(); | 356 | enable_lapic_nmi_watchdog(); |
278 | 357 | ||
279 | return false; | 358 | return false; |
280 | #endif | ||
281 | } | 359 | } |
282 | 360 | ||
283 | static void release_pmc_hardware(void) | 361 | static void release_pmc_hardware(void) |
284 | { | 362 | { |
285 | #ifdef CONFIG_X86_LOCAL_APIC | ||
286 | int i; | 363 | int i; |
287 | 364 | ||
288 | for (i = 0; i < x86_pmu.num_events; i++) { | 365 | for (i = 0; i < x86_pmu.num_counters; i++) { |
289 | release_perfctr_nmi(x86_pmu.perfctr + i); | 366 | release_perfctr_nmi(x86_pmu.perfctr + i); |
290 | release_evntsel_nmi(x86_pmu.eventsel + i); | 367 | release_evntsel_nmi(x86_pmu.eventsel + i); |
291 | } | 368 | } |
292 | 369 | ||
293 | if (nmi_watchdog == NMI_LOCAL_APIC) | 370 | if (nmi_watchdog == NMI_LOCAL_APIC) |
294 | enable_lapic_nmi_watchdog(); | 371 | enable_lapic_nmi_watchdog(); |
295 | #endif | ||
296 | } | ||
297 | |||
298 | static inline bool bts_available(void) | ||
299 | { | ||
300 | return x86_pmu.enable_bts != NULL; | ||
301 | } | 372 | } |
302 | 373 | ||
303 | static void init_debug_store_on_cpu(int cpu) | 374 | #else |
304 | { | ||
305 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
306 | |||
307 | if (!ds) | ||
308 | return; | ||
309 | |||
310 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
311 | (u32)((u64)(unsigned long)ds), | ||
312 | (u32)((u64)(unsigned long)ds >> 32)); | ||
313 | } | ||
314 | |||
315 | static void fini_debug_store_on_cpu(int cpu) | ||
316 | { | ||
317 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
318 | return; | ||
319 | |||
320 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
321 | } | ||
322 | |||
323 | static void release_bts_hardware(void) | ||
324 | { | ||
325 | int cpu; | ||
326 | |||
327 | if (!bts_available()) | ||
328 | return; | ||
329 | |||
330 | get_online_cpus(); | ||
331 | |||
332 | for_each_online_cpu(cpu) | ||
333 | fini_debug_store_on_cpu(cpu); | ||
334 | |||
335 | for_each_possible_cpu(cpu) { | ||
336 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
337 | |||
338 | if (!ds) | ||
339 | continue; | ||
340 | |||
341 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
342 | |||
343 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
344 | kfree(ds); | ||
345 | } | ||
346 | |||
347 | put_online_cpus(); | ||
348 | } | ||
349 | |||
350 | static int reserve_bts_hardware(void) | ||
351 | { | ||
352 | int cpu, err = 0; | ||
353 | |||
354 | if (!bts_available()) | ||
355 | return 0; | ||
356 | |||
357 | get_online_cpus(); | ||
358 | |||
359 | for_each_possible_cpu(cpu) { | ||
360 | struct debug_store *ds; | ||
361 | void *buffer; | ||
362 | |||
363 | err = -ENOMEM; | ||
364 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
365 | if (unlikely(!buffer)) | ||
366 | break; | ||
367 | |||
368 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
369 | if (unlikely(!ds)) { | ||
370 | kfree(buffer); | ||
371 | break; | ||
372 | } | ||
373 | |||
374 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
375 | ds->bts_index = ds->bts_buffer_base; | ||
376 | ds->bts_absolute_maximum = | ||
377 | ds->bts_buffer_base + BTS_BUFFER_SIZE; | ||
378 | ds->bts_interrupt_threshold = | ||
379 | ds->bts_absolute_maximum - BTS_OVFL_TH; | ||
380 | |||
381 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
382 | err = 0; | ||
383 | } | ||
384 | 375 | ||
385 | if (err) | 376 | static bool reserve_pmc_hardware(void) { return true; } |
386 | release_bts_hardware(); | 377 | static void release_pmc_hardware(void) {} |
387 | else { | ||
388 | for_each_online_cpu(cpu) | ||
389 | init_debug_store_on_cpu(cpu); | ||
390 | } | ||
391 | 378 | ||
392 | put_online_cpus(); | 379 | #endif |
393 | 380 | ||
394 | return err; | 381 | static int reserve_ds_buffers(void); |
395 | } | 382 | static void release_ds_buffers(void); |
396 | 383 | ||
397 | static void hw_perf_event_destroy(struct perf_event *event) | 384 | static void hw_perf_event_destroy(struct perf_event *event) |
398 | { | 385 | { |
399 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { | 386 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { |
400 | release_pmc_hardware(); | 387 | release_pmc_hardware(); |
401 | release_bts_hardware(); | 388 | release_ds_buffers(); |
402 | mutex_unlock(&pmc_reserve_mutex); | 389 | mutex_unlock(&pmc_reserve_mutex); |
403 | } | 390 | } |
404 | } | 391 | } |
@@ -441,54 +428,11 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | |||
441 | return 0; | 428 | return 0; |
442 | } | 429 | } |
443 | 430 | ||
444 | /* | 431 | static int x86_setup_perfctr(struct perf_event *event) |
445 | * Setup the hardware configuration for a given attr_type | ||
446 | */ | ||
447 | static int __hw_perf_event_init(struct perf_event *event) | ||
448 | { | 432 | { |
449 | struct perf_event_attr *attr = &event->attr; | 433 | struct perf_event_attr *attr = &event->attr; |
450 | struct hw_perf_event *hwc = &event->hw; | 434 | struct hw_perf_event *hwc = &event->hw; |
451 | u64 config; | 435 | u64 config; |
452 | int err; | ||
453 | |||
454 | if (!x86_pmu_initialized()) | ||
455 | return -ENODEV; | ||
456 | |||
457 | err = 0; | ||
458 | if (!atomic_inc_not_zero(&active_events)) { | ||
459 | mutex_lock(&pmc_reserve_mutex); | ||
460 | if (atomic_read(&active_events) == 0) { | ||
461 | if (!reserve_pmc_hardware()) | ||
462 | err = -EBUSY; | ||
463 | else | ||
464 | err = reserve_bts_hardware(); | ||
465 | } | ||
466 | if (!err) | ||
467 | atomic_inc(&active_events); | ||
468 | mutex_unlock(&pmc_reserve_mutex); | ||
469 | } | ||
470 | if (err) | ||
471 | return err; | ||
472 | |||
473 | event->destroy = hw_perf_event_destroy; | ||
474 | |||
475 | /* | ||
476 | * Generate PMC IRQs: | ||
477 | * (keep 'enabled' bit clear for now) | ||
478 | */ | ||
479 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | ||
480 | |||
481 | hwc->idx = -1; | ||
482 | hwc->last_cpu = -1; | ||
483 | hwc->last_tag = ~0ULL; | ||
484 | |||
485 | /* | ||
486 | * Count user and OS events unless requested not to. | ||
487 | */ | ||
488 | if (!attr->exclude_user) | ||
489 | hwc->config |= ARCH_PERFMON_EVENTSEL_USR; | ||
490 | if (!attr->exclude_kernel) | ||
491 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | ||
492 | 436 | ||
493 | if (!hwc->sample_period) { | 437 | if (!hwc->sample_period) { |
494 | hwc->sample_period = x86_pmu.max_period; | 438 | hwc->sample_period = x86_pmu.max_period; |
@@ -505,16 +449,8 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
505 | return -EOPNOTSUPP; | 449 | return -EOPNOTSUPP; |
506 | } | 450 | } |
507 | 451 | ||
508 | /* | 452 | if (attr->type == PERF_TYPE_RAW) |
509 | * Raw hw_event type provide the config in the hw_event structure | ||
510 | */ | ||
511 | if (attr->type == PERF_TYPE_RAW) { | ||
512 | hwc->config |= x86_pmu.raw_event(attr->config); | ||
513 | if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) && | ||
514 | perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
515 | return -EACCES; | ||
516 | return 0; | 453 | return 0; |
517 | } | ||
518 | 454 | ||
519 | if (attr->type == PERF_TYPE_HW_CACHE) | 455 | if (attr->type == PERF_TYPE_HW_CACHE) |
520 | return set_ext_hw_attr(hwc, attr); | 456 | return set_ext_hw_attr(hwc, attr); |
@@ -539,11 +475,11 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
539 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | 475 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && |
540 | (hwc->sample_period == 1)) { | 476 | (hwc->sample_period == 1)) { |
541 | /* BTS is not supported by this architecture. */ | 477 | /* BTS is not supported by this architecture. */ |
542 | if (!bts_available()) | 478 | if (!x86_pmu.bts) |
543 | return -EOPNOTSUPP; | 479 | return -EOPNOTSUPP; |
544 | 480 | ||
545 | /* BTS is currently only allowed for user-mode. */ | 481 | /* BTS is currently only allowed for user-mode. */ |
546 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | 482 | if (!attr->exclude_kernel) |
547 | return -EOPNOTSUPP; | 483 | return -EOPNOTSUPP; |
548 | } | 484 | } |
549 | 485 | ||
@@ -552,12 +488,87 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
552 | return 0; | 488 | return 0; |
553 | } | 489 | } |
554 | 490 | ||
491 | static int x86_pmu_hw_config(struct perf_event *event) | ||
492 | { | ||
493 | if (event->attr.precise_ip) { | ||
494 | int precise = 0; | ||
495 | |||
496 | /* Support for constant skid */ | ||
497 | if (x86_pmu.pebs) | ||
498 | precise++; | ||
499 | |||
500 | /* Support for IP fixup */ | ||
501 | if (x86_pmu.lbr_nr) | ||
502 | precise++; | ||
503 | |||
504 | if (event->attr.precise_ip > precise) | ||
505 | return -EOPNOTSUPP; | ||
506 | } | ||
507 | |||
508 | /* | ||
509 | * Generate PMC IRQs: | ||
510 | * (keep 'enabled' bit clear for now) | ||
511 | */ | ||
512 | event->hw.config = ARCH_PERFMON_EVENTSEL_INT; | ||
513 | |||
514 | /* | ||
515 | * Count user and OS events unless requested not to | ||
516 | */ | ||
517 | if (!event->attr.exclude_user) | ||
518 | event->hw.config |= ARCH_PERFMON_EVENTSEL_USR; | ||
519 | if (!event->attr.exclude_kernel) | ||
520 | event->hw.config |= ARCH_PERFMON_EVENTSEL_OS; | ||
521 | |||
522 | if (event->attr.type == PERF_TYPE_RAW) | ||
523 | event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; | ||
524 | |||
525 | return x86_setup_perfctr(event); | ||
526 | } | ||
527 | |||
528 | /* | ||
529 | * Setup the hardware configuration for a given attr_type | ||
530 | */ | ||
531 | static int __hw_perf_event_init(struct perf_event *event) | ||
532 | { | ||
533 | int err; | ||
534 | |||
535 | if (!x86_pmu_initialized()) | ||
536 | return -ENODEV; | ||
537 | |||
538 | err = 0; | ||
539 | if (!atomic_inc_not_zero(&active_events)) { | ||
540 | mutex_lock(&pmc_reserve_mutex); | ||
541 | if (atomic_read(&active_events) == 0) { | ||
542 | if (!reserve_pmc_hardware()) | ||
543 | err = -EBUSY; | ||
544 | else { | ||
545 | err = reserve_ds_buffers(); | ||
546 | if (err) | ||
547 | release_pmc_hardware(); | ||
548 | } | ||
549 | } | ||
550 | if (!err) | ||
551 | atomic_inc(&active_events); | ||
552 | mutex_unlock(&pmc_reserve_mutex); | ||
553 | } | ||
554 | if (err) | ||
555 | return err; | ||
556 | |||
557 | event->destroy = hw_perf_event_destroy; | ||
558 | |||
559 | event->hw.idx = -1; | ||
560 | event->hw.last_cpu = -1; | ||
561 | event->hw.last_tag = ~0ULL; | ||
562 | |||
563 | return x86_pmu.hw_config(event); | ||
564 | } | ||
565 | |||
555 | static void x86_pmu_disable_all(void) | 566 | static void x86_pmu_disable_all(void) |
556 | { | 567 | { |
557 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 568 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
558 | int idx; | 569 | int idx; |
559 | 570 | ||
560 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 571 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
561 | u64 val; | 572 | u64 val; |
562 | 573 | ||
563 | if (!test_bit(idx, cpuc->active_mask)) | 574 | if (!test_bit(idx, cpuc->active_mask)) |
@@ -587,12 +598,12 @@ void hw_perf_disable(void) | |||
587 | x86_pmu.disable_all(); | 598 | x86_pmu.disable_all(); |
588 | } | 599 | } |
589 | 600 | ||
590 | static void x86_pmu_enable_all(void) | 601 | static void x86_pmu_enable_all(int added) |
591 | { | 602 | { |
592 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 603 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
593 | int idx; | 604 | int idx; |
594 | 605 | ||
595 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 606 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
596 | struct perf_event *event = cpuc->events[idx]; | 607 | struct perf_event *event = cpuc->events[idx]; |
597 | u64 val; | 608 | u64 val; |
598 | 609 | ||
@@ -667,14 +678,14 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
667 | * assign events to counters starting with most | 678 | * assign events to counters starting with most |
668 | * constrained events. | 679 | * constrained events. |
669 | */ | 680 | */ |
670 | wmax = x86_pmu.num_events; | 681 | wmax = x86_pmu.num_counters; |
671 | 682 | ||
672 | /* | 683 | /* |
673 | * when fixed event counters are present, | 684 | * when fixed event counters are present, |
674 | * wmax is incremented by 1 to account | 685 | * wmax is incremented by 1 to account |
675 | * for one more choice | 686 | * for one more choice |
676 | */ | 687 | */ |
677 | if (x86_pmu.num_events_fixed) | 688 | if (x86_pmu.num_counters_fixed) |
678 | wmax++; | 689 | wmax++; |
679 | 690 | ||
680 | for (w = 1, num = n; num && w <= wmax; w++) { | 691 | for (w = 1, num = n; num && w <= wmax; w++) { |
@@ -724,7 +735,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, | |||
724 | struct perf_event *event; | 735 | struct perf_event *event; |
725 | int n, max_count; | 736 | int n, max_count; |
726 | 737 | ||
727 | max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; | 738 | max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed; |
728 | 739 | ||
729 | /* current number of events already accepted */ | 740 | /* current number of events already accepted */ |
730 | n = cpuc->n_events; | 741 | n = cpuc->n_events; |
@@ -795,7 +806,7 @@ void hw_perf_enable(void) | |||
795 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 806 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
796 | struct perf_event *event; | 807 | struct perf_event *event; |
797 | struct hw_perf_event *hwc; | 808 | struct hw_perf_event *hwc; |
798 | int i; | 809 | int i, added = cpuc->n_added; |
799 | 810 | ||
800 | if (!x86_pmu_initialized()) | 811 | if (!x86_pmu_initialized()) |
801 | return; | 812 | return; |
@@ -847,19 +858,20 @@ void hw_perf_enable(void) | |||
847 | cpuc->enabled = 1; | 858 | cpuc->enabled = 1; |
848 | barrier(); | 859 | barrier(); |
849 | 860 | ||
850 | x86_pmu.enable_all(); | 861 | x86_pmu.enable_all(added); |
851 | } | 862 | } |
852 | 863 | ||
853 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) | 864 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, |
865 | u64 enable_mask) | ||
854 | { | 866 | { |
855 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 867 | wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask); |
856 | hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
857 | } | 868 | } |
858 | 869 | ||
859 | static inline void x86_pmu_disable_event(struct perf_event *event) | 870 | static inline void x86_pmu_disable_event(struct perf_event *event) |
860 | { | 871 | { |
861 | struct hw_perf_event *hwc = &event->hw; | 872 | struct hw_perf_event *hwc = &event->hw; |
862 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); | 873 | |
874 | wrmsrl(hwc->config_base + hwc->idx, hwc->config); | ||
863 | } | 875 | } |
864 | 876 | ||
865 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | 877 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
@@ -874,7 +886,7 @@ x86_perf_event_set_period(struct perf_event *event) | |||
874 | struct hw_perf_event *hwc = &event->hw; | 886 | struct hw_perf_event *hwc = &event->hw; |
875 | s64 left = atomic64_read(&hwc->period_left); | 887 | s64 left = atomic64_read(&hwc->period_left); |
876 | s64 period = hwc->sample_period; | 888 | s64 period = hwc->sample_period; |
877 | int err, ret = 0, idx = hwc->idx; | 889 | int ret = 0, idx = hwc->idx; |
878 | 890 | ||
879 | if (idx == X86_PMC_IDX_FIXED_BTS) | 891 | if (idx == X86_PMC_IDX_FIXED_BTS) |
880 | return 0; | 892 | return 0; |
@@ -912,8 +924,8 @@ x86_perf_event_set_period(struct perf_event *event) | |||
912 | */ | 924 | */ |
913 | atomic64_set(&hwc->prev_count, (u64)-left); | 925 | atomic64_set(&hwc->prev_count, (u64)-left); |
914 | 926 | ||
915 | err = checking_wrmsrl(hwc->event_base + idx, | 927 | wrmsrl(hwc->event_base + idx, |
916 | (u64)(-left) & x86_pmu.event_mask); | 928 | (u64)(-left) & x86_pmu.cntval_mask); |
917 | 929 | ||
918 | perf_event_update_userpage(event); | 930 | perf_event_update_userpage(event); |
919 | 931 | ||
@@ -924,7 +936,8 @@ static void x86_pmu_enable_event(struct perf_event *event) | |||
924 | { | 936 | { |
925 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 937 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
926 | if (cpuc->enabled) | 938 | if (cpuc->enabled) |
927 | __x86_pmu_enable_event(&event->hw); | 939 | __x86_pmu_enable_event(&event->hw, |
940 | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
928 | } | 941 | } |
929 | 942 | ||
930 | /* | 943 | /* |
@@ -950,7 +963,15 @@ static int x86_pmu_enable(struct perf_event *event) | |||
950 | if (n < 0) | 963 | if (n < 0) |
951 | return n; | 964 | return n; |
952 | 965 | ||
953 | ret = x86_schedule_events(cpuc, n, assign); | 966 | /* |
967 | * If group events scheduling transaction was started, | ||
968 | * skip the schedulability test here, it will be peformed | ||
969 | * at commit time(->commit_txn) as a whole | ||
970 | */ | ||
971 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | ||
972 | goto out; | ||
973 | |||
974 | ret = x86_pmu.schedule_events(cpuc, n, assign); | ||
954 | if (ret) | 975 | if (ret) |
955 | return ret; | 976 | return ret; |
956 | /* | 977 | /* |
@@ -959,6 +980,7 @@ static int x86_pmu_enable(struct perf_event *event) | |||
959 | */ | 980 | */ |
960 | memcpy(cpuc->assign, assign, n*sizeof(int)); | 981 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
961 | 982 | ||
983 | out: | ||
962 | cpuc->n_events = n; | 984 | cpuc->n_events = n; |
963 | cpuc->n_added += n - n0; | 985 | cpuc->n_added += n - n0; |
964 | 986 | ||
@@ -991,11 +1013,12 @@ static void x86_pmu_unthrottle(struct perf_event *event) | |||
991 | void perf_event_print_debug(void) | 1013 | void perf_event_print_debug(void) |
992 | { | 1014 | { |
993 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | 1015 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; |
1016 | u64 pebs; | ||
994 | struct cpu_hw_events *cpuc; | 1017 | struct cpu_hw_events *cpuc; |
995 | unsigned long flags; | 1018 | unsigned long flags; |
996 | int cpu, idx; | 1019 | int cpu, idx; |
997 | 1020 | ||
998 | if (!x86_pmu.num_events) | 1021 | if (!x86_pmu.num_counters) |
999 | return; | 1022 | return; |
1000 | 1023 | ||
1001 | local_irq_save(flags); | 1024 | local_irq_save(flags); |
@@ -1008,16 +1031,18 @@ void perf_event_print_debug(void) | |||
1008 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | 1031 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); |
1009 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); | 1032 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); |
1010 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); | 1033 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); |
1034 | rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); | ||
1011 | 1035 | ||
1012 | pr_info("\n"); | 1036 | pr_info("\n"); |
1013 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); | 1037 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); |
1014 | pr_info("CPU#%d: status: %016llx\n", cpu, status); | 1038 | pr_info("CPU#%d: status: %016llx\n", cpu, status); |
1015 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | 1039 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); |
1016 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | 1040 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); |
1041 | pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); | ||
1017 | } | 1042 | } |
1018 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); | 1043 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); |
1019 | 1044 | ||
1020 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1045 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1021 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | 1046 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); |
1022 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); | 1047 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); |
1023 | 1048 | ||
@@ -1030,7 +1055,7 @@ void perf_event_print_debug(void) | |||
1030 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", | 1055 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", |
1031 | cpu, idx, prev_left); | 1056 | cpu, idx, prev_left); |
1032 | } | 1057 | } |
1033 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | 1058 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { |
1034 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); | 1059 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); |
1035 | 1060 | ||
1036 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", | 1061 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", |
@@ -1095,7 +1120,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1095 | 1120 | ||
1096 | cpuc = &__get_cpu_var(cpu_hw_events); | 1121 | cpuc = &__get_cpu_var(cpu_hw_events); |
1097 | 1122 | ||
1098 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1123 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1099 | if (!test_bit(idx, cpuc->active_mask)) | 1124 | if (!test_bit(idx, cpuc->active_mask)) |
1100 | continue; | 1125 | continue; |
1101 | 1126 | ||
@@ -1103,7 +1128,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1103 | hwc = &event->hw; | 1128 | hwc = &event->hw; |
1104 | 1129 | ||
1105 | val = x86_perf_event_update(event); | 1130 | val = x86_perf_event_update(event); |
1106 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | 1131 | if (val & (1ULL << (x86_pmu.cntval_bits - 1))) |
1107 | continue; | 1132 | continue; |
1108 | 1133 | ||
1109 | /* | 1134 | /* |
@@ -1146,7 +1171,6 @@ void set_perf_event_pending(void) | |||
1146 | 1171 | ||
1147 | void perf_events_lapic_init(void) | 1172 | void perf_events_lapic_init(void) |
1148 | { | 1173 | { |
1149 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1150 | if (!x86_pmu.apic || !x86_pmu_initialized()) | 1174 | if (!x86_pmu.apic || !x86_pmu_initialized()) |
1151 | return; | 1175 | return; |
1152 | 1176 | ||
@@ -1154,7 +1178,6 @@ void perf_events_lapic_init(void) | |||
1154 | * Always use NMI for PMU | 1178 | * Always use NMI for PMU |
1155 | */ | 1179 | */ |
1156 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1180 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1157 | #endif | ||
1158 | } | 1181 | } |
1159 | 1182 | ||
1160 | static int __kprobes | 1183 | static int __kprobes |
@@ -1178,9 +1201,7 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
1178 | 1201 | ||
1179 | regs = args->regs; | 1202 | regs = args->regs; |
1180 | 1203 | ||
1181 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1182 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1204 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1183 | #endif | ||
1184 | /* | 1205 | /* |
1185 | * Can't rely on the handled return value to say it was our NMI, two | 1206 | * Can't rely on the handled return value to say it was our NMI, two |
1186 | * events could trigger 'simultaneously' raising two back-to-back NMIs. | 1207 | * events could trigger 'simultaneously' raising two back-to-back NMIs. |
@@ -1217,118 +1238,11 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
1217 | return &unconstrained; | 1238 | return &unconstrained; |
1218 | } | 1239 | } |
1219 | 1240 | ||
1220 | static int x86_event_sched_in(struct perf_event *event, | ||
1221 | struct perf_cpu_context *cpuctx) | ||
1222 | { | ||
1223 | int ret = 0; | ||
1224 | |||
1225 | event->state = PERF_EVENT_STATE_ACTIVE; | ||
1226 | event->oncpu = smp_processor_id(); | ||
1227 | event->tstamp_running += event->ctx->time - event->tstamp_stopped; | ||
1228 | |||
1229 | if (!is_x86_event(event)) | ||
1230 | ret = event->pmu->enable(event); | ||
1231 | |||
1232 | if (!ret && !is_software_event(event)) | ||
1233 | cpuctx->active_oncpu++; | ||
1234 | |||
1235 | if (!ret && event->attr.exclusive) | ||
1236 | cpuctx->exclusive = 1; | ||
1237 | |||
1238 | return ret; | ||
1239 | } | ||
1240 | |||
1241 | static void x86_event_sched_out(struct perf_event *event, | ||
1242 | struct perf_cpu_context *cpuctx) | ||
1243 | { | ||
1244 | event->state = PERF_EVENT_STATE_INACTIVE; | ||
1245 | event->oncpu = -1; | ||
1246 | |||
1247 | if (!is_x86_event(event)) | ||
1248 | event->pmu->disable(event); | ||
1249 | |||
1250 | event->tstamp_running -= event->ctx->time - event->tstamp_stopped; | ||
1251 | |||
1252 | if (!is_software_event(event)) | ||
1253 | cpuctx->active_oncpu--; | ||
1254 | |||
1255 | if (event->attr.exclusive || !cpuctx->active_oncpu) | ||
1256 | cpuctx->exclusive = 0; | ||
1257 | } | ||
1258 | |||
1259 | /* | ||
1260 | * Called to enable a whole group of events. | ||
1261 | * Returns 1 if the group was enabled, or -EAGAIN if it could not be. | ||
1262 | * Assumes the caller has disabled interrupts and has | ||
1263 | * frozen the PMU with hw_perf_save_disable. | ||
1264 | * | ||
1265 | * called with PMU disabled. If successful and return value 1, | ||
1266 | * then guaranteed to call perf_enable() and hw_perf_enable() | ||
1267 | */ | ||
1268 | int hw_perf_group_sched_in(struct perf_event *leader, | ||
1269 | struct perf_cpu_context *cpuctx, | ||
1270 | struct perf_event_context *ctx) | ||
1271 | { | ||
1272 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1273 | struct perf_event *sub; | ||
1274 | int assign[X86_PMC_IDX_MAX]; | ||
1275 | int n0, n1, ret; | ||
1276 | |||
1277 | /* n0 = total number of events */ | ||
1278 | n0 = collect_events(cpuc, leader, true); | ||
1279 | if (n0 < 0) | ||
1280 | return n0; | ||
1281 | |||
1282 | ret = x86_schedule_events(cpuc, n0, assign); | ||
1283 | if (ret) | ||
1284 | return ret; | ||
1285 | |||
1286 | ret = x86_event_sched_in(leader, cpuctx); | ||
1287 | if (ret) | ||
1288 | return ret; | ||
1289 | |||
1290 | n1 = 1; | ||
1291 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | ||
1292 | if (sub->state > PERF_EVENT_STATE_OFF) { | ||
1293 | ret = x86_event_sched_in(sub, cpuctx); | ||
1294 | if (ret) | ||
1295 | goto undo; | ||
1296 | ++n1; | ||
1297 | } | ||
1298 | } | ||
1299 | /* | ||
1300 | * copy new assignment, now we know it is possible | ||
1301 | * will be used by hw_perf_enable() | ||
1302 | */ | ||
1303 | memcpy(cpuc->assign, assign, n0*sizeof(int)); | ||
1304 | |||
1305 | cpuc->n_events = n0; | ||
1306 | cpuc->n_added += n1; | ||
1307 | ctx->nr_active += n1; | ||
1308 | |||
1309 | /* | ||
1310 | * 1 means successful and events are active | ||
1311 | * This is not quite true because we defer | ||
1312 | * actual activation until hw_perf_enable() but | ||
1313 | * this way we* ensure caller won't try to enable | ||
1314 | * individual events | ||
1315 | */ | ||
1316 | return 1; | ||
1317 | undo: | ||
1318 | x86_event_sched_out(leader, cpuctx); | ||
1319 | n0 = 1; | ||
1320 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | ||
1321 | if (sub->state == PERF_EVENT_STATE_ACTIVE) { | ||
1322 | x86_event_sched_out(sub, cpuctx); | ||
1323 | if (++n0 == n1) | ||
1324 | break; | ||
1325 | } | ||
1326 | } | ||
1327 | return ret; | ||
1328 | } | ||
1329 | |||
1330 | #include "perf_event_amd.c" | 1241 | #include "perf_event_amd.c" |
1331 | #include "perf_event_p6.c" | 1242 | #include "perf_event_p6.c" |
1243 | #include "perf_event_p4.c" | ||
1244 | #include "perf_event_intel_lbr.c" | ||
1245 | #include "perf_event_intel_ds.c" | ||
1332 | #include "perf_event_intel.c" | 1246 | #include "perf_event_intel.c" |
1333 | 1247 | ||
1334 | static int __cpuinit | 1248 | static int __cpuinit |
@@ -1402,48 +1316,50 @@ void __init init_hw_perf_events(void) | |||
1402 | 1316 | ||
1403 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1317 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
1404 | 1318 | ||
1405 | if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { | 1319 | if (x86_pmu.quirks) |
1320 | x86_pmu.quirks(); | ||
1321 | |||
1322 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | ||
1406 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | 1323 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", |
1407 | x86_pmu.num_events, X86_PMC_MAX_GENERIC); | 1324 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); |
1408 | x86_pmu.num_events = X86_PMC_MAX_GENERIC; | 1325 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; |
1409 | } | 1326 | } |
1410 | perf_event_mask = (1 << x86_pmu.num_events) - 1; | 1327 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; |
1411 | perf_max_events = x86_pmu.num_events; | 1328 | perf_max_events = x86_pmu.num_counters; |
1412 | 1329 | ||
1413 | if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { | 1330 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { |
1414 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", | 1331 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", |
1415 | x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); | 1332 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); |
1416 | x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; | 1333 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; |
1417 | } | 1334 | } |
1418 | 1335 | ||
1419 | perf_event_mask |= | 1336 | x86_pmu.intel_ctrl |= |
1420 | ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; | 1337 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; |
1421 | x86_pmu.intel_ctrl = perf_event_mask; | ||
1422 | 1338 | ||
1423 | perf_events_lapic_init(); | 1339 | perf_events_lapic_init(); |
1424 | register_die_notifier(&perf_event_nmi_notifier); | 1340 | register_die_notifier(&perf_event_nmi_notifier); |
1425 | 1341 | ||
1426 | unconstrained = (struct event_constraint) | 1342 | unconstrained = (struct event_constraint) |
1427 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, | 1343 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, |
1428 | 0, x86_pmu.num_events); | 1344 | 0, x86_pmu.num_counters); |
1429 | 1345 | ||
1430 | if (x86_pmu.event_constraints) { | 1346 | if (x86_pmu.event_constraints) { |
1431 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1347 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
1432 | if (c->cmask != INTEL_ARCH_FIXED_MASK) | 1348 | if (c->cmask != X86_RAW_EVENT_MASK) |
1433 | continue; | 1349 | continue; |
1434 | 1350 | ||
1435 | c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; | 1351 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; |
1436 | c->weight += x86_pmu.num_events; | 1352 | c->weight += x86_pmu.num_counters; |
1437 | } | 1353 | } |
1438 | } | 1354 | } |
1439 | 1355 | ||
1440 | pr_info("... version: %d\n", x86_pmu.version); | 1356 | pr_info("... version: %d\n", x86_pmu.version); |
1441 | pr_info("... bit width: %d\n", x86_pmu.event_bits); | 1357 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); |
1442 | pr_info("... generic registers: %d\n", x86_pmu.num_events); | 1358 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); |
1443 | pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); | 1359 | pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); |
1444 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); | 1360 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); |
1445 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); | 1361 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); |
1446 | pr_info("... event mask: %016Lx\n", perf_event_mask); | 1362 | pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); |
1447 | 1363 | ||
1448 | perf_cpu_notifier(x86_pmu_notifier); | 1364 | perf_cpu_notifier(x86_pmu_notifier); |
1449 | } | 1365 | } |
@@ -1453,6 +1369,59 @@ static inline void x86_pmu_read(struct perf_event *event) | |||
1453 | x86_perf_event_update(event); | 1369 | x86_perf_event_update(event); |
1454 | } | 1370 | } |
1455 | 1371 | ||
1372 | /* | ||
1373 | * Start group events scheduling transaction | ||
1374 | * Set the flag to make pmu::enable() not perform the | ||
1375 | * schedulability test, it will be performed at commit time | ||
1376 | */ | ||
1377 | static void x86_pmu_start_txn(const struct pmu *pmu) | ||
1378 | { | ||
1379 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1380 | |||
1381 | cpuc->group_flag |= PERF_EVENT_TXN_STARTED; | ||
1382 | } | ||
1383 | |||
1384 | /* | ||
1385 | * Stop group events scheduling transaction | ||
1386 | * Clear the flag and pmu::enable() will perform the | ||
1387 | * schedulability test. | ||
1388 | */ | ||
1389 | static void x86_pmu_cancel_txn(const struct pmu *pmu) | ||
1390 | { | ||
1391 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1392 | |||
1393 | cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; | ||
1394 | } | ||
1395 | |||
1396 | /* | ||
1397 | * Commit group events scheduling transaction | ||
1398 | * Perform the group schedulability test as a whole | ||
1399 | * Return 0 if success | ||
1400 | */ | ||
1401 | static int x86_pmu_commit_txn(const struct pmu *pmu) | ||
1402 | { | ||
1403 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1404 | int assign[X86_PMC_IDX_MAX]; | ||
1405 | int n, ret; | ||
1406 | |||
1407 | n = cpuc->n_events; | ||
1408 | |||
1409 | if (!x86_pmu_initialized()) | ||
1410 | return -EAGAIN; | ||
1411 | |||
1412 | ret = x86_pmu.schedule_events(cpuc, n, assign); | ||
1413 | if (ret) | ||
1414 | return ret; | ||
1415 | |||
1416 | /* | ||
1417 | * copy new assignment, now we know it is possible | ||
1418 | * will be used by hw_perf_enable() | ||
1419 | */ | ||
1420 | memcpy(cpuc->assign, assign, n*sizeof(int)); | ||
1421 | |||
1422 | return 0; | ||
1423 | } | ||
1424 | |||
1456 | static const struct pmu pmu = { | 1425 | static const struct pmu pmu = { |
1457 | .enable = x86_pmu_enable, | 1426 | .enable = x86_pmu_enable, |
1458 | .disable = x86_pmu_disable, | 1427 | .disable = x86_pmu_disable, |
@@ -1460,9 +1429,38 @@ static const struct pmu pmu = { | |||
1460 | .stop = x86_pmu_stop, | 1429 | .stop = x86_pmu_stop, |
1461 | .read = x86_pmu_read, | 1430 | .read = x86_pmu_read, |
1462 | .unthrottle = x86_pmu_unthrottle, | 1431 | .unthrottle = x86_pmu_unthrottle, |
1432 | .start_txn = x86_pmu_start_txn, | ||
1433 | .cancel_txn = x86_pmu_cancel_txn, | ||
1434 | .commit_txn = x86_pmu_commit_txn, | ||
1463 | }; | 1435 | }; |
1464 | 1436 | ||
1465 | /* | 1437 | /* |
1438 | * validate that we can schedule this event | ||
1439 | */ | ||
1440 | static int validate_event(struct perf_event *event) | ||
1441 | { | ||
1442 | struct cpu_hw_events *fake_cpuc; | ||
1443 | struct event_constraint *c; | ||
1444 | int ret = 0; | ||
1445 | |||
1446 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | ||
1447 | if (!fake_cpuc) | ||
1448 | return -ENOMEM; | ||
1449 | |||
1450 | c = x86_pmu.get_event_constraints(fake_cpuc, event); | ||
1451 | |||
1452 | if (!c || !c->weight) | ||
1453 | ret = -ENOSPC; | ||
1454 | |||
1455 | if (x86_pmu.put_event_constraints) | ||
1456 | x86_pmu.put_event_constraints(fake_cpuc, event); | ||
1457 | |||
1458 | kfree(fake_cpuc); | ||
1459 | |||
1460 | return ret; | ||
1461 | } | ||
1462 | |||
1463 | /* | ||
1466 | * validate a single event group | 1464 | * validate a single event group |
1467 | * | 1465 | * |
1468 | * validation include: | 1466 | * validation include: |
@@ -1502,7 +1500,7 @@ static int validate_group(struct perf_event *event) | |||
1502 | 1500 | ||
1503 | fake_cpuc->n_events = n; | 1501 | fake_cpuc->n_events = n; |
1504 | 1502 | ||
1505 | ret = x86_schedule_events(fake_cpuc, n, NULL); | 1503 | ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); |
1506 | 1504 | ||
1507 | out_free: | 1505 | out_free: |
1508 | kfree(fake_cpuc); | 1506 | kfree(fake_cpuc); |
@@ -1527,6 +1525,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
1527 | 1525 | ||
1528 | if (event->group_leader != event) | 1526 | if (event->group_leader != event) |
1529 | err = validate_group(event); | 1527 | err = validate_group(event); |
1528 | else | ||
1529 | err = validate_event(event); | ||
1530 | 1530 | ||
1531 | event->pmu = tmp; | 1531 | event->pmu = tmp; |
1532 | } | 1532 | } |
@@ -1574,8 +1574,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
1574 | { | 1574 | { |
1575 | struct perf_callchain_entry *entry = data; | 1575 | struct perf_callchain_entry *entry = data; |
1576 | 1576 | ||
1577 | if (reliable) | 1577 | callchain_store(entry, addr); |
1578 | callchain_store(entry, addr); | ||
1579 | } | 1578 | } |
1580 | 1579 | ||
1581 | static const struct stacktrace_ops backtrace_ops = { | 1580 | static const struct stacktrace_ops backtrace_ops = { |
@@ -1597,41 +1596,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1597 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); | 1596 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); |
1598 | } | 1597 | } |
1599 | 1598 | ||
1600 | /* | ||
1601 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | ||
1602 | */ | ||
1603 | static unsigned long | ||
1604 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
1605 | { | ||
1606 | unsigned long offset, addr = (unsigned long)from; | ||
1607 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
1608 | unsigned long size, len = 0; | ||
1609 | struct page *page; | ||
1610 | void *map; | ||
1611 | int ret; | ||
1612 | |||
1613 | do { | ||
1614 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
1615 | if (!ret) | ||
1616 | break; | ||
1617 | |||
1618 | offset = addr & (PAGE_SIZE - 1); | ||
1619 | size = min(PAGE_SIZE - offset, n - len); | ||
1620 | |||
1621 | map = kmap_atomic(page, type); | ||
1622 | memcpy(to, map+offset, size); | ||
1623 | kunmap_atomic(map, type); | ||
1624 | put_page(page); | ||
1625 | |||
1626 | len += size; | ||
1627 | to += size; | ||
1628 | addr += size; | ||
1629 | |||
1630 | } while (len < n); | ||
1631 | |||
1632 | return len; | ||
1633 | } | ||
1634 | |||
1635 | #ifdef CONFIG_COMPAT | 1599 | #ifdef CONFIG_COMPAT |
1636 | static inline int | 1600 | static inline int |
1637 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1601 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) |
@@ -1727,6 +1691,11 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
1727 | { | 1691 | { |
1728 | struct perf_callchain_entry *entry; | 1692 | struct perf_callchain_entry *entry; |
1729 | 1693 | ||
1694 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
1695 | /* TODO: We don't support guest os callchain now */ | ||
1696 | return NULL; | ||
1697 | } | ||
1698 | |||
1730 | if (in_nmi()) | 1699 | if (in_nmi()) |
1731 | entry = &__get_cpu_var(pmc_nmi_entry); | 1700 | entry = &__get_cpu_var(pmc_nmi_entry); |
1732 | else | 1701 | else |
@@ -1750,3 +1719,37 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski | |||
1750 | regs->cs = __KERNEL_CS; | 1719 | regs->cs = __KERNEL_CS; |
1751 | local_save_flags(regs->flags); | 1720 | local_save_flags(regs->flags); |
1752 | } | 1721 | } |
1722 | |||
1723 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | ||
1724 | { | ||
1725 | unsigned long ip; | ||
1726 | |||
1727 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) | ||
1728 | ip = perf_guest_cbs->get_guest_ip(); | ||
1729 | else | ||
1730 | ip = instruction_pointer(regs); | ||
1731 | |||
1732 | return ip; | ||
1733 | } | ||
1734 | |||
1735 | unsigned long perf_misc_flags(struct pt_regs *regs) | ||
1736 | { | ||
1737 | int misc = 0; | ||
1738 | |||
1739 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
1740 | if (perf_guest_cbs->is_user_mode()) | ||
1741 | misc |= PERF_RECORD_MISC_GUEST_USER; | ||
1742 | else | ||
1743 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; | ||
1744 | } else { | ||
1745 | if (user_mode(regs)) | ||
1746 | misc |= PERF_RECORD_MISC_USER; | ||
1747 | else | ||
1748 | misc |= PERF_RECORD_MISC_KERNEL; | ||
1749 | } | ||
1750 | |||
1751 | if (regs->flags & PERF_EFLAGS_EXACT) | ||
1752 | misc |= PERF_RECORD_MISC_EXACT_IP; | ||
1753 | |||
1754 | return misc; | ||
1755 | } | ||
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index db6f7d4056e..611df11ba15 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -2,7 +2,7 @@ | |||
2 | 2 | ||
3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); | 3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); |
4 | 4 | ||
5 | static __initconst u64 amd_hw_cache_event_ids | 5 | static __initconst const u64 amd_hw_cache_event_ids |
6 | [PERF_COUNT_HW_CACHE_MAX] | 6 | [PERF_COUNT_HW_CACHE_MAX] |
7 | [PERF_COUNT_HW_CACHE_OP_MAX] | 7 | [PERF_COUNT_HW_CACHE_OP_MAX] |
8 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 8 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -111,22 +111,19 @@ static u64 amd_pmu_event_map(int hw_event) | |||
111 | return amd_perfmon_event_map[hw_event]; | 111 | return amd_perfmon_event_map[hw_event]; |
112 | } | 112 | } |
113 | 113 | ||
114 | static u64 amd_pmu_raw_event(u64 hw_event) | 114 | static int amd_pmu_hw_config(struct perf_event *event) |
115 | { | 115 | { |
116 | #define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL | 116 | int ret = x86_pmu_hw_config(event); |
117 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | 117 | |
118 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | 118 | if (ret) |
119 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | 119 | return ret; |
120 | #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL | 120 | |
121 | 121 | if (event->attr.type != PERF_TYPE_RAW) | |
122 | #define K7_EVNTSEL_MASK \ | 122 | return 0; |
123 | (K7_EVNTSEL_EVENT_MASK | \ | 123 | |
124 | K7_EVNTSEL_UNIT_MASK | \ | 124 | event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; |
125 | K7_EVNTSEL_EDGE_MASK | \ | 125 | |
126 | K7_EVNTSEL_INV_MASK | \ | 126 | return 0; |
127 | K7_EVNTSEL_REG_MASK) | ||
128 | |||
129 | return hw_event & K7_EVNTSEL_MASK; | ||
130 | } | 127 | } |
131 | 128 | ||
132 | /* | 129 | /* |
@@ -165,7 +162,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | |||
165 | * be removed on one CPU at a time AND PMU is disabled | 162 | * be removed on one CPU at a time AND PMU is disabled |
166 | * when we come here | 163 | * when we come here |
167 | */ | 164 | */ |
168 | for (i = 0; i < x86_pmu.num_events; i++) { | 165 | for (i = 0; i < x86_pmu.num_counters; i++) { |
169 | if (nb->owners[i] == event) { | 166 | if (nb->owners[i] == event) { |
170 | cmpxchg(nb->owners+i, event, NULL); | 167 | cmpxchg(nb->owners+i, event, NULL); |
171 | break; | 168 | break; |
@@ -215,7 +212,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
215 | struct hw_perf_event *hwc = &event->hw; | 212 | struct hw_perf_event *hwc = &event->hw; |
216 | struct amd_nb *nb = cpuc->amd_nb; | 213 | struct amd_nb *nb = cpuc->amd_nb; |
217 | struct perf_event *old = NULL; | 214 | struct perf_event *old = NULL; |
218 | int max = x86_pmu.num_events; | 215 | int max = x86_pmu.num_counters; |
219 | int i, j, k = -1; | 216 | int i, j, k = -1; |
220 | 217 | ||
221 | /* | 218 | /* |
@@ -293,7 +290,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | |||
293 | /* | 290 | /* |
294 | * initialize all possible NB constraints | 291 | * initialize all possible NB constraints |
295 | */ | 292 | */ |
296 | for (i = 0; i < x86_pmu.num_events; i++) { | 293 | for (i = 0; i < x86_pmu.num_counters; i++) { |
297 | __set_bit(i, nb->event_constraints[i].idxmsk); | 294 | __set_bit(i, nb->event_constraints[i].idxmsk); |
298 | nb->event_constraints[i].weight = 1; | 295 | nb->event_constraints[i].weight = 1; |
299 | } | 296 | } |
@@ -371,21 +368,22 @@ static void amd_pmu_cpu_dead(int cpu) | |||
371 | raw_spin_unlock(&amd_nb_lock); | 368 | raw_spin_unlock(&amd_nb_lock); |
372 | } | 369 | } |
373 | 370 | ||
374 | static __initconst struct x86_pmu amd_pmu = { | 371 | static __initconst const struct x86_pmu amd_pmu = { |
375 | .name = "AMD", | 372 | .name = "AMD", |
376 | .handle_irq = x86_pmu_handle_irq, | 373 | .handle_irq = x86_pmu_handle_irq, |
377 | .disable_all = x86_pmu_disable_all, | 374 | .disable_all = x86_pmu_disable_all, |
378 | .enable_all = x86_pmu_enable_all, | 375 | .enable_all = x86_pmu_enable_all, |
379 | .enable = x86_pmu_enable_event, | 376 | .enable = x86_pmu_enable_event, |
380 | .disable = x86_pmu_disable_event, | 377 | .disable = x86_pmu_disable_event, |
378 | .hw_config = amd_pmu_hw_config, | ||
379 | .schedule_events = x86_schedule_events, | ||
381 | .eventsel = MSR_K7_EVNTSEL0, | 380 | .eventsel = MSR_K7_EVNTSEL0, |
382 | .perfctr = MSR_K7_PERFCTR0, | 381 | .perfctr = MSR_K7_PERFCTR0, |
383 | .event_map = amd_pmu_event_map, | 382 | .event_map = amd_pmu_event_map, |
384 | .raw_event = amd_pmu_raw_event, | ||
385 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | 383 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), |
386 | .num_events = 4, | 384 | .num_counters = 4, |
387 | .event_bits = 48, | 385 | .cntval_bits = 48, |
388 | .event_mask = (1ULL << 48) - 1, | 386 | .cntval_mask = (1ULL << 48) - 1, |
389 | .apic = 1, | 387 | .apic = 1, |
390 | /* use highest bit to detect overflow */ | 388 | /* use highest bit to detect overflow */ |
391 | .max_period = (1ULL << 47) - 1, | 389 | .max_period = (1ULL << 47) - 1, |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 9c794ac8783..fdbc652d3fe 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -88,7 +88,7 @@ static u64 intel_pmu_event_map(int hw_event) | |||
88 | return intel_perfmon_event_map[hw_event]; | 88 | return intel_perfmon_event_map[hw_event]; |
89 | } | 89 | } |
90 | 90 | ||
91 | static __initconst u64 westmere_hw_cache_event_ids | 91 | static __initconst const u64 westmere_hw_cache_event_ids |
92 | [PERF_COUNT_HW_CACHE_MAX] | 92 | [PERF_COUNT_HW_CACHE_MAX] |
93 | [PERF_COUNT_HW_CACHE_OP_MAX] | 93 | [PERF_COUNT_HW_CACHE_OP_MAX] |
94 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 94 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -179,7 +179,7 @@ static __initconst u64 westmere_hw_cache_event_ids | |||
179 | }, | 179 | }, |
180 | }; | 180 | }; |
181 | 181 | ||
182 | static __initconst u64 nehalem_hw_cache_event_ids | 182 | static __initconst const u64 nehalem_hw_cache_event_ids |
183 | [PERF_COUNT_HW_CACHE_MAX] | 183 | [PERF_COUNT_HW_CACHE_MAX] |
184 | [PERF_COUNT_HW_CACHE_OP_MAX] | 184 | [PERF_COUNT_HW_CACHE_OP_MAX] |
185 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 185 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -270,7 +270,7 @@ static __initconst u64 nehalem_hw_cache_event_ids | |||
270 | }, | 270 | }, |
271 | }; | 271 | }; |
272 | 272 | ||
273 | static __initconst u64 core2_hw_cache_event_ids | 273 | static __initconst const u64 core2_hw_cache_event_ids |
274 | [PERF_COUNT_HW_CACHE_MAX] | 274 | [PERF_COUNT_HW_CACHE_MAX] |
275 | [PERF_COUNT_HW_CACHE_OP_MAX] | 275 | [PERF_COUNT_HW_CACHE_OP_MAX] |
276 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 276 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -361,7 +361,7 @@ static __initconst u64 core2_hw_cache_event_ids | |||
361 | }, | 361 | }, |
362 | }; | 362 | }; |
363 | 363 | ||
364 | static __initconst u64 atom_hw_cache_event_ids | 364 | static __initconst const u64 atom_hw_cache_event_ids |
365 | [PERF_COUNT_HW_CACHE_MAX] | 365 | [PERF_COUNT_HW_CACHE_MAX] |
366 | [PERF_COUNT_HW_CACHE_OP_MAX] | 366 | [PERF_COUNT_HW_CACHE_OP_MAX] |
367 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 367 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -452,60 +452,6 @@ static __initconst u64 atom_hw_cache_event_ids | |||
452 | }, | 452 | }, |
453 | }; | 453 | }; |
454 | 454 | ||
455 | static u64 intel_pmu_raw_event(u64 hw_event) | ||
456 | { | ||
457 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
458 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
459 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
460 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
461 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | ||
462 | |||
463 | #define CORE_EVNTSEL_MASK \ | ||
464 | (INTEL_ARCH_EVTSEL_MASK | \ | ||
465 | INTEL_ARCH_UNIT_MASK | \ | ||
466 | INTEL_ARCH_EDGE_MASK | \ | ||
467 | INTEL_ARCH_INV_MASK | \ | ||
468 | INTEL_ARCH_CNT_MASK) | ||
469 | |||
470 | return hw_event & CORE_EVNTSEL_MASK; | ||
471 | } | ||
472 | |||
473 | static void intel_pmu_enable_bts(u64 config) | ||
474 | { | ||
475 | unsigned long debugctlmsr; | ||
476 | |||
477 | debugctlmsr = get_debugctlmsr(); | ||
478 | |||
479 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
480 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
481 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
482 | |||
483 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
484 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
485 | |||
486 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
487 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
488 | |||
489 | update_debugctlmsr(debugctlmsr); | ||
490 | } | ||
491 | |||
492 | static void intel_pmu_disable_bts(void) | ||
493 | { | ||
494 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
495 | unsigned long debugctlmsr; | ||
496 | |||
497 | if (!cpuc->ds) | ||
498 | return; | ||
499 | |||
500 | debugctlmsr = get_debugctlmsr(); | ||
501 | |||
502 | debugctlmsr &= | ||
503 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
504 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
505 | |||
506 | update_debugctlmsr(debugctlmsr); | ||
507 | } | ||
508 | |||
509 | static void intel_pmu_disable_all(void) | 455 | static void intel_pmu_disable_all(void) |
510 | { | 456 | { |
511 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 457 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -514,12 +460,17 @@ static void intel_pmu_disable_all(void) | |||
514 | 460 | ||
515 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | 461 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) |
516 | intel_pmu_disable_bts(); | 462 | intel_pmu_disable_bts(); |
463 | |||
464 | intel_pmu_pebs_disable_all(); | ||
465 | intel_pmu_lbr_disable_all(); | ||
517 | } | 466 | } |
518 | 467 | ||
519 | static void intel_pmu_enable_all(void) | 468 | static void intel_pmu_enable_all(int added) |
520 | { | 469 | { |
521 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 470 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
522 | 471 | ||
472 | intel_pmu_pebs_enable_all(); | ||
473 | intel_pmu_lbr_enable_all(); | ||
523 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 474 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); |
524 | 475 | ||
525 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | 476 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { |
@@ -533,6 +484,42 @@ static void intel_pmu_enable_all(void) | |||
533 | } | 484 | } |
534 | } | 485 | } |
535 | 486 | ||
487 | /* | ||
488 | * Workaround for: | ||
489 | * Intel Errata AAK100 (model 26) | ||
490 | * Intel Errata AAP53 (model 30) | ||
491 | * Intel Errata BD53 (model 44) | ||
492 | * | ||
493 | * These chips need to be 'reset' when adding counters by programming | ||
494 | * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5 | ||
495 | * either in sequence on the same PMC or on different PMCs. | ||
496 | */ | ||
497 | static void intel_pmu_nhm_enable_all(int added) | ||
498 | { | ||
499 | if (added) { | ||
500 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
501 | int i; | ||
502 | |||
503 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2); | ||
504 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1); | ||
505 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5); | ||
506 | |||
507 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); | ||
508 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); | ||
509 | |||
510 | for (i = 0; i < 3; i++) { | ||
511 | struct perf_event *event = cpuc->events[i]; | ||
512 | |||
513 | if (!event) | ||
514 | continue; | ||
515 | |||
516 | __x86_pmu_enable_event(&event->hw, | ||
517 | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
518 | } | ||
519 | } | ||
520 | intel_pmu_enable_all(added); | ||
521 | } | ||
522 | |||
536 | static inline u64 intel_pmu_get_status(void) | 523 | static inline u64 intel_pmu_get_status(void) |
537 | { | 524 | { |
538 | u64 status; | 525 | u64 status; |
@@ -547,8 +534,7 @@ static inline void intel_pmu_ack_status(u64 ack) | |||
547 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | 534 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); |
548 | } | 535 | } |
549 | 536 | ||
550 | static inline void | 537 | static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) |
551 | intel_pmu_disable_fixed(struct hw_perf_event *hwc) | ||
552 | { | 538 | { |
553 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 539 | int idx = hwc->idx - X86_PMC_IDX_FIXED; |
554 | u64 ctrl_val, mask; | 540 | u64 ctrl_val, mask; |
@@ -557,71 +543,10 @@ intel_pmu_disable_fixed(struct hw_perf_event *hwc) | |||
557 | 543 | ||
558 | rdmsrl(hwc->config_base, ctrl_val); | 544 | rdmsrl(hwc->config_base, ctrl_val); |
559 | ctrl_val &= ~mask; | 545 | ctrl_val &= ~mask; |
560 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | 546 | wrmsrl(hwc->config_base, ctrl_val); |
561 | } | ||
562 | |||
563 | static void intel_pmu_drain_bts_buffer(void) | ||
564 | { | ||
565 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
566 | struct debug_store *ds = cpuc->ds; | ||
567 | struct bts_record { | ||
568 | u64 from; | ||
569 | u64 to; | ||
570 | u64 flags; | ||
571 | }; | ||
572 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
573 | struct bts_record *at, *top; | ||
574 | struct perf_output_handle handle; | ||
575 | struct perf_event_header header; | ||
576 | struct perf_sample_data data; | ||
577 | struct pt_regs regs; | ||
578 | |||
579 | if (!event) | ||
580 | return; | ||
581 | |||
582 | if (!ds) | ||
583 | return; | ||
584 | |||
585 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
586 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
587 | |||
588 | if (top <= at) | ||
589 | return; | ||
590 | |||
591 | ds->bts_index = ds->bts_buffer_base; | ||
592 | |||
593 | perf_sample_data_init(&data, 0); | ||
594 | |||
595 | data.period = event->hw.last_period; | ||
596 | regs.ip = 0; | ||
597 | |||
598 | /* | ||
599 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
600 | * We will overwrite the from and to address before we output | ||
601 | * the sample. | ||
602 | */ | ||
603 | perf_prepare_sample(&header, &data, event, ®s); | ||
604 | |||
605 | if (perf_output_begin(&handle, event, | ||
606 | header.size * (top - at), 1, 1)) | ||
607 | return; | ||
608 | |||
609 | for (; at < top; at++) { | ||
610 | data.ip = at->from; | ||
611 | data.addr = at->to; | ||
612 | |||
613 | perf_output_sample(&handle, &header, &data, event); | ||
614 | } | ||
615 | |||
616 | perf_output_end(&handle); | ||
617 | |||
618 | /* There's new data available. */ | ||
619 | event->hw.interrupts++; | ||
620 | event->pending_kill = POLL_IN; | ||
621 | } | 547 | } |
622 | 548 | ||
623 | static inline void | 549 | static void intel_pmu_disable_event(struct perf_event *event) |
624 | intel_pmu_disable_event(struct perf_event *event) | ||
625 | { | 550 | { |
626 | struct hw_perf_event *hwc = &event->hw; | 551 | struct hw_perf_event *hwc = &event->hw; |
627 | 552 | ||
@@ -637,14 +562,15 @@ intel_pmu_disable_event(struct perf_event *event) | |||
637 | } | 562 | } |
638 | 563 | ||
639 | x86_pmu_disable_event(event); | 564 | x86_pmu_disable_event(event); |
565 | |||
566 | if (unlikely(event->attr.precise_ip)) | ||
567 | intel_pmu_pebs_disable(event); | ||
640 | } | 568 | } |
641 | 569 | ||
642 | static inline void | 570 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) |
643 | intel_pmu_enable_fixed(struct hw_perf_event *hwc) | ||
644 | { | 571 | { |
645 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 572 | int idx = hwc->idx - X86_PMC_IDX_FIXED; |
646 | u64 ctrl_val, bits, mask; | 573 | u64 ctrl_val, bits, mask; |
647 | int err; | ||
648 | 574 | ||
649 | /* | 575 | /* |
650 | * Enable IRQ generation (0x8), | 576 | * Enable IRQ generation (0x8), |
@@ -669,7 +595,7 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc) | |||
669 | rdmsrl(hwc->config_base, ctrl_val); | 595 | rdmsrl(hwc->config_base, ctrl_val); |
670 | ctrl_val &= ~mask; | 596 | ctrl_val &= ~mask; |
671 | ctrl_val |= bits; | 597 | ctrl_val |= bits; |
672 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | 598 | wrmsrl(hwc->config_base, ctrl_val); |
673 | } | 599 | } |
674 | 600 | ||
675 | static void intel_pmu_enable_event(struct perf_event *event) | 601 | static void intel_pmu_enable_event(struct perf_event *event) |
@@ -689,7 +615,10 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
689 | return; | 615 | return; |
690 | } | 616 | } |
691 | 617 | ||
692 | __x86_pmu_enable_event(hwc); | 618 | if (unlikely(event->attr.precise_ip)) |
619 | intel_pmu_pebs_enable(event); | ||
620 | |||
621 | __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); | ||
693 | } | 622 | } |
694 | 623 | ||
695 | /* | 624 | /* |
@@ -708,20 +637,20 @@ static void intel_pmu_reset(void) | |||
708 | unsigned long flags; | 637 | unsigned long flags; |
709 | int idx; | 638 | int idx; |
710 | 639 | ||
711 | if (!x86_pmu.num_events) | 640 | if (!x86_pmu.num_counters) |
712 | return; | 641 | return; |
713 | 642 | ||
714 | local_irq_save(flags); | 643 | local_irq_save(flags); |
715 | 644 | ||
716 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | 645 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); |
717 | 646 | ||
718 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 647 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
719 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | 648 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); |
720 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | 649 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); |
721 | } | 650 | } |
722 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | 651 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) |
723 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | 652 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
724 | } | 653 | |
725 | if (ds) | 654 | if (ds) |
726 | ds->bts_index = ds->bts_buffer_base; | 655 | ds->bts_index = ds->bts_buffer_base; |
727 | 656 | ||
@@ -747,7 +676,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
747 | intel_pmu_drain_bts_buffer(); | 676 | intel_pmu_drain_bts_buffer(); |
748 | status = intel_pmu_get_status(); | 677 | status = intel_pmu_get_status(); |
749 | if (!status) { | 678 | if (!status) { |
750 | intel_pmu_enable_all(); | 679 | intel_pmu_enable_all(0); |
751 | return 0; | 680 | return 0; |
752 | } | 681 | } |
753 | 682 | ||
@@ -762,6 +691,15 @@ again: | |||
762 | 691 | ||
763 | inc_irq_stat(apic_perf_irqs); | 692 | inc_irq_stat(apic_perf_irqs); |
764 | ack = status; | 693 | ack = status; |
694 | |||
695 | intel_pmu_lbr_read(); | ||
696 | |||
697 | /* | ||
698 | * PEBS overflow sets bit 62 in the global status register | ||
699 | */ | ||
700 | if (__test_and_clear_bit(62, (unsigned long *)&status)) | ||
701 | x86_pmu.drain_pebs(regs); | ||
702 | |||
765 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | 703 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { |
766 | struct perf_event *event = cpuc->events[bit]; | 704 | struct perf_event *event = cpuc->events[bit]; |
767 | 705 | ||
@@ -787,26 +725,22 @@ again: | |||
787 | goto again; | 725 | goto again; |
788 | 726 | ||
789 | done: | 727 | done: |
790 | intel_pmu_enable_all(); | 728 | intel_pmu_enable_all(0); |
791 | return 1; | 729 | return 1; |
792 | } | 730 | } |
793 | 731 | ||
794 | static struct event_constraint bts_constraint = | ||
795 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
796 | |||
797 | static struct event_constraint * | 732 | static struct event_constraint * |
798 | intel_special_constraints(struct perf_event *event) | 733 | intel_bts_constraints(struct perf_event *event) |
799 | { | 734 | { |
800 | unsigned int hw_event; | 735 | struct hw_perf_event *hwc = &event->hw; |
801 | 736 | unsigned int hw_event, bts_event; | |
802 | hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; | ||
803 | 737 | ||
804 | if (unlikely((hw_event == | 738 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; |
805 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | 739 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); |
806 | (event->hw.sample_period == 1))) { | ||
807 | 740 | ||
741 | if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) | ||
808 | return &bts_constraint; | 742 | return &bts_constraint; |
809 | } | 743 | |
810 | return NULL; | 744 | return NULL; |
811 | } | 745 | } |
812 | 746 | ||
@@ -815,24 +749,53 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
815 | { | 749 | { |
816 | struct event_constraint *c; | 750 | struct event_constraint *c; |
817 | 751 | ||
818 | c = intel_special_constraints(event); | 752 | c = intel_bts_constraints(event); |
753 | if (c) | ||
754 | return c; | ||
755 | |||
756 | c = intel_pebs_constraints(event); | ||
819 | if (c) | 757 | if (c) |
820 | return c; | 758 | return c; |
821 | 759 | ||
822 | return x86_get_event_constraints(cpuc, event); | 760 | return x86_get_event_constraints(cpuc, event); |
823 | } | 761 | } |
824 | 762 | ||
825 | static __initconst struct x86_pmu core_pmu = { | 763 | static int intel_pmu_hw_config(struct perf_event *event) |
764 | { | ||
765 | int ret = x86_pmu_hw_config(event); | ||
766 | |||
767 | if (ret) | ||
768 | return ret; | ||
769 | |||
770 | if (event->attr.type != PERF_TYPE_RAW) | ||
771 | return 0; | ||
772 | |||
773 | if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) | ||
774 | return 0; | ||
775 | |||
776 | if (x86_pmu.version < 3) | ||
777 | return -EINVAL; | ||
778 | |||
779 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
780 | return -EACCES; | ||
781 | |||
782 | event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; | ||
783 | |||
784 | return 0; | ||
785 | } | ||
786 | |||
787 | static __initconst const struct x86_pmu core_pmu = { | ||
826 | .name = "core", | 788 | .name = "core", |
827 | .handle_irq = x86_pmu_handle_irq, | 789 | .handle_irq = x86_pmu_handle_irq, |
828 | .disable_all = x86_pmu_disable_all, | 790 | .disable_all = x86_pmu_disable_all, |
829 | .enable_all = x86_pmu_enable_all, | 791 | .enable_all = x86_pmu_enable_all, |
830 | .enable = x86_pmu_enable_event, | 792 | .enable = x86_pmu_enable_event, |
831 | .disable = x86_pmu_disable_event, | 793 | .disable = x86_pmu_disable_event, |
794 | .hw_config = x86_pmu_hw_config, | ||
795 | .schedule_events = x86_schedule_events, | ||
832 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 796 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
833 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 797 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
834 | .event_map = intel_pmu_event_map, | 798 | .event_map = intel_pmu_event_map, |
835 | .raw_event = intel_pmu_raw_event, | ||
836 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | 799 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
837 | .apic = 1, | 800 | .apic = 1, |
838 | /* | 801 | /* |
@@ -845,17 +808,32 @@ static __initconst struct x86_pmu core_pmu = { | |||
845 | .event_constraints = intel_core_event_constraints, | 808 | .event_constraints = intel_core_event_constraints, |
846 | }; | 809 | }; |
847 | 810 | ||
848 | static __initconst struct x86_pmu intel_pmu = { | 811 | static void intel_pmu_cpu_starting(int cpu) |
812 | { | ||
813 | init_debug_store_on_cpu(cpu); | ||
814 | /* | ||
815 | * Deal with CPUs that don't clear their LBRs on power-up. | ||
816 | */ | ||
817 | intel_pmu_lbr_reset(); | ||
818 | } | ||
819 | |||
820 | static void intel_pmu_cpu_dying(int cpu) | ||
821 | { | ||
822 | fini_debug_store_on_cpu(cpu); | ||
823 | } | ||
824 | |||
825 | static __initconst const struct x86_pmu intel_pmu = { | ||
849 | .name = "Intel", | 826 | .name = "Intel", |
850 | .handle_irq = intel_pmu_handle_irq, | 827 | .handle_irq = intel_pmu_handle_irq, |
851 | .disable_all = intel_pmu_disable_all, | 828 | .disable_all = intel_pmu_disable_all, |
852 | .enable_all = intel_pmu_enable_all, | 829 | .enable_all = intel_pmu_enable_all, |
853 | .enable = intel_pmu_enable_event, | 830 | .enable = intel_pmu_enable_event, |
854 | .disable = intel_pmu_disable_event, | 831 | .disable = intel_pmu_disable_event, |
832 | .hw_config = intel_pmu_hw_config, | ||
833 | .schedule_events = x86_schedule_events, | ||
855 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 834 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
856 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 835 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
857 | .event_map = intel_pmu_event_map, | 836 | .event_map = intel_pmu_event_map, |
858 | .raw_event = intel_pmu_raw_event, | ||
859 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | 837 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
860 | .apic = 1, | 838 | .apic = 1, |
861 | /* | 839 | /* |
@@ -864,14 +842,38 @@ static __initconst struct x86_pmu intel_pmu = { | |||
864 | * the generic event period: | 842 | * the generic event period: |
865 | */ | 843 | */ |
866 | .max_period = (1ULL << 31) - 1, | 844 | .max_period = (1ULL << 31) - 1, |
867 | .enable_bts = intel_pmu_enable_bts, | ||
868 | .disable_bts = intel_pmu_disable_bts, | ||
869 | .get_event_constraints = intel_get_event_constraints, | 845 | .get_event_constraints = intel_get_event_constraints, |
870 | 846 | ||
871 | .cpu_starting = init_debug_store_on_cpu, | 847 | .cpu_starting = intel_pmu_cpu_starting, |
872 | .cpu_dying = fini_debug_store_on_cpu, | 848 | .cpu_dying = intel_pmu_cpu_dying, |
873 | }; | 849 | }; |
874 | 850 | ||
851 | static void intel_clovertown_quirks(void) | ||
852 | { | ||
853 | /* | ||
854 | * PEBS is unreliable due to: | ||
855 | * | ||
856 | * AJ67 - PEBS may experience CPL leaks | ||
857 | * AJ68 - PEBS PMI may be delayed by one event | ||
858 | * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] | ||
859 | * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS | ||
860 | * | ||
861 | * AJ67 could be worked around by restricting the OS/USR flags. | ||
862 | * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. | ||
863 | * | ||
864 | * AJ106 could possibly be worked around by not allowing LBR | ||
865 | * usage from PEBS, including the fixup. | ||
866 | * AJ68 could possibly be worked around by always programming | ||
867 | * a pebs_event_reset[0] value and coping with the lost events. | ||
868 | * | ||
869 | * But taken together it might just make sense to not enable PEBS on | ||
870 | * these chips. | ||
871 | */ | ||
872 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); | ||
873 | x86_pmu.pebs = 0; | ||
874 | x86_pmu.pebs_constraints = NULL; | ||
875 | } | ||
876 | |||
875 | static __init int intel_pmu_init(void) | 877 | static __init int intel_pmu_init(void) |
876 | { | 878 | { |
877 | union cpuid10_edx edx; | 879 | union cpuid10_edx edx; |
@@ -881,12 +883,13 @@ static __init int intel_pmu_init(void) | |||
881 | int version; | 883 | int version; |
882 | 884 | ||
883 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 885 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
884 | /* check for P6 processor family */ | 886 | switch (boot_cpu_data.x86) { |
885 | if (boot_cpu_data.x86 == 6) { | 887 | case 0x6: |
886 | return p6_pmu_init(); | 888 | return p6_pmu_init(); |
887 | } else { | 889 | case 0xf: |
890 | return p4_pmu_init(); | ||
891 | } | ||
888 | return -ENODEV; | 892 | return -ENODEV; |
889 | } | ||
890 | } | 893 | } |
891 | 894 | ||
892 | /* | 895 | /* |
@@ -904,16 +907,28 @@ static __init int intel_pmu_init(void) | |||
904 | x86_pmu = intel_pmu; | 907 | x86_pmu = intel_pmu; |
905 | 908 | ||
906 | x86_pmu.version = version; | 909 | x86_pmu.version = version; |
907 | x86_pmu.num_events = eax.split.num_events; | 910 | x86_pmu.num_counters = eax.split.num_counters; |
908 | x86_pmu.event_bits = eax.split.bit_width; | 911 | x86_pmu.cntval_bits = eax.split.bit_width; |
909 | x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; | 912 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; |
910 | 913 | ||
911 | /* | 914 | /* |
912 | * Quirk: v2 perfmon does not report fixed-purpose events, so | 915 | * Quirk: v2 perfmon does not report fixed-purpose events, so |
913 | * assume at least 3 events: | 916 | * assume at least 3 events: |
914 | */ | 917 | */ |
915 | if (version > 1) | 918 | if (version > 1) |
916 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | 919 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); |
920 | |||
921 | /* | ||
922 | * v2 and above have a perf capabilities MSR | ||
923 | */ | ||
924 | if (version > 1) { | ||
925 | u64 capabilities; | ||
926 | |||
927 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); | ||
928 | x86_pmu.intel_cap.capabilities = capabilities; | ||
929 | } | ||
930 | |||
931 | intel_ds_init(); | ||
917 | 932 | ||
918 | /* | 933 | /* |
919 | * Install the hw-cache-events table: | 934 | * Install the hw-cache-events table: |
@@ -924,12 +939,15 @@ static __init int intel_pmu_init(void) | |||
924 | break; | 939 | break; |
925 | 940 | ||
926 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | 941 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ |
942 | x86_pmu.quirks = intel_clovertown_quirks; | ||
927 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | 943 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ |
928 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | 944 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ |
929 | case 29: /* six-core 45 nm xeon "Dunnington" */ | 945 | case 29: /* six-core 45 nm xeon "Dunnington" */ |
930 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | 946 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, |
931 | sizeof(hw_cache_event_ids)); | 947 | sizeof(hw_cache_event_ids)); |
932 | 948 | ||
949 | intel_pmu_lbr_init_core(); | ||
950 | |||
933 | x86_pmu.event_constraints = intel_core2_event_constraints; | 951 | x86_pmu.event_constraints = intel_core2_event_constraints; |
934 | pr_cont("Core2 events, "); | 952 | pr_cont("Core2 events, "); |
935 | break; | 953 | break; |
@@ -940,13 +958,19 @@ static __init int intel_pmu_init(void) | |||
940 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 958 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
941 | sizeof(hw_cache_event_ids)); | 959 | sizeof(hw_cache_event_ids)); |
942 | 960 | ||
961 | intel_pmu_lbr_init_nhm(); | ||
962 | |||
943 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 963 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
944 | pr_cont("Nehalem/Corei7 events, "); | 964 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
965 | pr_cont("Nehalem events, "); | ||
945 | break; | 966 | break; |
967 | |||
946 | case 28: /* Atom */ | 968 | case 28: /* Atom */ |
947 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | 969 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, |
948 | sizeof(hw_cache_event_ids)); | 970 | sizeof(hw_cache_event_ids)); |
949 | 971 | ||
972 | intel_pmu_lbr_init_atom(); | ||
973 | |||
950 | x86_pmu.event_constraints = intel_gen_event_constraints; | 974 | x86_pmu.event_constraints = intel_gen_event_constraints; |
951 | pr_cont("Atom events, "); | 975 | pr_cont("Atom events, "); |
952 | break; | 976 | break; |
@@ -956,7 +980,10 @@ static __init int intel_pmu_init(void) | |||
956 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | 980 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
957 | sizeof(hw_cache_event_ids)); | 981 | sizeof(hw_cache_event_ids)); |
958 | 982 | ||
983 | intel_pmu_lbr_init_nhm(); | ||
984 | |||
959 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 985 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
986 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | ||
960 | pr_cont("Westmere events, "); | 987 | pr_cont("Westmere events, "); |
961 | break; | 988 | break; |
962 | 989 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c new file mode 100644 index 00000000000..18018d1311c --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -0,0 +1,641 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | /* The maximal number of PEBS events: */ | ||
4 | #define MAX_PEBS_EVENTS 4 | ||
5 | |||
6 | /* The size of a BTS record in bytes: */ | ||
7 | #define BTS_RECORD_SIZE 24 | ||
8 | |||
9 | #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) | ||
10 | #define PEBS_BUFFER_SIZE PAGE_SIZE | ||
11 | |||
12 | /* | ||
13 | * pebs_record_32 for p4 and core not supported | ||
14 | |||
15 | struct pebs_record_32 { | ||
16 | u32 flags, ip; | ||
17 | u32 ax, bc, cx, dx; | ||
18 | u32 si, di, bp, sp; | ||
19 | }; | ||
20 | |||
21 | */ | ||
22 | |||
23 | struct pebs_record_core { | ||
24 | u64 flags, ip; | ||
25 | u64 ax, bx, cx, dx; | ||
26 | u64 si, di, bp, sp; | ||
27 | u64 r8, r9, r10, r11; | ||
28 | u64 r12, r13, r14, r15; | ||
29 | }; | ||
30 | |||
31 | struct pebs_record_nhm { | ||
32 | u64 flags, ip; | ||
33 | u64 ax, bx, cx, dx; | ||
34 | u64 si, di, bp, sp; | ||
35 | u64 r8, r9, r10, r11; | ||
36 | u64 r12, r13, r14, r15; | ||
37 | u64 status, dla, dse, lat; | ||
38 | }; | ||
39 | |||
40 | /* | ||
41 | * A debug store configuration. | ||
42 | * | ||
43 | * We only support architectures that use 64bit fields. | ||
44 | */ | ||
45 | struct debug_store { | ||
46 | u64 bts_buffer_base; | ||
47 | u64 bts_index; | ||
48 | u64 bts_absolute_maximum; | ||
49 | u64 bts_interrupt_threshold; | ||
50 | u64 pebs_buffer_base; | ||
51 | u64 pebs_index; | ||
52 | u64 pebs_absolute_maximum; | ||
53 | u64 pebs_interrupt_threshold; | ||
54 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
55 | }; | ||
56 | |||
57 | static void init_debug_store_on_cpu(int cpu) | ||
58 | { | ||
59 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
60 | |||
61 | if (!ds) | ||
62 | return; | ||
63 | |||
64 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
65 | (u32)((u64)(unsigned long)ds), | ||
66 | (u32)((u64)(unsigned long)ds >> 32)); | ||
67 | } | ||
68 | |||
69 | static void fini_debug_store_on_cpu(int cpu) | ||
70 | { | ||
71 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
72 | return; | ||
73 | |||
74 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
75 | } | ||
76 | |||
77 | static void release_ds_buffers(void) | ||
78 | { | ||
79 | int cpu; | ||
80 | |||
81 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
82 | return; | ||
83 | |||
84 | get_online_cpus(); | ||
85 | |||
86 | for_each_online_cpu(cpu) | ||
87 | fini_debug_store_on_cpu(cpu); | ||
88 | |||
89 | for_each_possible_cpu(cpu) { | ||
90 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
91 | |||
92 | if (!ds) | ||
93 | continue; | ||
94 | |||
95 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
96 | |||
97 | kfree((void *)(unsigned long)ds->pebs_buffer_base); | ||
98 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
99 | kfree(ds); | ||
100 | } | ||
101 | |||
102 | put_online_cpus(); | ||
103 | } | ||
104 | |||
105 | static int reserve_ds_buffers(void) | ||
106 | { | ||
107 | int cpu, err = 0; | ||
108 | |||
109 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
110 | return 0; | ||
111 | |||
112 | get_online_cpus(); | ||
113 | |||
114 | for_each_possible_cpu(cpu) { | ||
115 | struct debug_store *ds; | ||
116 | void *buffer; | ||
117 | int max, thresh; | ||
118 | |||
119 | err = -ENOMEM; | ||
120 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
121 | if (unlikely(!ds)) | ||
122 | break; | ||
123 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
124 | |||
125 | if (x86_pmu.bts) { | ||
126 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
127 | if (unlikely(!buffer)) | ||
128 | break; | ||
129 | |||
130 | max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; | ||
131 | thresh = max / 16; | ||
132 | |||
133 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
134 | ds->bts_index = ds->bts_buffer_base; | ||
135 | ds->bts_absolute_maximum = ds->bts_buffer_base + | ||
136 | max * BTS_RECORD_SIZE; | ||
137 | ds->bts_interrupt_threshold = ds->bts_absolute_maximum - | ||
138 | thresh * BTS_RECORD_SIZE; | ||
139 | } | ||
140 | |||
141 | if (x86_pmu.pebs) { | ||
142 | buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); | ||
143 | if (unlikely(!buffer)) | ||
144 | break; | ||
145 | |||
146 | max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; | ||
147 | |||
148 | ds->pebs_buffer_base = (u64)(unsigned long)buffer; | ||
149 | ds->pebs_index = ds->pebs_buffer_base; | ||
150 | ds->pebs_absolute_maximum = ds->pebs_buffer_base + | ||
151 | max * x86_pmu.pebs_record_size; | ||
152 | /* | ||
153 | * Always use single record PEBS | ||
154 | */ | ||
155 | ds->pebs_interrupt_threshold = ds->pebs_buffer_base + | ||
156 | x86_pmu.pebs_record_size; | ||
157 | } | ||
158 | |||
159 | err = 0; | ||
160 | } | ||
161 | |||
162 | if (err) | ||
163 | release_ds_buffers(); | ||
164 | else { | ||
165 | for_each_online_cpu(cpu) | ||
166 | init_debug_store_on_cpu(cpu); | ||
167 | } | ||
168 | |||
169 | put_online_cpus(); | ||
170 | |||
171 | return err; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * BTS | ||
176 | */ | ||
177 | |||
178 | static struct event_constraint bts_constraint = | ||
179 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
180 | |||
181 | static void intel_pmu_enable_bts(u64 config) | ||
182 | { | ||
183 | unsigned long debugctlmsr; | ||
184 | |||
185 | debugctlmsr = get_debugctlmsr(); | ||
186 | |||
187 | debugctlmsr |= DEBUGCTLMSR_TR; | ||
188 | debugctlmsr |= DEBUGCTLMSR_BTS; | ||
189 | debugctlmsr |= DEBUGCTLMSR_BTINT; | ||
190 | |||
191 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
192 | debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS; | ||
193 | |||
194 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
195 | debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR; | ||
196 | |||
197 | update_debugctlmsr(debugctlmsr); | ||
198 | } | ||
199 | |||
200 | static void intel_pmu_disable_bts(void) | ||
201 | { | ||
202 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
203 | unsigned long debugctlmsr; | ||
204 | |||
205 | if (!cpuc->ds) | ||
206 | return; | ||
207 | |||
208 | debugctlmsr = get_debugctlmsr(); | ||
209 | |||
210 | debugctlmsr &= | ||
211 | ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT | | ||
212 | DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR); | ||
213 | |||
214 | update_debugctlmsr(debugctlmsr); | ||
215 | } | ||
216 | |||
217 | static void intel_pmu_drain_bts_buffer(void) | ||
218 | { | ||
219 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
220 | struct debug_store *ds = cpuc->ds; | ||
221 | struct bts_record { | ||
222 | u64 from; | ||
223 | u64 to; | ||
224 | u64 flags; | ||
225 | }; | ||
226 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
227 | struct bts_record *at, *top; | ||
228 | struct perf_output_handle handle; | ||
229 | struct perf_event_header header; | ||
230 | struct perf_sample_data data; | ||
231 | struct pt_regs regs; | ||
232 | |||
233 | if (!event) | ||
234 | return; | ||
235 | |||
236 | if (!ds) | ||
237 | return; | ||
238 | |||
239 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
240 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
241 | |||
242 | if (top <= at) | ||
243 | return; | ||
244 | |||
245 | ds->bts_index = ds->bts_buffer_base; | ||
246 | |||
247 | perf_sample_data_init(&data, 0); | ||
248 | data.period = event->hw.last_period; | ||
249 | regs.ip = 0; | ||
250 | |||
251 | /* | ||
252 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
253 | * We will overwrite the from and to address before we output | ||
254 | * the sample. | ||
255 | */ | ||
256 | perf_prepare_sample(&header, &data, event, ®s); | ||
257 | |||
258 | if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) | ||
259 | return; | ||
260 | |||
261 | for (; at < top; at++) { | ||
262 | data.ip = at->from; | ||
263 | data.addr = at->to; | ||
264 | |||
265 | perf_output_sample(&handle, &header, &data, event); | ||
266 | } | ||
267 | |||
268 | perf_output_end(&handle); | ||
269 | |||
270 | /* There's new data available. */ | ||
271 | event->hw.interrupts++; | ||
272 | event->pending_kill = POLL_IN; | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * PEBS | ||
277 | */ | ||
278 | |||
279 | static struct event_constraint intel_core_pebs_events[] = { | ||
280 | PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ | ||
281 | PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ | ||
282 | PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ | ||
283 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ | ||
284 | PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
285 | PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
286 | PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
287 | PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
288 | PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
289 | EVENT_CONSTRAINT_END | ||
290 | }; | ||
291 | |||
292 | static struct event_constraint intel_nehalem_pebs_events[] = { | ||
293 | PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ | ||
294 | PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ | ||
295 | PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ | ||
296 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ | ||
297 | PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
298 | PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
299 | PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
300 | PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
301 | PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
302 | EVENT_CONSTRAINT_END | ||
303 | }; | ||
304 | |||
305 | static struct event_constraint * | ||
306 | intel_pebs_constraints(struct perf_event *event) | ||
307 | { | ||
308 | struct event_constraint *c; | ||
309 | |||
310 | if (!event->attr.precise_ip) | ||
311 | return NULL; | ||
312 | |||
313 | if (x86_pmu.pebs_constraints) { | ||
314 | for_each_event_constraint(c, x86_pmu.pebs_constraints) { | ||
315 | if ((event->hw.config & c->cmask) == c->code) | ||
316 | return c; | ||
317 | } | ||
318 | } | ||
319 | |||
320 | return &emptyconstraint; | ||
321 | } | ||
322 | |||
323 | static void intel_pmu_pebs_enable(struct perf_event *event) | ||
324 | { | ||
325 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
326 | struct hw_perf_event *hwc = &event->hw; | ||
327 | |||
328 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | ||
329 | |||
330 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | ||
331 | WARN_ON_ONCE(cpuc->enabled); | ||
332 | |||
333 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
334 | intel_pmu_lbr_enable(event); | ||
335 | } | ||
336 | |||
337 | static void intel_pmu_pebs_disable(struct perf_event *event) | ||
338 | { | ||
339 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
340 | struct hw_perf_event *hwc = &event->hw; | ||
341 | |||
342 | cpuc->pebs_enabled &= ~(1ULL << hwc->idx); | ||
343 | if (cpuc->enabled) | ||
344 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | ||
345 | |||
346 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | ||
347 | |||
348 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
349 | intel_pmu_lbr_disable(event); | ||
350 | } | ||
351 | |||
352 | static void intel_pmu_pebs_enable_all(void) | ||
353 | { | ||
354 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
355 | |||
356 | if (cpuc->pebs_enabled) | ||
357 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | ||
358 | } | ||
359 | |||
360 | static void intel_pmu_pebs_disable_all(void) | ||
361 | { | ||
362 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
363 | |||
364 | if (cpuc->pebs_enabled) | ||
365 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | ||
366 | } | ||
367 | |||
368 | #include <asm/insn.h> | ||
369 | |||
370 | static inline bool kernel_ip(unsigned long ip) | ||
371 | { | ||
372 | #ifdef CONFIG_X86_32 | ||
373 | return ip > PAGE_OFFSET; | ||
374 | #else | ||
375 | return (long)ip < 0; | ||
376 | #endif | ||
377 | } | ||
378 | |||
379 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | ||
380 | { | ||
381 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
382 | unsigned long from = cpuc->lbr_entries[0].from; | ||
383 | unsigned long old_to, to = cpuc->lbr_entries[0].to; | ||
384 | unsigned long ip = regs->ip; | ||
385 | |||
386 | /* | ||
387 | * We don't need to fixup if the PEBS assist is fault like | ||
388 | */ | ||
389 | if (!x86_pmu.intel_cap.pebs_trap) | ||
390 | return 1; | ||
391 | |||
392 | /* | ||
393 | * No LBR entry, no basic block, no rewinding | ||
394 | */ | ||
395 | if (!cpuc->lbr_stack.nr || !from || !to) | ||
396 | return 0; | ||
397 | |||
398 | /* | ||
399 | * Basic blocks should never cross user/kernel boundaries | ||
400 | */ | ||
401 | if (kernel_ip(ip) != kernel_ip(to)) | ||
402 | return 0; | ||
403 | |||
404 | /* | ||
405 | * unsigned math, either ip is before the start (impossible) or | ||
406 | * the basic block is larger than 1 page (sanity) | ||
407 | */ | ||
408 | if ((ip - to) > PAGE_SIZE) | ||
409 | return 0; | ||
410 | |||
411 | /* | ||
412 | * We sampled a branch insn, rewind using the LBR stack | ||
413 | */ | ||
414 | if (ip == to) { | ||
415 | regs->ip = from; | ||
416 | return 1; | ||
417 | } | ||
418 | |||
419 | do { | ||
420 | struct insn insn; | ||
421 | u8 buf[MAX_INSN_SIZE]; | ||
422 | void *kaddr; | ||
423 | |||
424 | old_to = to; | ||
425 | if (!kernel_ip(ip)) { | ||
426 | int bytes, size = MAX_INSN_SIZE; | ||
427 | |||
428 | bytes = copy_from_user_nmi(buf, (void __user *)to, size); | ||
429 | if (bytes != size) | ||
430 | return 0; | ||
431 | |||
432 | kaddr = buf; | ||
433 | } else | ||
434 | kaddr = (void *)to; | ||
435 | |||
436 | kernel_insn_init(&insn, kaddr); | ||
437 | insn_get_length(&insn); | ||
438 | to += insn.length; | ||
439 | } while (to < ip); | ||
440 | |||
441 | if (to == ip) { | ||
442 | regs->ip = old_to; | ||
443 | return 1; | ||
444 | } | ||
445 | |||
446 | /* | ||
447 | * Even though we decoded the basic block, the instruction stream | ||
448 | * never matched the given IP, either the TO or the IP got corrupted. | ||
449 | */ | ||
450 | return 0; | ||
451 | } | ||
452 | |||
453 | static int intel_pmu_save_and_restart(struct perf_event *event); | ||
454 | |||
455 | static void __intel_pmu_pebs_event(struct perf_event *event, | ||
456 | struct pt_regs *iregs, void *__pebs) | ||
457 | { | ||
458 | /* | ||
459 | * We cast to pebs_record_core since that is a subset of | ||
460 | * both formats and we don't use the other fields in this | ||
461 | * routine. | ||
462 | */ | ||
463 | struct pebs_record_core *pebs = __pebs; | ||
464 | struct perf_sample_data data; | ||
465 | struct pt_regs regs; | ||
466 | |||
467 | if (!intel_pmu_save_and_restart(event)) | ||
468 | return; | ||
469 | |||
470 | perf_sample_data_init(&data, 0); | ||
471 | data.period = event->hw.last_period; | ||
472 | |||
473 | /* | ||
474 | * We use the interrupt regs as a base because the PEBS record | ||
475 | * does not contain a full regs set, specifically it seems to | ||
476 | * lack segment descriptors, which get used by things like | ||
477 | * user_mode(). | ||
478 | * | ||
479 | * In the simple case fix up only the IP and BP,SP regs, for | ||
480 | * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. | ||
481 | * A possible PERF_SAMPLE_REGS will have to transfer all regs. | ||
482 | */ | ||
483 | regs = *iregs; | ||
484 | regs.ip = pebs->ip; | ||
485 | regs.bp = pebs->bp; | ||
486 | regs.sp = pebs->sp; | ||
487 | |||
488 | if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) | ||
489 | regs.flags |= PERF_EFLAGS_EXACT; | ||
490 | else | ||
491 | regs.flags &= ~PERF_EFLAGS_EXACT; | ||
492 | |||
493 | if (perf_event_overflow(event, 1, &data, ®s)) | ||
494 | x86_pmu_stop(event); | ||
495 | } | ||
496 | |||
497 | static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | ||
498 | { | ||
499 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
500 | struct debug_store *ds = cpuc->ds; | ||
501 | struct perf_event *event = cpuc->events[0]; /* PMC0 only */ | ||
502 | struct pebs_record_core *at, *top; | ||
503 | int n; | ||
504 | |||
505 | if (!ds || !x86_pmu.pebs) | ||
506 | return; | ||
507 | |||
508 | at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; | ||
509 | top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; | ||
510 | |||
511 | /* | ||
512 | * Whatever else happens, drain the thing | ||
513 | */ | ||
514 | ds->pebs_index = ds->pebs_buffer_base; | ||
515 | |||
516 | if (!test_bit(0, cpuc->active_mask)) | ||
517 | return; | ||
518 | |||
519 | WARN_ON_ONCE(!event); | ||
520 | |||
521 | if (!event->attr.precise_ip) | ||
522 | return; | ||
523 | |||
524 | n = top - at; | ||
525 | if (n <= 0) | ||
526 | return; | ||
527 | |||
528 | /* | ||
529 | * Should not happen, we program the threshold at 1 and do not | ||
530 | * set a reset value. | ||
531 | */ | ||
532 | WARN_ON_ONCE(n > 1); | ||
533 | at += n - 1; | ||
534 | |||
535 | __intel_pmu_pebs_event(event, iregs, at); | ||
536 | } | ||
537 | |||
538 | static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | ||
539 | { | ||
540 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
541 | struct debug_store *ds = cpuc->ds; | ||
542 | struct pebs_record_nhm *at, *top; | ||
543 | struct perf_event *event = NULL; | ||
544 | u64 status = 0; | ||
545 | int bit, n; | ||
546 | |||
547 | if (!ds || !x86_pmu.pebs) | ||
548 | return; | ||
549 | |||
550 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | ||
551 | top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; | ||
552 | |||
553 | ds->pebs_index = ds->pebs_buffer_base; | ||
554 | |||
555 | n = top - at; | ||
556 | if (n <= 0) | ||
557 | return; | ||
558 | |||
559 | /* | ||
560 | * Should not happen, we program the threshold at 1 and do not | ||
561 | * set a reset value. | ||
562 | */ | ||
563 | WARN_ON_ONCE(n > MAX_PEBS_EVENTS); | ||
564 | |||
565 | for ( ; at < top; at++) { | ||
566 | for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { | ||
567 | event = cpuc->events[bit]; | ||
568 | if (!test_bit(bit, cpuc->active_mask)) | ||
569 | continue; | ||
570 | |||
571 | WARN_ON_ONCE(!event); | ||
572 | |||
573 | if (!event->attr.precise_ip) | ||
574 | continue; | ||
575 | |||
576 | if (__test_and_set_bit(bit, (unsigned long *)&status)) | ||
577 | continue; | ||
578 | |||
579 | break; | ||
580 | } | ||
581 | |||
582 | if (!event || bit >= MAX_PEBS_EVENTS) | ||
583 | continue; | ||
584 | |||
585 | __intel_pmu_pebs_event(event, iregs, at); | ||
586 | } | ||
587 | } | ||
588 | |||
589 | /* | ||
590 | * BTS, PEBS probe and setup | ||
591 | */ | ||
592 | |||
593 | static void intel_ds_init(void) | ||
594 | { | ||
595 | /* | ||
596 | * No support for 32bit formats | ||
597 | */ | ||
598 | if (!boot_cpu_has(X86_FEATURE_DTES64)) | ||
599 | return; | ||
600 | |||
601 | x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); | ||
602 | x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); | ||
603 | if (x86_pmu.pebs) { | ||
604 | char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; | ||
605 | int format = x86_pmu.intel_cap.pebs_format; | ||
606 | |||
607 | switch (format) { | ||
608 | case 0: | ||
609 | printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); | ||
610 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); | ||
611 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; | ||
612 | x86_pmu.pebs_constraints = intel_core_pebs_events; | ||
613 | break; | ||
614 | |||
615 | case 1: | ||
616 | printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); | ||
617 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); | ||
618 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; | ||
619 | x86_pmu.pebs_constraints = intel_nehalem_pebs_events; | ||
620 | break; | ||
621 | |||
622 | default: | ||
623 | printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); | ||
624 | x86_pmu.pebs = 0; | ||
625 | break; | ||
626 | } | ||
627 | } | ||
628 | } | ||
629 | |||
630 | #else /* CONFIG_CPU_SUP_INTEL */ | ||
631 | |||
632 | static int reserve_ds_buffers(void) | ||
633 | { | ||
634 | return 0; | ||
635 | } | ||
636 | |||
637 | static void release_ds_buffers(void) | ||
638 | { | ||
639 | } | ||
640 | |||
641 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c new file mode 100644 index 00000000000..d202c1bece1 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -0,0 +1,218 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | enum { | ||
4 | LBR_FORMAT_32 = 0x00, | ||
5 | LBR_FORMAT_LIP = 0x01, | ||
6 | LBR_FORMAT_EIP = 0x02, | ||
7 | LBR_FORMAT_EIP_FLAGS = 0x03, | ||
8 | }; | ||
9 | |||
10 | /* | ||
11 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI | ||
12 | * otherwise it becomes near impossible to get a reliable stack. | ||
13 | */ | ||
14 | |||
15 | static void __intel_pmu_lbr_enable(void) | ||
16 | { | ||
17 | u64 debugctl; | ||
18 | |||
19 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
20 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | ||
21 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
22 | } | ||
23 | |||
24 | static void __intel_pmu_lbr_disable(void) | ||
25 | { | ||
26 | u64 debugctl; | ||
27 | |||
28 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
29 | debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | ||
30 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
31 | } | ||
32 | |||
33 | static void intel_pmu_lbr_reset_32(void) | ||
34 | { | ||
35 | int i; | ||
36 | |||
37 | for (i = 0; i < x86_pmu.lbr_nr; i++) | ||
38 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
39 | } | ||
40 | |||
41 | static void intel_pmu_lbr_reset_64(void) | ||
42 | { | ||
43 | int i; | ||
44 | |||
45 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
46 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
47 | wrmsrl(x86_pmu.lbr_to + i, 0); | ||
48 | } | ||
49 | } | ||
50 | |||
51 | static void intel_pmu_lbr_reset(void) | ||
52 | { | ||
53 | if (!x86_pmu.lbr_nr) | ||
54 | return; | ||
55 | |||
56 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) | ||
57 | intel_pmu_lbr_reset_32(); | ||
58 | else | ||
59 | intel_pmu_lbr_reset_64(); | ||
60 | } | ||
61 | |||
62 | static void intel_pmu_lbr_enable(struct perf_event *event) | ||
63 | { | ||
64 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
65 | |||
66 | if (!x86_pmu.lbr_nr) | ||
67 | return; | ||
68 | |||
69 | WARN_ON_ONCE(cpuc->enabled); | ||
70 | |||
71 | /* | ||
72 | * Reset the LBR stack if we changed task context to | ||
73 | * avoid data leaks. | ||
74 | */ | ||
75 | |||
76 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { | ||
77 | intel_pmu_lbr_reset(); | ||
78 | cpuc->lbr_context = event->ctx; | ||
79 | } | ||
80 | |||
81 | cpuc->lbr_users++; | ||
82 | } | ||
83 | |||
84 | static void intel_pmu_lbr_disable(struct perf_event *event) | ||
85 | { | ||
86 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
87 | |||
88 | if (!x86_pmu.lbr_nr) | ||
89 | return; | ||
90 | |||
91 | cpuc->lbr_users--; | ||
92 | WARN_ON_ONCE(cpuc->lbr_users < 0); | ||
93 | |||
94 | if (cpuc->enabled && !cpuc->lbr_users) | ||
95 | __intel_pmu_lbr_disable(); | ||
96 | } | ||
97 | |||
98 | static void intel_pmu_lbr_enable_all(void) | ||
99 | { | ||
100 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
101 | |||
102 | if (cpuc->lbr_users) | ||
103 | __intel_pmu_lbr_enable(); | ||
104 | } | ||
105 | |||
106 | static void intel_pmu_lbr_disable_all(void) | ||
107 | { | ||
108 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
109 | |||
110 | if (cpuc->lbr_users) | ||
111 | __intel_pmu_lbr_disable(); | ||
112 | } | ||
113 | |||
114 | static inline u64 intel_pmu_lbr_tos(void) | ||
115 | { | ||
116 | u64 tos; | ||
117 | |||
118 | rdmsrl(x86_pmu.lbr_tos, tos); | ||
119 | |||
120 | return tos; | ||
121 | } | ||
122 | |||
123 | static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | ||
124 | { | ||
125 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
126 | u64 tos = intel_pmu_lbr_tos(); | ||
127 | int i; | ||
128 | |||
129 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
130 | unsigned long lbr_idx = (tos - i) & mask; | ||
131 | union { | ||
132 | struct { | ||
133 | u32 from; | ||
134 | u32 to; | ||
135 | }; | ||
136 | u64 lbr; | ||
137 | } msr_lastbranch; | ||
138 | |||
139 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); | ||
140 | |||
141 | cpuc->lbr_entries[i].from = msr_lastbranch.from; | ||
142 | cpuc->lbr_entries[i].to = msr_lastbranch.to; | ||
143 | cpuc->lbr_entries[i].flags = 0; | ||
144 | } | ||
145 | cpuc->lbr_stack.nr = i; | ||
146 | } | ||
147 | |||
148 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
149 | |||
150 | /* | ||
151 | * Due to lack of segmentation in Linux the effective address (offset) | ||
152 | * is the same as the linear address, allowing us to merge the LIP and EIP | ||
153 | * LBR formats. | ||
154 | */ | ||
155 | static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | ||
156 | { | ||
157 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
158 | int lbr_format = x86_pmu.intel_cap.lbr_format; | ||
159 | u64 tos = intel_pmu_lbr_tos(); | ||
160 | int i; | ||
161 | |||
162 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
163 | unsigned long lbr_idx = (tos - i) & mask; | ||
164 | u64 from, to, flags = 0; | ||
165 | |||
166 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | ||
167 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | ||
168 | |||
169 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { | ||
170 | flags = !!(from & LBR_FROM_FLAG_MISPRED); | ||
171 | from = (u64)((((s64)from) << 1) >> 1); | ||
172 | } | ||
173 | |||
174 | cpuc->lbr_entries[i].from = from; | ||
175 | cpuc->lbr_entries[i].to = to; | ||
176 | cpuc->lbr_entries[i].flags = flags; | ||
177 | } | ||
178 | cpuc->lbr_stack.nr = i; | ||
179 | } | ||
180 | |||
181 | static void intel_pmu_lbr_read(void) | ||
182 | { | ||
183 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
184 | |||
185 | if (!cpuc->lbr_users) | ||
186 | return; | ||
187 | |||
188 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) | ||
189 | intel_pmu_lbr_read_32(cpuc); | ||
190 | else | ||
191 | intel_pmu_lbr_read_64(cpuc); | ||
192 | } | ||
193 | |||
194 | static void intel_pmu_lbr_init_core(void) | ||
195 | { | ||
196 | x86_pmu.lbr_nr = 4; | ||
197 | x86_pmu.lbr_tos = 0x01c9; | ||
198 | x86_pmu.lbr_from = 0x40; | ||
199 | x86_pmu.lbr_to = 0x60; | ||
200 | } | ||
201 | |||
202 | static void intel_pmu_lbr_init_nhm(void) | ||
203 | { | ||
204 | x86_pmu.lbr_nr = 16; | ||
205 | x86_pmu.lbr_tos = 0x01c9; | ||
206 | x86_pmu.lbr_from = 0x680; | ||
207 | x86_pmu.lbr_to = 0x6c0; | ||
208 | } | ||
209 | |||
210 | static void intel_pmu_lbr_init_atom(void) | ||
211 | { | ||
212 | x86_pmu.lbr_nr = 8; | ||
213 | x86_pmu.lbr_tos = 0x01c9; | ||
214 | x86_pmu.lbr_from = 0x40; | ||
215 | x86_pmu.lbr_to = 0x60; | ||
216 | } | ||
217 | |||
218 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c new file mode 100644 index 00000000000..424fc8de68e --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -0,0 +1,857 @@ | |||
1 | /* | ||
2 | * Netburst Perfomance Events (P4, old Xeon) | ||
3 | * | ||
4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> | ||
5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> | ||
6 | * | ||
7 | * For licencing details see kernel-base/COPYING | ||
8 | */ | ||
9 | |||
10 | #ifdef CONFIG_CPU_SUP_INTEL | ||
11 | |||
12 | #include <asm/perf_event_p4.h> | ||
13 | |||
14 | #define P4_CNTR_LIMIT 3 | ||
15 | /* | ||
16 | * array indices: 0,1 - HT threads, used with HT enabled cpu | ||
17 | */ | ||
18 | struct p4_event_bind { | ||
19 | unsigned int opcode; /* Event code and ESCR selector */ | ||
20 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ | ||
21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ | ||
22 | }; | ||
23 | |||
24 | struct p4_cache_event_bind { | ||
25 | unsigned int metric_pebs; | ||
26 | unsigned int metric_vert; | ||
27 | }; | ||
28 | |||
29 | #define P4_GEN_CACHE_EVENT_BIND(name) \ | ||
30 | [P4_CACHE__##name] = { \ | ||
31 | .metric_pebs = P4_PEBS__##name, \ | ||
32 | .metric_vert = P4_VERT__##name, \ | ||
33 | } | ||
34 | |||
35 | static struct p4_cache_event_bind p4_cache_event_bind_map[] = { | ||
36 | P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired), | ||
37 | P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired), | ||
38 | P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired), | ||
39 | P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired), | ||
40 | }; | ||
41 | |||
42 | /* | ||
43 | * Note that we don't use CCCR1 here, there is an | ||
44 | * exception for P4_BSQ_ALLOCATION but we just have | ||
45 | * no workaround | ||
46 | * | ||
47 | * consider this binding as resources which particular | ||
48 | * event may borrow, it doesn't contain EventMask, | ||
49 | * Tags and friends -- they are left to a caller | ||
50 | */ | ||
51 | static struct p4_event_bind p4_event_bind_map[] = { | ||
52 | [P4_EVENT_TC_DELIVER_MODE] = { | ||
53 | .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), | ||
54 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | ||
55 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
56 | }, | ||
57 | [P4_EVENT_BPU_FETCH_REQUEST] = { | ||
58 | .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), | ||
59 | .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, | ||
60 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
61 | }, | ||
62 | [P4_EVENT_ITLB_REFERENCE] = { | ||
63 | .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), | ||
64 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, | ||
65 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
66 | }, | ||
67 | [P4_EVENT_MEMORY_CANCEL] = { | ||
68 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), | ||
69 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | ||
70 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
71 | }, | ||
72 | [P4_EVENT_MEMORY_COMPLETE] = { | ||
73 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), | ||
74 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | ||
75 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
76 | }, | ||
77 | [P4_EVENT_LOAD_PORT_REPLAY] = { | ||
78 | .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), | ||
79 | .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, | ||
80 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
81 | }, | ||
82 | [P4_EVENT_STORE_PORT_REPLAY] = { | ||
83 | .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), | ||
84 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | ||
85 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
86 | }, | ||
87 | [P4_EVENT_MOB_LOAD_REPLAY] = { | ||
88 | .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), | ||
89 | .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, | ||
90 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
91 | }, | ||
92 | [P4_EVENT_PAGE_WALK_TYPE] = { | ||
93 | .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), | ||
94 | .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, | ||
95 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
96 | }, | ||
97 | [P4_EVENT_BSQ_CACHE_REFERENCE] = { | ||
98 | .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), | ||
99 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | ||
100 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
101 | }, | ||
102 | [P4_EVENT_IOQ_ALLOCATION] = { | ||
103 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), | ||
104 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
105 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
106 | }, | ||
107 | [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | ||
108 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), | ||
109 | .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, | ||
110 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | ||
111 | }, | ||
112 | [P4_EVENT_FSB_DATA_ACTIVITY] = { | ||
113 | .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), | ||
114 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
115 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
116 | }, | ||
117 | [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ | ||
118 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), | ||
119 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, | ||
120 | .cntr = { {0, -1, -1}, {1, -1, -1} }, | ||
121 | }, | ||
122 | [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | ||
123 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), | ||
124 | .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, | ||
125 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | ||
126 | }, | ||
127 | [P4_EVENT_SSE_INPUT_ASSIST] = { | ||
128 | .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), | ||
129 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
130 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
131 | }, | ||
132 | [P4_EVENT_PACKED_SP_UOP] = { | ||
133 | .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), | ||
134 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
135 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
136 | }, | ||
137 | [P4_EVENT_PACKED_DP_UOP] = { | ||
138 | .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), | ||
139 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
140 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
141 | }, | ||
142 | [P4_EVENT_SCALAR_SP_UOP] = { | ||
143 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), | ||
144 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
145 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
146 | }, | ||
147 | [P4_EVENT_SCALAR_DP_UOP] = { | ||
148 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), | ||
149 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
150 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
151 | }, | ||
152 | [P4_EVENT_64BIT_MMX_UOP] = { | ||
153 | .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), | ||
154 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
155 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
156 | }, | ||
157 | [P4_EVENT_128BIT_MMX_UOP] = { | ||
158 | .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), | ||
159 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
160 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
161 | }, | ||
162 | [P4_EVENT_X87_FP_UOP] = { | ||
163 | .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), | ||
164 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
165 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
166 | }, | ||
167 | [P4_EVENT_TC_MISC] = { | ||
168 | .opcode = P4_OPCODE(P4_EVENT_TC_MISC), | ||
169 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | ||
170 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
171 | }, | ||
172 | [P4_EVENT_GLOBAL_POWER_EVENTS] = { | ||
173 | .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), | ||
174 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
175 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
176 | }, | ||
177 | [P4_EVENT_TC_MS_XFER] = { | ||
178 | .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), | ||
179 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | ||
180 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
181 | }, | ||
182 | [P4_EVENT_UOP_QUEUE_WRITES] = { | ||
183 | .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), | ||
184 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | ||
185 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
186 | }, | ||
187 | [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { | ||
188 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), | ||
189 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, | ||
190 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
191 | }, | ||
192 | [P4_EVENT_RETIRED_BRANCH_TYPE] = { | ||
193 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), | ||
194 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, | ||
195 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
196 | }, | ||
197 | [P4_EVENT_RESOURCE_STALL] = { | ||
198 | .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), | ||
199 | .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, | ||
200 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
201 | }, | ||
202 | [P4_EVENT_WC_BUFFER] = { | ||
203 | .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), | ||
204 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | ||
205 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
206 | }, | ||
207 | [P4_EVENT_B2B_CYCLES] = { | ||
208 | .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), | ||
209 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
210 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
211 | }, | ||
212 | [P4_EVENT_BNR] = { | ||
213 | .opcode = P4_OPCODE(P4_EVENT_BNR), | ||
214 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
215 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
216 | }, | ||
217 | [P4_EVENT_SNOOP] = { | ||
218 | .opcode = P4_OPCODE(P4_EVENT_SNOOP), | ||
219 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
220 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
221 | }, | ||
222 | [P4_EVENT_RESPONSE] = { | ||
223 | .opcode = P4_OPCODE(P4_EVENT_RESPONSE), | ||
224 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
225 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
226 | }, | ||
227 | [P4_EVENT_FRONT_END_EVENT] = { | ||
228 | .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), | ||
229 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
230 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
231 | }, | ||
232 | [P4_EVENT_EXECUTION_EVENT] = { | ||
233 | .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), | ||
234 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
235 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
236 | }, | ||
237 | [P4_EVENT_REPLAY_EVENT] = { | ||
238 | .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), | ||
239 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
240 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
241 | }, | ||
242 | [P4_EVENT_INSTR_RETIRED] = { | ||
243 | .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), | ||
244 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
245 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
246 | }, | ||
247 | [P4_EVENT_UOPS_RETIRED] = { | ||
248 | .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), | ||
249 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
250 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
251 | }, | ||
252 | [P4_EVENT_UOP_TYPE] = { | ||
253 | .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), | ||
254 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, | ||
255 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
256 | }, | ||
257 | [P4_EVENT_BRANCH_RETIRED] = { | ||
258 | .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), | ||
259 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
260 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
261 | }, | ||
262 | [P4_EVENT_MISPRED_BRANCH_RETIRED] = { | ||
263 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), | ||
264 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
265 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
266 | }, | ||
267 | [P4_EVENT_X87_ASSIST] = { | ||
268 | .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), | ||
269 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
270 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
271 | }, | ||
272 | [P4_EVENT_MACHINE_CLEAR] = { | ||
273 | .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), | ||
274 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
275 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
276 | }, | ||
277 | [P4_EVENT_INSTR_COMPLETED] = { | ||
278 | .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), | ||
279 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
280 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
281 | }, | ||
282 | }; | ||
283 | |||
284 | #define P4_GEN_CACHE_EVENT(event, bit, cache_event) \ | ||
285 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ | ||
286 | P4_ESCR_EMASK_BIT(event, bit)) | \ | ||
287 | p4_config_pack_cccr(cache_event | \ | ||
288 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) | ||
289 | |||
290 | static __initconst const u64 p4_hw_cache_event_ids | ||
291 | [PERF_COUNT_HW_CACHE_MAX] | ||
292 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
293 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
294 | { | ||
295 | [ C(L1D ) ] = { | ||
296 | [ C(OP_READ) ] = { | ||
297 | [ C(RESULT_ACCESS) ] = 0x0, | ||
298 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
299 | P4_CACHE__1stl_cache_load_miss_retired), | ||
300 | }, | ||
301 | }, | ||
302 | [ C(LL ) ] = { | ||
303 | [ C(OP_READ) ] = { | ||
304 | [ C(RESULT_ACCESS) ] = 0x0, | ||
305 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
306 | P4_CACHE__2ndl_cache_load_miss_retired), | ||
307 | }, | ||
308 | }, | ||
309 | [ C(DTLB) ] = { | ||
310 | [ C(OP_READ) ] = { | ||
311 | [ C(RESULT_ACCESS) ] = 0x0, | ||
312 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
313 | P4_CACHE__dtlb_load_miss_retired), | ||
314 | }, | ||
315 | [ C(OP_WRITE) ] = { | ||
316 | [ C(RESULT_ACCESS) ] = 0x0, | ||
317 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
318 | P4_CACHE__dtlb_store_miss_retired), | ||
319 | }, | ||
320 | }, | ||
321 | [ C(ITLB) ] = { | ||
322 | [ C(OP_READ) ] = { | ||
323 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, | ||
324 | P4_CACHE__itlb_reference_hit), | ||
325 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, | ||
326 | P4_CACHE__itlb_reference_miss), | ||
327 | }, | ||
328 | [ C(OP_WRITE) ] = { | ||
329 | [ C(RESULT_ACCESS) ] = -1, | ||
330 | [ C(RESULT_MISS) ] = -1, | ||
331 | }, | ||
332 | [ C(OP_PREFETCH) ] = { | ||
333 | [ C(RESULT_ACCESS) ] = -1, | ||
334 | [ C(RESULT_MISS) ] = -1, | ||
335 | }, | ||
336 | }, | ||
337 | }; | ||
338 | |||
339 | static u64 p4_general_events[PERF_COUNT_HW_MAX] = { | ||
340 | /* non-halted CPU clocks */ | ||
341 | [PERF_COUNT_HW_CPU_CYCLES] = | ||
342 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | | ||
343 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), | ||
344 | |||
345 | /* | ||
346 | * retired instructions | ||
347 | * in a sake of simplicity we don't use the FSB tagging | ||
348 | */ | ||
349 | [PERF_COUNT_HW_INSTRUCTIONS] = | ||
350 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) | | ||
351 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | | ||
352 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)), | ||
353 | |||
354 | /* cache hits */ | ||
355 | [PERF_COUNT_HW_CACHE_REFERENCES] = | ||
356 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | ||
357 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | | ||
358 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | | ||
359 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | | ||
360 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | | ||
361 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | | ||
362 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)), | ||
363 | |||
364 | /* cache misses */ | ||
365 | [PERF_COUNT_HW_CACHE_MISSES] = | ||
366 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | ||
367 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | | ||
368 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | | ||
369 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)), | ||
370 | |||
371 | /* branch instructions retired */ | ||
372 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = | ||
373 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) | | ||
374 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | | ||
375 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | | ||
376 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | | ||
377 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)), | ||
378 | |||
379 | /* mispredicted branches retired */ | ||
380 | [PERF_COUNT_HW_BRANCH_MISSES] = | ||
381 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) | | ||
382 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)), | ||
383 | |||
384 | /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ | ||
385 | [PERF_COUNT_HW_BUS_CYCLES] = | ||
386 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) | | ||
387 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | | ||
388 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) | | ||
389 | p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), | ||
390 | }; | ||
391 | |||
392 | static struct p4_event_bind *p4_config_get_bind(u64 config) | ||
393 | { | ||
394 | unsigned int evnt = p4_config_unpack_event(config); | ||
395 | struct p4_event_bind *bind = NULL; | ||
396 | |||
397 | if (evnt < ARRAY_SIZE(p4_event_bind_map)) | ||
398 | bind = &p4_event_bind_map[evnt]; | ||
399 | |||
400 | return bind; | ||
401 | } | ||
402 | |||
403 | static u64 p4_pmu_event_map(int hw_event) | ||
404 | { | ||
405 | struct p4_event_bind *bind; | ||
406 | unsigned int esel; | ||
407 | u64 config; | ||
408 | |||
409 | config = p4_general_events[hw_event]; | ||
410 | bind = p4_config_get_bind(config); | ||
411 | esel = P4_OPCODE_ESEL(bind->opcode); | ||
412 | config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); | ||
413 | |||
414 | return config; | ||
415 | } | ||
416 | |||
417 | static int p4_hw_config(struct perf_event *event) | ||
418 | { | ||
419 | int cpu = get_cpu(); | ||
420 | int rc = 0; | ||
421 | unsigned int evnt; | ||
422 | u32 escr, cccr; | ||
423 | |||
424 | /* | ||
425 | * the reason we use cpu that early is that: if we get scheduled | ||
426 | * first time on the same cpu -- we will not need swap thread | ||
427 | * specific flags in config (and will save some cpu cycles) | ||
428 | */ | ||
429 | |||
430 | cccr = p4_default_cccr_conf(cpu); | ||
431 | escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel, | ||
432 | event->attr.exclude_user); | ||
433 | event->hw.config = p4_config_pack_escr(escr) | | ||
434 | p4_config_pack_cccr(cccr); | ||
435 | |||
436 | if (p4_ht_active() && p4_ht_thread(cpu)) | ||
437 | event->hw.config = p4_set_ht_bit(event->hw.config); | ||
438 | |||
439 | if (event->attr.type == PERF_TYPE_RAW) { | ||
440 | |||
441 | /* user data may have out-of-bound event index */ | ||
442 | evnt = p4_config_unpack_event(event->attr.config); | ||
443 | if (evnt >= ARRAY_SIZE(p4_event_bind_map)) { | ||
444 | rc = -EINVAL; | ||
445 | goto out; | ||
446 | } | ||
447 | |||
448 | /* | ||
449 | * We don't control raw events so it's up to the caller | ||
450 | * to pass sane values (and we don't count the thread number | ||
451 | * on HT machine but allow HT-compatible specifics to be | ||
452 | * passed on) | ||
453 | * | ||
454 | * XXX: HT wide things should check perf_paranoid_cpu() && | ||
455 | * CAP_SYS_ADMIN | ||
456 | */ | ||
457 | event->hw.config |= event->attr.config & | ||
458 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | | ||
459 | p4_config_pack_cccr(P4_CCCR_MASK_HT)); | ||
460 | } | ||
461 | |||
462 | rc = x86_setup_perfctr(event); | ||
463 | out: | ||
464 | put_cpu(); | ||
465 | return rc; | ||
466 | } | ||
467 | |||
468 | static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | ||
469 | { | ||
470 | unsigned long dummy; | ||
471 | |||
472 | rdmsrl(hwc->config_base + hwc->idx, dummy); | ||
473 | if (dummy & P4_CCCR_OVF) { | ||
474 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
475 | ((u64)dummy) & ~P4_CCCR_OVF); | ||
476 | } | ||
477 | } | ||
478 | |||
479 | static inline void p4_pmu_disable_event(struct perf_event *event) | ||
480 | { | ||
481 | struct hw_perf_event *hwc = &event->hw; | ||
482 | |||
483 | /* | ||
484 | * If event gets disabled while counter is in overflowed | ||
485 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get | ||
486 | * asserted again and again | ||
487 | */ | ||
488 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
489 | (u64)(p4_config_unpack_cccr(hwc->config)) & | ||
490 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); | ||
491 | } | ||
492 | |||
493 | static void p4_pmu_disable_all(void) | ||
494 | { | ||
495 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
496 | int idx; | ||
497 | |||
498 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
499 | struct perf_event *event = cpuc->events[idx]; | ||
500 | if (!test_bit(idx, cpuc->active_mask)) | ||
501 | continue; | ||
502 | p4_pmu_disable_event(event); | ||
503 | } | ||
504 | } | ||
505 | |||
506 | static void p4_pmu_enable_event(struct perf_event *event) | ||
507 | { | ||
508 | struct hw_perf_event *hwc = &event->hw; | ||
509 | int thread = p4_ht_config_thread(hwc->config); | ||
510 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); | ||
511 | unsigned int idx = p4_config_unpack_event(hwc->config); | ||
512 | unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config); | ||
513 | struct p4_event_bind *bind; | ||
514 | struct p4_cache_event_bind *bind_cache; | ||
515 | u64 escr_addr, cccr; | ||
516 | |||
517 | bind = &p4_event_bind_map[idx]; | ||
518 | escr_addr = (u64)bind->escr_msr[thread]; | ||
519 | |||
520 | /* | ||
521 | * - we dont support cascaded counters yet | ||
522 | * - and counter 1 is broken (erratum) | ||
523 | */ | ||
524 | WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); | ||
525 | WARN_ON_ONCE(hwc->idx == 1); | ||
526 | |||
527 | /* we need a real Event value */ | ||
528 | escr_conf &= ~P4_ESCR_EVENT_MASK; | ||
529 | escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode)); | ||
530 | |||
531 | cccr = p4_config_unpack_cccr(hwc->config); | ||
532 | |||
533 | /* | ||
534 | * it could be Cache event so that we need to | ||
535 | * set metrics into additional MSRs | ||
536 | */ | ||
537 | BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK); | ||
538 | if (idx_cache > P4_CACHE__NONE && | ||
539 | idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) { | ||
540 | bind_cache = &p4_cache_event_bind_map[idx_cache]; | ||
541 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs); | ||
542 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert); | ||
543 | } | ||
544 | |||
545 | (void)checking_wrmsrl(escr_addr, escr_conf); | ||
546 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
547 | (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); | ||
548 | } | ||
549 | |||
550 | static void p4_pmu_enable_all(int added) | ||
551 | { | ||
552 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
553 | int idx; | ||
554 | |||
555 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
556 | struct perf_event *event = cpuc->events[idx]; | ||
557 | if (!test_bit(idx, cpuc->active_mask)) | ||
558 | continue; | ||
559 | p4_pmu_enable_event(event); | ||
560 | } | ||
561 | } | ||
562 | |||
563 | static int p4_pmu_handle_irq(struct pt_regs *regs) | ||
564 | { | ||
565 | struct perf_sample_data data; | ||
566 | struct cpu_hw_events *cpuc; | ||
567 | struct perf_event *event; | ||
568 | struct hw_perf_event *hwc; | ||
569 | int idx, handled = 0; | ||
570 | u64 val; | ||
571 | |||
572 | data.addr = 0; | ||
573 | data.raw = NULL; | ||
574 | |||
575 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
576 | |||
577 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
578 | |||
579 | if (!test_bit(idx, cpuc->active_mask)) | ||
580 | continue; | ||
581 | |||
582 | event = cpuc->events[idx]; | ||
583 | hwc = &event->hw; | ||
584 | |||
585 | WARN_ON_ONCE(hwc->idx != idx); | ||
586 | |||
587 | /* | ||
588 | * FIXME: Redundant call, actually not needed | ||
589 | * but just to check if we're screwed | ||
590 | */ | ||
591 | p4_pmu_clear_cccr_ovf(hwc); | ||
592 | |||
593 | val = x86_perf_event_update(event); | ||
594 | if (val & (1ULL << (x86_pmu.cntval_bits - 1))) | ||
595 | continue; | ||
596 | |||
597 | /* | ||
598 | * event overflow | ||
599 | */ | ||
600 | handled = 1; | ||
601 | data.period = event->hw.last_period; | ||
602 | |||
603 | if (!x86_perf_event_set_period(event)) | ||
604 | continue; | ||
605 | if (perf_event_overflow(event, 1, &data, regs)) | ||
606 | p4_pmu_disable_event(event); | ||
607 | } | ||
608 | |||
609 | if (handled) { | ||
610 | /* p4 quirk: unmask it again */ | ||
611 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | ||
612 | inc_irq_stat(apic_perf_irqs); | ||
613 | } | ||
614 | |||
615 | return handled; | ||
616 | } | ||
617 | |||
618 | /* | ||
619 | * swap thread specific fields according to a thread | ||
620 | * we are going to run on | ||
621 | */ | ||
622 | static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) | ||
623 | { | ||
624 | u32 escr, cccr; | ||
625 | |||
626 | /* | ||
627 | * we either lucky and continue on same cpu or no HT support | ||
628 | */ | ||
629 | if (!p4_should_swap_ts(hwc->config, cpu)) | ||
630 | return; | ||
631 | |||
632 | /* | ||
633 | * the event is migrated from an another logical | ||
634 | * cpu, so we need to swap thread specific flags | ||
635 | */ | ||
636 | |||
637 | escr = p4_config_unpack_escr(hwc->config); | ||
638 | cccr = p4_config_unpack_cccr(hwc->config); | ||
639 | |||
640 | if (p4_ht_thread(cpu)) { | ||
641 | cccr &= ~P4_CCCR_OVF_PMI_T0; | ||
642 | cccr |= P4_CCCR_OVF_PMI_T1; | ||
643 | if (escr & P4_ESCR_T0_OS) { | ||
644 | escr &= ~P4_ESCR_T0_OS; | ||
645 | escr |= P4_ESCR_T1_OS; | ||
646 | } | ||
647 | if (escr & P4_ESCR_T0_USR) { | ||
648 | escr &= ~P4_ESCR_T0_USR; | ||
649 | escr |= P4_ESCR_T1_USR; | ||
650 | } | ||
651 | hwc->config = p4_config_pack_escr(escr); | ||
652 | hwc->config |= p4_config_pack_cccr(cccr); | ||
653 | hwc->config |= P4_CONFIG_HT; | ||
654 | } else { | ||
655 | cccr &= ~P4_CCCR_OVF_PMI_T1; | ||
656 | cccr |= P4_CCCR_OVF_PMI_T0; | ||
657 | if (escr & P4_ESCR_T1_OS) { | ||
658 | escr &= ~P4_ESCR_T1_OS; | ||
659 | escr |= P4_ESCR_T0_OS; | ||
660 | } | ||
661 | if (escr & P4_ESCR_T1_USR) { | ||
662 | escr &= ~P4_ESCR_T1_USR; | ||
663 | escr |= P4_ESCR_T0_USR; | ||
664 | } | ||
665 | hwc->config = p4_config_pack_escr(escr); | ||
666 | hwc->config |= p4_config_pack_cccr(cccr); | ||
667 | hwc->config &= ~P4_CONFIG_HT; | ||
668 | } | ||
669 | } | ||
670 | |||
671 | /* | ||
672 | * ESCR address hashing is tricky, ESCRs are not sequential | ||
673 | * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03e0) and | ||
674 | * the metric between any ESCRs is laid in range [0xa0,0xe1] | ||
675 | * | ||
676 | * so we make ~70% filled hashtable | ||
677 | */ | ||
678 | |||
679 | #define P4_ESCR_MSR_BASE 0x000003a0 | ||
680 | #define P4_ESCR_MSR_MAX 0x000003e1 | ||
681 | #define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1) | ||
682 | #define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE) | ||
683 | #define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr | ||
684 | |||
685 | static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = { | ||
686 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0), | ||
687 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1), | ||
688 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0), | ||
689 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1), | ||
690 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0), | ||
691 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1), | ||
692 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0), | ||
693 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1), | ||
694 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2), | ||
695 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3), | ||
696 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4), | ||
697 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5), | ||
698 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0), | ||
699 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1), | ||
700 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0), | ||
701 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1), | ||
702 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0), | ||
703 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1), | ||
704 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0), | ||
705 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1), | ||
706 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0), | ||
707 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1), | ||
708 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0), | ||
709 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1), | ||
710 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0), | ||
711 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1), | ||
712 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0), | ||
713 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1), | ||
714 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0), | ||
715 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1), | ||
716 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0), | ||
717 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1), | ||
718 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0), | ||
719 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1), | ||
720 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0), | ||
721 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1), | ||
722 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0), | ||
723 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1), | ||
724 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0), | ||
725 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1), | ||
726 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0), | ||
727 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1), | ||
728 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0), | ||
729 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1), | ||
730 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0), | ||
731 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1), | ||
732 | }; | ||
733 | |||
734 | static int p4_get_escr_idx(unsigned int addr) | ||
735 | { | ||
736 | unsigned int idx = P4_ESCR_MSR_IDX(addr); | ||
737 | |||
738 | if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || | ||
739 | !p4_escr_table[idx])) { | ||
740 | WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); | ||
741 | return -1; | ||
742 | } | ||
743 | |||
744 | return idx; | ||
745 | } | ||
746 | |||
747 | static int p4_next_cntr(int thread, unsigned long *used_mask, | ||
748 | struct p4_event_bind *bind) | ||
749 | { | ||
750 | int i, j; | ||
751 | |||
752 | for (i = 0; i < P4_CNTR_LIMIT; i++) { | ||
753 | j = bind->cntr[thread][i]; | ||
754 | if (j != -1 && !test_bit(j, used_mask)) | ||
755 | return j; | ||
756 | } | ||
757 | |||
758 | return -1; | ||
759 | } | ||
760 | |||
761 | static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | ||
762 | { | ||
763 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
764 | unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; | ||
765 | int cpu = raw_smp_processor_id(); | ||
766 | struct hw_perf_event *hwc; | ||
767 | struct p4_event_bind *bind; | ||
768 | unsigned int i, thread, num; | ||
769 | int cntr_idx, escr_idx; | ||
770 | |||
771 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
772 | bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); | ||
773 | |||
774 | for (i = 0, num = n; i < n; i++, num--) { | ||
775 | |||
776 | hwc = &cpuc->event_list[i]->hw; | ||
777 | thread = p4_ht_thread(cpu); | ||
778 | bind = p4_config_get_bind(hwc->config); | ||
779 | escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); | ||
780 | if (unlikely(escr_idx == -1)) | ||
781 | goto done; | ||
782 | |||
783 | if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { | ||
784 | cntr_idx = hwc->idx; | ||
785 | if (assign) | ||
786 | assign[i] = hwc->idx; | ||
787 | goto reserve; | ||
788 | } | ||
789 | |||
790 | cntr_idx = p4_next_cntr(thread, used_mask, bind); | ||
791 | if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) | ||
792 | goto done; | ||
793 | |||
794 | p4_pmu_swap_config_ts(hwc, cpu); | ||
795 | if (assign) | ||
796 | assign[i] = cntr_idx; | ||
797 | reserve: | ||
798 | set_bit(cntr_idx, used_mask); | ||
799 | set_bit(escr_idx, escr_mask); | ||
800 | } | ||
801 | |||
802 | done: | ||
803 | return num ? -ENOSPC : 0; | ||
804 | } | ||
805 | |||
806 | static __initconst const struct x86_pmu p4_pmu = { | ||
807 | .name = "Netburst P4/Xeon", | ||
808 | .handle_irq = p4_pmu_handle_irq, | ||
809 | .disable_all = p4_pmu_disable_all, | ||
810 | .enable_all = p4_pmu_enable_all, | ||
811 | .enable = p4_pmu_enable_event, | ||
812 | .disable = p4_pmu_disable_event, | ||
813 | .eventsel = MSR_P4_BPU_CCCR0, | ||
814 | .perfctr = MSR_P4_BPU_PERFCTR0, | ||
815 | .event_map = p4_pmu_event_map, | ||
816 | .max_events = ARRAY_SIZE(p4_general_events), | ||
817 | .get_event_constraints = x86_get_event_constraints, | ||
818 | /* | ||
819 | * IF HT disabled we may need to use all | ||
820 | * ARCH_P4_MAX_CCCR counters simulaneously | ||
821 | * though leave it restricted at moment assuming | ||
822 | * HT is on | ||
823 | */ | ||
824 | .num_counters = ARCH_P4_MAX_CCCR, | ||
825 | .apic = 1, | ||
826 | .cntval_bits = 40, | ||
827 | .cntval_mask = (1ULL << 40) - 1, | ||
828 | .max_period = (1ULL << 39) - 1, | ||
829 | .hw_config = p4_hw_config, | ||
830 | .schedule_events = p4_pmu_schedule_events, | ||
831 | }; | ||
832 | |||
833 | static __init int p4_pmu_init(void) | ||
834 | { | ||
835 | unsigned int low, high; | ||
836 | |||
837 | /* If we get stripped -- indexig fails */ | ||
838 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); | ||
839 | |||
840 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | ||
841 | if (!(low & (1 << 7))) { | ||
842 | pr_cont("unsupported Netburst CPU model %d ", | ||
843 | boot_cpu_data.x86_model); | ||
844 | return -ENODEV; | ||
845 | } | ||
846 | |||
847 | memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, | ||
848 | sizeof(hw_cache_event_ids)); | ||
849 | |||
850 | pr_cont("Netburst events, "); | ||
851 | |||
852 | x86_pmu = p4_pmu; | ||
853 | |||
854 | return 0; | ||
855 | } | ||
856 | |||
857 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index a330485d14d..34ba07be2cd 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
@@ -27,24 +27,6 @@ static u64 p6_pmu_event_map(int hw_event) | |||
27 | */ | 27 | */ |
28 | #define P6_NOP_EVENT 0x0000002EULL | 28 | #define P6_NOP_EVENT 0x0000002EULL |
29 | 29 | ||
30 | static u64 p6_pmu_raw_event(u64 hw_event) | ||
31 | { | ||
32 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
33 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
34 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
35 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
36 | #define P6_EVNTSEL_REG_MASK 0xFF000000ULL | ||
37 | |||
38 | #define P6_EVNTSEL_MASK \ | ||
39 | (P6_EVNTSEL_EVENT_MASK | \ | ||
40 | P6_EVNTSEL_UNIT_MASK | \ | ||
41 | P6_EVNTSEL_EDGE_MASK | \ | ||
42 | P6_EVNTSEL_INV_MASK | \ | ||
43 | P6_EVNTSEL_REG_MASK) | ||
44 | |||
45 | return hw_event & P6_EVNTSEL_MASK; | ||
46 | } | ||
47 | |||
48 | static struct event_constraint p6_event_constraints[] = | 30 | static struct event_constraint p6_event_constraints[] = |
49 | { | 31 | { |
50 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | 32 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ |
@@ -66,7 +48,7 @@ static void p6_pmu_disable_all(void) | |||
66 | wrmsrl(MSR_P6_EVNTSEL0, val); | 48 | wrmsrl(MSR_P6_EVNTSEL0, val); |
67 | } | 49 | } |
68 | 50 | ||
69 | static void p6_pmu_enable_all(void) | 51 | static void p6_pmu_enable_all(int added) |
70 | { | 52 | { |
71 | unsigned long val; | 53 | unsigned long val; |
72 | 54 | ||
@@ -102,22 +84,23 @@ static void p6_pmu_enable_event(struct perf_event *event) | |||
102 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); | 84 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); |
103 | } | 85 | } |
104 | 86 | ||
105 | static __initconst struct x86_pmu p6_pmu = { | 87 | static __initconst const struct x86_pmu p6_pmu = { |
106 | .name = "p6", | 88 | .name = "p6", |
107 | .handle_irq = x86_pmu_handle_irq, | 89 | .handle_irq = x86_pmu_handle_irq, |
108 | .disable_all = p6_pmu_disable_all, | 90 | .disable_all = p6_pmu_disable_all, |
109 | .enable_all = p6_pmu_enable_all, | 91 | .enable_all = p6_pmu_enable_all, |
110 | .enable = p6_pmu_enable_event, | 92 | .enable = p6_pmu_enable_event, |
111 | .disable = p6_pmu_disable_event, | 93 | .disable = p6_pmu_disable_event, |
94 | .hw_config = x86_pmu_hw_config, | ||
95 | .schedule_events = x86_schedule_events, | ||
112 | .eventsel = MSR_P6_EVNTSEL0, | 96 | .eventsel = MSR_P6_EVNTSEL0, |
113 | .perfctr = MSR_P6_PERFCTR0, | 97 | .perfctr = MSR_P6_PERFCTR0, |
114 | .event_map = p6_pmu_event_map, | 98 | .event_map = p6_pmu_event_map, |
115 | .raw_event = p6_pmu_raw_event, | ||
116 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | 99 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), |
117 | .apic = 1, | 100 | .apic = 1, |
118 | .max_period = (1ULL << 31) - 1, | 101 | .max_period = (1ULL << 31) - 1, |
119 | .version = 0, | 102 | .version = 0, |
120 | .num_events = 2, | 103 | .num_counters = 2, |
121 | /* | 104 | /* |
122 | * Events have 40 bits implemented. However they are designed such | 105 | * Events have 40 bits implemented. However they are designed such |
123 | * that bits [32-39] are sign extensions of bit 31. As such the | 106 | * that bits [32-39] are sign extensions of bit 31. As such the |
@@ -125,8 +108,8 @@ static __initconst struct x86_pmu p6_pmu = { | |||
125 | * | 108 | * |
126 | * See IA-32 Intel Architecture Software developer manual Vol 3B | 109 | * See IA-32 Intel Architecture Software developer manual Vol 3B |
127 | */ | 110 | */ |
128 | .event_bits = 32, | 111 | .cntval_bits = 32, |
129 | .event_mask = (1ULL << 32) - 1, | 112 | .cntval_mask = (1ULL << 32) - 1, |
130 | .get_event_constraints = x86_get_event_constraints, | 113 | .get_event_constraints = x86_get_event_constraints, |
131 | .event_constraints = p6_event_constraints, | 114 | .event_constraints = p6_event_constraints, |
132 | }; | 115 | }; |
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index dfdb4dba232..b9d1ff58844 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -24,8 +24,8 @@ | |||
24 | #include <linux/dmi.h> | 24 | #include <linux/dmi.h> |
25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
26 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
27 | #include <asm/vmware.h> | ||
28 | #include <asm/x86_init.h> | 27 | #include <asm/x86_init.h> |
28 | #include <asm/hypervisor.h> | ||
29 | 29 | ||
30 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 | 30 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 |
31 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 | 31 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 |
@@ -65,7 +65,7 @@ static unsigned long vmware_get_tsc_khz(void) | |||
65 | return tsc_hz; | 65 | return tsc_hz; |
66 | } | 66 | } |
67 | 67 | ||
68 | void __init vmware_platform_setup(void) | 68 | static void __init vmware_platform_setup(void) |
69 | { | 69 | { |
70 | uint32_t eax, ebx, ecx, edx; | 70 | uint32_t eax, ebx, ecx, edx; |
71 | 71 | ||
@@ -83,26 +83,22 @@ void __init vmware_platform_setup(void) | |||
83 | * serial key should be enough, as this will always have a VMware | 83 | * serial key should be enough, as this will always have a VMware |
84 | * specific string when running under VMware hypervisor. | 84 | * specific string when running under VMware hypervisor. |
85 | */ | 85 | */ |
86 | int vmware_platform(void) | 86 | static bool __init vmware_platform(void) |
87 | { | 87 | { |
88 | if (cpu_has_hypervisor) { | 88 | if (cpu_has_hypervisor) { |
89 | unsigned int eax, ebx, ecx, edx; | 89 | unsigned int eax; |
90 | char hyper_vendor_id[13]; | 90 | unsigned int hyper_vendor_id[3]; |
91 | 91 | ||
92 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &ebx, &ecx, &edx); | 92 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0], |
93 | memcpy(hyper_vendor_id + 0, &ebx, 4); | 93 | &hyper_vendor_id[1], &hyper_vendor_id[2]); |
94 | memcpy(hyper_vendor_id + 4, &ecx, 4); | 94 | if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) |
95 | memcpy(hyper_vendor_id + 8, &edx, 4); | 95 | return true; |
96 | hyper_vendor_id[12] = '\0'; | ||
97 | if (!strcmp(hyper_vendor_id, "VMwareVMware")) | ||
98 | return 1; | ||
99 | } else if (dmi_available && dmi_name_in_serial("VMware") && | 96 | } else if (dmi_available && dmi_name_in_serial("VMware") && |
100 | __vmware_platform()) | 97 | __vmware_platform()) |
101 | return 1; | 98 | return true; |
102 | 99 | ||
103 | return 0; | 100 | return false; |
104 | } | 101 | } |
105 | EXPORT_SYMBOL(vmware_platform); | ||
106 | 102 | ||
107 | /* | 103 | /* |
108 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. | 104 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. |
@@ -116,8 +112,16 @@ EXPORT_SYMBOL(vmware_platform); | |||
116 | * so that the kernel could just trust the hypervisor with providing a | 112 | * so that the kernel could just trust the hypervisor with providing a |
117 | * reliable virtual TSC that is suitable for timekeeping. | 113 | * reliable virtual TSC that is suitable for timekeeping. |
118 | */ | 114 | */ |
119 | void __cpuinit vmware_set_feature_bits(struct cpuinfo_x86 *c) | 115 | static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c) |
120 | { | 116 | { |
121 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 117 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
122 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); | 118 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); |
123 | } | 119 | } |
120 | |||
121 | const __refconst struct hypervisor_x86 x86_hyper_vmware = { | ||
122 | .name = "VMware", | ||
123 | .detect = vmware_platform, | ||
124 | .set_cpu_features = vmware_set_cpu_features, | ||
125 | .init_platform = vmware_platform_setup, | ||
126 | }; | ||
127 | EXPORT_SYMBOL(x86_hyper_vmware); | ||