aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/amd.c84
-rw-r--r--arch/x86/kernel/cpu/common.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c18
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-smi.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c5
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c83
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c42
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c25
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c40
-rw-r--r--arch/x86/kernel/cpu/perf_event.c181
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c191
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c417
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c83
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c40
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c4
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/cpu/vmware.c2
24 files changed, 1046 insertions, 209 deletions
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7c7bedb83c5a..3532d3bf8105 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -233,18 +233,22 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
233} 233}
234#endif 234#endif
235 235
236#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 236#ifdef CONFIG_NUMA
237/*
238 * To workaround broken NUMA config. Read the comment in
239 * srat_detect_node().
240 */
237static int __cpuinit nearby_node(int apicid) 241static int __cpuinit nearby_node(int apicid)
238{ 242{
239 int i, node; 243 int i, node;
240 244
241 for (i = apicid - 1; i >= 0; i--) { 245 for (i = apicid - 1; i >= 0; i--) {
242 node = apicid_to_node[i]; 246 node = __apicid_to_node[i];
243 if (node != NUMA_NO_NODE && node_online(node)) 247 if (node != NUMA_NO_NODE && node_online(node))
244 return node; 248 return node;
245 } 249 }
246 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { 250 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
247 node = apicid_to_node[i]; 251 node = __apicid_to_node[i];
248 if (node != NUMA_NO_NODE && node_online(node)) 252 if (node != NUMA_NO_NODE && node_online(node))
249 return node; 253 return node;
250 } 254 }
@@ -261,7 +265,7 @@ static int __cpuinit nearby_node(int apicid)
261#ifdef CONFIG_X86_HT 265#ifdef CONFIG_X86_HT
262static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) 266static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
263{ 267{
264 u32 nodes; 268 u32 nodes, cores_per_cu = 1;
265 u8 node_id; 269 u8 node_id;
266 int cpu = smp_processor_id(); 270 int cpu = smp_processor_id();
267 271
@@ -276,6 +280,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
276 /* get compute unit information */ 280 /* get compute unit information */
277 smp_num_siblings = ((ebx >> 8) & 3) + 1; 281 smp_num_siblings = ((ebx >> 8) & 3) + 1;
278 c->compute_unit_id = ebx & 0xff; 282 c->compute_unit_id = ebx & 0xff;
283 cores_per_cu += ((ebx >> 8) & 3);
279 } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { 284 } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
280 u64 value; 285 u64 value;
281 286
@@ -288,15 +293,18 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
288 /* fixup multi-node processor information */ 293 /* fixup multi-node processor information */
289 if (nodes > 1) { 294 if (nodes > 1) {
290 u32 cores_per_node; 295 u32 cores_per_node;
296 u32 cus_per_node;
291 297
292 set_cpu_cap(c, X86_FEATURE_AMD_DCM); 298 set_cpu_cap(c, X86_FEATURE_AMD_DCM);
293 cores_per_node = c->x86_max_cores / nodes; 299 cores_per_node = c->x86_max_cores / nodes;
300 cus_per_node = cores_per_node / cores_per_cu;
294 301
295 /* store NodeID, use llc_shared_map to store sibling info */ 302 /* store NodeID, use llc_shared_map to store sibling info */
296 per_cpu(cpu_llc_id, cpu) = node_id; 303 per_cpu(cpu_llc_id, cpu) = node_id;
297 304
298 /* core id to be in range from 0 to (cores_per_node - 1) */ 305 /* core id has to be in the [0 .. cores_per_node - 1] range */
299 c->cpu_core_id = c->cpu_core_id % cores_per_node; 306 c->cpu_core_id %= cores_per_node;
307 c->compute_unit_id %= cus_per_node;
300 } 308 }
301} 309}
302#endif 310#endif
@@ -334,31 +342,40 @@ EXPORT_SYMBOL_GPL(amd_get_nb_id);
334 342
335static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) 343static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
336{ 344{
337#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 345#ifdef CONFIG_NUMA
338 int cpu = smp_processor_id(); 346 int cpu = smp_processor_id();
339 int node; 347 int node;
340 unsigned apicid = c->apicid; 348 unsigned apicid = c->apicid;
341 349
342 node = per_cpu(cpu_llc_id, cpu); 350 node = numa_cpu_node(cpu);
351 if (node == NUMA_NO_NODE)
352 node = per_cpu(cpu_llc_id, cpu);
343 353
344 if (apicid_to_node[apicid] != NUMA_NO_NODE)
345 node = apicid_to_node[apicid];
346 if (!node_online(node)) { 354 if (!node_online(node)) {
347 /* Two possibilities here: 355 /*
348 - The CPU is missing memory and no node was created. 356 * Two possibilities here:
349 In that case try picking one from a nearby CPU 357 *
350 - The APIC IDs differ from the HyperTransport node IDs 358 * - The CPU is missing memory and no node was created. In
351 which the K8 northbridge parsing fills in. 359 * that case try picking one from a nearby CPU.
352 Assume they are all increased by a constant offset, 360 *
353 but in the same order as the HT nodeids. 361 * - The APIC IDs differ from the HyperTransport node IDs
354 If that doesn't result in a usable node fall back to the 362 * which the K8 northbridge parsing fills in. Assume
355 path for the previous case. */ 363 * they are all increased by a constant offset, but in
356 364 * the same order as the HT nodeids. If that doesn't
365 * result in a usable node fall back to the path for the
366 * previous case.
367 *
368 * This workaround operates directly on the mapping between
369 * APIC ID and NUMA node, assuming certain relationship
370 * between APIC ID, HT node ID and NUMA topology. As going
371 * through CPU mapping may alter the outcome, directly
372 * access __apicid_to_node[].
373 */
357 int ht_nodeid = c->initial_apicid; 374 int ht_nodeid = c->initial_apicid;
358 375
359 if (ht_nodeid >= 0 && 376 if (ht_nodeid >= 0 &&
360 apicid_to_node[ht_nodeid] != NUMA_NO_NODE) 377 __apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
361 node = apicid_to_node[ht_nodeid]; 378 node = __apicid_to_node[ht_nodeid];
362 /* Pick a nearby node */ 379 /* Pick a nearby node */
363 if (!node_online(node)) 380 if (!node_online(node))
364 node = nearby_node(apicid); 381 node = nearby_node(apicid);
@@ -594,6 +611,29 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
594 } 611 }
595 } 612 }
596#endif 613#endif
614
615 /* As a rule processors have APIC timer running in deep C states */
616 if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400))
617 set_cpu_cap(c, X86_FEATURE_ARAT);
618
619 /*
620 * Disable GART TLB Walk Errors on Fam10h. We do this here
621 * because this is always needed when GART is enabled, even in a
622 * kernel which has no MCE support built in.
623 */
624 if (c->x86 == 0x10) {
625 /*
626 * BIOS should disable GartTlbWlk Errors themself. If
627 * it doesn't do it here as suggested by the BKDG.
628 *
629 * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012
630 */
631 u64 mask;
632
633 rdmsrl(MSR_AMD64_MCx_MASK(4), mask);
634 mask |= (1 << 10);
635 wrmsrl(MSR_AMD64_MCx_MASK(4), mask);
636 }
597} 637}
598 638
599#ifdef CONFIG_X86_32 639#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1d59834396bd..e2ced0074a45 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -675,7 +675,7 @@ void __init early_cpu_init(void)
675 const struct cpu_dev *const *cdev; 675 const struct cpu_dev *const *cdev;
676 int count = 0; 676 int count = 0;
677 677
678#ifdef PROCESSOR_SELECT 678#ifdef CONFIG_PROCESSOR_SELECT
679 printk(KERN_INFO "KERNEL supported cpus:\n"); 679 printk(KERN_INFO "KERNEL supported cpus:\n");
680#endif 680#endif
681 681
@@ -687,7 +687,7 @@ void __init early_cpu_init(void)
687 cpu_devs[count] = cpudev; 687 cpu_devs[count] = cpudev;
688 count++; 688 count++;
689 689
690#ifdef PROCESSOR_SELECT 690#ifdef CONFIG_PROCESSOR_SELECT
691 { 691 {
692 unsigned int j; 692 unsigned int j;
693 693
@@ -869,7 +869,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
869 869
870 select_idle_routine(c); 870 select_idle_routine(c);
871 871
872#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 872#ifdef CONFIG_NUMA
873 numa_add_cpu(smp_processor_id()); 873 numa_add_cpu(smp_processor_id());
874#endif 874#endif
875} 875}
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index 03162dac6271..cf48cdd6907d 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -444,7 +444,7 @@ static int __cpuinit longhaul_get_ranges(void)
444 return -EINVAL; 444 return -EINVAL;
445 } 445 }
446 /* Get max multiplier - as we always did. 446 /* Get max multiplier - as we always did.
447 * Longhaul MSR is usefull only when voltage scaling is enabled. 447 * Longhaul MSR is useful only when voltage scaling is enabled.
448 * C3 is booting at max anyway. */ 448 * C3 is booting at max anyway. */
449 maxmult = mult; 449 maxmult = mult;
450 /* Get min multiplier */ 450 /* Get min multiplier */
@@ -1011,7 +1011,7 @@ static void __exit longhaul_exit(void)
1011 * trigger frequency transition in some cases. */ 1011 * trigger frequency transition in some cases. */
1012module_param(disable_acpi_c3, int, 0644); 1012module_param(disable_acpi_c3, int, 0644);
1013MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); 1013MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support");
1014/* Change CPU voltage with frequency. Very usefull to save 1014/* Change CPU voltage with frequency. Very useful to save
1015 * power, but most VIA C3 processors aren't supporting it. */ 1015 * power, but most VIA C3 processors aren't supporting it. */
1016module_param(scale_voltage, int, 0644); 1016module_param(scale_voltage, int, 0644);
1017MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); 1017MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index bd1cac747f67..52c93648e492 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -158,9 +158,9 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
158{ 158{
159 if (c->x86 == 0x06) { 159 if (c->x86 == 0x06) {
160 if (cpu_has(c, X86_FEATURE_EST)) 160 if (cpu_has(c, X86_FEATURE_EST))
161 printk(KERN_WARNING PFX "Warning: EST-capable CPU " 161 printk_once(KERN_WARNING PFX "Warning: EST-capable "
162 "detected. The acpi-cpufreq module offers " 162 "CPU detected. The acpi-cpufreq module offers "
163 "voltage scaling in addition of frequency " 163 "voltage scaling in addition to frequency "
164 "scaling. You should use that instead of " 164 "scaling. You should use that instead of "
165 "p4-clockmod, if possible.\n"); 165 "p4-clockmod, if possible.\n");
166 switch (c->x86_model) { 166 switch (c->x86_model) {
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index 4f6f679f2799..755a31e0f5b0 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -195,7 +195,7 @@ static unsigned int pcc_get_freq(unsigned int cpu)
195cmd_incomplete: 195cmd_incomplete:
196 iowrite16(0, &pcch_hdr->status); 196 iowrite16(0, &pcch_hdr->status);
197 spin_unlock(&pcc_lock); 197 spin_unlock(&pcc_lock);
198 return -EINVAL; 198 return 0;
199} 199}
200 200
201static int pcc_cpufreq_target(struct cpufreq_policy *policy, 201static int pcc_cpufreq_target(struct cpufreq_policy *policy,
@@ -315,8 +315,6 @@ static int __init pcc_cpufreq_do_osc(acpi_handle *handle)
315 315
316 input.count = 4; 316 input.count = 4;
317 input.pointer = in_params; 317 input.pointer = in_params;
318 input.count = 4;
319 input.pointer = in_params;
320 in_params[0].type = ACPI_TYPE_BUFFER; 318 in_params[0].type = ACPI_TYPE_BUFFER;
321 in_params[0].buffer.length = 16; 319 in_params[0].buffer.length = 16;
322 in_params[0].buffer.pointer = OSC_UUID; 320 in_params[0].buffer.pointer = OSC_UUID;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 35c7e65e59be..2368e38327b3 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -630,8 +630,7 @@ static void print_basics(struct powernow_k8_data *data)
630 data->powernow_table[j].frequency/1000); 630 data->powernow_table[j].frequency/1000);
631 } else { 631 } else {
632 printk(KERN_INFO PFX 632 printk(KERN_INFO PFX
633 " %d : fid 0x%x (%d MHz), vid 0x%x\n", 633 "fid 0x%x (%d MHz), vid 0x%x\n",
634 j,
635 data->powernow_table[j].index & 0xff, 634 data->powernow_table[j].index & 0xff,
636 data->powernow_table[j].frequency/1000, 635 data->powernow_table[j].frequency/1000,
637 data->powernow_table[j].index >> 8); 636 data->powernow_table[j].index >> 8);
@@ -1276,7 +1275,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1276 1275
1277 if (powernow_k8_cpu_init_acpi(data)) { 1276 if (powernow_k8_cpu_init_acpi(data)) {
1278 /* 1277 /*
1279 * Use the PSB BIOS structure. This is only availabe on 1278 * Use the PSB BIOS structure. This is only available on
1280 * an UP version, and is deprecated by AMD. 1279 * an UP version, and is deprecated by AMD.
1281 */ 1280 */
1282 if (num_online_cpus() != 1) { 1281 if (num_online_cpus() != 1) {
@@ -1537,6 +1536,7 @@ static struct notifier_block cpb_nb = {
1537static int __cpuinit powernowk8_init(void) 1536static int __cpuinit powernowk8_init(void)
1538{ 1537{
1539 unsigned int i, supported_cpus = 0, cpu; 1538 unsigned int i, supported_cpus = 0, cpu;
1539 int rv;
1540 1540
1541 for_each_online_cpu(i) { 1541 for_each_online_cpu(i) {
1542 int rc; 1542 int rc;
@@ -1555,14 +1555,14 @@ static int __cpuinit powernowk8_init(void)
1555 1555
1556 cpb_capable = true; 1556 cpb_capable = true;
1557 1557
1558 register_cpu_notifier(&cpb_nb);
1559
1560 msrs = msrs_alloc(); 1558 msrs = msrs_alloc();
1561 if (!msrs) { 1559 if (!msrs) {
1562 printk(KERN_ERR "%s: Error allocating msrs!\n", __func__); 1560 printk(KERN_ERR "%s: Error allocating msrs!\n", __func__);
1563 return -ENOMEM; 1561 return -ENOMEM;
1564 } 1562 }
1565 1563
1564 register_cpu_notifier(&cpb_nb);
1565
1566 rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); 1566 rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
1567 1567
1568 for_each_cpu(cpu, cpu_online_mask) { 1568 for_each_cpu(cpu, cpu_online_mask) {
@@ -1574,7 +1574,13 @@ static int __cpuinit powernowk8_init(void)
1574 (cpb_enabled ? "on" : "off")); 1574 (cpb_enabled ? "on" : "off"));
1575 } 1575 }
1576 1576
1577 return cpufreq_register_driver(&cpufreq_amd64_driver); 1577 rv = cpufreq_register_driver(&cpufreq_amd64_driver);
1578 if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) {
1579 unregister_cpu_notifier(&cpb_nb);
1580 msrs_free(msrs);
1581 msrs = NULL;
1582 }
1583 return rv;
1578} 1584}
1579 1585
1580/* driver entry point for term */ 1586/* driver entry point for term */
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index 8abd869baabf..91bc25b67bc1 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -292,7 +292,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
292 292
293 result = speedstep_smi_ownership(); 293 result = speedstep_smi_ownership();
294 if (result) { 294 if (result) {
295 dprintk("fails in aquiring ownership of a SMI interface.\n"); 295 dprintk("fails in acquiring ownership of a SMI interface.\n");
296 return -EINVAL; 296 return -EINVAL;
297 } 297 }
298 298
@@ -360,7 +360,7 @@ static int speedstep_resume(struct cpufreq_policy *policy)
360 int result = speedstep_smi_ownership(); 360 int result = speedstep_smi_ownership();
361 361
362 if (result) 362 if (result)
363 dprintk("fails in re-aquiring ownership of a SMI interface.\n"); 363 dprintk("fails in re-acquiring ownership of a SMI interface.\n");
364 364
365 return result; 365 return result;
366} 366}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d16c2c53d6bf..df86bc8c859d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -276,14 +276,13 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
276 276
277static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) 277static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
278{ 278{
279#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 279#ifdef CONFIG_NUMA
280 unsigned node; 280 unsigned node;
281 int cpu = smp_processor_id(); 281 int cpu = smp_processor_id();
282 int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
283 282
284 /* Don't do the funky fallback heuristics the AMD version employs 283 /* Don't do the funky fallback heuristics the AMD version employs
285 for now. */ 284 for now. */
286 node = apicid_to_node[apicid]; 285 node = numa_cpu_node(cpu);
287 if (node == NUMA_NO_NODE || !node_online(node)) { 286 if (node == NUMA_NO_NODE || !node_online(node)) {
288 /* reuse the value from init_cpu_to_node() */ 287 /* reuse the value from init_cpu_to_node() */
289 node = cpu_to_node(cpu); 288 node = cpu_to_node(cpu);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 7283e98deaae..1ce1af2899df 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -45,6 +45,7 @@ static const struct _cache_table __cpuinitconst cache_table[] =
45 { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ 45 { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
46 { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ 46 { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
47 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ 47 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
48 { 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */
48 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ 49 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
49 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 50 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
50 { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ 51 { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
@@ -66,6 +67,7 @@ static const struct _cache_table __cpuinitconst cache_table[] =
66 { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */ 67 { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
67 { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */ 68 { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
68 { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */ 69 { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
70 { 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */
69 { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ 71 { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
70 { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */ 72 { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
71 { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ 73 { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
@@ -87,6 +89,7 @@ static const struct _cache_table __cpuinitconst cache_table[] =
87 { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ 89 { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
88 { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */ 90 { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
89 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ 91 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
92 { 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */
90 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ 93 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
91 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ 94 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
92 { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */ 95 { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
@@ -301,8 +304,9 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
301 304
302struct _cache_attr { 305struct _cache_attr {
303 struct attribute attr; 306 struct attribute attr;
304 ssize_t (*show)(struct _cpuid4_info *, char *); 307 ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
305 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); 308 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
309 unsigned int);
306}; 310};
307 311
308#ifdef CONFIG_AMD_NB 312#ifdef CONFIG_AMD_NB
@@ -397,7 +401,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
397 401
398#define SHOW_CACHE_DISABLE(slot) \ 402#define SHOW_CACHE_DISABLE(slot) \
399static ssize_t \ 403static ssize_t \
400show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \ 404show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \
405 unsigned int cpu) \
401{ \ 406{ \
402 return show_cache_disable(this_leaf, buf, slot); \ 407 return show_cache_disable(this_leaf, buf, slot); \
403} 408}
@@ -509,7 +514,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
509#define STORE_CACHE_DISABLE(slot) \ 514#define STORE_CACHE_DISABLE(slot) \
510static ssize_t \ 515static ssize_t \
511store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ 516store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
512 const char *buf, size_t count) \ 517 const char *buf, size_t count, \
518 unsigned int cpu) \
513{ \ 519{ \
514 return store_cache_disable(this_leaf, buf, count, slot); \ 520 return store_cache_disable(this_leaf, buf, count, slot); \
515} 521}
@@ -521,6 +527,39 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
521static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, 527static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
522 show_cache_disable_1, store_cache_disable_1); 528 show_cache_disable_1, store_cache_disable_1);
523 529
530static ssize_t
531show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
532{
533 if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
534 return -EINVAL;
535
536 return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
537}
538
539static ssize_t
540store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
541 unsigned int cpu)
542{
543 unsigned long val;
544
545 if (!capable(CAP_SYS_ADMIN))
546 return -EPERM;
547
548 if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
549 return -EINVAL;
550
551 if (strict_strtoul(buf, 16, &val) < 0)
552 return -EINVAL;
553
554 if (amd_set_subcaches(cpu, val))
555 return -EINVAL;
556
557 return count;
558}
559
560static struct _cache_attr subcaches =
561 __ATTR(subcaches, 0644, show_subcaches, store_subcaches);
562
524#else /* CONFIG_AMD_NB */ 563#else /* CONFIG_AMD_NB */
525#define amd_init_l3_cache(x, y) 564#define amd_init_l3_cache(x, y)
526#endif /* CONFIG_AMD_NB */ 565#endif /* CONFIG_AMD_NB */
@@ -529,9 +568,9 @@ static int
529__cpuinit cpuid4_cache_lookup_regs(int index, 568__cpuinit cpuid4_cache_lookup_regs(int index,
530 struct _cpuid4_info_regs *this_leaf) 569 struct _cpuid4_info_regs *this_leaf)
531{ 570{
532 union _cpuid4_leaf_eax eax; 571 union _cpuid4_leaf_eax eax;
533 union _cpuid4_leaf_ebx ebx; 572 union _cpuid4_leaf_ebx ebx;
534 union _cpuid4_leaf_ecx ecx; 573 union _cpuid4_leaf_ecx ecx;
535 unsigned edx; 574 unsigned edx;
536 575
537 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 576 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
@@ -729,11 +768,11 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
729 struct cpuinfo_x86 *c = &cpu_data(cpu); 768 struct cpuinfo_x86 *c = &cpu_data(cpu);
730 769
731 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { 770 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
732 for_each_cpu(i, c->llc_shared_map) { 771 for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
733 if (!per_cpu(ici_cpuid4_info, i)) 772 if (!per_cpu(ici_cpuid4_info, i))
734 continue; 773 continue;
735 this_leaf = CPUID4_INFO_IDX(i, index); 774 this_leaf = CPUID4_INFO_IDX(i, index);
736 for_each_cpu(sibling, c->llc_shared_map) { 775 for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
737 if (!cpu_online(sibling)) 776 if (!cpu_online(sibling))
738 continue; 777 continue;
739 set_bit(sibling, this_leaf->shared_cpu_map); 778 set_bit(sibling, this_leaf->shared_cpu_map);
@@ -867,8 +906,8 @@ static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
867#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y])) 906#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
868 907
869#define show_one_plus(file_name, object, val) \ 908#define show_one_plus(file_name, object, val) \
870static ssize_t show_##file_name \ 909static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
871 (struct _cpuid4_info *this_leaf, char *buf) \ 910 unsigned int cpu) \
872{ \ 911{ \
873 return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \ 912 return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
874} 913}
@@ -879,7 +918,8 @@ show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
879show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1); 918show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
880show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); 919show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
881 920
882static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) 921static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
922 unsigned int cpu)
883{ 923{
884 return sprintf(buf, "%luK\n", this_leaf->size / 1024); 924 return sprintf(buf, "%luK\n", this_leaf->size / 1024);
885} 925}
@@ -903,17 +943,20 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
903 return n; 943 return n;
904} 944}
905 945
906static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf) 946static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
947 unsigned int cpu)
907{ 948{
908 return show_shared_cpu_map_func(leaf, 0, buf); 949 return show_shared_cpu_map_func(leaf, 0, buf);
909} 950}
910 951
911static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf) 952static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
953 unsigned int cpu)
912{ 954{
913 return show_shared_cpu_map_func(leaf, 1, buf); 955 return show_shared_cpu_map_func(leaf, 1, buf);
914} 956}
915 957
916static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) 958static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
959 unsigned int cpu)
917{ 960{
918 switch (this_leaf->eax.split.type) { 961 switch (this_leaf->eax.split.type) {
919 case CACHE_TYPE_DATA: 962 case CACHE_TYPE_DATA:
@@ -971,6 +1014,9 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
971 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) 1014 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
972 n += 2; 1015 n += 2;
973 1016
1017 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1018 n += 1;
1019
974 attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL); 1020 attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
975 if (attrs == NULL) 1021 if (attrs == NULL)
976 return attrs = default_attrs; 1022 return attrs = default_attrs;
@@ -983,6 +1029,9 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
983 attrs[n++] = &cache_disable_1.attr; 1029 attrs[n++] = &cache_disable_1.attr;
984 } 1030 }
985 1031
1032 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1033 attrs[n++] = &subcaches.attr;
1034
986 return attrs; 1035 return attrs;
987} 1036}
988#endif 1037#endif
@@ -995,7 +1044,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
995 1044
996 ret = fattr->show ? 1045 ret = fattr->show ?
997 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), 1046 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
998 buf) : 1047 buf, this_leaf->cpu) :
999 0; 1048 0;
1000 return ret; 1049 return ret;
1001} 1050}
@@ -1009,7 +1058,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
1009 1058
1010 ret = fattr->store ? 1059 ret = fattr->store ?
1011 fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), 1060 fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1012 buf, count) : 1061 buf, count, this_leaf->cpu) :
1013 0; 1062 0;
1014 return ret; 1063 return ret;
1015} 1064}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index 8209472b27a5..83930deec3c6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -106,24 +106,34 @@ int apei_write_mce(struct mce *m)
106ssize_t apei_read_mce(struct mce *m, u64 *record_id) 106ssize_t apei_read_mce(struct mce *m, u64 *record_id)
107{ 107{
108 struct cper_mce_record rcd; 108 struct cper_mce_record rcd;
109 ssize_t len; 109 int rc, pos;
110 110
111 len = erst_read_next(&rcd.hdr, sizeof(rcd)); 111 rc = erst_get_record_id_begin(&pos);
112 if (len <= 0) 112 if (rc)
113 return len; 113 return rc;
114 /* Can not skip other records in storage via ERST unless clear them */ 114retry:
115 else if (len != sizeof(rcd) || 115 rc = erst_get_record_id_next(&pos, record_id);
116 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) { 116 if (rc)
117 if (printk_ratelimit()) 117 goto out;
118 pr_warning( 118 /* no more record */
119 "MCE-APEI: Can not skip the unknown record in ERST"); 119 if (*record_id == APEI_ERST_INVALID_RECORD_ID)
120 return -EIO; 120 goto out;
121 } 121 rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd));
122 122 /* someone else has cleared the record, try next one */
123 if (rc == -ENOENT)
124 goto retry;
125 else if (rc < 0)
126 goto out;
127 /* try to skip other type records in storage */
128 else if (rc != sizeof(rcd) ||
129 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
130 goto retry;
123 memcpy(m, &rcd.mce, sizeof(*m)); 131 memcpy(m, &rcd.mce, sizeof(*m));
124 *record_id = rcd.hdr.record_id; 132 rc = sizeof(*m);
133out:
134 erst_get_record_id_end();
125 135
126 return sizeof(*m); 136 return rc;
127} 137}
128 138
129/* Check whether there is record in ERST */ 139/* Check whether there is record in ERST */
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index a77971979564..0ed633c5048b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -32,7 +32,7 @@ static void inject_mce(struct mce *m)
32{ 32{
33 struct mce *i = &per_cpu(injectm, m->extcpu); 33 struct mce *i = &per_cpu(injectm, m->extcpu);
34 34
35 /* Make sure noone reads partially written injectm */ 35 /* Make sure no one reads partially written injectm */
36 i->finished = 0; 36 i->finished = 0;
37 mb(); 37 mb();
38 m->finished = 0; 38 m->finished = 0;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d916183b7f9c..3385ea26f684 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -21,6 +21,7 @@
21#include <linux/percpu.h> 21#include <linux/percpu.h>
22#include <linux/string.h> 22#include <linux/string.h>
23#include <linux/sysdev.h> 23#include <linux/sysdev.h>
24#include <linux/syscore_ops.h>
24#include <linux/delay.h> 25#include <linux/delay.h>
25#include <linux/ctype.h> 26#include <linux/ctype.h>
26#include <linux/sched.h> 27#include <linux/sched.h>
@@ -881,7 +882,7 @@ reset:
881 * Check if the address reported by the CPU is in a format we can parse. 882 * Check if the address reported by the CPU is in a format we can parse.
882 * It would be possible to add code for most other cases, but all would 883 * It would be possible to add code for most other cases, but all would
883 * be somewhat complicated (e.g. segment offset would require an instruction 884 * be somewhat complicated (e.g. segment offset would require an instruction
884 * parser). So only support physical addresses upto page granuality for now. 885 * parser). So only support physical addresses up to page granuality for now.
885 */ 886 */
886static int mce_usable_address(struct mce *m) 887static int mce_usable_address(struct mce *m)
887{ 888{
@@ -1625,7 +1626,7 @@ out:
1625static unsigned int mce_poll(struct file *file, poll_table *wait) 1626static unsigned int mce_poll(struct file *file, poll_table *wait)
1626{ 1627{
1627 poll_wait(file, &mce_wait, wait); 1628 poll_wait(file, &mce_wait, wait);
1628 if (rcu_dereference_check_mce(mcelog.next)) 1629 if (rcu_access_index(mcelog.next))
1629 return POLLIN | POLLRDNORM; 1630 return POLLIN | POLLRDNORM;
1630 if (!mce_apei_read_done && apei_check_mce()) 1631 if (!mce_apei_read_done && apei_check_mce())
1631 return POLLIN | POLLRDNORM; 1632 return POLLIN | POLLRDNORM;
@@ -1749,14 +1750,14 @@ static int mce_disable_error_reporting(void)
1749 return 0; 1750 return 0;
1750} 1751}
1751 1752
1752static int mce_suspend(struct sys_device *dev, pm_message_t state) 1753static int mce_suspend(void)
1753{ 1754{
1754 return mce_disable_error_reporting(); 1755 return mce_disable_error_reporting();
1755} 1756}
1756 1757
1757static int mce_shutdown(struct sys_device *dev) 1758static void mce_shutdown(void)
1758{ 1759{
1759 return mce_disable_error_reporting(); 1760 mce_disable_error_reporting();
1760} 1761}
1761 1762
1762/* 1763/*
@@ -1764,14 +1765,18 @@ static int mce_shutdown(struct sys_device *dev)
1764 * Only one CPU is active at this time, the others get re-added later using 1765 * Only one CPU is active at this time, the others get re-added later using
1765 * CPU hotplug: 1766 * CPU hotplug:
1766 */ 1767 */
1767static int mce_resume(struct sys_device *dev) 1768static void mce_resume(void)
1768{ 1769{
1769 __mcheck_cpu_init_generic(); 1770 __mcheck_cpu_init_generic();
1770 __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info)); 1771 __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info));
1771
1772 return 0;
1773} 1772}
1774 1773
1774static struct syscore_ops mce_syscore_ops = {
1775 .suspend = mce_suspend,
1776 .shutdown = mce_shutdown,
1777 .resume = mce_resume,
1778};
1779
1775static void mce_cpu_restart(void *data) 1780static void mce_cpu_restart(void *data)
1776{ 1781{
1777 del_timer_sync(&__get_cpu_var(mce_timer)); 1782 del_timer_sync(&__get_cpu_var(mce_timer));
@@ -1808,9 +1813,6 @@ static void mce_enable_ce(void *all)
1808} 1813}
1809 1814
1810static struct sysdev_class mce_sysclass = { 1815static struct sysdev_class mce_sysclass = {
1811 .suspend = mce_suspend,
1812 .shutdown = mce_shutdown,
1813 .resume = mce_resume,
1814 .name = "machinecheck", 1816 .name = "machinecheck",
1815}; 1817};
1816 1818
@@ -2139,6 +2141,7 @@ static __init int mcheck_init_device(void)
2139 return err; 2141 return err;
2140 } 2142 }
2141 2143
2144 register_syscore_ops(&mce_syscore_ops);
2142 register_hotcpu_notifier(&mce_cpu_notifier); 2145 register_hotcpu_notifier(&mce_cpu_notifier);
2143 misc_register(&mce_log_device); 2146 misc_register(&mce_log_device);
2144 2147
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5bf2fac52aca..167f97b5596e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -527,15 +527,12 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
527 int i, err = 0; 527 int i, err = 0;
528 struct threshold_bank *b = NULL; 528 struct threshold_bank *b = NULL;
529 char name[32]; 529 char name[32];
530#ifdef CONFIG_SMP
531 struct cpuinfo_x86 *c = &cpu_data(cpu);
532#endif
533 530
534 sprintf(name, "threshold_bank%i", bank); 531 sprintf(name, "threshold_bank%i", bank);
535 532
536#ifdef CONFIG_SMP 533#ifdef CONFIG_SMP
537 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 534 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
538 i = cpumask_first(c->llc_shared_map); 535 i = cpumask_first(cpu_llc_shared_mask(cpu));
539 536
540 /* first core not up yet */ 537 /* first core not up yet */
541 if (cpu_data(i).cpu_core_id) 538 if (cpu_data(i).cpu_core_id)
@@ -555,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
555 if (err) 552 if (err)
556 goto out; 553 goto out;
557 554
558 cpumask_copy(b->cpus, c->llc_shared_map); 555 cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu));
559 per_cpu(threshold_banks, cpu)[bank] = b; 556 per_cpu(threshold_banks, cpu)[bank] = b;
560 557
561 goto out; 558 goto out;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index e12246ff5aa6..6f8c5e9da97f 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -59,6 +59,7 @@ struct thermal_state {
59 59
60/* Callback to handle core threshold interrupts */ 60/* Callback to handle core threshold interrupts */
61int (*platform_thermal_notify)(__u64 msr_val); 61int (*platform_thermal_notify)(__u64 msr_val);
62EXPORT_SYMBOL(platform_thermal_notify);
62 63
63static DEFINE_PER_CPU(struct thermal_state, thermal_state); 64static DEFINE_PER_CPU(struct thermal_state, thermal_state);
64 65
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 9f27228ceffd..a71efcdbb092 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * This only handles 32bit MTRR on 32bit hosts. This is strictly wrong 2 * This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
3 * because MTRRs can span upto 40 bits (36bits on most modern x86) 3 * because MTRRs can span up to 40 bits (36bits on most modern x86)
4 */ 4 */
5#define DEBUG 5#define DEBUG
6 6
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 01c0f3ee6cc3..929739a653d1 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -45,6 +45,7 @@
45#include <linux/cpu.h> 45#include <linux/cpu.h>
46#include <linux/pci.h> 46#include <linux/pci.h>
47#include <linux/smp.h> 47#include <linux/smp.h>
48#include <linux/syscore_ops.h>
48 49
49#include <asm/processor.h> 50#include <asm/processor.h>
50#include <asm/e820.h> 51#include <asm/e820.h>
@@ -292,14 +293,24 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
292 293
293 /* 294 /*
294 * HACK! 295 * HACK!
295 * We use this same function to initialize the mtrrs on boot. 296 *
296 * The state of the boot cpu's mtrrs has been saved, and we want 297 * We use this same function to initialize the mtrrs during boot,
297 * to replicate across all the APs. 298 * resume, runtime cpu online and on an explicit request to set a
298 * If we're doing that @reg is set to something special... 299 * specific MTRR.
300 *
301 * During boot or suspend, the state of the boot cpu's mtrrs has been
302 * saved, and we want to replicate that across all the cpus that come
303 * online (either at the end of boot or resume or during a runtime cpu
304 * online). If we're doing that, @reg is set to something special and on
305 * this cpu we still do mtrr_if->set_all(). During boot/resume, this
306 * is unnecessary if at this point we are still on the cpu that started
307 * the boot/resume sequence. But there is no guarantee that we are still
308 * on the same cpu. So we do mtrr_if->set_all() on this cpu aswell to be
309 * sure that we are in sync with everyone else.
299 */ 310 */
300 if (reg != ~0U) 311 if (reg != ~0U)
301 mtrr_if->set(reg, base, size, type); 312 mtrr_if->set(reg, base, size, type);
302 else if (!mtrr_aps_delayed_init) 313 else
303 mtrr_if->set_all(); 314 mtrr_if->set_all();
304 315
305 /* Wait for the others */ 316 /* Wait for the others */
@@ -630,7 +641,7 @@ struct mtrr_value {
630 641
631static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES]; 642static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
632 643
633static int mtrr_save(struct sys_device *sysdev, pm_message_t state) 644static int mtrr_save(void)
634{ 645{
635 int i; 646 int i;
636 647
@@ -642,7 +653,7 @@ static int mtrr_save(struct sys_device *sysdev, pm_message_t state)
642 return 0; 653 return 0;
643} 654}
644 655
645static int mtrr_restore(struct sys_device *sysdev) 656static void mtrr_restore(void)
646{ 657{
647 int i; 658 int i;
648 659
@@ -653,12 +664,11 @@ static int mtrr_restore(struct sys_device *sysdev)
653 mtrr_value[i].ltype); 664 mtrr_value[i].ltype);
654 } 665 }
655 } 666 }
656 return 0;
657} 667}
658 668
659 669
660 670
661static struct sysdev_driver mtrr_sysdev_driver = { 671static struct syscore_ops mtrr_syscore_ops = {
662 .suspend = mtrr_save, 672 .suspend = mtrr_save,
663 .resume = mtrr_restore, 673 .resume = mtrr_restore,
664}; 674};
@@ -793,13 +803,21 @@ void set_mtrr_aps_delayed_init(void)
793} 803}
794 804
795/* 805/*
796 * MTRR initialization for all AP's 806 * Delayed MTRR initialization for all AP's
797 */ 807 */
798void mtrr_aps_init(void) 808void mtrr_aps_init(void)
799{ 809{
800 if (!use_intel()) 810 if (!use_intel())
801 return; 811 return;
802 812
813 /*
814 * Check if someone has requested the delay of AP MTRR initialization,
815 * by doing set_mtrr_aps_delayed_init(), prior to this point. If not,
816 * then we are done.
817 */
818 if (!mtrr_aps_delayed_init)
819 return;
820
803 set_mtrr(~0U, 0, 0, 0); 821 set_mtrr(~0U, 0, 0, 0);
804 mtrr_aps_delayed_init = false; 822 mtrr_aps_delayed_init = false;
805} 823}
@@ -831,7 +849,7 @@ static int __init mtrr_init_finialize(void)
831 * TBD: is there any system with such CPU which supports 849 * TBD: is there any system with such CPU which supports
832 * suspend/resume? If no, we should remove the code. 850 * suspend/resume? If no, we should remove the code.
833 */ 851 */
834 sysdev_driver_register(&cpu_sysdev_class, &mtrr_sysdev_driver); 852 register_syscore_ops(&mtrr_syscore_ops);
835 853
836 return 0; 854 return 0;
837} 855}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9d977a2ea693..eed3673a8656 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -30,6 +30,7 @@
30#include <asm/stacktrace.h> 30#include <asm/stacktrace.h>
31#include <asm/nmi.h> 31#include <asm/nmi.h>
32#include <asm/compat.h> 32#include <asm/compat.h>
33#include <asm/smp.h>
33 34
34#if 0 35#if 0
35#undef wrmsrl 36#undef wrmsrl
@@ -93,6 +94,8 @@ struct amd_nb {
93 struct event_constraint event_constraints[X86_PMC_IDX_MAX]; 94 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
94}; 95};
95 96
97struct intel_percore;
98
96#define MAX_LBR_ENTRIES 16 99#define MAX_LBR_ENTRIES 16
97 100
98struct cpu_hw_events { 101struct cpu_hw_events {
@@ -128,6 +131,13 @@ struct cpu_hw_events {
128 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; 131 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
129 132
130 /* 133 /*
134 * Intel percore register state.
135 * Coordinate shared resources between HT threads.
136 */
137 int percore_used; /* Used by this CPU? */
138 struct intel_percore *per_core;
139
140 /*
131 * AMD specific bits 141 * AMD specific bits
132 */ 142 */
133 struct amd_nb *amd_nb; 143 struct amd_nb *amd_nb;
@@ -166,7 +176,7 @@ struct cpu_hw_events {
166/* 176/*
167 * Constraint on the Event code + UMask 177 * Constraint on the Event code + UMask
168 */ 178 */
169#define PEBS_EVENT_CONSTRAINT(c, n) \ 179#define INTEL_UEVENT_CONSTRAINT(c, n) \
170 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) 180 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
171 181
172#define EVENT_CONSTRAINT_END \ 182#define EVENT_CONSTRAINT_END \
@@ -175,6 +185,28 @@ struct cpu_hw_events {
175#define for_each_event_constraint(e, c) \ 185#define for_each_event_constraint(e, c) \
176 for ((e) = (c); (e)->weight; (e)++) 186 for ((e) = (c); (e)->weight; (e)++)
177 187
188/*
189 * Extra registers for specific events.
190 * Some events need large masks and require external MSRs.
191 * Define a mapping to these extra registers.
192 */
193struct extra_reg {
194 unsigned int event;
195 unsigned int msr;
196 u64 config_mask;
197 u64 valid_mask;
198};
199
200#define EVENT_EXTRA_REG(e, ms, m, vm) { \
201 .event = (e), \
202 .msr = (ms), \
203 .config_mask = (m), \
204 .valid_mask = (vm), \
205 }
206#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \
207 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
208#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)
209
178union perf_capabilities { 210union perf_capabilities {
179 struct { 211 struct {
180 u64 lbr_format : 6; 212 u64 lbr_format : 6;
@@ -219,6 +251,7 @@ struct x86_pmu {
219 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 251 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
220 struct perf_event *event); 252 struct perf_event *event);
221 struct event_constraint *event_constraints; 253 struct event_constraint *event_constraints;
254 struct event_constraint *percore_constraints;
222 void (*quirks)(void); 255 void (*quirks)(void);
223 int perfctr_second_write; 256 int perfctr_second_write;
224 257
@@ -247,6 +280,11 @@ struct x86_pmu {
247 */ 280 */
248 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ 281 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
249 int lbr_nr; /* hardware stack size */ 282 int lbr_nr; /* hardware stack size */
283
284 /*
285 * Extra registers for events
286 */
287 struct extra_reg *extra_regs;
250}; 288};
251 289
252static struct x86_pmu x86_pmu __read_mostly; 290static struct x86_pmu x86_pmu __read_mostly;
@@ -271,6 +309,10 @@ static u64 __read_mostly hw_cache_event_ids
271 [PERF_COUNT_HW_CACHE_MAX] 309 [PERF_COUNT_HW_CACHE_MAX]
272 [PERF_COUNT_HW_CACHE_OP_MAX] 310 [PERF_COUNT_HW_CACHE_OP_MAX]
273 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 311 [PERF_COUNT_HW_CACHE_RESULT_MAX];
312static u64 __read_mostly hw_cache_extra_regs
313 [PERF_COUNT_HW_CACHE_MAX]
314 [PERF_COUNT_HW_CACHE_OP_MAX]
315 [PERF_COUNT_HW_CACHE_RESULT_MAX];
274 316
275/* 317/*
276 * Propagate event elapsed time into the generic event. 318 * Propagate event elapsed time into the generic event.
@@ -298,7 +340,7 @@ x86_perf_event_update(struct perf_event *event)
298 */ 340 */
299again: 341again:
300 prev_raw_count = local64_read(&hwc->prev_count); 342 prev_raw_count = local64_read(&hwc->prev_count);
301 rdmsrl(hwc->event_base + idx, new_raw_count); 343 rdmsrl(hwc->event_base, new_raw_count);
302 344
303 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 345 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
304 new_raw_count) != prev_raw_count) 346 new_raw_count) != prev_raw_count)
@@ -321,6 +363,49 @@ again:
321 return new_raw_count; 363 return new_raw_count;
322} 364}
323 365
366/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */
367static inline int x86_pmu_addr_offset(int index)
368{
369 if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
370 return index << 1;
371 return index;
372}
373
374static inline unsigned int x86_pmu_config_addr(int index)
375{
376 return x86_pmu.eventsel + x86_pmu_addr_offset(index);
377}
378
379static inline unsigned int x86_pmu_event_addr(int index)
380{
381 return x86_pmu.perfctr + x86_pmu_addr_offset(index);
382}
383
384/*
385 * Find and validate any extra registers to set up.
386 */
387static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
388{
389 struct extra_reg *er;
390
391 event->hw.extra_reg = 0;
392 event->hw.extra_config = 0;
393
394 if (!x86_pmu.extra_regs)
395 return 0;
396
397 for (er = x86_pmu.extra_regs; er->msr; er++) {
398 if (er->event != (config & er->config_mask))
399 continue;
400 if (event->attr.config1 & ~er->valid_mask)
401 return -EINVAL;
402 event->hw.extra_reg = er->msr;
403 event->hw.extra_config = event->attr.config1;
404 break;
405 }
406 return 0;
407}
408
324static atomic_t active_events; 409static atomic_t active_events;
325static DEFINE_MUTEX(pmc_reserve_mutex); 410static DEFINE_MUTEX(pmc_reserve_mutex);
326 411
@@ -331,12 +416,12 @@ static bool reserve_pmc_hardware(void)
331 int i; 416 int i;
332 417
333 for (i = 0; i < x86_pmu.num_counters; i++) { 418 for (i = 0; i < x86_pmu.num_counters; i++) {
334 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) 419 if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
335 goto perfctr_fail; 420 goto perfctr_fail;
336 } 421 }
337 422
338 for (i = 0; i < x86_pmu.num_counters; i++) { 423 for (i = 0; i < x86_pmu.num_counters; i++) {
339 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 424 if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
340 goto eventsel_fail; 425 goto eventsel_fail;
341 } 426 }
342 427
@@ -344,13 +429,13 @@ static bool reserve_pmc_hardware(void)
344 429
345eventsel_fail: 430eventsel_fail:
346 for (i--; i >= 0; i--) 431 for (i--; i >= 0; i--)
347 release_evntsel_nmi(x86_pmu.eventsel + i); 432 release_evntsel_nmi(x86_pmu_config_addr(i));
348 433
349 i = x86_pmu.num_counters; 434 i = x86_pmu.num_counters;
350 435
351perfctr_fail: 436perfctr_fail:
352 for (i--; i >= 0; i--) 437 for (i--; i >= 0; i--)
353 release_perfctr_nmi(x86_pmu.perfctr + i); 438 release_perfctr_nmi(x86_pmu_event_addr(i));
354 439
355 return false; 440 return false;
356} 441}
@@ -360,8 +445,8 @@ static void release_pmc_hardware(void)
360 int i; 445 int i;
361 446
362 for (i = 0; i < x86_pmu.num_counters; i++) { 447 for (i = 0; i < x86_pmu.num_counters; i++) {
363 release_perfctr_nmi(x86_pmu.perfctr + i); 448 release_perfctr_nmi(x86_pmu_event_addr(i));
364 release_evntsel_nmi(x86_pmu.eventsel + i); 449 release_evntsel_nmi(x86_pmu_config_addr(i));
365 } 450 }
366} 451}
367 452
@@ -382,7 +467,7 @@ static bool check_hw_exists(void)
382 * complain and bail. 467 * complain and bail.
383 */ 468 */
384 for (i = 0; i < x86_pmu.num_counters; i++) { 469 for (i = 0; i < x86_pmu.num_counters; i++) {
385 reg = x86_pmu.eventsel + i; 470 reg = x86_pmu_config_addr(i);
386 ret = rdmsrl_safe(reg, &val); 471 ret = rdmsrl_safe(reg, &val);
387 if (ret) 472 if (ret)
388 goto msr_fail; 473 goto msr_fail;
@@ -407,20 +492,25 @@ static bool check_hw_exists(void)
407 * that don't trap on the MSR access and always return 0s. 492 * that don't trap on the MSR access and always return 0s.
408 */ 493 */
409 val = 0xabcdUL; 494 val = 0xabcdUL;
410 ret = checking_wrmsrl(x86_pmu.perfctr, val); 495 ret = checking_wrmsrl(x86_pmu_event_addr(0), val);
411 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); 496 ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);
412 if (ret || val != val_new) 497 if (ret || val != val_new)
413 goto msr_fail; 498 goto msr_fail;
414 499
415 return true; 500 return true;
416 501
417bios_fail: 502bios_fail:
418 printk(KERN_CONT "Broken BIOS detected, using software events only.\n"); 503 /*
504 * We still allow the PMU driver to operate:
505 */
506 printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
419 printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val); 507 printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
420 return false; 508
509 return true;
421 510
422msr_fail: 511msr_fail:
423 printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); 512 printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
513
424 return false; 514 return false;
425} 515}
426 516
@@ -442,8 +532,9 @@ static inline int x86_pmu_initialized(void)
442} 532}
443 533
444static inline int 534static inline int
445set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) 535set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
446{ 536{
537 struct perf_event_attr *attr = &event->attr;
447 unsigned int cache_type, cache_op, cache_result; 538 unsigned int cache_type, cache_op, cache_result;
448 u64 config, val; 539 u64 config, val;
449 540
@@ -470,8 +561,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
470 return -EINVAL; 561 return -EINVAL;
471 562
472 hwc->config |= val; 563 hwc->config |= val;
473 564 attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
474 return 0; 565 return x86_pmu_extra_regs(val, event);
475} 566}
476 567
477static int x86_setup_perfctr(struct perf_event *event) 568static int x86_setup_perfctr(struct perf_event *event)
@@ -496,10 +587,10 @@ static int x86_setup_perfctr(struct perf_event *event)
496 } 587 }
497 588
498 if (attr->type == PERF_TYPE_RAW) 589 if (attr->type == PERF_TYPE_RAW)
499 return 0; 590 return x86_pmu_extra_regs(event->attr.config, event);
500 591
501 if (attr->type == PERF_TYPE_HW_CACHE) 592 if (attr->type == PERF_TYPE_HW_CACHE)
502 return set_ext_hw_attr(hwc, attr); 593 return set_ext_hw_attr(hwc, event);
503 594
504 if (attr->config >= x86_pmu.max_events) 595 if (attr->config >= x86_pmu.max_events)
505 return -EINVAL; 596 return -EINVAL;
@@ -617,11 +708,11 @@ static void x86_pmu_disable_all(void)
617 708
618 if (!test_bit(idx, cpuc->active_mask)) 709 if (!test_bit(idx, cpuc->active_mask))
619 continue; 710 continue;
620 rdmsrl(x86_pmu.eventsel + idx, val); 711 rdmsrl(x86_pmu_config_addr(idx), val);
621 if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) 712 if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
622 continue; 713 continue;
623 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; 714 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
624 wrmsrl(x86_pmu.eventsel + idx, val); 715 wrmsrl(x86_pmu_config_addr(idx), val);
625 } 716 }
626} 717}
627 718
@@ -642,21 +733,26 @@ static void x86_pmu_disable(struct pmu *pmu)
642 x86_pmu.disable_all(); 733 x86_pmu.disable_all();
643} 734}
644 735
736static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
737 u64 enable_mask)
738{
739 if (hwc->extra_reg)
740 wrmsrl(hwc->extra_reg, hwc->extra_config);
741 wrmsrl(hwc->config_base, hwc->config | enable_mask);
742}
743
645static void x86_pmu_enable_all(int added) 744static void x86_pmu_enable_all(int added)
646{ 745{
647 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 746 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
648 int idx; 747 int idx;
649 748
650 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 749 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
651 struct perf_event *event = cpuc->events[idx]; 750 struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
652 u64 val;
653 751
654 if (!test_bit(idx, cpuc->active_mask)) 752 if (!test_bit(idx, cpuc->active_mask))
655 continue; 753 continue;
656 754
657 val = event->hw.config; 755 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
658 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
659 wrmsrl(x86_pmu.eventsel + idx, val);
660 } 756 }
661} 757}
662 758
@@ -821,15 +917,10 @@ static inline void x86_assign_hw_event(struct perf_event *event,
821 hwc->event_base = 0; 917 hwc->event_base = 0;
822 } else if (hwc->idx >= X86_PMC_IDX_FIXED) { 918 } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
823 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; 919 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
824 /* 920 hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);
825 * We set it so that event_base + idx in wrmsr/rdmsr maps to
826 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
827 */
828 hwc->event_base =
829 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
830 } else { 921 } else {
831 hwc->config_base = x86_pmu.eventsel; 922 hwc->config_base = x86_pmu_config_addr(hwc->idx);
832 hwc->event_base = x86_pmu.perfctr; 923 hwc->event_base = x86_pmu_event_addr(hwc->idx);
833 } 924 }
834} 925}
835 926
@@ -915,17 +1006,11 @@ static void x86_pmu_enable(struct pmu *pmu)
915 x86_pmu.enable_all(added); 1006 x86_pmu.enable_all(added);
916} 1007}
917 1008
918static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
919 u64 enable_mask)
920{
921 wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
922}
923
924static inline void x86_pmu_disable_event(struct perf_event *event) 1009static inline void x86_pmu_disable_event(struct perf_event *event)
925{ 1010{
926 struct hw_perf_event *hwc = &event->hw; 1011 struct hw_perf_event *hwc = &event->hw;
927 1012
928 wrmsrl(hwc->config_base + hwc->idx, hwc->config); 1013 wrmsrl(hwc->config_base, hwc->config);
929} 1014}
930 1015
931static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 1016static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -978,7 +1063,7 @@ x86_perf_event_set_period(struct perf_event *event)
978 */ 1063 */
979 local64_set(&hwc->prev_count, (u64)-left); 1064 local64_set(&hwc->prev_count, (u64)-left);
980 1065
981 wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); 1066 wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
982 1067
983 /* 1068 /*
984 * Due to erratum on certan cpu we need 1069 * Due to erratum on certan cpu we need
@@ -986,7 +1071,7 @@ x86_perf_event_set_period(struct perf_event *event)
986 * is updated properly 1071 * is updated properly
987 */ 1072 */
988 if (x86_pmu.perfctr_second_write) { 1073 if (x86_pmu.perfctr_second_write) {
989 wrmsrl(hwc->event_base + idx, 1074 wrmsrl(hwc->event_base,
990 (u64)(-left) & x86_pmu.cntval_mask); 1075 (u64)(-left) & x86_pmu.cntval_mask);
991 } 1076 }
992 1077
@@ -1029,7 +1114,7 @@ static int x86_pmu_add(struct perf_event *event, int flags)
1029 1114
1030 /* 1115 /*
1031 * If group events scheduling transaction was started, 1116 * If group events scheduling transaction was started,
1032 * skip the schedulability test here, it will be peformed 1117 * skip the schedulability test here, it will be performed
1033 * at commit time (->commit_txn) as a whole 1118 * at commit time (->commit_txn) as a whole
1034 */ 1119 */
1035 if (cpuc->group_flag & PERF_EVENT_TXN) 1120 if (cpuc->group_flag & PERF_EVENT_TXN)
@@ -1113,8 +1198,8 @@ void perf_event_print_debug(void)
1113 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); 1198 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1114 1199
1115 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1200 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1116 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 1201 rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
1117 rdmsrl(x86_pmu.perfctr + idx, pmc_count); 1202 rdmsrl(x86_pmu_event_addr(idx), pmc_count);
1118 1203
1119 prev_left = per_cpu(pmc_prev_left[idx], cpu); 1204 prev_left = per_cpu(pmc_prev_left[idx], cpu);
1120 1205
@@ -1389,7 +1474,7 @@ static void __init pmu_check_apic(void)
1389 pr_info("no hardware sampling interrupt available.\n"); 1474 pr_info("no hardware sampling interrupt available.\n");
1390} 1475}
1391 1476
1392int __init init_hw_perf_events(void) 1477static int __init init_hw_perf_events(void)
1393{ 1478{
1394 struct event_constraint *c; 1479 struct event_constraint *c;
1395 int err; 1480 int err;
@@ -1608,7 +1693,7 @@ out:
1608 return ret; 1693 return ret;
1609} 1694}
1610 1695
1611int x86_pmu_event_init(struct perf_event *event) 1696static int x86_pmu_event_init(struct perf_event *event)
1612{ 1697{
1613 struct pmu *tmp; 1698 struct pmu *tmp;
1614 int err; 1699 int err;
@@ -1710,7 +1795,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1710 1795
1711 perf_callchain_store(entry, regs->ip); 1796 perf_callchain_store(entry, regs->ip);
1712 1797
1713 dump_trace(NULL, regs, NULL, &backtrace_ops, entry); 1798 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1714} 1799}
1715 1800
1716#ifdef CONFIG_COMPAT 1801#ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 67e2202a6039..cf4e369cea67 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -8,7 +8,7 @@ static __initconst const u64 amd_hw_cache_event_ids
8 [ C(L1D) ] = { 8 [ C(L1D) ] = {
9 [ C(OP_READ) ] = { 9 [ C(OP_READ) ] = {
10 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ 10 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
11 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ 11 [ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */
12 }, 12 },
13 [ C(OP_WRITE) ] = { 13 [ C(OP_WRITE) ] = {
14 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ 14 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
@@ -127,6 +127,11 @@ static int amd_pmu_hw_config(struct perf_event *event)
127/* 127/*
128 * AMD64 events are detected based on their event codes. 128 * AMD64 events are detected based on their event codes.
129 */ 129 */
130static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
131{
132 return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
133}
134
130static inline int amd_is_nb_event(struct hw_perf_event *hwc) 135static inline int amd_is_nb_event(struct hw_perf_event *hwc)
131{ 136{
132 return (hwc->config & 0xe0) == 0xe0; 137 return (hwc->config & 0xe0) == 0xe0;
@@ -385,13 +390,195 @@ static __initconst const struct x86_pmu amd_pmu = {
385 .cpu_dead = amd_pmu_cpu_dead, 390 .cpu_dead = amd_pmu_cpu_dead,
386}; 391};
387 392
393/* AMD Family 15h */
394
395#define AMD_EVENT_TYPE_MASK 0x000000F0ULL
396
397#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL
398#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL
399#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL
400#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL
401#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL
402#define AMD_EVENT_EX_LS 0x000000C0ULL
403#define AMD_EVENT_DE 0x000000D0ULL
404#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL
405
406/*
407 * AMD family 15h event code/PMC mappings:
408 *
409 * type = event_code & 0x0F0:
410 *
411 * 0x000 FP PERF_CTL[5:3]
412 * 0x010 FP PERF_CTL[5:3]
413 * 0x020 LS PERF_CTL[5:0]
414 * 0x030 LS PERF_CTL[5:0]
415 * 0x040 DC PERF_CTL[5:0]
416 * 0x050 DC PERF_CTL[5:0]
417 * 0x060 CU PERF_CTL[2:0]
418 * 0x070 CU PERF_CTL[2:0]
419 * 0x080 IC/DE PERF_CTL[2:0]
420 * 0x090 IC/DE PERF_CTL[2:0]
421 * 0x0A0 ---
422 * 0x0B0 ---
423 * 0x0C0 EX/LS PERF_CTL[5:0]
424 * 0x0D0 DE PERF_CTL[2:0]
425 * 0x0E0 NB NB_PERF_CTL[3:0]
426 * 0x0F0 NB NB_PERF_CTL[3:0]
427 *
428 * Exceptions:
429 *
430 * 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*)
431 * 0x003 FP PERF_CTL[3]
432 * 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*)
433 * 0x00B FP PERF_CTL[3]
434 * 0x00D FP PERF_CTL[3]
435 * 0x023 DE PERF_CTL[2:0]
436 * 0x02D LS PERF_CTL[3]
437 * 0x02E LS PERF_CTL[3,0]
438 * 0x043 CU PERF_CTL[2:0]
439 * 0x045 CU PERF_CTL[2:0]
440 * 0x046 CU PERF_CTL[2:0]
441 * 0x054 CU PERF_CTL[2:0]
442 * 0x055 CU PERF_CTL[2:0]
443 * 0x08F IC PERF_CTL[0]
444 * 0x187 DE PERF_CTL[0]
445 * 0x188 DE PERF_CTL[0]
446 * 0x0DB EX PERF_CTL[5:0]
447 * 0x0DC LS PERF_CTL[5:0]
448 * 0x0DD LS PERF_CTL[5:0]
449 * 0x0DE LS PERF_CTL[5:0]
450 * 0x0DF LS PERF_CTL[5:0]
451 * 0x1D6 EX PERF_CTL[5:0]
452 * 0x1D8 EX PERF_CTL[5:0]
453 *
454 * (*) depending on the umask all FPU counters may be used
455 */
456
457static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
458static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
459static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
460static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);
461static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
462static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
463
464static struct event_constraint *
465amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
466{
467 struct hw_perf_event *hwc = &event->hw;
468 unsigned int event_code = amd_get_event_code(hwc);
469
470 switch (event_code & AMD_EVENT_TYPE_MASK) {
471 case AMD_EVENT_FP:
472 switch (event_code) {
473 case 0x000:
474 if (!(hwc->config & 0x0000F000ULL))
475 break;
476 if (!(hwc->config & 0x00000F00ULL))
477 break;
478 return &amd_f15_PMC3;
479 case 0x004:
480 if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
481 break;
482 return &amd_f15_PMC3;
483 case 0x003:
484 case 0x00B:
485 case 0x00D:
486 return &amd_f15_PMC3;
487 }
488 return &amd_f15_PMC53;
489 case AMD_EVENT_LS:
490 case AMD_EVENT_DC:
491 case AMD_EVENT_EX_LS:
492 switch (event_code) {
493 case 0x023:
494 case 0x043:
495 case 0x045:
496 case 0x046:
497 case 0x054:
498 case 0x055:
499 return &amd_f15_PMC20;
500 case 0x02D:
501 return &amd_f15_PMC3;
502 case 0x02E:
503 return &amd_f15_PMC30;
504 default:
505 return &amd_f15_PMC50;
506 }
507 case AMD_EVENT_CU:
508 case AMD_EVENT_IC_DE:
509 case AMD_EVENT_DE:
510 switch (event_code) {
511 case 0x08F:
512 case 0x187:
513 case 0x188:
514 return &amd_f15_PMC0;
515 case 0x0DB ... 0x0DF:
516 case 0x1D6:
517 case 0x1D8:
518 return &amd_f15_PMC50;
519 default:
520 return &amd_f15_PMC20;
521 }
522 case AMD_EVENT_NB:
523 /* not yet implemented */
524 return &emptyconstraint;
525 default:
526 return &emptyconstraint;
527 }
528}
529
530static __initconst const struct x86_pmu amd_pmu_f15h = {
531 .name = "AMD Family 15h",
532 .handle_irq = x86_pmu_handle_irq,
533 .disable_all = x86_pmu_disable_all,
534 .enable_all = x86_pmu_enable_all,
535 .enable = x86_pmu_enable_event,
536 .disable = x86_pmu_disable_event,
537 .hw_config = amd_pmu_hw_config,
538 .schedule_events = x86_schedule_events,
539 .eventsel = MSR_F15H_PERF_CTL,
540 .perfctr = MSR_F15H_PERF_CTR,
541 .event_map = amd_pmu_event_map,
542 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
543 .num_counters = 6,
544 .cntval_bits = 48,
545 .cntval_mask = (1ULL << 48) - 1,
546 .apic = 1,
547 /* use highest bit to detect overflow */
548 .max_period = (1ULL << 47) - 1,
549 .get_event_constraints = amd_get_event_constraints_f15h,
550 /* nortbridge counters not yet implemented: */
551#if 0
552 .put_event_constraints = amd_put_event_constraints,
553
554 .cpu_prepare = amd_pmu_cpu_prepare,
555 .cpu_starting = amd_pmu_cpu_starting,
556 .cpu_dead = amd_pmu_cpu_dead,
557#endif
558};
559
388static __init int amd_pmu_init(void) 560static __init int amd_pmu_init(void)
389{ 561{
390 /* Performance-monitoring supported from K7 and later: */ 562 /* Performance-monitoring supported from K7 and later: */
391 if (boot_cpu_data.x86 < 6) 563 if (boot_cpu_data.x86 < 6)
392 return -ENODEV; 564 return -ENODEV;
393 565
394 x86_pmu = amd_pmu; 566 /*
567 * If core performance counter extensions exists, it must be
568 * family 15h, otherwise fail. See x86_pmu_addr_offset().
569 */
570 switch (boot_cpu_data.x86) {
571 case 0x15:
572 if (!cpu_has_perfctr_core)
573 return -ENODEV;
574 x86_pmu = amd_pmu_f15h;
575 break;
576 default:
577 if (cpu_has_perfctr_core)
578 return -ENODEV;
579 x86_pmu = amd_pmu;
580 break;
581 }
395 582
396 /* Events are common for all AMDs */ 583 /* Events are common for all AMDs */
397 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, 584 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 008835c1d79c..8fc2b2cee1da 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,5 +1,27 @@
1#ifdef CONFIG_CPU_SUP_INTEL 1#ifdef CONFIG_CPU_SUP_INTEL
2 2
3#define MAX_EXTRA_REGS 2
4
5/*
6 * Per register state.
7 */
8struct er_account {
9 int ref; /* reference count */
10 unsigned int extra_reg; /* extra MSR number */
11 u64 extra_config; /* extra MSR config */
12};
13
14/*
15 * Per core state
16 * This used to coordinate shared registers for HT threads.
17 */
18struct intel_percore {
19 raw_spinlock_t lock; /* protect structure */
20 struct er_account regs[MAX_EXTRA_REGS];
21 int refcnt; /* number of threads */
22 unsigned core_id;
23};
24
3/* 25/*
4 * Intel PerfMon, used on Core and later. 26 * Intel PerfMon, used on Core and later.
5 */ 27 */
@@ -64,6 +86,18 @@ static struct event_constraint intel_nehalem_event_constraints[] =
64 EVENT_CONSTRAINT_END 86 EVENT_CONSTRAINT_END
65}; 87};
66 88
89static struct extra_reg intel_nehalem_extra_regs[] =
90{
91 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
92 EVENT_EXTRA_END
93};
94
95static struct event_constraint intel_nehalem_percore_constraints[] =
96{
97 INTEL_EVENT_CONSTRAINT(0xb7, 0),
98 EVENT_CONSTRAINT_END
99};
100
67static struct event_constraint intel_westmere_event_constraints[] = 101static struct event_constraint intel_westmere_event_constraints[] =
68{ 102{
69 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -76,6 +110,33 @@ static struct event_constraint intel_westmere_event_constraints[] =
76 EVENT_CONSTRAINT_END 110 EVENT_CONSTRAINT_END
77}; 111};
78 112
113static struct event_constraint intel_snb_event_constraints[] =
114{
115 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
116 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
117 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
118 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
119 INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
120 INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
121 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
122 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
123 EVENT_CONSTRAINT_END
124};
125
126static struct extra_reg intel_westmere_extra_regs[] =
127{
128 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
129 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
130 EVENT_EXTRA_END
131};
132
133static struct event_constraint intel_westmere_percore_constraints[] =
134{
135 INTEL_EVENT_CONSTRAINT(0xb7, 0),
136 INTEL_EVENT_CONSTRAINT(0xbb, 0),
137 EVENT_CONSTRAINT_END
138};
139
79static struct event_constraint intel_gen_event_constraints[] = 140static struct event_constraint intel_gen_event_constraints[] =
80{ 141{
81 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 142 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -89,6 +150,106 @@ static u64 intel_pmu_event_map(int hw_event)
89 return intel_perfmon_event_map[hw_event]; 150 return intel_perfmon_event_map[hw_event];
90} 151}
91 152
153static __initconst const u64 snb_hw_cache_event_ids
154 [PERF_COUNT_HW_CACHE_MAX]
155 [PERF_COUNT_HW_CACHE_OP_MAX]
156 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
157{
158 [ C(L1D) ] = {
159 [ C(OP_READ) ] = {
160 [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
161 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
162 },
163 [ C(OP_WRITE) ] = {
164 [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
165 [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
166 },
167 [ C(OP_PREFETCH) ] = {
168 [ C(RESULT_ACCESS) ] = 0x0,
169 [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
170 },
171 },
172 [ C(L1I ) ] = {
173 [ C(OP_READ) ] = {
174 [ C(RESULT_ACCESS) ] = 0x0,
175 [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
176 },
177 [ C(OP_WRITE) ] = {
178 [ C(RESULT_ACCESS) ] = -1,
179 [ C(RESULT_MISS) ] = -1,
180 },
181 [ C(OP_PREFETCH) ] = {
182 [ C(RESULT_ACCESS) ] = 0x0,
183 [ C(RESULT_MISS) ] = 0x0,
184 },
185 },
186 [ C(LL ) ] = {
187 /*
188 * TBD: Need Off-core Response Performance Monitoring support
189 */
190 [ C(OP_READ) ] = {
191 /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
192 [ C(RESULT_ACCESS) ] = 0x01b7,
193 /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
194 [ C(RESULT_MISS) ] = 0x01bb,
195 },
196 [ C(OP_WRITE) ] = {
197 /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
198 [ C(RESULT_ACCESS) ] = 0x01b7,
199 /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
200 [ C(RESULT_MISS) ] = 0x01bb,
201 },
202 [ C(OP_PREFETCH) ] = {
203 /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
204 [ C(RESULT_ACCESS) ] = 0x01b7,
205 /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
206 [ C(RESULT_MISS) ] = 0x01bb,
207 },
208 },
209 [ C(DTLB) ] = {
210 [ C(OP_READ) ] = {
211 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
212 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
213 },
214 [ C(OP_WRITE) ] = {
215 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
216 [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
217 },
218 [ C(OP_PREFETCH) ] = {
219 [ C(RESULT_ACCESS) ] = 0x0,
220 [ C(RESULT_MISS) ] = 0x0,
221 },
222 },
223 [ C(ITLB) ] = {
224 [ C(OP_READ) ] = {
225 [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
226 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
227 },
228 [ C(OP_WRITE) ] = {
229 [ C(RESULT_ACCESS) ] = -1,
230 [ C(RESULT_MISS) ] = -1,
231 },
232 [ C(OP_PREFETCH) ] = {
233 [ C(RESULT_ACCESS) ] = -1,
234 [ C(RESULT_MISS) ] = -1,
235 },
236 },
237 [ C(BPU ) ] = {
238 [ C(OP_READ) ] = {
239 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
240 [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
241 },
242 [ C(OP_WRITE) ] = {
243 [ C(RESULT_ACCESS) ] = -1,
244 [ C(RESULT_MISS) ] = -1,
245 },
246 [ C(OP_PREFETCH) ] = {
247 [ C(RESULT_ACCESS) ] = -1,
248 [ C(RESULT_MISS) ] = -1,
249 },
250 },
251};
252
92static __initconst const u64 westmere_hw_cache_event_ids 253static __initconst const u64 westmere_hw_cache_event_ids
93 [PERF_COUNT_HW_CACHE_MAX] 254 [PERF_COUNT_HW_CACHE_MAX]
94 [PERF_COUNT_HW_CACHE_OP_MAX] 255 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -124,16 +285,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
124 }, 285 },
125 [ C(LL ) ] = { 286 [ C(LL ) ] = {
126 [ C(OP_READ) ] = { 287 [ C(OP_READ) ] = {
127 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ 288 /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
128 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ 289 [ C(RESULT_ACCESS) ] = 0x01b7,
290 /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
291 [ C(RESULT_MISS) ] = 0x01bb,
129 }, 292 },
293 /*
294 * Use RFO, not WRITEBACK, because a write miss would typically occur
295 * on RFO.
296 */
130 [ C(OP_WRITE) ] = { 297 [ C(OP_WRITE) ] = {
131 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ 298 /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */
132 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ 299 [ C(RESULT_ACCESS) ] = 0x01bb,
300 /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */
301 [ C(RESULT_MISS) ] = 0x01b7,
133 }, 302 },
134 [ C(OP_PREFETCH) ] = { 303 [ C(OP_PREFETCH) ] = {
135 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ 304 /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
136 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ 305 [ C(RESULT_ACCESS) ] = 0x01b7,
306 /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
307 [ C(RESULT_MISS) ] = 0x01bb,
137 }, 308 },
138 }, 309 },
139 [ C(DTLB) ] = { 310 [ C(DTLB) ] = {
@@ -180,6 +351,39 @@ static __initconst const u64 westmere_hw_cache_event_ids
180 }, 351 },
181}; 352};
182 353
354/*
355 * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3
356 */
357
358#define DMND_DATA_RD (1 << 0)
359#define DMND_RFO (1 << 1)
360#define DMND_WB (1 << 3)
361#define PF_DATA_RD (1 << 4)
362#define PF_DATA_RFO (1 << 5)
363#define RESP_UNCORE_HIT (1 << 8)
364#define RESP_MISS (0xf600) /* non uncore hit */
365
366static __initconst const u64 nehalem_hw_cache_extra_regs
367 [PERF_COUNT_HW_CACHE_MAX]
368 [PERF_COUNT_HW_CACHE_OP_MAX]
369 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
370{
371 [ C(LL ) ] = {
372 [ C(OP_READ) ] = {
373 [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT,
374 [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS,
375 },
376 [ C(OP_WRITE) ] = {
377 [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT,
378 [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS,
379 },
380 [ C(OP_PREFETCH) ] = {
381 [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT,
382 [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS,
383 },
384 }
385};
386
183static __initconst const u64 nehalem_hw_cache_event_ids 387static __initconst const u64 nehalem_hw_cache_event_ids
184 [PERF_COUNT_HW_CACHE_MAX] 388 [PERF_COUNT_HW_CACHE_MAX]
185 [PERF_COUNT_HW_CACHE_OP_MAX] 389 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -215,16 +419,26 @@ static __initconst const u64 nehalem_hw_cache_event_ids
215 }, 419 },
216 [ C(LL ) ] = { 420 [ C(LL ) ] = {
217 [ C(OP_READ) ] = { 421 [ C(OP_READ) ] = {
218 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ 422 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
219 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ 423 [ C(RESULT_ACCESS) ] = 0x01b7,
424 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
425 [ C(RESULT_MISS) ] = 0x01b7,
220 }, 426 },
427 /*
428 * Use RFO, not WRITEBACK, because a write miss would typically occur
429 * on RFO.
430 */
221 [ C(OP_WRITE) ] = { 431 [ C(OP_WRITE) ] = {
222 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ 432 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
223 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ 433 [ C(RESULT_ACCESS) ] = 0x01b7,
434 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
435 [ C(RESULT_MISS) ] = 0x01b7,
224 }, 436 },
225 [ C(OP_PREFETCH) ] = { 437 [ C(OP_PREFETCH) ] = {
226 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ 438 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
227 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ 439 [ C(RESULT_ACCESS) ] = 0x01b7,
440 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
441 [ C(RESULT_MISS) ] = 0x01b7,
228 }, 442 },
229 }, 443 },
230 [ C(DTLB) ] = { 444 [ C(DTLB) ] = {
@@ -691,8 +905,8 @@ static void intel_pmu_reset(void)
691 printk("clearing PMU state on CPU#%d\n", smp_processor_id()); 905 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
692 906
693 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 907 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
694 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); 908 checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
695 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); 909 checking_wrmsrl(x86_pmu_event_addr(idx), 0ull);
696 } 910 }
697 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) 911 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
698 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); 912 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
@@ -794,6 +1008,67 @@ intel_bts_constraints(struct perf_event *event)
794} 1008}
795 1009
796static struct event_constraint * 1010static struct event_constraint *
1011intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1012{
1013 struct hw_perf_event *hwc = &event->hw;
1014 unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
1015 struct event_constraint *c;
1016 struct intel_percore *pc;
1017 struct er_account *era;
1018 int i;
1019 int free_slot;
1020 int found;
1021
1022 if (!x86_pmu.percore_constraints || hwc->extra_alloc)
1023 return NULL;
1024
1025 for (c = x86_pmu.percore_constraints; c->cmask; c++) {
1026 if (e != c->code)
1027 continue;
1028
1029 /*
1030 * Allocate resource per core.
1031 */
1032 pc = cpuc->per_core;
1033 if (!pc)
1034 break;
1035 c = &emptyconstraint;
1036 raw_spin_lock(&pc->lock);
1037 free_slot = -1;
1038 found = 0;
1039 for (i = 0; i < MAX_EXTRA_REGS; i++) {
1040 era = &pc->regs[i];
1041 if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
1042 /* Allow sharing same config */
1043 if (hwc->extra_config == era->extra_config) {
1044 era->ref++;
1045 cpuc->percore_used = 1;
1046 hwc->extra_alloc = 1;
1047 c = NULL;
1048 }
1049 /* else conflict */
1050 found = 1;
1051 break;
1052 } else if (era->ref == 0 && free_slot == -1)
1053 free_slot = i;
1054 }
1055 if (!found && free_slot != -1) {
1056 era = &pc->regs[free_slot];
1057 era->ref = 1;
1058 era->extra_reg = hwc->extra_reg;
1059 era->extra_config = hwc->extra_config;
1060 cpuc->percore_used = 1;
1061 hwc->extra_alloc = 1;
1062 c = NULL;
1063 }
1064 raw_spin_unlock(&pc->lock);
1065 return c;
1066 }
1067
1068 return NULL;
1069}
1070
1071static struct event_constraint *
797intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 1072intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
798{ 1073{
799 struct event_constraint *c; 1074 struct event_constraint *c;
@@ -806,9 +1081,51 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
806 if (c) 1081 if (c)
807 return c; 1082 return c;
808 1083
1084 c = intel_percore_constraints(cpuc, event);
1085 if (c)
1086 return c;
1087
809 return x86_get_event_constraints(cpuc, event); 1088 return x86_get_event_constraints(cpuc, event);
810} 1089}
811 1090
1091static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1092 struct perf_event *event)
1093{
1094 struct extra_reg *er;
1095 struct intel_percore *pc;
1096 struct er_account *era;
1097 struct hw_perf_event *hwc = &event->hw;
1098 int i, allref;
1099
1100 if (!cpuc->percore_used)
1101 return;
1102
1103 for (er = x86_pmu.extra_regs; er->msr; er++) {
1104 if (er->event != (hwc->config & er->config_mask))
1105 continue;
1106
1107 pc = cpuc->per_core;
1108 raw_spin_lock(&pc->lock);
1109 for (i = 0; i < MAX_EXTRA_REGS; i++) {
1110 era = &pc->regs[i];
1111 if (era->ref > 0 &&
1112 era->extra_config == hwc->extra_config &&
1113 era->extra_reg == er->msr) {
1114 era->ref--;
1115 hwc->extra_alloc = 0;
1116 break;
1117 }
1118 }
1119 allref = 0;
1120 for (i = 0; i < MAX_EXTRA_REGS; i++)
1121 allref += pc->regs[i].ref;
1122 if (allref == 0)
1123 cpuc->percore_used = 0;
1124 raw_spin_unlock(&pc->lock);
1125 break;
1126 }
1127}
1128
812static int intel_pmu_hw_config(struct perf_event *event) 1129static int intel_pmu_hw_config(struct perf_event *event)
813{ 1130{
814 int ret = x86_pmu_hw_config(event); 1131 int ret = x86_pmu_hw_config(event);
@@ -880,20 +1197,67 @@ static __initconst const struct x86_pmu core_pmu = {
880 */ 1197 */
881 .max_period = (1ULL << 31) - 1, 1198 .max_period = (1ULL << 31) - 1,
882 .get_event_constraints = intel_get_event_constraints, 1199 .get_event_constraints = intel_get_event_constraints,
1200 .put_event_constraints = intel_put_event_constraints,
883 .event_constraints = intel_core_event_constraints, 1201 .event_constraints = intel_core_event_constraints,
884}; 1202};
885 1203
1204static int intel_pmu_cpu_prepare(int cpu)
1205{
1206 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1207
1208 if (!cpu_has_ht_siblings())
1209 return NOTIFY_OK;
1210
1211 cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
1212 GFP_KERNEL, cpu_to_node(cpu));
1213 if (!cpuc->per_core)
1214 return NOTIFY_BAD;
1215
1216 raw_spin_lock_init(&cpuc->per_core->lock);
1217 cpuc->per_core->core_id = -1;
1218 return NOTIFY_OK;
1219}
1220
886static void intel_pmu_cpu_starting(int cpu) 1221static void intel_pmu_cpu_starting(int cpu)
887{ 1222{
1223 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1224 int core_id = topology_core_id(cpu);
1225 int i;
1226
888 init_debug_store_on_cpu(cpu); 1227 init_debug_store_on_cpu(cpu);
889 /* 1228 /*
890 * Deal with CPUs that don't clear their LBRs on power-up. 1229 * Deal with CPUs that don't clear their LBRs on power-up.
891 */ 1230 */
892 intel_pmu_lbr_reset(); 1231 intel_pmu_lbr_reset();
1232
1233 if (!cpu_has_ht_siblings())
1234 return;
1235
1236 for_each_cpu(i, topology_thread_cpumask(cpu)) {
1237 struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
1238
1239 if (pc && pc->core_id == core_id) {
1240 kfree(cpuc->per_core);
1241 cpuc->per_core = pc;
1242 break;
1243 }
1244 }
1245
1246 cpuc->per_core->core_id = core_id;
1247 cpuc->per_core->refcnt++;
893} 1248}
894 1249
895static void intel_pmu_cpu_dying(int cpu) 1250static void intel_pmu_cpu_dying(int cpu)
896{ 1251{
1252 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1253 struct intel_percore *pc = cpuc->per_core;
1254
1255 if (pc) {
1256 if (pc->core_id == -1 || --pc->refcnt == 0)
1257 kfree(pc);
1258 cpuc->per_core = NULL;
1259 }
1260
897 fini_debug_store_on_cpu(cpu); 1261 fini_debug_store_on_cpu(cpu);
898} 1262}
899 1263
@@ -918,7 +1282,9 @@ static __initconst const struct x86_pmu intel_pmu = {
918 */ 1282 */
919 .max_period = (1ULL << 31) - 1, 1283 .max_period = (1ULL << 31) - 1,
920 .get_event_constraints = intel_get_event_constraints, 1284 .get_event_constraints = intel_get_event_constraints,
1285 .put_event_constraints = intel_put_event_constraints,
921 1286
1287 .cpu_prepare = intel_pmu_cpu_prepare,
922 .cpu_starting = intel_pmu_cpu_starting, 1288 .cpu_starting = intel_pmu_cpu_starting,
923 .cpu_dying = intel_pmu_cpu_dying, 1289 .cpu_dying = intel_pmu_cpu_dying,
924}; 1290};
@@ -1024,6 +1390,7 @@ static __init int intel_pmu_init(void)
1024 intel_pmu_lbr_init_core(); 1390 intel_pmu_lbr_init_core();
1025 1391
1026 x86_pmu.event_constraints = intel_core2_event_constraints; 1392 x86_pmu.event_constraints = intel_core2_event_constraints;
1393 x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
1027 pr_cont("Core2 events, "); 1394 pr_cont("Core2 events, ");
1028 break; 1395 break;
1029 1396
@@ -1032,11 +1399,16 @@ static __init int intel_pmu_init(void)
1032 case 46: /* 45 nm nehalem-ex, "Beckton" */ 1399 case 46: /* 45 nm nehalem-ex, "Beckton" */
1033 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, 1400 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
1034 sizeof(hw_cache_event_ids)); 1401 sizeof(hw_cache_event_ids));
1402 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
1403 sizeof(hw_cache_extra_regs));
1035 1404
1036 intel_pmu_lbr_init_nhm(); 1405 intel_pmu_lbr_init_nhm();
1037 1406
1038 x86_pmu.event_constraints = intel_nehalem_event_constraints; 1407 x86_pmu.event_constraints = intel_nehalem_event_constraints;
1408 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
1409 x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
1039 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1410 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1411 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1040 pr_cont("Nehalem events, "); 1412 pr_cont("Nehalem events, ");
1041 break; 1413 break;
1042 1414
@@ -1047,6 +1419,7 @@ static __init int intel_pmu_init(void)
1047 intel_pmu_lbr_init_atom(); 1419 intel_pmu_lbr_init_atom();
1048 1420
1049 x86_pmu.event_constraints = intel_gen_event_constraints; 1421 x86_pmu.event_constraints = intel_gen_event_constraints;
1422 x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
1050 pr_cont("Atom events, "); 1423 pr_cont("Atom events, ");
1051 break; 1424 break;
1052 1425
@@ -1054,14 +1427,30 @@ static __init int intel_pmu_init(void)
1054 case 44: /* 32 nm nehalem, "Gulftown" */ 1427 case 44: /* 32 nm nehalem, "Gulftown" */
1055 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, 1428 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
1056 sizeof(hw_cache_event_ids)); 1429 sizeof(hw_cache_event_ids));
1430 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
1431 sizeof(hw_cache_extra_regs));
1057 1432
1058 intel_pmu_lbr_init_nhm(); 1433 intel_pmu_lbr_init_nhm();
1059 1434
1060 x86_pmu.event_constraints = intel_westmere_event_constraints; 1435 x86_pmu.event_constraints = intel_westmere_event_constraints;
1436 x86_pmu.percore_constraints = intel_westmere_percore_constraints;
1061 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1437 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1438 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
1439 x86_pmu.extra_regs = intel_westmere_extra_regs;
1062 pr_cont("Westmere events, "); 1440 pr_cont("Westmere events, ");
1063 break; 1441 break;
1064 1442
1443 case 42: /* SandyBridge */
1444 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1445 sizeof(hw_cache_event_ids));
1446
1447 intel_pmu_lbr_init_nhm();
1448
1449 x86_pmu.event_constraints = intel_snb_event_constraints;
1450 x86_pmu.pebs_constraints = intel_snb_pebs_events;
1451 pr_cont("SandyBridge events, ");
1452 break;
1453
1065 default: 1454 default:
1066 /* 1455 /*
1067 * default constraints for v2 and up 1456 * default constraints for v2 and up
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index b7dcd9f2b8a0..bab491b8ee25 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -361,30 +361,70 @@ static int intel_pmu_drain_bts_buffer(void)
361/* 361/*
362 * PEBS 362 * PEBS
363 */ 363 */
364static struct event_constraint intel_core2_pebs_event_constraints[] = {
365 INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
366 INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
367 INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
368 INTEL_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
369 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
370 EVENT_CONSTRAINT_END
371};
372
373static struct event_constraint intel_atom_pebs_event_constraints[] = {
374 INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
375 INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
376 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
377 EVENT_CONSTRAINT_END
378};
364 379
365static struct event_constraint intel_core_pebs_events[] = { 380static struct event_constraint intel_nehalem_pebs_event_constraints[] = {
366 PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ 381 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
367 PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ 382 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
368 PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ 383 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
369 PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ 384 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
370 PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ 385 INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
371 PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ 386 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
372 PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ 387 INTEL_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
373 PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ 388 INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
374 PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ 389 INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
390 INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
391 INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
375 EVENT_CONSTRAINT_END 392 EVENT_CONSTRAINT_END
376}; 393};
377 394
378static struct event_constraint intel_nehalem_pebs_events[] = { 395static struct event_constraint intel_westmere_pebs_event_constraints[] = {
379 PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ 396 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
380 PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ 397 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
381 PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ 398 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
382 PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ 399 INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
383 PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ 400 INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
384 PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ 401 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
385 PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ 402 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
386 PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ 403 INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
387 PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ 404 INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
405 INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
406 INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
407 EVENT_CONSTRAINT_END
408};
409
410static struct event_constraint intel_snb_pebs_events[] = {
411 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
412 INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
413 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
414 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
415 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
416 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
417 INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */
418 INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
419 INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
420 INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
421 INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
422 INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
423 INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
424 INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
425 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
426 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
427 INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
388 EVENT_CONSTRAINT_END 428 EVENT_CONSTRAINT_END
389}; 429};
390 430
@@ -695,20 +735,17 @@ static void intel_ds_init(void)
695 printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); 735 printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
696 x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); 736 x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
697 x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; 737 x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
698 x86_pmu.pebs_constraints = intel_core_pebs_events;
699 break; 738 break;
700 739
701 case 1: 740 case 1:
702 printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); 741 printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
703 x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); 742 x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
704 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 743 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
705 x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
706 break; 744 break;
707 745
708 default: 746 default:
709 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); 747 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
710 x86_pmu.pebs = 0; 748 x86_pmu.pebs = 0;
711 break;
712 } 749 }
713 } 750 }
714} 751}
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index e56b9bfbabd1..c2520e178d32 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Netburst Perfomance Events (P4, old Xeon) 2 * Netburst Performance Events (P4, old Xeon)
3 * 3 *
4 * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> 4 * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org>
5 * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> 5 * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com>
@@ -679,10 +679,10 @@ static int p4_validate_raw_event(struct perf_event *event)
679 */ 679 */
680 680
681 /* 681 /*
682 * if an event is shared accross the logical threads 682 * if an event is shared across the logical threads
683 * the user needs special permissions to be able to use it 683 * the user needs special permissions to be able to use it
684 */ 684 */
685 if (p4_event_bind_map[v].shared) { 685 if (p4_ht_active() && p4_event_bind_map[v].shared) {
686 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) 686 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
687 return -EACCES; 687 return -EACCES;
688 } 688 }
@@ -727,7 +727,8 @@ static int p4_hw_config(struct perf_event *event)
727 event->hw.config = p4_set_ht_bit(event->hw.config); 727 event->hw.config = p4_set_ht_bit(event->hw.config);
728 728
729 if (event->attr.type == PERF_TYPE_RAW) { 729 if (event->attr.type == PERF_TYPE_RAW) {
730 730 struct p4_event_bind *bind;
731 unsigned int esel;
731 /* 732 /*
732 * Clear bits we reserve to be managed by kernel itself 733 * Clear bits we reserve to be managed by kernel itself
733 * and never allowed from a user space 734 * and never allowed from a user space
@@ -743,6 +744,13 @@ static int p4_hw_config(struct perf_event *event)
743 * bits since we keep additional info here (for cache events and etc) 744 * bits since we keep additional info here (for cache events and etc)
744 */ 745 */
745 event->hw.config |= event->attr.config; 746 event->hw.config |= event->attr.config;
747 bind = p4_config_get_bind(event->attr.config);
748 if (!bind) {
749 rc = -EINVAL;
750 goto out;
751 }
752 esel = P4_OPCODE_ESEL(bind->opcode);
753 event->hw.config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
746 } 754 }
747 755
748 rc = x86_setup_perfctr(event); 756 rc = x86_setup_perfctr(event);
@@ -756,15 +764,21 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
756 u64 v; 764 u64 v;
757 765
758 /* an official way for overflow indication */ 766 /* an official way for overflow indication */
759 rdmsrl(hwc->config_base + hwc->idx, v); 767 rdmsrl(hwc->config_base, v);
760 if (v & P4_CCCR_OVF) { 768 if (v & P4_CCCR_OVF) {
761 wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF); 769 wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF);
762 return 1; 770 return 1;
763 } 771 }
764 772
765 /* it might be unflagged overflow */ 773 /*
766 rdmsrl(hwc->event_base + hwc->idx, v); 774 * In some circumstances the overflow might issue an NMI but did
767 if (!(v & ARCH_P4_CNTRVAL_MASK)) 775 * not set P4_CCCR_OVF bit. Because a counter holds a negative value
776 * we simply check for high bit being set, if it's cleared it means
777 * the counter has reached zero value and continued counting before
778 * real NMI signal was received:
779 */
780 rdmsrl(hwc->event_base, v);
781 if (!(v & ARCH_P4_UNFLAGGED_BIT))
768 return 1; 782 return 1;
769 783
770 return 0; 784 return 0;
@@ -777,13 +791,13 @@ static void p4_pmu_disable_pebs(void)
777 * 791 *
778 * It's still allowed that two threads setup same cache 792 * It's still allowed that two threads setup same cache
779 * events so we can't simply clear metrics until we knew 793 * events so we can't simply clear metrics until we knew
780 * noone is depending on us, so we need kind of counter 794 * no one is depending on us, so we need kind of counter
781 * for "ReplayEvent" users. 795 * for "ReplayEvent" users.
782 * 796 *
783 * What is more complex -- RAW events, if user (for some 797 * What is more complex -- RAW events, if user (for some
784 * reason) will pass some cache event metric with improper 798 * reason) will pass some cache event metric with improper
785 * event opcode -- it's fine from hardware point of view 799 * event opcode -- it's fine from hardware point of view
786 * but completely nonsence from "meaning" of such action. 800 * but completely nonsense from "meaning" of such action.
787 * 801 *
788 * So at moment let leave metrics turned on forever -- it's 802 * So at moment let leave metrics turned on forever -- it's
789 * ok for now but need to be revisited! 803 * ok for now but need to be revisited!
@@ -802,7 +816,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)
802 * state we need to clear P4_CCCR_OVF, otherwise interrupt get 816 * state we need to clear P4_CCCR_OVF, otherwise interrupt get
803 * asserted again and again 817 * asserted again and again
804 */ 818 */
805 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 819 (void)checking_wrmsrl(hwc->config_base,
806 (u64)(p4_config_unpack_cccr(hwc->config)) & 820 (u64)(p4_config_unpack_cccr(hwc->config)) &
807 ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); 821 ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
808} 822}
@@ -872,7 +886,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
872 p4_pmu_enable_pebs(hwc->config); 886 p4_pmu_enable_pebs(hwc->config);
873 887
874 (void)checking_wrmsrl(escr_addr, escr_conf); 888 (void)checking_wrmsrl(escr_addr, escr_conf);
875 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 889 (void)checking_wrmsrl(hwc->config_base,
876 (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); 890 (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
877} 891}
878 892
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 34ba07be2cda..20c097e33860 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -68,7 +68,7 @@ p6_pmu_disable_event(struct perf_event *event)
68 if (cpuc->enabled) 68 if (cpuc->enabled)
69 val |= ARCH_PERFMON_EVENTSEL_ENABLE; 69 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
70 70
71 (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); 71 (void)checking_wrmsrl(hwc->config_base, val);
72} 72}
73 73
74static void p6_pmu_enable_event(struct perf_event *event) 74static void p6_pmu_enable_event(struct perf_event *event)
@@ -81,7 +81,7 @@ static void p6_pmu_enable_event(struct perf_event *event)
81 if (cpuc->enabled) 81 if (cpuc->enabled)
82 val |= ARCH_PERFMON_EVENTSEL_ENABLE; 82 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
83 83
84 (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); 84 (void)checking_wrmsrl(hwc->config_base, val);
85} 85}
86 86
87static __initconst const struct x86_pmu p6_pmu = { 87static __initconst const struct x86_pmu p6_pmu = {
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d5a236615501..966512b2cacf 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -46,6 +46,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
46 /* returns the bit offset of the performance counter register */ 46 /* returns the bit offset of the performance counter register */
47 switch (boot_cpu_data.x86_vendor) { 47 switch (boot_cpu_data.x86_vendor) {
48 case X86_VENDOR_AMD: 48 case X86_VENDOR_AMD:
49 if (msr >= MSR_F15H_PERF_CTR)
50 return (msr - MSR_F15H_PERF_CTR) >> 1;
49 return msr - MSR_K7_PERFCTR0; 51 return msr - MSR_K7_PERFCTR0;
50 case X86_VENDOR_INTEL: 52 case X86_VENDOR_INTEL:
51 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 53 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
@@ -70,6 +72,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
70 /* returns the bit offset of the event selection register */ 72 /* returns the bit offset of the event selection register */
71 switch (boot_cpu_data.x86_vendor) { 73 switch (boot_cpu_data.x86_vendor) {
72 case X86_VENDOR_AMD: 74 case X86_VENDOR_AMD:
75 if (msr >= MSR_F15H_PERF_CTL)
76 return (msr - MSR_F15H_PERF_CTL) >> 1;
73 return msr - MSR_K7_EVNTSEL0; 77 return msr - MSR_K7_EVNTSEL0;
74 case X86_VENDOR_INTEL: 78 case X86_VENDOR_INTEL:
75 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 79 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 227b0448960d..d22d0c4edcfd 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -86,7 +86,7 @@ static void __init vmware_platform_setup(void)
86} 86}
87 87
88/* 88/*
89 * While checking the dmi string infomation, just checking the product 89 * While checking the dmi string information, just checking the product
90 * serial key should be enough, as this will always have a VMware 90 * serial key should be enough, as this will always have a VMware
91 * specific string when running under VMware hypervisor. 91 * specific string when running under VMware hypervisor.
92 */ 92 */