aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
authorAndrea Bastoni <bastoni@cs.unc.edu>2010-05-30 19:16:45 -0400
committerAndrea Bastoni <bastoni@cs.unc.edu>2010-05-30 19:16:45 -0400
commitada47b5fe13d89735805b566185f4885f5a3f750 (patch)
tree644b88f8a71896307d71438e9b3af49126ffb22b /arch/x86/kernel/cpu
parent43e98717ad40a4ae64545b5ba047c7b86aa44f4f (diff)
parent3280f21d43ee541f97f8cda5792150d2dbec20d5 (diff)
Merge branch 'wip-2.6.34' into old-private-masterarchived-private-master
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/Makefile3
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c19
-rw-r--r--arch/x86/kernel/cpu/amd.c57
-rw-r--r--arch/x86/kernel/cpu/centaur.c2
-rw-r--r--arch/x86/kernel/cpu/common.c52
-rw-r--r--arch/x86/kernel/cpu/cpu.h2
-rw-r--r--arch/x86/kernel/cpu/cpu_debug.c688
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig14
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Makefile1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c50
-rw-r--r--arch/x86/kernel/cpu/cpufreq/elanfreq.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/gx-suspmod.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longrun.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c621
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k6.c3
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c19
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c44
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c3
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c7
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.h24
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-smi.c3
-rw-r--r--arch/x86/kernel/cpu/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c33
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c420
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c23
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c129
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c45
-rw-r--r--arch/x86/kernel/cpu/mtrr/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mtrr/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/centaur.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c189
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c11
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c12
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c7
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h6
-rw-r--r--arch/x86/kernel/cpu/mtrr/state.c94
-rw-r--r--arch/x86/kernel/cpu/perf_event.c1865
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c422
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c980
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c159
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c15
-rw-r--r--arch/x86/kernel/cpu/transmeta.c2
-rw-r--r--arch/x86/kernel/cpu/vmware.c2
49 files changed, 3466 insertions, 2585 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 68537e957a9b..c202b62f3671 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -5,6 +5,7 @@
5# Don't trace early stages of a secondary CPU boot 5# Don't trace early stages of a secondary CPU boot
6ifdef CONFIG_FUNCTION_TRACER 6ifdef CONFIG_FUNCTION_TRACER
7CFLAGS_REMOVE_common.o = -pg 7CFLAGS_REMOVE_common.o = -pg
8CFLAGS_REMOVE_perf_event.o = -pg
8endif 9endif
9 10
10# Make sure load_percpu_segment has no stackprotector 11# Make sure load_percpu_segment has no stackprotector
@@ -18,8 +19,6 @@ obj-y += vmware.o hypervisor.o sched.o
18obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o 19obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
19obj-$(CONFIG_X86_64) += bugs_64.o 20obj-$(CONFIG_X86_64) += bugs_64.o
20 21
21obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o
22
23obj-$(CONFIG_CPU_SUP_INTEL) += intel.o 22obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
24obj-$(CONFIG_CPU_SUP_AMD) += amd.o 23obj-$(CONFIG_CPU_SUP_AMD) += amd.o
25obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o 24obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index c965e5212714..97ad79cdf688 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -32,6 +32,10 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
32 static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { 32 static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
33 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, 33 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
34 { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, 34 { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 },
35 { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a },
36 { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a },
37 { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a },
38 { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a },
35 { 0, 0, 0, 0 } 39 { 0, 0, 0, 0 }
36 }; 40 };
37 41
@@ -74,6 +78,7 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
74 unsigned int eax, ebx, ecx, edx, sub_index; 78 unsigned int eax, ebx, ecx, edx, sub_index;
75 unsigned int ht_mask_width, core_plus_mask_width; 79 unsigned int ht_mask_width, core_plus_mask_width;
76 unsigned int core_select_mask, core_level_siblings; 80 unsigned int core_select_mask, core_level_siblings;
81 static bool printed;
77 82
78 if (c->cpuid_level < 0xb) 83 if (c->cpuid_level < 0xb)
79 return; 84 return;
@@ -127,12 +132,14 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
127 132
128 c->x86_max_cores = (core_level_siblings / smp_num_siblings); 133 c->x86_max_cores = (core_level_siblings / smp_num_siblings);
129 134
130 135 if (!printed) {
131 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", 136 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
132 c->phys_proc_id); 137 c->phys_proc_id);
133 if (c->x86_max_cores > 1) 138 if (c->x86_max_cores > 1)
134 printk(KERN_INFO "CPU: Processor Core ID: %d\n", 139 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
135 c->cpu_core_id); 140 c->cpu_core_id);
141 printed = 1;
142 }
136 return; 143 return;
137#endif 144#endif
138} 145}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c910a716a71c..e485825130d2 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -254,59 +254,36 @@ static int __cpuinit nearby_node(int apicid)
254 254
255/* 255/*
256 * Fixup core topology information for AMD multi-node processors. 256 * Fixup core topology information for AMD multi-node processors.
257 * Assumption 1: Number of cores in each internal node is the same. 257 * Assumption: Number of cores in each internal node is the same.
258 * Assumption 2: Mixed systems with both single-node and dual-node
259 * processors are not supported.
260 */ 258 */
261#ifdef CONFIG_X86_HT 259#ifdef CONFIG_X86_HT
262static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) 260static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c)
263{ 261{
264#ifdef CONFIG_PCI 262 unsigned long long value;
265 u32 t, cpn; 263 u32 nodes, cores_per_node;
266 u8 n, n_id;
267 int cpu = smp_processor_id(); 264 int cpu = smp_processor_id();
268 265
266 if (!cpu_has(c, X86_FEATURE_NODEID_MSR))
267 return;
268
269 /* fixup topology information only once for a core */ 269 /* fixup topology information only once for a core */
270 if (cpu_has(c, X86_FEATURE_AMD_DCM)) 270 if (cpu_has(c, X86_FEATURE_AMD_DCM))
271 return; 271 return;
272 272
273 /* check for multi-node processor on boot cpu */ 273 rdmsrl(MSR_FAM10H_NODE_ID, value);
274 t = read_pci_config(0, 24, 3, 0xe8); 274
275 if (!(t & (1 << 29))) 275 nodes = ((value >> 3) & 7) + 1;
276 if (nodes == 1)
276 return; 277 return;
277 278
278 set_cpu_cap(c, X86_FEATURE_AMD_DCM); 279 set_cpu_cap(c, X86_FEATURE_AMD_DCM);
280 cores_per_node = c->x86_max_cores / nodes;
279 281
280 /* cores per node: each internal node has half the number of cores */ 282 /* store NodeID, use llc_shared_map to store sibling info */
281 cpn = c->x86_max_cores >> 1; 283 per_cpu(cpu_llc_id, cpu) = value & 7;
282
283 /* even-numbered NB_id of this dual-node processor */
284 n = c->phys_proc_id << 1;
285
286 /*
287 * determine internal node id and assign cores fifty-fifty to
288 * each node of the dual-node processor
289 */
290 t = read_pci_config(0, 24 + n, 3, 0xe8);
291 n = (t>>30) & 0x3;
292 if (n == 0) {
293 if (c->cpu_core_id < cpn)
294 n_id = 0;
295 else
296 n_id = 1;
297 } else {
298 if (c->cpu_core_id < cpn)
299 n_id = 1;
300 else
301 n_id = 0;
302 }
303
304 /* compute entire NodeID, use llc_shared_map to store sibling info */
305 per_cpu(cpu_llc_id, cpu) = (c->phys_proc_id << 1) + n_id;
306 284
307 /* fixup core id to be in range from 0 to cpn */ 285 /* fixup core id to be in range from 0 to (cores_per_node - 1) */
308 c->cpu_core_id = c->cpu_core_id % cpn; 286 c->cpu_core_id = c->cpu_core_id % cores_per_node;
309#endif
310} 287}
311#endif 288#endif
312 289
@@ -375,8 +352,6 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
375 node = nearby_node(apicid); 352 node = nearby_node(apicid);
376 } 353 }
377 numa_set_node(cpu, node); 354 numa_set_node(cpu, node);
378
379 printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
380#endif 355#endif
381} 356}
382 357
@@ -535,7 +510,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
535 } 510 }
536 } 511 }
537 512
538 display_cacheinfo(c); 513 cpu_detect_cache_sizes(c);
539 514
540 /* Multi core CPU? */ 515 /* Multi core CPU? */
541 if (c->extended_cpuid_level >= 0x80000008) { 516 if (c->extended_cpuid_level >= 0x80000008) {
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index c95e831bb095..e58d978e0758 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -294,7 +294,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
294 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 294 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
295 } 295 }
296 296
297 display_cacheinfo(c); 297 cpu_detect_cache_sizes(c);
298} 298}
299 299
300enum { 300enum {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cc25c2b4a567..4868e4a951ee 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -61,7 +61,7 @@ void __init setup_cpu_local_masks(void)
61static void __cpuinit default_init(struct cpuinfo_x86 *c) 61static void __cpuinit default_init(struct cpuinfo_x86 *c)
62{ 62{
63#ifdef CONFIG_X86_64 63#ifdef CONFIG_X86_64
64 display_cacheinfo(c); 64 cpu_detect_cache_sizes(c);
65#else 65#else
66 /* Not much we can do here... */ 66 /* Not much we can do here... */
67 /* Check if at least it has cpuid */ 67 /* Check if at least it has cpuid */
@@ -383,7 +383,7 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
383 } 383 }
384} 384}
385 385
386void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) 386void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
387{ 387{
388 unsigned int n, dummy, ebx, ecx, edx, l2size; 388 unsigned int n, dummy, ebx, ecx, edx, l2size;
389 389
@@ -391,8 +391,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
391 391
392 if (n >= 0x80000005) { 392 if (n >= 0x80000005) {
393 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); 393 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
394 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
395 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
396 c->x86_cache_size = (ecx>>24) + (edx>>24); 394 c->x86_cache_size = (ecx>>24) + (edx>>24);
397#ifdef CONFIG_X86_64 395#ifdef CONFIG_X86_64
398 /* On K8 L1 TLB is inclusive, so don't count it */ 396 /* On K8 L1 TLB is inclusive, so don't count it */
@@ -422,9 +420,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
422#endif 420#endif
423 421
424 c->x86_cache_size = l2size; 422 c->x86_cache_size = l2size;
425
426 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
427 l2size, ecx & 0xFF);
428} 423}
429 424
430void __cpuinit detect_ht(struct cpuinfo_x86 *c) 425void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -432,6 +427,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
432#ifdef CONFIG_X86_HT 427#ifdef CONFIG_X86_HT
433 u32 eax, ebx, ecx, edx; 428 u32 eax, ebx, ecx, edx;
434 int index_msb, core_bits; 429 int index_msb, core_bits;
430 static bool printed;
435 431
436 if (!cpu_has(c, X86_FEATURE_HT)) 432 if (!cpu_has(c, X86_FEATURE_HT))
437 return; 433 return;
@@ -447,7 +443,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
447 smp_num_siblings = (ebx & 0xff0000) >> 16; 443 smp_num_siblings = (ebx & 0xff0000) >> 16;
448 444
449 if (smp_num_siblings == 1) { 445 if (smp_num_siblings == 1) {
450 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 446 printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n");
451 goto out; 447 goto out;
452 } 448 }
453 449
@@ -474,11 +470,12 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
474 ((1 << core_bits) - 1); 470 ((1 << core_bits) - 1);
475 471
476out: 472out:
477 if ((c->x86_max_cores * smp_num_siblings) > 1) { 473 if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
478 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", 474 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
479 c->phys_proc_id); 475 c->phys_proc_id);
480 printk(KERN_INFO "CPU: Processor Core ID: %d\n", 476 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
481 c->cpu_core_id); 477 c->cpu_core_id);
478 printed = 1;
482 } 479 }
483#endif 480#endif
484} 481}
@@ -659,24 +656,31 @@ void __init early_cpu_init(void)
659 const struct cpu_dev *const *cdev; 656 const struct cpu_dev *const *cdev;
660 int count = 0; 657 int count = 0;
661 658
659#ifdef PROCESSOR_SELECT
662 printk(KERN_INFO "KERNEL supported cpus:\n"); 660 printk(KERN_INFO "KERNEL supported cpus:\n");
661#endif
662
663 for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { 663 for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
664 const struct cpu_dev *cpudev = *cdev; 664 const struct cpu_dev *cpudev = *cdev;
665 unsigned int j;
666 665
667 if (count >= X86_VENDOR_NUM) 666 if (count >= X86_VENDOR_NUM)
668 break; 667 break;
669 cpu_devs[count] = cpudev; 668 cpu_devs[count] = cpudev;
670 count++; 669 count++;
671 670
672 for (j = 0; j < 2; j++) { 671#ifdef PROCESSOR_SELECT
673 if (!cpudev->c_ident[j]) 672 {
674 continue; 673 unsigned int j;
675 printk(KERN_INFO " %s %s\n", cpudev->c_vendor, 674
676 cpudev->c_ident[j]); 675 for (j = 0; j < 2; j++) {
676 if (!cpudev->c_ident[j])
677 continue;
678 printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
679 cpudev->c_ident[j]);
680 }
677 } 681 }
682#endif
678 } 683 }
679
680 early_identify_cpu(&boot_cpu_data); 684 early_identify_cpu(&boot_cpu_data);
681} 685}
682 686
@@ -837,10 +841,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
837 boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; 841 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
838 } 842 }
839 843
840#ifdef CONFIG_X86_MCE
841 /* Init Machine Check Exception if available. */ 844 /* Init Machine Check Exception if available. */
842 mcheck_init(c); 845 mcheck_cpu_init(c);
843#endif
844 846
845 select_idle_routine(c); 847 select_idle_routine(c);
846 848
@@ -1093,7 +1095,7 @@ static void clear_all_debug_regs(void)
1093 1095
1094void __cpuinit cpu_init(void) 1096void __cpuinit cpu_init(void)
1095{ 1097{
1096 struct orig_ist *orig_ist; 1098 struct orig_ist *oist;
1097 struct task_struct *me; 1099 struct task_struct *me;
1098 struct tss_struct *t; 1100 struct tss_struct *t;
1099 unsigned long v; 1101 unsigned long v;
@@ -1102,7 +1104,7 @@ void __cpuinit cpu_init(void)
1102 1104
1103 cpu = stack_smp_processor_id(); 1105 cpu = stack_smp_processor_id();
1104 t = &per_cpu(init_tss, cpu); 1106 t = &per_cpu(init_tss, cpu);
1105 orig_ist = &per_cpu(orig_ist, cpu); 1107 oist = &per_cpu(orig_ist, cpu);
1106 1108
1107#ifdef CONFIG_NUMA 1109#ifdef CONFIG_NUMA
1108 if (cpu != 0 && percpu_read(node_number) == 0 && 1110 if (cpu != 0 && percpu_read(node_number) == 0 &&
@@ -1115,7 +1117,7 @@ void __cpuinit cpu_init(void)
1115 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) 1117 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask))
1116 panic("CPU#%d already initialized!\n", cpu); 1118 panic("CPU#%d already initialized!\n", cpu);
1117 1119
1118 printk(KERN_INFO "Initializing CPU#%d\n", cpu); 1120 pr_debug("Initializing CPU#%d\n", cpu);
1119 1121
1120 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1122 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
1121 1123
@@ -1136,19 +1138,19 @@ void __cpuinit cpu_init(void)
1136 wrmsrl(MSR_KERNEL_GS_BASE, 0); 1138 wrmsrl(MSR_KERNEL_GS_BASE, 0);
1137 barrier(); 1139 barrier();
1138 1140
1139 check_efer(); 1141 x86_configure_nx();
1140 if (cpu != 0) 1142 if (cpu != 0)
1141 enable_x2apic(); 1143 enable_x2apic();
1142 1144
1143 /* 1145 /*
1144 * set up and load the per-CPU TSS 1146 * set up and load the per-CPU TSS
1145 */ 1147 */
1146 if (!orig_ist->ist[0]) { 1148 if (!oist->ist[0]) {
1147 char *estacks = per_cpu(exception_stacks, cpu); 1149 char *estacks = per_cpu(exception_stacks, cpu);
1148 1150
1149 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1151 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1150 estacks += exception_stack_sizes[v]; 1152 estacks += exception_stack_sizes[v];
1151 orig_ist->ist[v] = t->x86_tss.ist[v] = 1153 oist->ist[v] = t->x86_tss.ist[v] =
1152 (unsigned long)estacks; 1154 (unsigned long)estacks;
1153 } 1155 }
1154 } 1156 }
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 6de9a908e400..3624e8a0f71b 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -32,6 +32,6 @@ struct cpu_dev {
32extern const struct cpu_dev *const __x86_cpu_dev_start[], 32extern const struct cpu_dev *const __x86_cpu_dev_start[],
33 *const __x86_cpu_dev_end[]; 33 *const __x86_cpu_dev_end[];
34 34
35extern void display_cacheinfo(struct cpuinfo_x86 *c); 35extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
36 36
37#endif 37#endif
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
deleted file mode 100644
index dca325c03999..000000000000
--- a/arch/x86/kernel/cpu/cpu_debug.c
+++ /dev/null
@@ -1,688 +0,0 @@
1/*
2 * CPU x86 architecture debug code
3 *
4 * Copyright(C) 2009 Jaswinder Singh Rajput
5 *
6 * For licencing details see kernel-base/COPYING
7 */
8
9#include <linux/interrupt.h>
10#include <linux/compiler.h>
11#include <linux/seq_file.h>
12#include <linux/debugfs.h>
13#include <linux/kprobes.h>
14#include <linux/uaccess.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/percpu.h>
18#include <linux/signal.h>
19#include <linux/errno.h>
20#include <linux/sched.h>
21#include <linux/types.h>
22#include <linux/init.h>
23#include <linux/slab.h>
24#include <linux/smp.h>
25
26#include <asm/cpu_debug.h>
27#include <asm/paravirt.h>
28#include <asm/system.h>
29#include <asm/traps.h>
30#include <asm/apic.h>
31#include <asm/desc.h>
32
33static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr);
34static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr);
35static DEFINE_PER_CPU(int, cpu_priv_count);
36
37static DEFINE_MUTEX(cpu_debug_lock);
38
39static struct dentry *cpu_debugfs_dir;
40
41static struct cpu_debug_base cpu_base[] = {
42 { "mc", CPU_MC, 0 },
43 { "monitor", CPU_MONITOR, 0 },
44 { "time", CPU_TIME, 0 },
45 { "pmc", CPU_PMC, 1 },
46 { "platform", CPU_PLATFORM, 0 },
47 { "apic", CPU_APIC, 0 },
48 { "poweron", CPU_POWERON, 0 },
49 { "control", CPU_CONTROL, 0 },
50 { "features", CPU_FEATURES, 0 },
51 { "lastbranch", CPU_LBRANCH, 0 },
52 { "bios", CPU_BIOS, 0 },
53 { "freq", CPU_FREQ, 0 },
54 { "mtrr", CPU_MTRR, 0 },
55 { "perf", CPU_PERF, 0 },
56 { "cache", CPU_CACHE, 0 },
57 { "sysenter", CPU_SYSENTER, 0 },
58 { "therm", CPU_THERM, 0 },
59 { "misc", CPU_MISC, 0 },
60 { "debug", CPU_DEBUG, 0 },
61 { "pat", CPU_PAT, 0 },
62 { "vmx", CPU_VMX, 0 },
63 { "call", CPU_CALL, 0 },
64 { "base", CPU_BASE, 0 },
65 { "ver", CPU_VER, 0 },
66 { "conf", CPU_CONF, 0 },
67 { "smm", CPU_SMM, 0 },
68 { "svm", CPU_SVM, 0 },
69 { "osvm", CPU_OSVM, 0 },
70 { "tss", CPU_TSS, 0 },
71 { "cr", CPU_CR, 0 },
72 { "dt", CPU_DT, 0 },
73 { "registers", CPU_REG_ALL, 0 },
74};
75
76static struct cpu_file_base cpu_file[] = {
77 { "index", CPU_REG_ALL, 0 },
78 { "value", CPU_REG_ALL, 1 },
79};
80
81/* CPU Registers Range */
82static struct cpu_debug_range cpu_reg_range[] = {
83 { 0x00000000, 0x00000001, CPU_MC, },
84 { 0x00000006, 0x00000007, CPU_MONITOR, },
85 { 0x00000010, 0x00000010, CPU_TIME, },
86 { 0x00000011, 0x00000013, CPU_PMC, },
87 { 0x00000017, 0x00000017, CPU_PLATFORM, },
88 { 0x0000001B, 0x0000001B, CPU_APIC, },
89 { 0x0000002A, 0x0000002B, CPU_POWERON, },
90 { 0x0000002C, 0x0000002C, CPU_FREQ, },
91 { 0x0000003A, 0x0000003A, CPU_CONTROL, },
92 { 0x00000040, 0x00000047, CPU_LBRANCH, },
93 { 0x00000060, 0x00000067, CPU_LBRANCH, },
94 { 0x00000079, 0x00000079, CPU_BIOS, },
95 { 0x00000088, 0x0000008A, CPU_CACHE, },
96 { 0x0000008B, 0x0000008B, CPU_BIOS, },
97 { 0x0000009B, 0x0000009B, CPU_MONITOR, },
98 { 0x000000C1, 0x000000C4, CPU_PMC, },
99 { 0x000000CD, 0x000000CD, CPU_FREQ, },
100 { 0x000000E7, 0x000000E8, CPU_PERF, },
101 { 0x000000FE, 0x000000FE, CPU_MTRR, },
102
103 { 0x00000116, 0x0000011E, CPU_CACHE, },
104 { 0x00000174, 0x00000176, CPU_SYSENTER, },
105 { 0x00000179, 0x0000017B, CPU_MC, },
106 { 0x00000186, 0x00000189, CPU_PMC, },
107 { 0x00000198, 0x00000199, CPU_PERF, },
108 { 0x0000019A, 0x0000019A, CPU_TIME, },
109 { 0x0000019B, 0x0000019D, CPU_THERM, },
110 { 0x000001A0, 0x000001A0, CPU_MISC, },
111 { 0x000001C9, 0x000001C9, CPU_LBRANCH, },
112 { 0x000001D7, 0x000001D8, CPU_LBRANCH, },
113 { 0x000001D9, 0x000001D9, CPU_DEBUG, },
114 { 0x000001DA, 0x000001E0, CPU_LBRANCH, },
115
116 { 0x00000200, 0x0000020F, CPU_MTRR, },
117 { 0x00000250, 0x00000250, CPU_MTRR, },
118 { 0x00000258, 0x00000259, CPU_MTRR, },
119 { 0x00000268, 0x0000026F, CPU_MTRR, },
120 { 0x00000277, 0x00000277, CPU_PAT, },
121 { 0x000002FF, 0x000002FF, CPU_MTRR, },
122
123 { 0x00000300, 0x00000311, CPU_PMC, },
124 { 0x00000345, 0x00000345, CPU_PMC, },
125 { 0x00000360, 0x00000371, CPU_PMC, },
126 { 0x0000038D, 0x00000390, CPU_PMC, },
127 { 0x000003A0, 0x000003BE, CPU_PMC, },
128 { 0x000003C0, 0x000003CD, CPU_PMC, },
129 { 0x000003E0, 0x000003E1, CPU_PMC, },
130 { 0x000003F0, 0x000003F2, CPU_PMC, },
131
132 { 0x00000400, 0x00000417, CPU_MC, },
133 { 0x00000480, 0x0000048B, CPU_VMX, },
134
135 { 0x00000600, 0x00000600, CPU_DEBUG, },
136 { 0x00000680, 0x0000068F, CPU_LBRANCH, },
137 { 0x000006C0, 0x000006CF, CPU_LBRANCH, },
138
139 { 0x000107CC, 0x000107D3, CPU_PMC, },
140
141 { 0xC0000080, 0xC0000080, CPU_FEATURES, },
142 { 0xC0000081, 0xC0000084, CPU_CALL, },
143 { 0xC0000100, 0xC0000102, CPU_BASE, },
144 { 0xC0000103, 0xC0000103, CPU_TIME, },
145
146 { 0xC0010000, 0xC0010007, CPU_PMC, },
147 { 0xC0010010, 0xC0010010, CPU_CONF, },
148 { 0xC0010015, 0xC0010015, CPU_CONF, },
149 { 0xC0010016, 0xC001001A, CPU_MTRR, },
150 { 0xC001001D, 0xC001001D, CPU_MTRR, },
151 { 0xC001001F, 0xC001001F, CPU_CONF, },
152 { 0xC0010030, 0xC0010035, CPU_BIOS, },
153 { 0xC0010044, 0xC0010048, CPU_MC, },
154 { 0xC0010050, 0xC0010056, CPU_SMM, },
155 { 0xC0010058, 0xC0010058, CPU_CONF, },
156 { 0xC0010060, 0xC0010060, CPU_CACHE, },
157 { 0xC0010061, 0xC0010068, CPU_SMM, },
158 { 0xC0010069, 0xC001006B, CPU_SMM, },
159 { 0xC0010070, 0xC0010071, CPU_SMM, },
160 { 0xC0010111, 0xC0010113, CPU_SMM, },
161 { 0xC0010114, 0xC0010118, CPU_SVM, },
162 { 0xC0010140, 0xC0010141, CPU_OSVM, },
163 { 0xC0011022, 0xC0011023, CPU_CONF, },
164};
165
166static int is_typeflag_valid(unsigned cpu, unsigned flag)
167{
168 int i;
169
170 /* Standard Registers should be always valid */
171 if (flag >= CPU_TSS)
172 return 1;
173
174 for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
175 if (cpu_reg_range[i].flag == flag)
176 return 1;
177 }
178
179 /* Invalid */
180 return 0;
181}
182
183static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max,
184 int index, unsigned flag)
185{
186 if (cpu_reg_range[index].flag == flag) {
187 *min = cpu_reg_range[index].min;
188 *max = cpu_reg_range[index].max;
189 } else
190 *max = 0;
191
192 return *max;
193}
194
195/* This function can also be called with seq = NULL for printk */
196static void print_cpu_data(struct seq_file *seq, unsigned type,
197 u32 low, u32 high)
198{
199 struct cpu_private *priv;
200 u64 val = high;
201
202 if (seq) {
203 priv = seq->private;
204 if (priv->file) {
205 val = (val << 32) | low;
206 seq_printf(seq, "0x%llx\n", val);
207 } else
208 seq_printf(seq, " %08x: %08x_%08x\n",
209 type, high, low);
210 } else
211 printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low);
212}
213
214/* This function can also be called with seq = NULL for printk */
215static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
216{
217 unsigned msr, msr_min, msr_max;
218 struct cpu_private *priv;
219 u32 low, high;
220 int i;
221
222 if (seq) {
223 priv = seq->private;
224 if (priv->file) {
225 if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg,
226 &low, &high))
227 print_cpu_data(seq, priv->reg, low, high);
228 return;
229 }
230 }
231
232 for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
233 if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag))
234 continue;
235
236 for (msr = msr_min; msr <= msr_max; msr++) {
237 if (rdmsr_safe_on_cpu(cpu, msr, &low, &high))
238 continue;
239 print_cpu_data(seq, msr, low, high);
240 }
241 }
242}
243
244static void print_tss(void *arg)
245{
246 struct pt_regs *regs = task_pt_regs(current);
247 struct seq_file *seq = arg;
248 unsigned int seg;
249
250 seq_printf(seq, " RAX\t: %016lx\n", regs->ax);
251 seq_printf(seq, " RBX\t: %016lx\n", regs->bx);
252 seq_printf(seq, " RCX\t: %016lx\n", regs->cx);
253 seq_printf(seq, " RDX\t: %016lx\n", regs->dx);
254
255 seq_printf(seq, " RSI\t: %016lx\n", regs->si);
256 seq_printf(seq, " RDI\t: %016lx\n", regs->di);
257 seq_printf(seq, " RBP\t: %016lx\n", regs->bp);
258 seq_printf(seq, " ESP\t: %016lx\n", regs->sp);
259
260#ifdef CONFIG_X86_64
261 seq_printf(seq, " R08\t: %016lx\n", regs->r8);
262 seq_printf(seq, " R09\t: %016lx\n", regs->r9);
263 seq_printf(seq, " R10\t: %016lx\n", regs->r10);
264 seq_printf(seq, " R11\t: %016lx\n", regs->r11);
265 seq_printf(seq, " R12\t: %016lx\n", regs->r12);
266 seq_printf(seq, " R13\t: %016lx\n", regs->r13);
267 seq_printf(seq, " R14\t: %016lx\n", regs->r14);
268 seq_printf(seq, " R15\t: %016lx\n", regs->r15);
269#endif
270
271 asm("movl %%cs,%0" : "=r" (seg));
272 seq_printf(seq, " CS\t: %04x\n", seg);
273 asm("movl %%ds,%0" : "=r" (seg));
274 seq_printf(seq, " DS\t: %04x\n", seg);
275 seq_printf(seq, " SS\t: %04lx\n", regs->ss & 0xffff);
276 asm("movl %%es,%0" : "=r" (seg));
277 seq_printf(seq, " ES\t: %04x\n", seg);
278 asm("movl %%fs,%0" : "=r" (seg));
279 seq_printf(seq, " FS\t: %04x\n", seg);
280 asm("movl %%gs,%0" : "=r" (seg));
281 seq_printf(seq, " GS\t: %04x\n", seg);
282
283 seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags);
284
285 seq_printf(seq, " EIP\t: %016lx\n", regs->ip);
286}
287
288static void print_cr(void *arg)
289{
290 struct seq_file *seq = arg;
291
292 seq_printf(seq, " cr0\t: %016lx\n", read_cr0());
293 seq_printf(seq, " cr2\t: %016lx\n", read_cr2());
294 seq_printf(seq, " cr3\t: %016lx\n", read_cr3());
295 seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe());
296#ifdef CONFIG_X86_64
297 seq_printf(seq, " cr8\t: %016lx\n", read_cr8());
298#endif
299}
300
301static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt)
302{
303 seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size));
304}
305
306static void print_dt(void *seq)
307{
308 struct desc_ptr dt;
309 unsigned long ldt;
310
311 /* IDT */
312 store_idt((struct desc_ptr *)&dt);
313 print_desc_ptr("IDT", seq, dt);
314
315 /* GDT */
316 store_gdt((struct desc_ptr *)&dt);
317 print_desc_ptr("GDT", seq, dt);
318
319 /* LDT */
320 store_ldt(ldt);
321 seq_printf(seq, " LDT\t: %016lx\n", ldt);
322
323 /* TR */
324 store_tr(ldt);
325 seq_printf(seq, " TR\t: %016lx\n", ldt);
326}
327
328static void print_dr(void *arg)
329{
330 struct seq_file *seq = arg;
331 unsigned long dr;
332 int i;
333
334 for (i = 0; i < 8; i++) {
335 /* Ignore db4, db5 */
336 if ((i == 4) || (i == 5))
337 continue;
338 get_debugreg(dr, i);
339 seq_printf(seq, " dr%d\t: %016lx\n", i, dr);
340 }
341
342 seq_printf(seq, "\n MSR\t:\n");
343}
344
345static void print_apic(void *arg)
346{
347 struct seq_file *seq = arg;
348
349#ifdef CONFIG_X86_LOCAL_APIC
350 seq_printf(seq, " LAPIC\t:\n");
351 seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24);
352 seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR));
353 seq_printf(seq, " TASKPRI\t: %08x\n", apic_read(APIC_TASKPRI));
354 seq_printf(seq, " ARBPRI\t\t: %08x\n", apic_read(APIC_ARBPRI));
355 seq_printf(seq, " PROCPRI\t: %08x\n", apic_read(APIC_PROCPRI));
356 seq_printf(seq, " LDR\t\t: %08x\n", apic_read(APIC_LDR));
357 seq_printf(seq, " DFR\t\t: %08x\n", apic_read(APIC_DFR));
358 seq_printf(seq, " SPIV\t\t: %08x\n", apic_read(APIC_SPIV));
359 seq_printf(seq, " ISR\t\t: %08x\n", apic_read(APIC_ISR));
360 seq_printf(seq, " ESR\t\t: %08x\n", apic_read(APIC_ESR));
361 seq_printf(seq, " ICR\t\t: %08x\n", apic_read(APIC_ICR));
362 seq_printf(seq, " ICR2\t\t: %08x\n", apic_read(APIC_ICR2));
363 seq_printf(seq, " LVTT\t\t: %08x\n", apic_read(APIC_LVTT));
364 seq_printf(seq, " LVTTHMR\t: %08x\n", apic_read(APIC_LVTTHMR));
365 seq_printf(seq, " LVTPC\t\t: %08x\n", apic_read(APIC_LVTPC));
366 seq_printf(seq, " LVT0\t\t: %08x\n", apic_read(APIC_LVT0));
367 seq_printf(seq, " LVT1\t\t: %08x\n", apic_read(APIC_LVT1));
368 seq_printf(seq, " LVTERR\t\t: %08x\n", apic_read(APIC_LVTERR));
369 seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT));
370 seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT));
371 seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR));
372 if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
373 unsigned int i, v, maxeilvt;
374
375 v = apic_read(APIC_EFEAT);
376 maxeilvt = (v >> 16) & 0xff;
377 seq_printf(seq, " EFEAT\t\t: %08x\n", v);
378 seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL));
379
380 for (i = 0; i < maxeilvt; i++) {
381 v = apic_read(APIC_EILVTn(i));
382 seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v);
383 }
384 }
385#endif /* CONFIG_X86_LOCAL_APIC */
386 seq_printf(seq, "\n MSR\t:\n");
387}
388
389static int cpu_seq_show(struct seq_file *seq, void *v)
390{
391 struct cpu_private *priv = seq->private;
392
393 if (priv == NULL)
394 return -EINVAL;
395
396 switch (cpu_base[priv->type].flag) {
397 case CPU_TSS:
398 smp_call_function_single(priv->cpu, print_tss, seq, 1);
399 break;
400 case CPU_CR:
401 smp_call_function_single(priv->cpu, print_cr, seq, 1);
402 break;
403 case CPU_DT:
404 smp_call_function_single(priv->cpu, print_dt, seq, 1);
405 break;
406 case CPU_DEBUG:
407 if (priv->file == CPU_INDEX_BIT)
408 smp_call_function_single(priv->cpu, print_dr, seq, 1);
409 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
410 break;
411 case CPU_APIC:
412 if (priv->file == CPU_INDEX_BIT)
413 smp_call_function_single(priv->cpu, print_apic, seq, 1);
414 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
415 break;
416
417 default:
418 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
419 break;
420 }
421 seq_printf(seq, "\n");
422
423 return 0;
424}
425
426static void *cpu_seq_start(struct seq_file *seq, loff_t *pos)
427{
428 if (*pos == 0) /* One time is enough ;-) */
429 return seq;
430
431 return NULL;
432}
433
434static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
435{
436 (*pos)++;
437
438 return cpu_seq_start(seq, pos);
439}
440
441static void cpu_seq_stop(struct seq_file *seq, void *v)
442{
443}
444
445static const struct seq_operations cpu_seq_ops = {
446 .start = cpu_seq_start,
447 .next = cpu_seq_next,
448 .stop = cpu_seq_stop,
449 .show = cpu_seq_show,
450};
451
452static int cpu_seq_open(struct inode *inode, struct file *file)
453{
454 struct cpu_private *priv = inode->i_private;
455 struct seq_file *seq;
456 int err;
457
458 err = seq_open(file, &cpu_seq_ops);
459 if (!err) {
460 seq = file->private_data;
461 seq->private = priv;
462 }
463
464 return err;
465}
466
467static int write_msr(struct cpu_private *priv, u64 val)
468{
469 u32 low, high;
470
471 high = (val >> 32) & 0xffffffff;
472 low = val & 0xffffffff;
473
474 if (!wrmsr_safe_on_cpu(priv->cpu, priv->reg, low, high))
475 return 0;
476
477 return -EPERM;
478}
479
480static int write_cpu_register(struct cpu_private *priv, const char *buf)
481{
482 int ret = -EPERM;
483 u64 val;
484
485 ret = strict_strtoull(buf, 0, &val);
486 if (ret < 0)
487 return ret;
488
489 /* Supporting only MSRs */
490 if (priv->type < CPU_TSS_BIT)
491 return write_msr(priv, val);
492
493 return ret;
494}
495
496static ssize_t cpu_write(struct file *file, const char __user *ubuf,
497 size_t count, loff_t *off)
498{
499 struct seq_file *seq = file->private_data;
500 struct cpu_private *priv = seq->private;
501 char buf[19];
502
503 if ((priv == NULL) || (count >= sizeof(buf)))
504 return -EINVAL;
505
506 if (copy_from_user(&buf, ubuf, count))
507 return -EFAULT;
508
509 buf[count] = 0;
510
511 if ((cpu_base[priv->type].write) && (cpu_file[priv->file].write))
512 if (!write_cpu_register(priv, buf))
513 return count;
514
515 return -EACCES;
516}
517
518static const struct file_operations cpu_fops = {
519 .owner = THIS_MODULE,
520 .open = cpu_seq_open,
521 .read = seq_read,
522 .write = cpu_write,
523 .llseek = seq_lseek,
524 .release = seq_release,
525};
526
527static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
528 unsigned file, struct dentry *dentry)
529{
530 struct cpu_private *priv = NULL;
531
532 /* Already intialized */
533 if (file == CPU_INDEX_BIT)
534 if (per_cpu(cpu_arr[type].init, cpu))
535 return 0;
536
537 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
538 if (priv == NULL)
539 return -ENOMEM;
540
541 priv->cpu = cpu;
542 priv->type = type;
543 priv->reg = reg;
544 priv->file = file;
545 mutex_lock(&cpu_debug_lock);
546 per_cpu(priv_arr[type], cpu) = priv;
547 per_cpu(cpu_priv_count, cpu)++;
548 mutex_unlock(&cpu_debug_lock);
549
550 if (file)
551 debugfs_create_file(cpu_file[file].name, S_IRUGO,
552 dentry, (void *)priv, &cpu_fops);
553 else {
554 debugfs_create_file(cpu_base[type].name, S_IRUGO,
555 per_cpu(cpu_arr[type].dentry, cpu),
556 (void *)priv, &cpu_fops);
557 mutex_lock(&cpu_debug_lock);
558 per_cpu(cpu_arr[type].init, cpu) = 1;
559 mutex_unlock(&cpu_debug_lock);
560 }
561
562 return 0;
563}
564
565static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg,
566 struct dentry *dentry)
567{
568 unsigned file;
569 int err = 0;
570
571 for (file = 0; file < ARRAY_SIZE(cpu_file); file++) {
572 err = cpu_create_file(cpu, type, reg, file, dentry);
573 if (err)
574 return err;
575 }
576
577 return err;
578}
579
580static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry)
581{
582 struct dentry *cpu_dentry = NULL;
583 unsigned reg, reg_min, reg_max;
584 int i, err = 0;
585 char reg_dir[12];
586 u32 low, high;
587
588 for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
589 if (!get_cpu_range(cpu, &reg_min, &reg_max, i,
590 cpu_base[type].flag))
591 continue;
592
593 for (reg = reg_min; reg <= reg_max; reg++) {
594 if (rdmsr_safe_on_cpu(cpu, reg, &low, &high))
595 continue;
596
597 sprintf(reg_dir, "0x%x", reg);
598 cpu_dentry = debugfs_create_dir(reg_dir, dentry);
599 err = cpu_init_regfiles(cpu, type, reg, cpu_dentry);
600 if (err)
601 return err;
602 }
603 }
604
605 return err;
606}
607
608static int cpu_init_allreg(unsigned cpu, struct dentry *dentry)
609{
610 struct dentry *cpu_dentry = NULL;
611 unsigned type;
612 int err = 0;
613
614 for (type = 0; type < ARRAY_SIZE(cpu_base) - 1; type++) {
615 if (!is_typeflag_valid(cpu, cpu_base[type].flag))
616 continue;
617 cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry);
618 per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry;
619
620 if (type < CPU_TSS_BIT)
621 err = cpu_init_msr(cpu, type, cpu_dentry);
622 else
623 err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT,
624 cpu_dentry);
625 if (err)
626 return err;
627 }
628
629 return err;
630}
631
632static int cpu_init_cpu(void)
633{
634 struct dentry *cpu_dentry = NULL;
635 struct cpuinfo_x86 *cpui;
636 char cpu_dir[12];
637 unsigned cpu;
638 int err = 0;
639
640 for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
641 cpui = &cpu_data(cpu);
642 if (!cpu_has(cpui, X86_FEATURE_MSR))
643 continue;
644
645 sprintf(cpu_dir, "cpu%d", cpu);
646 cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir);
647 err = cpu_init_allreg(cpu, cpu_dentry);
648
649 pr_info("cpu%d(%d) debug files %d\n",
650 cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu));
651 if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) {
652 pr_err("Register files count %d exceeds limit %d\n",
653 per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES);
654 per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES;
655 err = -ENFILE;
656 }
657 if (err)
658 return err;
659 }
660
661 return err;
662}
663
664static int __init cpu_debug_init(void)
665{
666 cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir);
667
668 return cpu_init_cpu();
669}
670
671static void __exit cpu_debug_exit(void)
672{
673 int i, cpu;
674
675 if (cpu_debugfs_dir)
676 debugfs_remove_recursive(cpu_debugfs_dir);
677
678 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
679 for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++)
680 kfree(per_cpu(priv_arr[i], cpu));
681}
682
683module_init(cpu_debug_init);
684module_exit(cpu_debug_exit);
685
686MODULE_AUTHOR("Jaswinder Singh Rajput");
687MODULE_DESCRIPTION("CPU Debug module");
688MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index f138c6c389b9..870e6cc6ad28 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -10,6 +10,20 @@ if CPU_FREQ
10 10
11comment "CPUFreq processor drivers" 11comment "CPUFreq processor drivers"
12 12
13config X86_PCC_CPUFREQ
14 tristate "Processor Clocking Control interface driver"
15 depends on ACPI && ACPI_PROCESSOR
16 help
17 This driver adds support for the PCC interface.
18
19 For details, take a look at:
20 <file:Documentation/cpu-freq/pcc-cpufreq.txt>.
21
22 To compile this driver as a module, choose M here: the
23 module will be called pcc-cpufreq.
24
25 If in doubt, say N.
26
13config X86_ACPI_CPUFREQ 27config X86_ACPI_CPUFREQ
14 tristate "ACPI Processor P-States driver" 28 tristate "ACPI Processor P-States driver"
15 select CPU_FREQ_TABLE 29 select CPU_FREQ_TABLE
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile
index 509296df294d..1840c0a5170b 100644
--- a/arch/x86/kernel/cpu/cpufreq/Makefile
+++ b/arch/x86/kernel/cpu/cpufreq/Makefile
@@ -4,6 +4,7 @@
4 4
5obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o 5obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
6obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o 6obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
7obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o
7obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o 8obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
8obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o 9obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o
9obj-$(CONFIG_X86_LONGHAUL) += longhaul.o 10obj-$(CONFIG_X86_LONGHAUL) += longhaul.o
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8b581d3905cb..459168083b77 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,6 +33,7 @@
33#include <linux/cpufreq.h> 33#include <linux/cpufreq.h>
34#include <linux/compiler.h> 34#include <linux/compiler.h>
35#include <linux/dmi.h> 35#include <linux/dmi.h>
36#include <linux/slab.h>
36#include <trace/events/power.h> 37#include <trace/events/power.h>
37 38
38#include <linux/acpi.h> 39#include <linux/acpi.h>
@@ -68,9 +69,9 @@ struct acpi_cpufreq_data {
68 unsigned int cpu_feature; 69 unsigned int cpu_feature;
69}; 70};
70 71
71static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); 72static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data);
72 73
73static DEFINE_PER_CPU(struct aperfmperf, old_perf); 74static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf);
74 75
75/* acpi_perf_data is a pointer to percpu data. */ 76/* acpi_perf_data is a pointer to percpu data. */
76static struct acpi_processor_performance *acpi_perf_data; 77static struct acpi_processor_performance *acpi_perf_data;
@@ -190,9 +191,11 @@ static void do_drv_write(void *_cmd)
190 191
191static void drv_read(struct drv_cmd *cmd) 192static void drv_read(struct drv_cmd *cmd)
192{ 193{
194 int err;
193 cmd->val = 0; 195 cmd->val = 0;
194 196
195 smp_call_function_single(cpumask_any(cmd->mask), do_drv_read, cmd, 1); 197 err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1);
198 WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */
196} 199}
197 200
198static void drv_write(struct drv_cmd *cmd) 201static void drv_write(struct drv_cmd *cmd)
@@ -214,14 +217,14 @@ static u32 get_cur_val(const struct cpumask *mask)
214 if (unlikely(cpumask_empty(mask))) 217 if (unlikely(cpumask_empty(mask)))
215 return 0; 218 return 0;
216 219
217 switch (per_cpu(drv_data, cpumask_first(mask))->cpu_feature) { 220 switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) {
218 case SYSTEM_INTEL_MSR_CAPABLE: 221 case SYSTEM_INTEL_MSR_CAPABLE:
219 cmd.type = SYSTEM_INTEL_MSR_CAPABLE; 222 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
220 cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; 223 cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
221 break; 224 break;
222 case SYSTEM_IO_CAPABLE: 225 case SYSTEM_IO_CAPABLE:
223 cmd.type = SYSTEM_IO_CAPABLE; 226 cmd.type = SYSTEM_IO_CAPABLE;
224 perf = per_cpu(drv_data, cpumask_first(mask))->acpi_data; 227 perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
225 cmd.addr.io.port = perf->control_register.address; 228 cmd.addr.io.port = perf->control_register.address;
226 cmd.addr.io.bit_width = perf->control_register.bit_width; 229 cmd.addr.io.bit_width = perf->control_register.bit_width;
227 break; 230 break;
@@ -268,8 +271,8 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
268 if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) 271 if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
269 return 0; 272 return 0;
270 273
271 ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf); 274 ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf);
272 per_cpu(old_perf, cpu) = perf; 275 per_cpu(acfreq_old_perf, cpu) = perf;
273 276
274 retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; 277 retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
275 278
@@ -278,7 +281,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
278 281
279static unsigned int get_cur_freq_on_cpu(unsigned int cpu) 282static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
280{ 283{
281 struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); 284 struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu);
282 unsigned int freq; 285 unsigned int freq;
283 unsigned int cached_freq; 286 unsigned int cached_freq;
284 287
@@ -322,7 +325,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
322static int acpi_cpufreq_target(struct cpufreq_policy *policy, 325static int acpi_cpufreq_target(struct cpufreq_policy *policy,
323 unsigned int target_freq, unsigned int relation) 326 unsigned int target_freq, unsigned int relation)
324{ 327{
325 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); 328 struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
326 struct acpi_processor_performance *perf; 329 struct acpi_processor_performance *perf;
327 struct cpufreq_freqs freqs; 330 struct cpufreq_freqs freqs;
328 struct drv_cmd cmd; 331 struct drv_cmd cmd;
@@ -416,7 +419,7 @@ out:
416 419
417static int acpi_cpufreq_verify(struct cpufreq_policy *policy) 420static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
418{ 421{
419 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); 422 struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
420 423
421 dprintk("acpi_cpufreq_verify\n"); 424 dprintk("acpi_cpufreq_verify\n");
422 425
@@ -574,7 +577,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
574 return -ENOMEM; 577 return -ENOMEM;
575 578
576 data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); 579 data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
577 per_cpu(drv_data, cpu) = data; 580 per_cpu(acfreq_data, cpu) = data;
578 581
579 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) 582 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
580 acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; 583 acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
@@ -725,20 +728,20 @@ err_unreg:
725 acpi_processor_unregister_performance(perf, cpu); 728 acpi_processor_unregister_performance(perf, cpu);
726err_free: 729err_free:
727 kfree(data); 730 kfree(data);
728 per_cpu(drv_data, cpu) = NULL; 731 per_cpu(acfreq_data, cpu) = NULL;
729 732
730 return result; 733 return result;
731} 734}
732 735
733static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) 736static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
734{ 737{
735 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); 738 struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
736 739
737 dprintk("acpi_cpufreq_cpu_exit\n"); 740 dprintk("acpi_cpufreq_cpu_exit\n");
738 741
739 if (data) { 742 if (data) {
740 cpufreq_frequency_table_put_attr(policy->cpu); 743 cpufreq_frequency_table_put_attr(policy->cpu);
741 per_cpu(drv_data, policy->cpu) = NULL; 744 per_cpu(acfreq_data, policy->cpu) = NULL;
742 acpi_processor_unregister_performance(data->acpi_data, 745 acpi_processor_unregister_performance(data->acpi_data,
743 policy->cpu); 746 policy->cpu);
744 kfree(data); 747 kfree(data);
@@ -749,7 +752,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
749 752
750static int acpi_cpufreq_resume(struct cpufreq_policy *policy) 753static int acpi_cpufreq_resume(struct cpufreq_policy *policy)
751{ 754{
752 struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); 755 struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu);
753 756
754 dprintk("acpi_cpufreq_resume\n"); 757 dprintk("acpi_cpufreq_resume\n");
755 758
@@ -764,14 +767,15 @@ static struct freq_attr *acpi_cpufreq_attr[] = {
764}; 767};
765 768
766static struct cpufreq_driver acpi_cpufreq_driver = { 769static struct cpufreq_driver acpi_cpufreq_driver = {
767 .verify = acpi_cpufreq_verify, 770 .verify = acpi_cpufreq_verify,
768 .target = acpi_cpufreq_target, 771 .target = acpi_cpufreq_target,
769 .init = acpi_cpufreq_cpu_init, 772 .bios_limit = acpi_processor_get_bios_limit,
770 .exit = acpi_cpufreq_cpu_exit, 773 .init = acpi_cpufreq_cpu_init,
771 .resume = acpi_cpufreq_resume, 774 .exit = acpi_cpufreq_cpu_exit,
772 .name = "acpi-cpufreq", 775 .resume = acpi_cpufreq_resume,
773 .owner = THIS_MODULE, 776 .name = "acpi-cpufreq",
774 .attr = acpi_cpufreq_attr, 777 .owner = THIS_MODULE,
778 .attr = acpi_cpufreq_attr,
775}; 779};
776 780
777static int __init acpi_cpufreq_init(void) 781static int __init acpi_cpufreq_init(void)
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index 006b278b0d5d..c587db472a75 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -20,7 +20,6 @@
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/init.h> 21#include <linux/init.h>
22 22
23#include <linux/slab.h>
24#include <linux/delay.h> 23#include <linux/delay.h>
25#include <linux/cpufreq.h> 24#include <linux/cpufreq.h>
26 25
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
index ac27ec2264d5..16e3483be9e3 100644
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
@@ -80,6 +80,7 @@
80#include <linux/cpufreq.h> 80#include <linux/cpufreq.h>
81#include <linux/pci.h> 81#include <linux/pci.h>
82#include <linux/errno.h> 82#include <linux/errno.h>
83#include <linux/slab.h>
83 84
84#include <asm/processor-cyrix.h> 85#include <asm/processor-cyrix.h>
85 86
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index cabd2fa3fc93..7e7eea4f8261 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -885,7 +885,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
885 885
886 /* Find ACPI data for processor */ 886 /* Find ACPI data for processor */
887 acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, 887 acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
888 ACPI_UINT32_MAX, &longhaul_walk_callback, 888 ACPI_UINT32_MAX, &longhaul_walk_callback, NULL,
889 NULL, (void *)&pr); 889 NULL, (void *)&pr);
890 890
891 /* Check ACPI support for C3 state */ 891 /* Check ACPI support for C3 state */
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index da5f70fcb766..e7b559d74c52 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -9,7 +9,6 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/slab.h>
13#include <linux/cpufreq.h> 12#include <linux/cpufreq.h>
14#include <linux/timex.h> 13#include <linux/timex.h>
15 14
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 869615193720..7b8a8ba67b07 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -25,7 +25,6 @@
25#include <linux/init.h> 25#include <linux/init.h>
26#include <linux/smp.h> 26#include <linux/smp.h>
27#include <linux/cpufreq.h> 27#include <linux/cpufreq.h>
28#include <linux/slab.h>
29#include <linux/cpumask.h> 28#include <linux/cpumask.h>
30#include <linux/timex.h> 29#include <linux/timex.h>
31 30
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
new file mode 100644
index 000000000000..ce7cde713e71
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -0,0 +1,621 @@
1/*
2 * pcc-cpufreq.c - Processor Clocking Control firmware cpufreq interface
3 *
4 * Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com>
5 * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
6 * Nagananda Chumbalkar <nagananda.chumbalkar@hp.com>
7 *
8 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; version 2 of the License.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON
17 * INFRINGEMENT. See the GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 675 Mass Ave, Cambridge, MA 02139, USA.
22 *
23 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
24 */
25
26#include <linux/kernel.h>
27#include <linux/module.h>
28#include <linux/init.h>
29#include <linux/smp.h>
30#include <linux/sched.h>
31#include <linux/cpufreq.h>
32#include <linux/compiler.h>
33#include <linux/slab.h>
34
35#include <linux/acpi.h>
36#include <linux/io.h>
37#include <linux/spinlock.h>
38#include <linux/uaccess.h>
39
40#include <acpi/processor.h>
41
42#define PCC_VERSION "1.00.00"
43#define POLL_LOOPS 300
44
45#define CMD_COMPLETE 0x1
46#define CMD_GET_FREQ 0x0
47#define CMD_SET_FREQ 0x1
48
49#define BUF_SZ 4
50
51#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
52 "pcc-cpufreq", msg)
53
54struct pcc_register_resource {
55 u8 descriptor;
56 u16 length;
57 u8 space_id;
58 u8 bit_width;
59 u8 bit_offset;
60 u8 access_size;
61 u64 address;
62} __attribute__ ((packed));
63
64struct pcc_memory_resource {
65 u8 descriptor;
66 u16 length;
67 u8 space_id;
68 u8 resource_usage;
69 u8 type_specific;
70 u64 granularity;
71 u64 minimum;
72 u64 maximum;
73 u64 translation_offset;
74 u64 address_length;
75} __attribute__ ((packed));
76
77static struct cpufreq_driver pcc_cpufreq_driver;
78
79struct pcc_header {
80 u32 signature;
81 u16 length;
82 u8 major;
83 u8 minor;
84 u32 features;
85 u16 command;
86 u16 status;
87 u32 latency;
88 u32 minimum_time;
89 u32 maximum_time;
90 u32 nominal;
91 u32 throttled_frequency;
92 u32 minimum_frequency;
93};
94
95static void __iomem *pcch_virt_addr;
96static struct pcc_header __iomem *pcch_hdr;
97
98static DEFINE_SPINLOCK(pcc_lock);
99
100static struct acpi_generic_address doorbell;
101
102static u64 doorbell_preserve;
103static u64 doorbell_write;
104
105static u8 OSC_UUID[16] = {0x63, 0x9B, 0x2C, 0x9F, 0x70, 0x91, 0x49, 0x1f,
106 0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46};
107
108struct pcc_cpu {
109 u32 input_offset;
110 u32 output_offset;
111};
112
113static struct pcc_cpu *pcc_cpu_info;
114
115static int pcc_cpufreq_verify(struct cpufreq_policy *policy)
116{
117 cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
118 policy->cpuinfo.max_freq);
119 return 0;
120}
121
122static inline void pcc_cmd(void)
123{
124 u64 doorbell_value;
125 int i;
126
127 acpi_read(&doorbell_value, &doorbell);
128 acpi_write((doorbell_value & doorbell_preserve) | doorbell_write,
129 &doorbell);
130
131 for (i = 0; i < POLL_LOOPS; i++) {
132 if (ioread16(&pcch_hdr->status) & CMD_COMPLETE)
133 break;
134 }
135}
136
137static inline void pcc_clear_mapping(void)
138{
139 if (pcch_virt_addr)
140 iounmap(pcch_virt_addr);
141 pcch_virt_addr = NULL;
142}
143
144static unsigned int pcc_get_freq(unsigned int cpu)
145{
146 struct pcc_cpu *pcc_cpu_data;
147 unsigned int curr_freq;
148 unsigned int freq_limit;
149 u16 status;
150 u32 input_buffer;
151 u32 output_buffer;
152
153 spin_lock(&pcc_lock);
154
155 dprintk("get: get_freq for CPU %d\n", cpu);
156 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
157
158 input_buffer = 0x1;
159 iowrite32(input_buffer,
160 (pcch_virt_addr + pcc_cpu_data->input_offset));
161 iowrite16(CMD_GET_FREQ, &pcch_hdr->command);
162
163 pcc_cmd();
164
165 output_buffer =
166 ioread32(pcch_virt_addr + pcc_cpu_data->output_offset);
167
168 /* Clear the input buffer - we are done with the current command */
169 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
170
171 status = ioread16(&pcch_hdr->status);
172 if (status != CMD_COMPLETE) {
173 dprintk("get: FAILED: for CPU %d, status is %d\n",
174 cpu, status);
175 goto cmd_incomplete;
176 }
177 iowrite16(0, &pcch_hdr->status);
178 curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff))
179 / 100) * 1000);
180
181 dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is "
182 "0x%x, contains a value of: 0x%x. Speed is: %d MHz\n",
183 cpu, (pcch_virt_addr + pcc_cpu_data->output_offset),
184 output_buffer, curr_freq);
185
186 freq_limit = (output_buffer >> 8) & 0xff;
187 if (freq_limit != 0xff) {
188 dprintk("get: frequency for cpu %d is being temporarily"
189 " capped at %d\n", cpu, curr_freq);
190 }
191
192 spin_unlock(&pcc_lock);
193 return curr_freq;
194
195cmd_incomplete:
196 iowrite16(0, &pcch_hdr->status);
197 spin_unlock(&pcc_lock);
198 return -EINVAL;
199}
200
201static int pcc_cpufreq_target(struct cpufreq_policy *policy,
202 unsigned int target_freq,
203 unsigned int relation)
204{
205 struct pcc_cpu *pcc_cpu_data;
206 struct cpufreq_freqs freqs;
207 u16 status;
208 u32 input_buffer;
209 int cpu;
210
211 spin_lock(&pcc_lock);
212 cpu = policy->cpu;
213 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
214
215 dprintk("target: CPU %d should go to target freq: %d "
216 "(virtual) input_offset is 0x%x\n",
217 cpu, target_freq,
218 (pcch_virt_addr + pcc_cpu_data->input_offset));
219
220 freqs.new = target_freq;
221 freqs.cpu = cpu;
222 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
223
224 input_buffer = 0x1 | (((target_freq * 100)
225 / (ioread32(&pcch_hdr->nominal) * 1000)) << 8);
226 iowrite32(input_buffer,
227 (pcch_virt_addr + pcc_cpu_data->input_offset));
228 iowrite16(CMD_SET_FREQ, &pcch_hdr->command);
229
230 pcc_cmd();
231
232 /* Clear the input buffer - we are done with the current command */
233 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
234
235 status = ioread16(&pcch_hdr->status);
236 if (status != CMD_COMPLETE) {
237 dprintk("target: FAILED for cpu %d, with status: 0x%x\n",
238 cpu, status);
239 goto cmd_incomplete;
240 }
241 iowrite16(0, &pcch_hdr->status);
242
243 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
244 dprintk("target: was SUCCESSFUL for cpu %d\n", cpu);
245 spin_unlock(&pcc_lock);
246
247 return 0;
248
249cmd_incomplete:
250 iowrite16(0, &pcch_hdr->status);
251 spin_unlock(&pcc_lock);
252 return -EINVAL;
253}
254
255static int pcc_get_offset(int cpu)
256{
257 acpi_status status;
258 struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
259 union acpi_object *pccp, *offset;
260 struct pcc_cpu *pcc_cpu_data;
261 struct acpi_processor *pr;
262 int ret = 0;
263
264 pr = per_cpu(processors, cpu);
265 pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
266
267 status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer);
268 if (ACPI_FAILURE(status))
269 return -ENODEV;
270
271 pccp = buffer.pointer;
272 if (!pccp || pccp->type != ACPI_TYPE_PACKAGE) {
273 ret = -ENODEV;
274 goto out_free;
275 };
276
277 offset = &(pccp->package.elements[0]);
278 if (!offset || offset->type != ACPI_TYPE_INTEGER) {
279 ret = -ENODEV;
280 goto out_free;
281 }
282
283 pcc_cpu_data->input_offset = offset->integer.value;
284
285 offset = &(pccp->package.elements[1]);
286 if (!offset || offset->type != ACPI_TYPE_INTEGER) {
287 ret = -ENODEV;
288 goto out_free;
289 }
290
291 pcc_cpu_data->output_offset = offset->integer.value;
292
293 memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ);
294 memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ);
295
296 dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data "
297 "input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n",
298 cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset);
299out_free:
300 kfree(buffer.pointer);
301 return ret;
302}
303
304static int __init pcc_cpufreq_do_osc(acpi_handle *handle)
305{
306 acpi_status status;
307 struct acpi_object_list input;
308 struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
309 union acpi_object in_params[4];
310 union acpi_object *out_obj;
311 u32 capabilities[2];
312 u32 errors;
313 u32 supported;
314 int ret = 0;
315
316 input.count = 4;
317 input.pointer = in_params;
318 input.count = 4;
319 input.pointer = in_params;
320 in_params[0].type = ACPI_TYPE_BUFFER;
321 in_params[0].buffer.length = 16;
322 in_params[0].buffer.pointer = OSC_UUID;
323 in_params[1].type = ACPI_TYPE_INTEGER;
324 in_params[1].integer.value = 1;
325 in_params[2].type = ACPI_TYPE_INTEGER;
326 in_params[2].integer.value = 2;
327 in_params[3].type = ACPI_TYPE_BUFFER;
328 in_params[3].buffer.length = 8;
329 in_params[3].buffer.pointer = (u8 *)&capabilities;
330
331 capabilities[0] = OSC_QUERY_ENABLE;
332 capabilities[1] = 0x1;
333
334 status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
335 if (ACPI_FAILURE(status))
336 return -ENODEV;
337
338 if (!output.length)
339 return -ENODEV;
340
341 out_obj = output.pointer;
342 if (out_obj->type != ACPI_TYPE_BUFFER) {
343 ret = -ENODEV;
344 goto out_free;
345 }
346
347 errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
348 if (errors) {
349 ret = -ENODEV;
350 goto out_free;
351 }
352
353 supported = *((u32 *)(out_obj->buffer.pointer + 4));
354 if (!(supported & 0x1)) {
355 ret = -ENODEV;
356 goto out_free;
357 }
358
359 kfree(output.pointer);
360 capabilities[0] = 0x0;
361 capabilities[1] = 0x1;
362
363 status = acpi_evaluate_object(*handle, "_OSC", &input, &output);
364 if (ACPI_FAILURE(status))
365 return -ENODEV;
366
367 if (!output.length)
368 return -ENODEV;
369
370 out_obj = output.pointer;
371 if (out_obj->type != ACPI_TYPE_BUFFER) {
372 ret = -ENODEV;
373 goto out_free;
374 }
375
376 errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
377 if (errors) {
378 ret = -ENODEV;
379 goto out_free;
380 }
381
382 supported = *((u32 *)(out_obj->buffer.pointer + 4));
383 if (!(supported & 0x1)) {
384 ret = -ENODEV;
385 goto out_free;
386 }
387
388out_free:
389 kfree(output.pointer);
390 return ret;
391}
392
393static int __init pcc_cpufreq_probe(void)
394{
395 acpi_status status;
396 struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
397 struct pcc_memory_resource *mem_resource;
398 struct pcc_register_resource *reg_resource;
399 union acpi_object *out_obj, *member;
400 acpi_handle handle, osc_handle;
401 int ret = 0;
402
403 status = acpi_get_handle(NULL, "\\_SB", &handle);
404 if (ACPI_FAILURE(status))
405 return -ENODEV;
406
407 status = acpi_get_handle(handle, "_OSC", &osc_handle);
408 if (ACPI_SUCCESS(status)) {
409 ret = pcc_cpufreq_do_osc(&osc_handle);
410 if (ret)
411 dprintk("probe: _OSC evaluation did not succeed\n");
412 /* Firmware's use of _OSC is optional */
413 ret = 0;
414 }
415
416 status = acpi_evaluate_object(handle, "PCCH", NULL, &output);
417 if (ACPI_FAILURE(status))
418 return -ENODEV;
419
420 out_obj = output.pointer;
421 if (out_obj->type != ACPI_TYPE_PACKAGE) {
422 ret = -ENODEV;
423 goto out_free;
424 }
425
426 member = &out_obj->package.elements[0];
427 if (member->type != ACPI_TYPE_BUFFER) {
428 ret = -ENODEV;
429 goto out_free;
430 }
431
432 mem_resource = (struct pcc_memory_resource *)member->buffer.pointer;
433
434 dprintk("probe: mem_resource descriptor: 0x%x,"
435 " length: %d, space_id: %d, resource_usage: %d,"
436 " type_specific: %d, granularity: 0x%llx,"
437 " minimum: 0x%llx, maximum: 0x%llx,"
438 " translation_offset: 0x%llx, address_length: 0x%llx\n",
439 mem_resource->descriptor, mem_resource->length,
440 mem_resource->space_id, mem_resource->resource_usage,
441 mem_resource->type_specific, mem_resource->granularity,
442 mem_resource->minimum, mem_resource->maximum,
443 mem_resource->translation_offset,
444 mem_resource->address_length);
445
446 if (mem_resource->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) {
447 ret = -ENODEV;
448 goto out_free;
449 }
450
451 pcch_virt_addr = ioremap_nocache(mem_resource->minimum,
452 mem_resource->address_length);
453 if (pcch_virt_addr == NULL) {
454 dprintk("probe: could not map shared mem region\n");
455 goto out_free;
456 }
457 pcch_hdr = pcch_virt_addr;
458
459 dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr);
460 dprintk("probe: PCCH header is at physical address: 0x%llx,"
461 " signature: 0x%x, length: %d bytes, major: %d, minor: %d,"
462 " supported features: 0x%x, command field: 0x%x,"
463 " status field: 0x%x, nominal latency: %d us\n",
464 mem_resource->minimum, ioread32(&pcch_hdr->signature),
465 ioread16(&pcch_hdr->length), ioread8(&pcch_hdr->major),
466 ioread8(&pcch_hdr->minor), ioread32(&pcch_hdr->features),
467 ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status),
468 ioread32(&pcch_hdr->latency));
469
470 dprintk("probe: min time between commands: %d us,"
471 " max time between commands: %d us,"
472 " nominal CPU frequency: %d MHz,"
473 " minimum CPU frequency: %d MHz,"
474 " minimum CPU frequency without throttling: %d MHz\n",
475 ioread32(&pcch_hdr->minimum_time),
476 ioread32(&pcch_hdr->maximum_time),
477 ioread32(&pcch_hdr->nominal),
478 ioread32(&pcch_hdr->throttled_frequency),
479 ioread32(&pcch_hdr->minimum_frequency));
480
481 member = &out_obj->package.elements[1];
482 if (member->type != ACPI_TYPE_BUFFER) {
483 ret = -ENODEV;
484 goto pcch_free;
485 }
486
487 reg_resource = (struct pcc_register_resource *)member->buffer.pointer;
488
489 doorbell.space_id = reg_resource->space_id;
490 doorbell.bit_width = reg_resource->bit_width;
491 doorbell.bit_offset = reg_resource->bit_offset;
492 doorbell.access_width = 64;
493 doorbell.address = reg_resource->address;
494
495 dprintk("probe: doorbell: space_id is %d, bit_width is %d, "
496 "bit_offset is %d, access_width is %d, address is 0x%llx\n",
497 doorbell.space_id, doorbell.bit_width, doorbell.bit_offset,
498 doorbell.access_width, reg_resource->address);
499
500 member = &out_obj->package.elements[2];
501 if (member->type != ACPI_TYPE_INTEGER) {
502 ret = -ENODEV;
503 goto pcch_free;
504 }
505
506 doorbell_preserve = member->integer.value;
507
508 member = &out_obj->package.elements[3];
509 if (member->type != ACPI_TYPE_INTEGER) {
510 ret = -ENODEV;
511 goto pcch_free;
512 }
513
514 doorbell_write = member->integer.value;
515
516 dprintk("probe: doorbell_preserve: 0x%llx,"
517 " doorbell_write: 0x%llx\n",
518 doorbell_preserve, doorbell_write);
519
520 pcc_cpu_info = alloc_percpu(struct pcc_cpu);
521 if (!pcc_cpu_info) {
522 ret = -ENOMEM;
523 goto pcch_free;
524 }
525
526 printk(KERN_DEBUG "pcc-cpufreq: (v%s) driver loaded with frequency"
527 " limits: %d MHz, %d MHz\n", PCC_VERSION,
528 ioread32(&pcch_hdr->minimum_frequency),
529 ioread32(&pcch_hdr->nominal));
530 kfree(output.pointer);
531 return ret;
532pcch_free:
533 pcc_clear_mapping();
534out_free:
535 kfree(output.pointer);
536 return ret;
537}
538
539static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
540{
541 unsigned int cpu = policy->cpu;
542 unsigned int result = 0;
543
544 if (!pcch_virt_addr) {
545 result = -1;
546 goto pcch_null;
547 }
548
549 result = pcc_get_offset(cpu);
550 if (result) {
551 dprintk("init: PCCP evaluation failed\n");
552 goto free;
553 }
554
555 policy->max = policy->cpuinfo.max_freq =
556 ioread32(&pcch_hdr->nominal) * 1000;
557 policy->min = policy->cpuinfo.min_freq =
558 ioread32(&pcch_hdr->minimum_frequency) * 1000;
559 policy->cur = pcc_get_freq(cpu);
560
561 dprintk("init: policy->max is %d, policy->min is %d\n",
562 policy->max, policy->min);
563
564 return 0;
565free:
566 pcc_clear_mapping();
567 free_percpu(pcc_cpu_info);
568pcch_null:
569 return result;
570}
571
572static int pcc_cpufreq_cpu_exit(struct cpufreq_policy *policy)
573{
574 return 0;
575}
576
577static struct cpufreq_driver pcc_cpufreq_driver = {
578 .flags = CPUFREQ_CONST_LOOPS,
579 .get = pcc_get_freq,
580 .verify = pcc_cpufreq_verify,
581 .target = pcc_cpufreq_target,
582 .init = pcc_cpufreq_cpu_init,
583 .exit = pcc_cpufreq_cpu_exit,
584 .name = "pcc-cpufreq",
585 .owner = THIS_MODULE,
586};
587
588static int __init pcc_cpufreq_init(void)
589{
590 int ret;
591
592 if (acpi_disabled)
593 return 0;
594
595 ret = pcc_cpufreq_probe();
596 if (ret) {
597 dprintk("pcc_cpufreq_init: PCCH evaluation failed\n");
598 return ret;
599 }
600
601 ret = cpufreq_register_driver(&pcc_cpufreq_driver);
602
603 return ret;
604}
605
606static void __exit pcc_cpufreq_exit(void)
607{
608 cpufreq_unregister_driver(&pcc_cpufreq_driver);
609
610 pcc_clear_mapping();
611
612 free_percpu(pcc_cpu_info);
613}
614
615MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar");
616MODULE_VERSION(PCC_VERSION);
617MODULE_DESCRIPTION("Processor Clocking Control interface driver");
618MODULE_LICENSE("GPL");
619
620late_initcall(pcc_cpufreq_init);
621module_exit(pcc_cpufreq_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index f10dea409f40..b3379d6a5c57 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -13,7 +13,6 @@
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/cpufreq.h> 14#include <linux/cpufreq.h>
15#include <linux/ioport.h> 15#include <linux/ioport.h>
16#include <linux/slab.h>
17#include <linux/timex.h> 16#include <linux/timex.h>
18#include <linux/io.h> 17#include <linux/io.h>
19 18
@@ -164,7 +163,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
164 } 163 }
165 164
166 /* cpuinfo and default policy values */ 165 /* cpuinfo and default policy values */
167 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; 166 policy->cpuinfo.transition_latency = 200000;
168 policy->cur = busfreq * max_multiplier; 167 policy->cur = busfreq * max_multiplier;
169 168
170 result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); 169 result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index d47c775eb0ab..9a97116f89e5 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -714,14 +714,17 @@ static struct freq_attr *powernow_table_attr[] = {
714}; 714};
715 715
716static struct cpufreq_driver powernow_driver = { 716static struct cpufreq_driver powernow_driver = {
717 .verify = powernow_verify, 717 .verify = powernow_verify,
718 .target = powernow_target, 718 .target = powernow_target,
719 .get = powernow_get, 719 .get = powernow_get,
720 .init = powernow_cpu_init, 720#ifdef CONFIG_X86_POWERNOW_K7_ACPI
721 .exit = powernow_cpu_exit, 721 .bios_limit = acpi_processor_get_bios_limit,
722 .name = "powernow-k7", 722#endif
723 .owner = THIS_MODULE, 723 .init = powernow_cpu_init,
724 .attr = powernow_table_attr, 724 .exit = powernow_cpu_exit,
725 .name = "powernow-k7",
726 .owner = THIS_MODULE,
727 .attr = powernow_table_attr,
725}; 728};
726 729
727static int __init powernow_init(void) 730static int __init powernow_init(void)
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 3f12dabeab52..b6215b9798e2 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -806,7 +806,7 @@ static int find_psb_table(struct powernow_k8_data *data)
806static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, 806static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
807 unsigned int index) 807 unsigned int index)
808{ 808{
809 acpi_integer control; 809 u64 control;
810 810
811 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) 811 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
812 return; 812 return;
@@ -824,7 +824,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
824{ 824{
825 struct cpufreq_frequency_table *powernow_table; 825 struct cpufreq_frequency_table *powernow_table;
826 int ret_val = -ENODEV; 826 int ret_val = -ENODEV;
827 acpi_integer control, status; 827 u64 control, status;
828 828
829 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { 829 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
830 dprintk("register performance failed: bad ACPI data\n"); 830 dprintk("register performance failed: bad ACPI data\n");
@@ -929,7 +929,8 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data,
929 powernow_table[i].index = index; 929 powernow_table[i].index = index;
930 930
931 /* Frequency may be rounded for these */ 931 /* Frequency may be rounded for these */
932 if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) { 932 if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
933 || boot_cpu_data.x86 == 0x11) {
933 powernow_table[i].frequency = 934 powernow_table[i].frequency =
934 freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); 935 freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);
935 } else 936 } else
@@ -948,7 +949,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
948 u32 fid; 949 u32 fid;
949 u32 vid; 950 u32 vid;
950 u32 freq, index; 951 u32 freq, index;
951 acpi_integer status, control; 952 u64 status, control;
952 953
953 if (data->exttype) { 954 if (data->exttype) {
954 status = data->acpi_data.states[i].status; 955 status = data->acpi_data.states[i].status;
@@ -1118,7 +1119,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data,
1118static int powernowk8_target(struct cpufreq_policy *pol, 1119static int powernowk8_target(struct cpufreq_policy *pol,
1119 unsigned targfreq, unsigned relation) 1120 unsigned targfreq, unsigned relation)
1120{ 1121{
1121 cpumask_t oldmask; 1122 cpumask_var_t oldmask;
1122 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); 1123 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1123 u32 checkfid; 1124 u32 checkfid;
1124 u32 checkvid; 1125 u32 checkvid;
@@ -1131,9 +1132,13 @@ static int powernowk8_target(struct cpufreq_policy *pol,
1131 checkfid = data->currfid; 1132 checkfid = data->currfid;
1132 checkvid = data->currvid; 1133 checkvid = data->currvid;
1133 1134
1134 /* only run on specific CPU from here on */ 1135 /* only run on specific CPU from here on. */
1135 oldmask = current->cpus_allowed; 1136 /* This is poor form: use a workqueue or smp_call_function_single */
1136 set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); 1137 if (!alloc_cpumask_var(&oldmask, GFP_KERNEL))
1138 return -ENOMEM;
1139
1140 cpumask_copy(oldmask, tsk_cpus_allowed(current));
1141 set_cpus_allowed_ptr(current, cpumask_of(pol->cpu));
1137 1142
1138 if (smp_processor_id() != pol->cpu) { 1143 if (smp_processor_id() != pol->cpu) {
1139 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); 1144 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
@@ -1193,7 +1198,8 @@ static int powernowk8_target(struct cpufreq_policy *pol,
1193 ret = 0; 1198 ret = 0;
1194 1199
1195err_out: 1200err_out:
1196 set_cpus_allowed_ptr(current, &oldmask); 1201 set_cpus_allowed_ptr(current, oldmask);
1202 free_cpumask_var(oldmask);
1197 return ret; 1203 return ret;
1198} 1204}
1199 1205
@@ -1351,6 +1357,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
1351 1357
1352 kfree(data->powernow_table); 1358 kfree(data->powernow_table);
1353 kfree(data); 1359 kfree(data);
1360 per_cpu(powernow_data, pol->cpu) = NULL;
1354 1361
1355 return 0; 1362 return 0;
1356} 1363}
@@ -1370,7 +1377,7 @@ static unsigned int powernowk8_get(unsigned int cpu)
1370 int err; 1377 int err;
1371 1378
1372 if (!data) 1379 if (!data)
1373 return -EINVAL; 1380 return 0;
1374 1381
1375 smp_call_function_single(cpu, query_values_on_cpu, &err, true); 1382 smp_call_function_single(cpu, query_values_on_cpu, &err, true);
1376 if (err) 1383 if (err)
@@ -1393,14 +1400,15 @@ static struct freq_attr *powernow_k8_attr[] = {
1393}; 1400};
1394 1401
1395static struct cpufreq_driver cpufreq_amd64_driver = { 1402static struct cpufreq_driver cpufreq_amd64_driver = {
1396 .verify = powernowk8_verify, 1403 .verify = powernowk8_verify,
1397 .target = powernowk8_target, 1404 .target = powernowk8_target,
1398 .init = powernowk8_cpu_init, 1405 .bios_limit = acpi_processor_get_bios_limit,
1399 .exit = __devexit_p(powernowk8_cpu_exit), 1406 .init = powernowk8_cpu_init,
1400 .get = powernowk8_get, 1407 .exit = __devexit_p(powernowk8_cpu_exit),
1401 .name = "powernow-k8", 1408 .get = powernowk8_get,
1402 .owner = THIS_MODULE, 1409 .name = "powernow-k8",
1403 .attr = powernow_k8_attr, 1410 .owner = THIS_MODULE,
1411 .attr = powernow_k8_attr,
1404}; 1412};
1405 1413
1406/* driver entry point for init */ 1414/* driver entry point for init */
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 8d672ef162ce..9b1ff37de46a 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -20,6 +20,7 @@
20#include <linux/sched.h> /* current */ 20#include <linux/sched.h> /* current */
21#include <linux/delay.h> 21#include <linux/delay.h>
22#include <linux/compiler.h> 22#include <linux/compiler.h>
23#include <linux/gfp.h>
23 24
24#include <asm/msr.h> 25#include <asm/msr.h>
25#include <asm/processor.h> 26#include <asm/processor.h>
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 3ae5a7a3a500..561758e95180 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -23,7 +23,6 @@
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/cpufreq.h> 24#include <linux/cpufreq.h>
25#include <linux/pci.h> 25#include <linux/pci.h>
26#include <linux/slab.h>
27#include <linux/sched.h> 26#include <linux/sched.h>
28 27
29#include "speedstep-lib.h" 28#include "speedstep-lib.h"
@@ -39,7 +38,7 @@ static struct pci_dev *speedstep_chipset_dev;
39 38
40/* speedstep_processor 39/* speedstep_processor
41 */ 40 */
42static unsigned int speedstep_processor; 41static enum speedstep_processor speedstep_processor;
43 42
44static u32 pmbase; 43static u32 pmbase;
45 44
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index f4c290b8482f..a94ec6be69fa 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -13,7 +13,6 @@
13#include <linux/moduleparam.h> 13#include <linux/moduleparam.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/cpufreq.h> 15#include <linux/cpufreq.h>
16#include <linux/slab.h>
17 16
18#include <asm/msr.h> 17#include <asm/msr.h>
19#include <asm/tsc.h> 18#include <asm/tsc.h>
@@ -34,7 +33,7 @@ static int relaxed_check;
34 * GET PROCESSOR CORE SPEED IN KHZ * 33 * GET PROCESSOR CORE SPEED IN KHZ *
35 *********************************************************************/ 34 *********************************************************************/
36 35
37static unsigned int pentium3_get_frequency(unsigned int processor) 36static unsigned int pentium3_get_frequency(enum speedstep_processor processor)
38{ 37{
39 /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ 38 /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
40 struct { 39 struct {
@@ -227,7 +226,7 @@ static unsigned int pentium4_get_frequency(void)
227 226
228 227
229/* Warning: may get called from smp_call_function_single. */ 228/* Warning: may get called from smp_call_function_single. */
230unsigned int speedstep_get_frequency(unsigned int processor) 229unsigned int speedstep_get_frequency(enum speedstep_processor processor)
231{ 230{
232 switch (processor) { 231 switch (processor) {
233 case SPEEDSTEP_CPU_PCORE: 232 case SPEEDSTEP_CPU_PCORE:
@@ -380,7 +379,7 @@ EXPORT_SYMBOL_GPL(speedstep_detect_processor);
380 * DETECT SPEEDSTEP SPEEDS * 379 * DETECT SPEEDSTEP SPEEDS *
381 *********************************************************************/ 380 *********************************************************************/
382 381
383unsigned int speedstep_get_freqs(unsigned int processor, 382unsigned int speedstep_get_freqs(enum speedstep_processor processor,
384 unsigned int *low_speed, 383 unsigned int *low_speed,
385 unsigned int *high_speed, 384 unsigned int *high_speed,
386 unsigned int *transition_latency, 385 unsigned int *transition_latency,
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
index 2b6c04e5a304..70d9cea1219d 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
@@ -11,18 +11,18 @@
11 11
12 12
13/* processors */ 13/* processors */
14 14enum speedstep_processor {
15#define SPEEDSTEP_CPU_PIII_C_EARLY 0x00000001 /* Coppermine core */ 15 SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001, /* Coppermine core */
16#define SPEEDSTEP_CPU_PIII_C 0x00000002 /* Coppermine core */ 16 SPEEDSTEP_CPU_PIII_C = 0x00000002, /* Coppermine core */
17#define SPEEDSTEP_CPU_PIII_T 0x00000003 /* Tualatin core */ 17 SPEEDSTEP_CPU_PIII_T = 0x00000003, /* Tualatin core */
18#define SPEEDSTEP_CPU_P4M 0x00000004 /* P4-M */ 18 SPEEDSTEP_CPU_P4M = 0x00000004, /* P4-M */
19
20/* the following processors are not speedstep-capable and are not auto-detected 19/* the following processors are not speedstep-capable and are not auto-detected
21 * in speedstep_detect_processor(). However, their speed can be detected using 20 * in speedstep_detect_processor(). However, their speed can be detected using
22 * the speedstep_get_frequency() call. */ 21 * the speedstep_get_frequency() call. */
23#define SPEEDSTEP_CPU_PM 0xFFFFFF03 /* Pentium M */ 22 SPEEDSTEP_CPU_PM = 0xFFFFFF03, /* Pentium M */
24#define SPEEDSTEP_CPU_P4D 0xFFFFFF04 /* desktop P4 */ 23 SPEEDSTEP_CPU_P4D = 0xFFFFFF04, /* desktop P4 */
25#define SPEEDSTEP_CPU_PCORE 0xFFFFFF05 /* Core */ 24 SPEEDSTEP_CPU_PCORE = 0xFFFFFF05, /* Core */
25};
26 26
27/* speedstep states -- only two of them */ 27/* speedstep states -- only two of them */
28 28
@@ -31,10 +31,10 @@
31 31
32 32
33/* detect a speedstep-capable processor */ 33/* detect a speedstep-capable processor */
34extern unsigned int speedstep_detect_processor (void); 34extern enum speedstep_processor speedstep_detect_processor(void);
35 35
36/* detect the current speed (in khz) of the processor */ 36/* detect the current speed (in khz) of the processor */
37extern unsigned int speedstep_get_frequency(unsigned int processor); 37extern unsigned int speedstep_get_frequency(enum speedstep_processor processor);
38 38
39 39
40/* detect the low and high speeds of the processor. The callback 40/* detect the low and high speeds of the processor. The callback
@@ -42,7 +42,7 @@ extern unsigned int speedstep_get_frequency(unsigned int processor);
42 * SPEEDSTEP_LOW; the second argument is zero so that no 42 * SPEEDSTEP_LOW; the second argument is zero so that no
43 * cpufreq_notify_transition calls are initiated. 43 * cpufreq_notify_transition calls are initiated.
44 */ 44 */
45extern unsigned int speedstep_get_freqs(unsigned int processor, 45extern unsigned int speedstep_get_freqs(enum speedstep_processor processor,
46 unsigned int *low_speed, 46 unsigned int *low_speed,
47 unsigned int *high_speed, 47 unsigned int *high_speed,
48 unsigned int *transition_latency, 48 unsigned int *transition_latency,
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index befea088e4f5..8abd869baabf 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -17,7 +17,6 @@
17#include <linux/moduleparam.h> 17#include <linux/moduleparam.h>
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/cpufreq.h> 19#include <linux/cpufreq.h>
20#include <linux/slab.h>
21#include <linux/delay.h> 20#include <linux/delay.h>
22#include <linux/io.h> 21#include <linux/io.h>
23#include <asm/ist.h> 22#include <asm/ist.h>
@@ -35,7 +34,7 @@ static int smi_cmd;
35static unsigned int smi_sig; 34static unsigned int smi_sig;
36 35
37/* info about the processor */ 36/* info about the processor */
38static unsigned int speedstep_processor; 37static enum speedstep_processor speedstep_processor;
39 38
40/* 39/*
41 * There are only two frequency states for each processor. Values 40 * There are only two frequency states for each processor. Values
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 19807b89f058..4fbd384fb645 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -373,7 +373,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c)
373 /* Handle the GX (Formally known as the GX2) */ 373 /* Handle the GX (Formally known as the GX2) */
374 374
375 if (c->x86 == 5 && c->x86_model == 5) 375 if (c->x86 == 5 && c->x86_model == 5)
376 display_cacheinfo(c); 376 cpu_detect_cache_sizes(c);
377 else 377 else
378 init_cyrix(c); 378 init_cyrix(c);
379} 379}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 40e1835b35e8..1366c7cfd483 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -47,6 +47,27 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
47 (c->x86 == 0x6 && c->x86_model >= 0x0e)) 47 (c->x86 == 0x6 && c->x86_model >= 0x0e))
48 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 48 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
49 49
50 /*
51 * Atom erratum AAE44/AAF40/AAG38/AAH41:
52 *
53 * A race condition between speculative fetches and invalidating
54 * a large page. This is worked around in microcode, but we
55 * need the microcode to have already been loaded... so if it is
56 * not, recommend a BIOS update and disable large pages.
57 */
58 if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2) {
59 u32 ucode, junk;
60
61 wrmsr(MSR_IA32_UCODE_REV, 0, 0);
62 sync_core();
63 rdmsr(MSR_IA32_UCODE_REV, junk, ucode);
64
65 if (ucode < 0x20e) {
66 printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
67 clear_cpu_cap(c, X86_FEATURE_PSE);
68 }
69 }
70
50#ifdef CONFIG_X86_64 71#ifdef CONFIG_X86_64
51 set_cpu_cap(c, X86_FEATURE_SYSENTER32); 72 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
52#else 73#else
@@ -70,8 +91,8 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
70 if (c->x86_power & (1 << 8)) { 91 if (c->x86_power & (1 << 8)) {
71 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 92 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
72 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 93 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
73 set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); 94 if (!check_tsc_unstable())
74 sched_clock_stable = 1; 95 sched_clock_stable = 1;
75 } 96 }
76 97
77 /* 98 /*
@@ -263,11 +284,13 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
263 /* Don't do the funky fallback heuristics the AMD version employs 284 /* Don't do the funky fallback heuristics the AMD version employs
264 for now. */ 285 for now. */
265 node = apicid_to_node[apicid]; 286 node = apicid_to_node[apicid];
266 if (node == NUMA_NO_NODE || !node_online(node)) 287 if (node == NUMA_NO_NODE)
267 node = first_node(node_online_map); 288 node = first_node(node_online_map);
289 else if (!node_online(node)) {
290 /* reuse the value from init_cpu_to_node() */
291 node = cpu_to_node(cpu);
292 }
268 numa_set_node(cpu, node); 293 numa_set_node(cpu, node);
269
270 printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
271#endif 294#endif
272} 295}
273 296
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 3167c3d72596..94d8e475744c 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -18,6 +18,7 @@
18#include <asm/processor.h> 18#include <asm/processor.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <asm/k8.h> 20#include <asm/k8.h>
21#include <asm/smp.h>
21 22
22#define LVL_1_INST 1 23#define LVL_1_INST 1
23#define LVL_1_DATA 2 24#define LVL_1_DATA 2
@@ -31,6 +32,8 @@ struct _cache_table {
31 short size; 32 short size;
32}; 33};
33 34
35#define MB(x) ((x) * 1024)
36
34/* All the cache descriptor types we care about (no TLB or 37/* All the cache descriptor types we care about (no TLB or
35 trace cache entries) */ 38 trace cache entries) */
36 39
@@ -44,9 +47,9 @@ static const struct _cache_table __cpuinitconst cache_table[] =
44 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ 47 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
45 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ 48 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
46 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 49 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
47 { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 50 { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
48 { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 51 { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
49 { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 52 { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
50 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ 53 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
51 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ 54 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
52 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 55 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
@@ -59,16 +62,16 @@ static const struct _cache_table __cpuinitconst cache_table[] =
59 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ 62 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
60 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ 63 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
61 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ 64 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
62 { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ 65 { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */
63 { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ 66 { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
64 { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */ 67 { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
65 { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */ 68 { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
66 { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ 69 { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
67 { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */ 70 { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
68 { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ 71 { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
69 { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ 72 { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */
70 { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ 73 { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */
71 { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */ 74 { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */
72 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 75 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
73 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 76 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
74 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 77 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
@@ -77,31 +80,34 @@ static const struct _cache_table __cpuinitconst cache_table[] =
77 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ 80 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
78 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ 81 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
79 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ 82 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
80 { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ 83 { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
81 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 84 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
82 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 85 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
83 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 86 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
84 { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 87 { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
85 { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */ 88 { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
86 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ 89 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
87 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ 90 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
88 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ 91 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
89 { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */ 92 { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
90 { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ 93 { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */
91 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ 94 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
92 { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ 95 { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */
93 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ 96 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
94 { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */ 97 { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */
95 { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */ 98 { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */
96 { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */ 99 { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */
97 { 0xd7, LVL_3, 2038 }, /* 8-way set assoc, 64 byte line size */ 100 { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */
98 { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ 101 { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
99 { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */ 102 { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */
100 { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ 103 { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
101 { 0xde, LVL_3, 8192 }, /* 12-way set assoc, 64 byte line size */ 104 { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */
102 { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */ 105 { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */
103 { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ 106 { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
104 { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ 107 { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
108 { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */
109 { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */
110 { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */
105 { 0x00, 0, 0} 111 { 0x00, 0, 0}
106}; 112};
107 113
@@ -147,7 +153,8 @@ struct _cpuid4_info {
147 union _cpuid4_leaf_ebx ebx; 153 union _cpuid4_leaf_ebx ebx;
148 union _cpuid4_leaf_ecx ecx; 154 union _cpuid4_leaf_ecx ecx;
149 unsigned long size; 155 unsigned long size;
150 unsigned long can_disable; 156 bool can_disable;
157 unsigned int l3_indices;
151 DECLARE_BITMAP(shared_cpu_map, NR_CPUS); 158 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
152}; 159};
153 160
@@ -157,7 +164,8 @@ struct _cpuid4_info_regs {
157 union _cpuid4_leaf_ebx ebx; 164 union _cpuid4_leaf_ebx ebx;
158 union _cpuid4_leaf_ecx ecx; 165 union _cpuid4_leaf_ecx ecx;
159 unsigned long size; 166 unsigned long size;
160 unsigned long can_disable; 167 bool can_disable;
168 unsigned int l3_indices;
161}; 169};
162 170
163unsigned short num_cache_leaves; 171unsigned short num_cache_leaves;
@@ -287,6 +295,36 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
287 (ebx->split.ways_of_associativity + 1) - 1; 295 (ebx->split.ways_of_associativity + 1) - 1;
288} 296}
289 297
298struct _cache_attr {
299 struct attribute attr;
300 ssize_t (*show)(struct _cpuid4_info *, char *);
301 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
302};
303
304#ifdef CONFIG_CPU_SUP_AMD
305static unsigned int __cpuinit amd_calc_l3_indices(void)
306{
307 /*
308 * We're called over smp_call_function_single() and therefore
309 * are on the correct cpu.
310 */
311 int cpu = smp_processor_id();
312 int node = cpu_to_node(cpu);
313 struct pci_dev *dev = node_to_k8_nb_misc(node);
314 unsigned int sc0, sc1, sc2, sc3;
315 u32 val = 0;
316
317 pci_read_config_dword(dev, 0x1C4, &val);
318
319 /* calculate subcache sizes */
320 sc0 = !(val & BIT(0));
321 sc1 = !(val & BIT(4));
322 sc2 = !(val & BIT(8)) + !(val & BIT(9));
323 sc3 = !(val & BIT(12)) + !(val & BIT(13));
324
325 return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
326}
327
290static void __cpuinit 328static void __cpuinit
291amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) 329amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
292{ 330{
@@ -296,13 +334,108 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
296 if (boot_cpu_data.x86 == 0x11) 334 if (boot_cpu_data.x86 == 0x11)
297 return; 335 return;
298 336
299 /* see erratum #382 */ 337 /* see errata #382 and #388 */
300 if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8)) 338 if ((boot_cpu_data.x86 == 0x10) &&
339 ((boot_cpu_data.x86_model < 0x8) ||
340 (boot_cpu_data.x86_mask < 0x1)))
301 return; 341 return;
302 342
303 this_leaf->can_disable = 1; 343 /* not in virtualized environments */
344 if (num_k8_northbridges == 0)
345 return;
346
347 this_leaf->can_disable = true;
348 this_leaf->l3_indices = amd_calc_l3_indices();
349}
350
351static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
352 unsigned int index)
353{
354 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
355 int node = amd_get_nb_id(cpu);
356 struct pci_dev *dev = node_to_k8_nb_misc(node);
357 unsigned int reg = 0;
358
359 if (!this_leaf->can_disable)
360 return -EINVAL;
361
362 if (!dev)
363 return -EINVAL;
364
365 pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
366 return sprintf(buf, "0x%08x\n", reg);
304} 367}
305 368
369#define SHOW_CACHE_DISABLE(index) \
370static ssize_t \
371show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
372{ \
373 return show_cache_disable(this_leaf, buf, index); \
374}
375SHOW_CACHE_DISABLE(0)
376SHOW_CACHE_DISABLE(1)
377
378static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
379 const char *buf, size_t count, unsigned int index)
380{
381 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
382 int node = amd_get_nb_id(cpu);
383 struct pci_dev *dev = node_to_k8_nb_misc(node);
384 unsigned long val = 0;
385
386#define SUBCACHE_MASK (3UL << 20)
387#define SUBCACHE_INDEX 0xfff
388
389 if (!this_leaf->can_disable)
390 return -EINVAL;
391
392 if (!capable(CAP_SYS_ADMIN))
393 return -EPERM;
394
395 if (!dev)
396 return -EINVAL;
397
398 if (strict_strtoul(buf, 10, &val) < 0)
399 return -EINVAL;
400
401 /* do not allow writes outside of allowed bits */
402 if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
403 ((val & SUBCACHE_INDEX) > this_leaf->l3_indices))
404 return -EINVAL;
405
406 val |= BIT(30);
407 pci_write_config_dword(dev, 0x1BC + index * 4, val);
408 /*
409 * We need to WBINVD on a core on the node containing the L3 cache which
410 * indices we disable therefore a simple wbinvd() is not sufficient.
411 */
412 wbinvd_on_cpu(cpu);
413 pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31));
414 return count;
415}
416
417#define STORE_CACHE_DISABLE(index) \
418static ssize_t \
419store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
420 const char *buf, size_t count) \
421{ \
422 return store_cache_disable(this_leaf, buf, count, index); \
423}
424STORE_CACHE_DISABLE(0)
425STORE_CACHE_DISABLE(1)
426
427static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
428 show_cache_disable_0, store_cache_disable_0);
429static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
430 show_cache_disable_1, store_cache_disable_1);
431
432#else /* CONFIG_CPU_SUP_AMD */
433static void __cpuinit
434amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
435{
436};
437#endif /* CONFIG_CPU_SUP_AMD */
438
306static int 439static int
307__cpuinit cpuid4_cache_lookup_regs(int index, 440__cpuinit cpuid4_cache_lookup_regs(int index,
308 struct _cpuid4_info_regs *this_leaf) 441 struct _cpuid4_info_regs *this_leaf)
@@ -488,22 +621,6 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
488#endif 621#endif
489 } 622 }
490 623
491 if (trace)
492 printk(KERN_INFO "CPU: Trace cache: %dK uops", trace);
493 else if (l1i)
494 printk(KERN_INFO "CPU: L1 I cache: %dK", l1i);
495
496 if (l1d)
497 printk(KERN_CONT ", L1 D cache: %dK\n", l1d);
498 else
499 printk(KERN_CONT "\n");
500
501 if (l2)
502 printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
503
504 if (l3)
505 printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
506
507 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); 624 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
508 625
509 return l2; 626 return l2;
@@ -512,8 +629,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
512#ifdef CONFIG_SYSFS 629#ifdef CONFIG_SYSFS
513 630
514/* pointer to _cpuid4_info array (for each cache leaf) */ 631/* pointer to _cpuid4_info array (for each cache leaf) */
515static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); 632static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
516#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) 633#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
517 634
518/* returns CPUs that share the index cache with cpu */ 635/* returns CPUs that share the index cache with cpu */
519int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index) 636int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index)
@@ -537,18 +654,19 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
537{ 654{
538 struct _cpuid4_info *this_leaf, *sibling_leaf; 655 struct _cpuid4_info *this_leaf, *sibling_leaf;
539 unsigned long num_threads_sharing; 656 unsigned long num_threads_sharing;
540 int index_msb, i; 657 int index_msb, i, sibling;
541 struct cpuinfo_x86 *c = &cpu_data(cpu); 658 struct cpuinfo_x86 *c = &cpu_data(cpu);
542 659
543 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { 660 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
544 struct cpuinfo_x86 *d; 661 for_each_cpu(i, c->llc_shared_map) {
545 for_each_online_cpu(i) { 662 if (!per_cpu(ici_cpuid4_info, i))
546 if (!per_cpu(cpuid4_info, i))
547 continue; 663 continue;
548 d = &cpu_data(i);
549 this_leaf = CPUID4_INFO_IDX(i, index); 664 this_leaf = CPUID4_INFO_IDX(i, index);
550 cpumask_copy(to_cpumask(this_leaf->shared_cpu_map), 665 for_each_cpu(sibling, c->llc_shared_map) {
551 d->llc_shared_map); 666 if (!cpu_online(sibling))
667 continue;
668 set_bit(sibling, this_leaf->shared_cpu_map);
669 }
552 } 670 }
553 return; 671 return;
554 } 672 }
@@ -565,7 +683,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
565 c->apicid >> index_msb) { 683 c->apicid >> index_msb) {
566 cpumask_set_cpu(i, 684 cpumask_set_cpu(i,
567 to_cpumask(this_leaf->shared_cpu_map)); 685 to_cpumask(this_leaf->shared_cpu_map));
568 if (i != cpu && per_cpu(cpuid4_info, i)) { 686 if (i != cpu && per_cpu(ici_cpuid4_info, i)) {
569 sibling_leaf = 687 sibling_leaf =
570 CPUID4_INFO_IDX(i, index); 688 CPUID4_INFO_IDX(i, index);
571 cpumask_set_cpu(cpu, to_cpumask( 689 cpumask_set_cpu(cpu, to_cpumask(
@@ -604,8 +722,8 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
604 for (i = 0; i < num_cache_leaves; i++) 722 for (i = 0; i < num_cache_leaves; i++)
605 cache_remove_shared_cpu_map(cpu, i); 723 cache_remove_shared_cpu_map(cpu, i);
606 724
607 kfree(per_cpu(cpuid4_info, cpu)); 725 kfree(per_cpu(ici_cpuid4_info, cpu));
608 per_cpu(cpuid4_info, cpu) = NULL; 726 per_cpu(ici_cpuid4_info, cpu) = NULL;
609} 727}
610 728
611static int 729static int
@@ -644,15 +762,15 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
644 if (num_cache_leaves == 0) 762 if (num_cache_leaves == 0)
645 return -ENOENT; 763 return -ENOENT;
646 764
647 per_cpu(cpuid4_info, cpu) = kzalloc( 765 per_cpu(ici_cpuid4_info, cpu) = kzalloc(
648 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); 766 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
649 if (per_cpu(cpuid4_info, cpu) == NULL) 767 if (per_cpu(ici_cpuid4_info, cpu) == NULL)
650 return -ENOMEM; 768 return -ENOMEM;
651 769
652 smp_call_function_single(cpu, get_cpu_leaves, &retval, true); 770 smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
653 if (retval) { 771 if (retval) {
654 kfree(per_cpu(cpuid4_info, cpu)); 772 kfree(per_cpu(ici_cpuid4_info, cpu));
655 per_cpu(cpuid4_info, cpu) = NULL; 773 per_cpu(ici_cpuid4_info, cpu) = NULL;
656 } 774 }
657 775
658 return retval; 776 return retval;
@@ -664,7 +782,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
664extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ 782extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
665 783
666/* pointer to kobject for cpuX/cache */ 784/* pointer to kobject for cpuX/cache */
667static DEFINE_PER_CPU(struct kobject *, cache_kobject); 785static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
668 786
669struct _index_kobject { 787struct _index_kobject {
670 struct kobject kobj; 788 struct kobject kobj;
@@ -673,8 +791,8 @@ struct _index_kobject {
673}; 791};
674 792
675/* pointer to array of kobjects for cpuX/cache/indexY */ 793/* pointer to array of kobjects for cpuX/cache/indexY */
676static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); 794static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
677#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) 795#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
678 796
679#define show_one_plus(file_name, object, val) \ 797#define show_one_plus(file_name, object, val) \
680static ssize_t show_##file_name \ 798static ssize_t show_##file_name \
@@ -740,82 +858,6 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
740#define to_object(k) container_of(k, struct _index_kobject, kobj) 858#define to_object(k) container_of(k, struct _index_kobject, kobj)
741#define to_attr(a) container_of(a, struct _cache_attr, attr) 859#define to_attr(a) container_of(a, struct _cache_attr, attr)
742 860
743static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
744 unsigned int index)
745{
746 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
747 int node = cpu_to_node(cpu);
748 struct pci_dev *dev = node_to_k8_nb_misc(node);
749 unsigned int reg = 0;
750
751 if (!this_leaf->can_disable)
752 return -EINVAL;
753
754 if (!dev)
755 return -EINVAL;
756
757 pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
758 return sprintf(buf, "%x\n", reg);
759}
760
761#define SHOW_CACHE_DISABLE(index) \
762static ssize_t \
763show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
764{ \
765 return show_cache_disable(this_leaf, buf, index); \
766}
767SHOW_CACHE_DISABLE(0)
768SHOW_CACHE_DISABLE(1)
769
770static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
771 const char *buf, size_t count, unsigned int index)
772{
773 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
774 int node = cpu_to_node(cpu);
775 struct pci_dev *dev = node_to_k8_nb_misc(node);
776 unsigned long val = 0;
777 unsigned int scrubber = 0;
778
779 if (!this_leaf->can_disable)
780 return -EINVAL;
781
782 if (!capable(CAP_SYS_ADMIN))
783 return -EPERM;
784
785 if (!dev)
786 return -EINVAL;
787
788 if (strict_strtoul(buf, 10, &val) < 0)
789 return -EINVAL;
790
791 val |= 0xc0000000;
792
793 pci_read_config_dword(dev, 0x58, &scrubber);
794 scrubber &= ~0x1f000000;
795 pci_write_config_dword(dev, 0x58, scrubber);
796
797 pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
798 wbinvd();
799 pci_write_config_dword(dev, 0x1BC + index * 4, val);
800 return count;
801}
802
803#define STORE_CACHE_DISABLE(index) \
804static ssize_t \
805store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
806 const char *buf, size_t count) \
807{ \
808 return store_cache_disable(this_leaf, buf, count, index); \
809}
810STORE_CACHE_DISABLE(0)
811STORE_CACHE_DISABLE(1)
812
813struct _cache_attr {
814 struct attribute attr;
815 ssize_t (*show)(struct _cpuid4_info *, char *);
816 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
817};
818
819#define define_one_ro(_name) \ 861#define define_one_ro(_name) \
820static struct _cache_attr _name = \ 862static struct _cache_attr _name = \
821 __ATTR(_name, 0444, show_##_name, NULL) 863 __ATTR(_name, 0444, show_##_name, NULL)
@@ -830,23 +872,28 @@ define_one_ro(size);
830define_one_ro(shared_cpu_map); 872define_one_ro(shared_cpu_map);
831define_one_ro(shared_cpu_list); 873define_one_ro(shared_cpu_list);
832 874
833static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, 875#define DEFAULT_SYSFS_CACHE_ATTRS \
834 show_cache_disable_0, store_cache_disable_0); 876 &type.attr, \
835static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, 877 &level.attr, \
836 show_cache_disable_1, store_cache_disable_1); 878 &coherency_line_size.attr, \
879 &physical_line_partition.attr, \
880 &ways_of_associativity.attr, \
881 &number_of_sets.attr, \
882 &size.attr, \
883 &shared_cpu_map.attr, \
884 &shared_cpu_list.attr
837 885
838static struct attribute *default_attrs[] = { 886static struct attribute *default_attrs[] = {
839 &type.attr, 887 DEFAULT_SYSFS_CACHE_ATTRS,
840 &level.attr, 888 NULL
841 &coherency_line_size.attr, 889};
842 &physical_line_partition.attr, 890
843 &ways_of_associativity.attr, 891static struct attribute *default_l3_attrs[] = {
844 &number_of_sets.attr, 892 DEFAULT_SYSFS_CACHE_ATTRS,
845 &size.attr, 893#ifdef CONFIG_CPU_SUP_AMD
846 &shared_cpu_map.attr,
847 &shared_cpu_list.attr,
848 &cache_disable_0.attr, 894 &cache_disable_0.attr,
849 &cache_disable_1.attr, 895 &cache_disable_1.attr,
896#endif
850 NULL 897 NULL
851}; 898};
852 899
@@ -877,7 +924,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
877 return ret; 924 return ret;
878} 925}
879 926
880static struct sysfs_ops sysfs_ops = { 927static const struct sysfs_ops sysfs_ops = {
881 .show = show, 928 .show = show,
882 .store = store, 929 .store = store,
883}; 930};
@@ -893,10 +940,10 @@ static struct kobj_type ktype_percpu_entry = {
893 940
894static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu) 941static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
895{ 942{
896 kfree(per_cpu(cache_kobject, cpu)); 943 kfree(per_cpu(ici_cache_kobject, cpu));
897 kfree(per_cpu(index_kobject, cpu)); 944 kfree(per_cpu(ici_index_kobject, cpu));
898 per_cpu(cache_kobject, cpu) = NULL; 945 per_cpu(ici_cache_kobject, cpu) = NULL;
899 per_cpu(index_kobject, cpu) = NULL; 946 per_cpu(ici_index_kobject, cpu) = NULL;
900 free_cache_attributes(cpu); 947 free_cache_attributes(cpu);
901} 948}
902 949
@@ -912,14 +959,14 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
912 return err; 959 return err;
913 960
914 /* Allocate all required memory */ 961 /* Allocate all required memory */
915 per_cpu(cache_kobject, cpu) = 962 per_cpu(ici_cache_kobject, cpu) =
916 kzalloc(sizeof(struct kobject), GFP_KERNEL); 963 kzalloc(sizeof(struct kobject), GFP_KERNEL);
917 if (unlikely(per_cpu(cache_kobject, cpu) == NULL)) 964 if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
918 goto err_out; 965 goto err_out;
919 966
920 per_cpu(index_kobject, cpu) = kzalloc( 967 per_cpu(ici_index_kobject, cpu) = kzalloc(
921 sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL); 968 sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
922 if (unlikely(per_cpu(index_kobject, cpu) == NULL)) 969 if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
923 goto err_out; 970 goto err_out;
924 971
925 return 0; 972 return 0;
@@ -937,13 +984,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
937 unsigned int cpu = sys_dev->id; 984 unsigned int cpu = sys_dev->id;
938 unsigned long i, j; 985 unsigned long i, j;
939 struct _index_kobject *this_object; 986 struct _index_kobject *this_object;
987 struct _cpuid4_info *this_leaf;
940 int retval; 988 int retval;
941 989
942 retval = cpuid4_cache_sysfs_init(cpu); 990 retval = cpuid4_cache_sysfs_init(cpu);
943 if (unlikely(retval < 0)) 991 if (unlikely(retval < 0))
944 return retval; 992 return retval;
945 993
946 retval = kobject_init_and_add(per_cpu(cache_kobject, cpu), 994 retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
947 &ktype_percpu_entry, 995 &ktype_percpu_entry,
948 &sys_dev->kobj, "%s", "cache"); 996 &sys_dev->kobj, "%s", "cache");
949 if (retval < 0) { 997 if (retval < 0) {
@@ -955,14 +1003,22 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
955 this_object = INDEX_KOBJECT_PTR(cpu, i); 1003 this_object = INDEX_KOBJECT_PTR(cpu, i);
956 this_object->cpu = cpu; 1004 this_object->cpu = cpu;
957 this_object->index = i; 1005 this_object->index = i;
1006
1007 this_leaf = CPUID4_INFO_IDX(cpu, i);
1008
1009 if (this_leaf->can_disable)
1010 ktype_cache.default_attrs = default_l3_attrs;
1011 else
1012 ktype_cache.default_attrs = default_attrs;
1013
958 retval = kobject_init_and_add(&(this_object->kobj), 1014 retval = kobject_init_and_add(&(this_object->kobj),
959 &ktype_cache, 1015 &ktype_cache,
960 per_cpu(cache_kobject, cpu), 1016 per_cpu(ici_cache_kobject, cpu),
961 "index%1lu", i); 1017 "index%1lu", i);
962 if (unlikely(retval)) { 1018 if (unlikely(retval)) {
963 for (j = 0; j < i; j++) 1019 for (j = 0; j < i; j++)
964 kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj)); 1020 kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
965 kobject_put(per_cpu(cache_kobject, cpu)); 1021 kobject_put(per_cpu(ici_cache_kobject, cpu));
966 cpuid4_cache_sysfs_exit(cpu); 1022 cpuid4_cache_sysfs_exit(cpu);
967 return retval; 1023 return retval;
968 } 1024 }
@@ -970,7 +1026,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
970 } 1026 }
971 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map)); 1027 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
972 1028
973 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); 1029 kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
974 return 0; 1030 return 0;
975} 1031}
976 1032
@@ -979,7 +1035,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
979 unsigned int cpu = sys_dev->id; 1035 unsigned int cpu = sys_dev->id;
980 unsigned long i; 1036 unsigned long i;
981 1037
982 if (per_cpu(cpuid4_info, cpu) == NULL) 1038 if (per_cpu(ici_cpuid4_info, cpu) == NULL)
983 return; 1039 return;
984 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map))) 1040 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
985 return; 1041 return;
@@ -987,7 +1043,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
987 1043
988 for (i = 0; i < num_cache_leaves; i++) 1044 for (i = 0; i < num_cache_leaves; i++)
989 kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj)); 1045 kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
990 kobject_put(per_cpu(cache_kobject, cpu)); 1046 kobject_put(per_cpu(ici_cache_kobject, cpu));
991 cpuid4_cache_sysfs_exit(cpu); 1047 cpuid4_cache_sysfs_exit(cpu);
992} 1048}
993 1049
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 472763d92098..e7dbde7bfedb 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -22,6 +22,7 @@
22#include <linux/kdebug.h> 22#include <linux/kdebug.h>
23#include <linux/cpu.h> 23#include <linux/cpu.h>
24#include <linux/sched.h> 24#include <linux/sched.h>
25#include <linux/gfp.h>
25#include <asm/mce.h> 26#include <asm/mce.h>
26#include <asm/apic.h> 27#include <asm/apic.h>
27 28
@@ -74,7 +75,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
74 m->finished = 0; 75 m->finished = 0;
75} 76}
76 77
77static cpumask_t mce_inject_cpumask; 78static cpumask_var_t mce_inject_cpumask;
78 79
79static int mce_raise_notify(struct notifier_block *self, 80static int mce_raise_notify(struct notifier_block *self,
80 unsigned long val, void *data) 81 unsigned long val, void *data)
@@ -82,9 +83,9 @@ static int mce_raise_notify(struct notifier_block *self,
82 struct die_args *args = (struct die_args *)data; 83 struct die_args *args = (struct die_args *)data;
83 int cpu = smp_processor_id(); 84 int cpu = smp_processor_id();
84 struct mce *m = &__get_cpu_var(injectm); 85 struct mce *m = &__get_cpu_var(injectm);
85 if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask)) 86 if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
86 return NOTIFY_DONE; 87 return NOTIFY_DONE;
87 cpu_clear(cpu, mce_inject_cpumask); 88 cpumask_clear_cpu(cpu, mce_inject_cpumask);
88 if (m->inject_flags & MCJ_EXCEPTION) 89 if (m->inject_flags & MCJ_EXCEPTION)
89 raise_exception(m, args->regs); 90 raise_exception(m, args->regs);
90 else if (m->status) 91 else if (m->status)
@@ -148,22 +149,22 @@ static void raise_mce(struct mce *m)
148 unsigned long start; 149 unsigned long start;
149 int cpu; 150 int cpu;
150 get_online_cpus(); 151 get_online_cpus();
151 mce_inject_cpumask = cpu_online_map; 152 cpumask_copy(mce_inject_cpumask, cpu_online_mask);
152 cpu_clear(get_cpu(), mce_inject_cpumask); 153 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
153 for_each_online_cpu(cpu) { 154 for_each_online_cpu(cpu) {
154 struct mce *mcpu = &per_cpu(injectm, cpu); 155 struct mce *mcpu = &per_cpu(injectm, cpu);
155 if (!mcpu->finished || 156 if (!mcpu->finished ||
156 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) 157 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
157 cpu_clear(cpu, mce_inject_cpumask); 158 cpumask_clear_cpu(cpu, mce_inject_cpumask);
158 } 159 }
159 if (!cpus_empty(mce_inject_cpumask)) 160 if (!cpumask_empty(mce_inject_cpumask))
160 apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR); 161 apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR);
161 start = jiffies; 162 start = jiffies;
162 while (!cpus_empty(mce_inject_cpumask)) { 163 while (!cpumask_empty(mce_inject_cpumask)) {
163 if (!time_before(jiffies, start + 2*HZ)) { 164 if (!time_before(jiffies, start + 2*HZ)) {
164 printk(KERN_ERR 165 printk(KERN_ERR
165 "Timeout waiting for mce inject NMI %lx\n", 166 "Timeout waiting for mce inject NMI %lx\n",
166 *cpus_addr(mce_inject_cpumask)); 167 *cpumask_bits(mce_inject_cpumask));
167 break; 168 break;
168 } 169 }
169 cpu_relax(); 170 cpu_relax();
@@ -210,6 +211,8 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
210 211
211static int inject_init(void) 212static int inject_init(void)
212{ 213{
214 if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
215 return -ENOMEM;
213 printk(KERN_INFO "Machine check injector initialized\n"); 216 printk(KERN_INFO "Machine check injector initialized\n");
214 mce_chrdev_ops.write = mce_write; 217 mce_chrdev_ops.write = mce_write;
215 register_die_notifier(&mce_raise_nb); 218 register_die_notifier(&mce_raise_nb);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 721a77ca8115..8a6f0afa767e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -26,6 +26,7 @@
26#include <linux/sched.h> 26#include <linux/sched.h>
27#include <linux/sysfs.h> 27#include <linux/sysfs.h>
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/slab.h>
29#include <linux/init.h> 30#include <linux/init.h>
30#include <linux/kmod.h> 31#include <linux/kmod.h>
31#include <linux/poll.h> 32#include <linux/poll.h>
@@ -46,6 +47,16 @@
46 47
47#include "mce-internal.h" 48#include "mce-internal.h"
48 49
50static DEFINE_MUTEX(mce_read_mutex);
51
52#define rcu_dereference_check_mce(p) \
53 rcu_dereference_check((p), \
54 rcu_read_lock_sched_held() || \
55 lockdep_is_held(&mce_read_mutex))
56
57#define CREATE_TRACE_POINTS
58#include <trace/events/mce.h>
59
49int mce_disabled __read_mostly; 60int mce_disabled __read_mostly;
50 61
51#define MISC_MCELOG_MINOR 227 62#define MISC_MCELOG_MINOR 227
@@ -85,18 +96,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
85static DEFINE_PER_CPU(struct mce, mces_seen); 96static DEFINE_PER_CPU(struct mce, mces_seen);
86static int cpu_missing; 97static int cpu_missing;
87 98
88static void default_decode_mce(struct mce *m) 99/*
100 * CPU/chipset specific EDAC code can register a notifier call here to print
101 * MCE errors in a human-readable form.
102 */
103ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
104EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
105
106static int default_decode_mce(struct notifier_block *nb, unsigned long val,
107 void *data)
89{ 108{
90 pr_emerg("No human readable MCE decoding support on this CPU type.\n"); 109 pr_emerg("No human readable MCE decoding support on this CPU type.\n");
91 pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); 110 pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
111
112 return NOTIFY_STOP;
92} 113}
93 114
94/* 115static struct notifier_block mce_dec_nb = {
95 * CPU/chipset specific EDAC code can register a callback here to print 116 .notifier_call = default_decode_mce,
96 * MCE errors in a human-readable form: 117 .priority = -1,
97 */ 118};
98void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
99EXPORT_SYMBOL(x86_mce_decode_callback);
100 119
101/* MCA banks polled by the period polling timer for corrected events */ 120/* MCA banks polled by the period polling timer for corrected events */
102DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 121DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -141,10 +160,13 @@ void mce_log(struct mce *mce)
141{ 160{
142 unsigned next, entry; 161 unsigned next, entry;
143 162
163 /* Emit the trace record: */
164 trace_mce_record(mce);
165
144 mce->finished = 0; 166 mce->finished = 0;
145 wmb(); 167 wmb();
146 for (;;) { 168 for (;;) {
147 entry = rcu_dereference(mcelog.next); 169 entry = rcu_dereference_check_mce(mcelog.next);
148 for (;;) { 170 for (;;) {
149 /* 171 /*
150 * When the buffer fills up discard new entries. 172 * When the buffer fills up discard new entries.
@@ -204,9 +226,9 @@ static void print_mce(struct mce *m)
204 226
205 /* 227 /*
206 * Print out human-readable details about the MCE error, 228 * Print out human-readable details about the MCE error,
207 * (if the CPU has an implementation for that): 229 * (if the CPU has an implementation for that)
208 */ 230 */
209 x86_mce_decode_callback(m); 231 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
210} 232}
211 233
212static void print_mce_head(void) 234static void print_mce_head(void)
@@ -1122,7 +1144,7 @@ static int check_interval = 5 * 60; /* 5 minutes */
1122static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ 1144static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
1123static DEFINE_PER_CPU(struct timer_list, mce_timer); 1145static DEFINE_PER_CPU(struct timer_list, mce_timer);
1124 1146
1125static void mcheck_timer(unsigned long data) 1147static void mce_start_timer(unsigned long data)
1126{ 1148{
1127 struct timer_list *t = &per_cpu(mce_timer, data); 1149 struct timer_list *t = &per_cpu(mce_timer, data);
1128 int *n; 1150 int *n;
@@ -1187,7 +1209,7 @@ int mce_notify_irq(void)
1187} 1209}
1188EXPORT_SYMBOL_GPL(mce_notify_irq); 1210EXPORT_SYMBOL_GPL(mce_notify_irq);
1189 1211
1190static int mce_banks_init(void) 1212static int __cpuinit __mcheck_cpu_mce_banks_init(void)
1191{ 1213{
1192 int i; 1214 int i;
1193 1215
@@ -1206,7 +1228,7 @@ static int mce_banks_init(void)
1206/* 1228/*
1207 * Initialize Machine Checks for a CPU. 1229 * Initialize Machine Checks for a CPU.
1208 */ 1230 */
1209static int __cpuinit mce_cap_init(void) 1231static int __cpuinit __mcheck_cpu_cap_init(void)
1210{ 1232{
1211 unsigned b; 1233 unsigned b;
1212 u64 cap; 1234 u64 cap;
@@ -1228,7 +1250,7 @@ static int __cpuinit mce_cap_init(void)
1228 WARN_ON(banks != 0 && b != banks); 1250 WARN_ON(banks != 0 && b != banks);
1229 banks = b; 1251 banks = b;
1230 if (!mce_banks) { 1252 if (!mce_banks) {
1231 int err = mce_banks_init(); 1253 int err = __mcheck_cpu_mce_banks_init();
1232 1254
1233 if (err) 1255 if (err)
1234 return err; 1256 return err;
@@ -1244,7 +1266,7 @@ static int __cpuinit mce_cap_init(void)
1244 return 0; 1266 return 0;
1245} 1267}
1246 1268
1247static void mce_init(void) 1269static void __mcheck_cpu_init_generic(void)
1248{ 1270{
1249 mce_banks_t all_banks; 1271 mce_banks_t all_banks;
1250 u64 cap; 1272 u64 cap;
@@ -1273,7 +1295,7 @@ static void mce_init(void)
1273} 1295}
1274 1296
1275/* Add per CPU specific workarounds here */ 1297/* Add per CPU specific workarounds here */
1276static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) 1298static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1277{ 1299{
1278 if (c->x86_vendor == X86_VENDOR_UNKNOWN) { 1300 if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
1279 pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); 1301 pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
@@ -1341,7 +1363,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
1341 return 0; 1363 return 0;
1342} 1364}
1343 1365
1344static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) 1366static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
1345{ 1367{
1346 if (c->x86 != 5) 1368 if (c->x86 != 5)
1347 return; 1369 return;
@@ -1355,7 +1377,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
1355 } 1377 }
1356} 1378}
1357 1379
1358static void mce_cpu_features(struct cpuinfo_x86 *c) 1380static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1359{ 1381{
1360 switch (c->x86_vendor) { 1382 switch (c->x86_vendor) {
1361 case X86_VENDOR_INTEL: 1383 case X86_VENDOR_INTEL:
@@ -1369,18 +1391,19 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
1369 } 1391 }
1370} 1392}
1371 1393
1372static void mce_init_timer(void) 1394static void __mcheck_cpu_init_timer(void)
1373{ 1395{
1374 struct timer_list *t = &__get_cpu_var(mce_timer); 1396 struct timer_list *t = &__get_cpu_var(mce_timer);
1375 int *n = &__get_cpu_var(mce_next_interval); 1397 int *n = &__get_cpu_var(mce_next_interval);
1376 1398
1399 setup_timer(t, mce_start_timer, smp_processor_id());
1400
1377 if (mce_ignore_ce) 1401 if (mce_ignore_ce)
1378 return; 1402 return;
1379 1403
1380 *n = check_interval * HZ; 1404 *n = check_interval * HZ;
1381 if (!*n) 1405 if (!*n)
1382 return; 1406 return;
1383 setup_timer(t, mcheck_timer, smp_processor_id());
1384 t->expires = round_jiffies(jiffies + *n); 1407 t->expires = round_jiffies(jiffies + *n);
1385 add_timer_on(t, smp_processor_id()); 1408 add_timer_on(t, smp_processor_id());
1386} 1409}
@@ -1400,27 +1423,28 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
1400 * Called for each booted CPU to set up machine checks. 1423 * Called for each booted CPU to set up machine checks.
1401 * Must be called with preempt off: 1424 * Must be called with preempt off:
1402 */ 1425 */
1403void __cpuinit mcheck_init(struct cpuinfo_x86 *c) 1426void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
1404{ 1427{
1405 if (mce_disabled) 1428 if (mce_disabled)
1406 return; 1429 return;
1407 1430
1408 mce_ancient_init(c); 1431 __mcheck_cpu_ancient_init(c);
1409 1432
1410 if (!mce_available(c)) 1433 if (!mce_available(c))
1411 return; 1434 return;
1412 1435
1413 if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { 1436 if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
1414 mce_disabled = 1; 1437 mce_disabled = 1;
1415 return; 1438 return;
1416 } 1439 }
1417 1440
1418 machine_check_vector = do_machine_check; 1441 machine_check_vector = do_machine_check;
1419 1442
1420 mce_init(); 1443 __mcheck_cpu_init_generic();
1421 mce_cpu_features(c); 1444 __mcheck_cpu_init_vendor(c);
1422 mce_init_timer(); 1445 __mcheck_cpu_init_timer();
1423 INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); 1446 INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
1447
1424} 1448}
1425 1449
1426/* 1450/*
@@ -1469,8 +1493,6 @@ static void collect_tscs(void *data)
1469 rdtscll(cpu_tsc[smp_processor_id()]); 1493 rdtscll(cpu_tsc[smp_processor_id()]);
1470} 1494}
1471 1495
1472static DEFINE_MUTEX(mce_read_mutex);
1473
1474static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, 1496static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
1475 loff_t *off) 1497 loff_t *off)
1476{ 1498{
@@ -1484,7 +1506,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
1484 return -ENOMEM; 1506 return -ENOMEM;
1485 1507
1486 mutex_lock(&mce_read_mutex); 1508 mutex_lock(&mce_read_mutex);
1487 next = rcu_dereference(mcelog.next); 1509 next = rcu_dereference_check_mce(mcelog.next);
1488 1510
1489 /* Only supports full reads right now */ 1511 /* Only supports full reads right now */
1490 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { 1512 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
@@ -1549,7 +1571,7 @@ timeout:
1549static unsigned int mce_poll(struct file *file, poll_table *wait) 1571static unsigned int mce_poll(struct file *file, poll_table *wait)
1550{ 1572{
1551 poll_wait(file, &mce_wait, wait); 1573 poll_wait(file, &mce_wait, wait);
1552 if (rcu_dereference(mcelog.next)) 1574 if (rcu_dereference_check_mce(mcelog.next))
1553 return POLLIN | POLLRDNORM; 1575 return POLLIN | POLLRDNORM;
1554 return 0; 1576 return 0;
1555} 1577}
@@ -1640,6 +1662,15 @@ static int __init mcheck_enable(char *str)
1640} 1662}
1641__setup("mce", mcheck_enable); 1663__setup("mce", mcheck_enable);
1642 1664
1665int __init mcheck_init(void)
1666{
1667 atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
1668
1669 mcheck_intel_therm_init();
1670
1671 return 0;
1672}
1673
1643/* 1674/*
1644 * Sysfs support 1675 * Sysfs support
1645 */ 1676 */
@@ -1648,7 +1679,7 @@ __setup("mce", mcheck_enable);
1648 * Disable machine checks on suspend and shutdown. We can't really handle 1679 * Disable machine checks on suspend and shutdown. We can't really handle
1649 * them later. 1680 * them later.
1650 */ 1681 */
1651static int mce_disable(void) 1682static int mce_disable_error_reporting(void)
1652{ 1683{
1653 int i; 1684 int i;
1654 1685
@@ -1663,12 +1694,12 @@ static int mce_disable(void)
1663 1694
1664static int mce_suspend(struct sys_device *dev, pm_message_t state) 1695static int mce_suspend(struct sys_device *dev, pm_message_t state)
1665{ 1696{
1666 return mce_disable(); 1697 return mce_disable_error_reporting();
1667} 1698}
1668 1699
1669static int mce_shutdown(struct sys_device *dev) 1700static int mce_shutdown(struct sys_device *dev)
1670{ 1701{
1671 return mce_disable(); 1702 return mce_disable_error_reporting();
1672} 1703}
1673 1704
1674/* 1705/*
@@ -1678,8 +1709,8 @@ static int mce_shutdown(struct sys_device *dev)
1678 */ 1709 */
1679static int mce_resume(struct sys_device *dev) 1710static int mce_resume(struct sys_device *dev)
1680{ 1711{
1681 mce_init(); 1712 __mcheck_cpu_init_generic();
1682 mce_cpu_features(&current_cpu_data); 1713 __mcheck_cpu_init_vendor(&current_cpu_data);
1683 1714
1684 return 0; 1715 return 0;
1685} 1716}
@@ -1689,8 +1720,8 @@ static void mce_cpu_restart(void *data)
1689 del_timer_sync(&__get_cpu_var(mce_timer)); 1720 del_timer_sync(&__get_cpu_var(mce_timer));
1690 if (!mce_available(&current_cpu_data)) 1721 if (!mce_available(&current_cpu_data))
1691 return; 1722 return;
1692 mce_init(); 1723 __mcheck_cpu_init_generic();
1693 mce_init_timer(); 1724 __mcheck_cpu_init_timer();
1694} 1725}
1695 1726
1696/* Reinit MCEs after user configuration changes */ 1727/* Reinit MCEs after user configuration changes */
@@ -1716,7 +1747,7 @@ static void mce_enable_ce(void *all)
1716 cmci_reenable(); 1747 cmci_reenable();
1717 cmci_recheck(); 1748 cmci_recheck();
1718 if (all) 1749 if (all)
1719 mce_init_timer(); 1750 __mcheck_cpu_init_timer();
1720} 1751}
1721 1752
1722static struct sysdev_class mce_sysclass = { 1753static struct sysdev_class mce_sysclass = {
@@ -1904,7 +1935,7 @@ error2:
1904 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); 1935 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr);
1905error: 1936error:
1906 while (--i >= 0) 1937 while (--i >= 0)
1907 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); 1938 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
1908 1939
1909 sysdev_unregister(&per_cpu(mce_dev, cpu)); 1940 sysdev_unregister(&per_cpu(mce_dev, cpu));
1910 1941
@@ -1929,13 +1960,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
1929} 1960}
1930 1961
1931/* Make sure there are no machine checks on offlined CPUs. */ 1962/* Make sure there are no machine checks on offlined CPUs. */
1932static void mce_disable_cpu(void *h) 1963static void __cpuinit mce_disable_cpu(void *h)
1933{ 1964{
1934 unsigned long action = *(unsigned long *)h; 1965 unsigned long action = *(unsigned long *)h;
1935 int i; 1966 int i;
1936 1967
1937 if (!mce_available(&current_cpu_data)) 1968 if (!mce_available(&current_cpu_data))
1938 return; 1969 return;
1970
1939 if (!(action & CPU_TASKS_FROZEN)) 1971 if (!(action & CPU_TASKS_FROZEN))
1940 cmci_clear(); 1972 cmci_clear();
1941 for (i = 0; i < banks; i++) { 1973 for (i = 0; i < banks; i++) {
@@ -1946,7 +1978,7 @@ static void mce_disable_cpu(void *h)
1946 } 1978 }
1947} 1979}
1948 1980
1949static void mce_reenable_cpu(void *h) 1981static void __cpuinit mce_reenable_cpu(void *h)
1950{ 1982{
1951 unsigned long action = *(unsigned long *)h; 1983 unsigned long action = *(unsigned long *)h;
1952 int i; 1984 int i;
@@ -1991,9 +2023,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
1991 break; 2023 break;
1992 case CPU_DOWN_FAILED: 2024 case CPU_DOWN_FAILED:
1993 case CPU_DOWN_FAILED_FROZEN: 2025 case CPU_DOWN_FAILED_FROZEN:
1994 t->expires = round_jiffies(jiffies + 2026 if (!mce_ignore_ce && check_interval) {
2027 t->expires = round_jiffies(jiffies +
1995 __get_cpu_var(mce_next_interval)); 2028 __get_cpu_var(mce_next_interval));
1996 add_timer_on(t, cpu); 2029 add_timer_on(t, cpu);
2030 }
1997 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); 2031 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
1998 break; 2032 break;
1999 case CPU_POST_DEAD: 2033 case CPU_POST_DEAD:
@@ -2016,6 +2050,7 @@ static __init void mce_init_banks(void)
2016 struct mce_bank *b = &mce_banks[i]; 2050 struct mce_bank *b = &mce_banks[i];
2017 struct sysdev_attribute *a = &b->attr; 2051 struct sysdev_attribute *a = &b->attr;
2018 2052
2053 sysfs_attr_init(&a->attr);
2019 a->attr.name = b->attrname; 2054 a->attr.name = b->attrname;
2020 snprintf(b->attrname, ATTR_LEN, "bank%d", i); 2055 snprintf(b->attrname, ATTR_LEN, "bank%d", i);
2021 2056
@@ -2025,7 +2060,7 @@ static __init void mce_init_banks(void)
2025 } 2060 }
2026} 2061}
2027 2062
2028static __init int mce_init_device(void) 2063static __init int mcheck_init_device(void)
2029{ 2064{
2030 int err; 2065 int err;
2031 int i = 0; 2066 int i = 0;
@@ -2053,7 +2088,7 @@ static __init int mce_init_device(void)
2053 return err; 2088 return err;
2054} 2089}
2055 2090
2056device_initcall(mce_init_device); 2091device_initcall(mcheck_init_device);
2057 2092
2058/* 2093/*
2059 * Old style boot options parsing. Only for compatibility. 2094 * Old style boot options parsing. Only for compatibility.
@@ -2101,7 +2136,7 @@ static int fake_panic_set(void *data, u64 val)
2101DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, 2136DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
2102 fake_panic_set, "%llu\n"); 2137 fake_panic_set, "%llu\n");
2103 2138
2104static int __init mce_debugfs_init(void) 2139static int __init mcheck_debugfs_init(void)
2105{ 2140{
2106 struct dentry *dmce, *ffake_panic; 2141 struct dentry *dmce, *ffake_panic;
2107 2142
@@ -2115,5 +2150,5 @@ static int __init mce_debugfs_init(void)
2115 2150
2116 return 0; 2151 return 0;
2117} 2152}
2118late_initcall(mce_debugfs_init); 2153late_initcall(mcheck_debugfs_init);
2119#endif 2154#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 83a3d1f4efca..224392d8fe8c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -21,6 +21,7 @@
21#include <linux/errno.h> 21#include <linux/errno.h>
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/sysfs.h> 23#include <linux/sysfs.h>
24#include <linux/slab.h>
24#include <linux/init.h> 25#include <linux/init.h>
25#include <linux/cpu.h> 26#include <linux/cpu.h>
26#include <linux/smp.h> 27#include <linux/smp.h>
@@ -388,7 +389,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
388 return ret; 389 return ret;
389} 390}
390 391
391static struct sysfs_ops threshold_ops = { 392static const struct sysfs_ops threshold_ops = {
392 .show = show, 393 .show = show,
393 .store = store, 394 .store = store,
394}; 395};
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 7c785634af2b..62b48e40920a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -5,6 +5,7 @@
5 * Author: Andi Kleen 5 * Author: Andi Kleen
6 */ 6 */
7 7
8#include <linux/gfp.h>
8#include <linux/init.h> 9#include <linux/init.h>
9#include <linux/interrupt.h> 10#include <linux/interrupt.h>
10#include <linux/percpu.h> 11#include <linux/percpu.h>
@@ -95,7 +96,7 @@ static void cmci_discover(int banks, int boot)
95 96
96 /* Already owned by someone else? */ 97 /* Already owned by someone else? */
97 if (val & CMCI_EN) { 98 if (val & CMCI_EN) {
98 if (test_and_clear_bit(i, owned) || boot) 99 if (test_and_clear_bit(i, owned) && !boot)
99 print_update("SHD", &hdr, i); 100 print_update("SHD", &hdr, i);
100 __clear_bit(i, __get_cpu_var(mce_poll_banks)); 101 __clear_bit(i, __get_cpu_var(mce_poll_banks));
101 continue; 102 continue;
@@ -107,7 +108,7 @@ static void cmci_discover(int banks, int boot)
107 108
108 /* Did the enable bit stick? -- the bank supports CMCI */ 109 /* Did the enable bit stick? -- the bank supports CMCI */
109 if (val & CMCI_EN) { 110 if (val & CMCI_EN) {
110 if (!test_and_set_bit(i, owned) || boot) 111 if (!test_and_set_bit(i, owned) && !boot)
111 print_update("CMCI", &hdr, i); 112 print_update("CMCI", &hdr, i);
112 __clear_bit(i, __get_cpu_var(mce_poll_banks)); 113 __clear_bit(i, __get_cpu_var(mce_poll_banks));
113 } else { 114 } else {
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index b3a1dba75330..81c499eceb21 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state);
49 49
50static atomic_t therm_throt_en = ATOMIC_INIT(0); 50static atomic_t therm_throt_en = ATOMIC_INIT(0);
51 51
52static u32 lvtthmr_init __read_mostly;
53
52#ifdef CONFIG_SYSFS 54#ifdef CONFIG_SYSFS
53#define define_therm_throt_sysdev_one_ro(_name) \ 55#define define_therm_throt_sysdev_one_ro(_name) \
54 static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) 56 static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
@@ -254,14 +256,34 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
254 ack_APIC_irq(); 256 ack_APIC_irq();
255} 257}
256 258
259/* Thermal monitoring depends on APIC, ACPI and clock modulation */
260static int intel_thermal_supported(struct cpuinfo_x86 *c)
261{
262 if (!cpu_has_apic)
263 return 0;
264 if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
265 return 0;
266 return 1;
267}
268
269void __init mcheck_intel_therm_init(void)
270{
271 /*
272 * This function is only called on boot CPU. Save the init thermal
273 * LVT value on BSP and use that value to restore APs' thermal LVT
274 * entry BIOS programmed later
275 */
276 if (intel_thermal_supported(&boot_cpu_data))
277 lvtthmr_init = apic_read(APIC_LVTTHMR);
278}
279
257void intel_init_thermal(struct cpuinfo_x86 *c) 280void intel_init_thermal(struct cpuinfo_x86 *c)
258{ 281{
259 unsigned int cpu = smp_processor_id(); 282 unsigned int cpu = smp_processor_id();
260 int tm2 = 0; 283 int tm2 = 0;
261 u32 l, h; 284 u32 l, h;
262 285
263 /* Thermal monitoring depends on ACPI and clock modulation*/ 286 if (!intel_thermal_supported(c))
264 if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
265 return; 287 return;
266 288
267 /* 289 /*
@@ -270,7 +292,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
270 * since it might be delivered via SMI already: 292 * since it might be delivered via SMI already:
271 */ 293 */
272 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 294 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
273 h = apic_read(APIC_LVTTHMR); 295
296 /*
297 * The initial value of thermal LVT entries on all APs always reads
298 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
299 * sequence to them and LVT registers are reset to 0s except for
300 * the mask bits which are set to 1s when APs receive INIT IPI.
301 * Always restore the value that BIOS has programmed on AP based on
302 * BSP's info we saved since BIOS is always setting the same value
303 * for all threads/cores
304 */
305 apic_write(APIC_LVTTHMR, lvtthmr_init);
306
307 h = lvtthmr_init;
308
274 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { 309 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
275 printk(KERN_DEBUG 310 printk(KERN_DEBUG
276 "CPU%d: Thermal monitoring handled by SMI\n", cpu); 311 "CPU%d: Thermal monitoring handled by SMI\n", cpu);
@@ -312,8 +347,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
312 l = apic_read(APIC_LVTTHMR); 347 l = apic_read(APIC_LVTTHMR);
313 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 348 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
314 349
315 printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", 350 printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
316 cpu, tm2 ? "TM2" : "TM1"); 351 tm2 ? "TM2" : "TM1");
317 352
318 /* enable thermal throttle processing */ 353 /* enable thermal throttle processing */
319 atomic_set(&therm_throt_en, 1); 354 atomic_set(&therm_throt_en, 1);
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile
index f4361b56f8e9..ad9e5ed81181 100644
--- a/arch/x86/kernel/cpu/mtrr/Makefile
+++ b/arch/x86/kernel/cpu/mtrr/Makefile
@@ -1,3 +1,3 @@
1obj-y := main.o if.o generic.o state.o cleanup.o 1obj-y := main.o if.o generic.o cleanup.o
2obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o 2obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
3 3
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c
index 33af14110dfd..92ba9cd31c9a 100644
--- a/arch/x86/kernel/cpu/mtrr/amd.c
+++ b/arch/x86/kernel/cpu/mtrr/amd.c
@@ -108,7 +108,7 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
108 return 0; 108 return 0;
109} 109}
110 110
111static struct mtrr_ops amd_mtrr_ops = { 111static const struct mtrr_ops amd_mtrr_ops = {
112 .vendor = X86_VENDOR_AMD, 112 .vendor = X86_VENDOR_AMD,
113 .set = amd_set_mtrr, 113 .set = amd_set_mtrr,
114 .get = amd_get_mtrr, 114 .get = amd_get_mtrr,
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
index de89f14eff3a..316fe3e60a97 100644
--- a/arch/x86/kernel/cpu/mtrr/centaur.c
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
@@ -110,7 +110,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t
110 return 0; 110 return 0;
111} 111}
112 112
113static struct mtrr_ops centaur_mtrr_ops = { 113static const struct mtrr_ops centaur_mtrr_ops = {
114 .vendor = X86_VENDOR_CENTAUR, 114 .vendor = X86_VENDOR_CENTAUR,
115 .set = centaur_set_mcr, 115 .set = centaur_set_mcr,
116 .get = centaur_get_mcr, 116 .get = centaur_get_mcr,
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 73c86db5acbe..06130b52f012 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -22,10 +22,10 @@
22#include <linux/pci.h> 22#include <linux/pci.h>
23#include <linux/smp.h> 23#include <linux/smp.h>
24#include <linux/cpu.h> 24#include <linux/cpu.h>
25#include <linux/sort.h>
26#include <linux/mutex.h> 25#include <linux/mutex.h>
27#include <linux/uaccess.h> 26#include <linux/uaccess.h>
28#include <linux/kvm_para.h> 27#include <linux/kvm_para.h>
28#include <linux/range.h>
29 29
30#include <asm/processor.h> 30#include <asm/processor.h>
31#include <asm/e820.h> 31#include <asm/e820.h>
@@ -34,11 +34,6 @@
34 34
35#include "mtrr.h" 35#include "mtrr.h"
36 36
37struct res_range {
38 unsigned long start;
39 unsigned long end;
40};
41
42struct var_mtrr_range_state { 37struct var_mtrr_range_state {
43 unsigned long base_pfn; 38 unsigned long base_pfn;
44 unsigned long size_pfn; 39 unsigned long size_pfn;
@@ -56,7 +51,7 @@ struct var_mtrr_state {
56/* Should be related to MTRR_VAR_RANGES nums */ 51/* Should be related to MTRR_VAR_RANGES nums */
57#define RANGE_NUM 256 52#define RANGE_NUM 256
58 53
59static struct res_range __initdata range[RANGE_NUM]; 54static struct range __initdata range[RANGE_NUM];
60static int __initdata nr_range; 55static int __initdata nr_range;
61 56
62static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; 57static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
@@ -64,117 +59,11 @@ static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
64static int __initdata debug_print; 59static int __initdata debug_print;
65#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) 60#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
66 61
67
68static int __init
69add_range(struct res_range *range, int nr_range,
70 unsigned long start, unsigned long end)
71{
72 /* Out of slots: */
73 if (nr_range >= RANGE_NUM)
74 return nr_range;
75
76 range[nr_range].start = start;
77 range[nr_range].end = end;
78
79 nr_range++;
80
81 return nr_range;
82}
83
84static int __init
85add_range_with_merge(struct res_range *range, int nr_range,
86 unsigned long start, unsigned long end)
87{
88 int i;
89
90 /* Try to merge it with old one: */
91 for (i = 0; i < nr_range; i++) {
92 unsigned long final_start, final_end;
93 unsigned long common_start, common_end;
94
95 if (!range[i].end)
96 continue;
97
98 common_start = max(range[i].start, start);
99 common_end = min(range[i].end, end);
100 if (common_start > common_end + 1)
101 continue;
102
103 final_start = min(range[i].start, start);
104 final_end = max(range[i].end, end);
105
106 range[i].start = final_start;
107 range[i].end = final_end;
108 return nr_range;
109 }
110
111 /* Need to add it: */
112 return add_range(range, nr_range, start, end);
113}
114
115static void __init
116subtract_range(struct res_range *range, unsigned long start, unsigned long end)
117{
118 int i, j;
119
120 for (j = 0; j < RANGE_NUM; j++) {
121 if (!range[j].end)
122 continue;
123
124 if (start <= range[j].start && end >= range[j].end) {
125 range[j].start = 0;
126 range[j].end = 0;
127 continue;
128 }
129
130 if (start <= range[j].start && end < range[j].end &&
131 range[j].start < end + 1) {
132 range[j].start = end + 1;
133 continue;
134 }
135
136
137 if (start > range[j].start && end >= range[j].end &&
138 range[j].end > start - 1) {
139 range[j].end = start - 1;
140 continue;
141 }
142
143 if (start > range[j].start && end < range[j].end) {
144 /* Find the new spare: */
145 for (i = 0; i < RANGE_NUM; i++) {
146 if (range[i].end == 0)
147 break;
148 }
149 if (i < RANGE_NUM) {
150 range[i].end = range[j].end;
151 range[i].start = end + 1;
152 } else {
153 printk(KERN_ERR "run of slot in ranges\n");
154 }
155 range[j].end = start - 1;
156 continue;
157 }
158 }
159}
160
161static int __init cmp_range(const void *x1, const void *x2)
162{
163 const struct res_range *r1 = x1;
164 const struct res_range *r2 = x2;
165 long start1, start2;
166
167 start1 = r1->start;
168 start2 = r2->start;
169
170 return start1 - start2;
171}
172
173#define BIOS_BUG_MSG KERN_WARNING \ 62#define BIOS_BUG_MSG KERN_WARNING \
174 "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" 63 "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
175 64
176static int __init 65static int __init
177x86_get_mtrr_mem_range(struct res_range *range, int nr_range, 66x86_get_mtrr_mem_range(struct range *range, int nr_range,
178 unsigned long extra_remove_base, 67 unsigned long extra_remove_base,
179 unsigned long extra_remove_size) 68 unsigned long extra_remove_size)
180{ 69{
@@ -188,14 +77,14 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
188 continue; 77 continue;
189 base = range_state[i].base_pfn; 78 base = range_state[i].base_pfn;
190 size = range_state[i].size_pfn; 79 size = range_state[i].size_pfn;
191 nr_range = add_range_with_merge(range, nr_range, base, 80 nr_range = add_range_with_merge(range, RANGE_NUM, nr_range,
192 base + size - 1); 81 base, base + size);
193 } 82 }
194 if (debug_print) { 83 if (debug_print) {
195 printk(KERN_DEBUG "After WB checking\n"); 84 printk(KERN_DEBUG "After WB checking\n");
196 for (i = 0; i < nr_range; i++) 85 for (i = 0; i < nr_range; i++)
197 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 86 printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
198 range[i].start, range[i].end + 1); 87 range[i].start, range[i].end);
199 } 88 }
200 89
201 /* Take out UC ranges: */ 90 /* Take out UC ranges: */
@@ -217,51 +106,43 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
217 size -= (1<<(20-PAGE_SHIFT)) - base; 106 size -= (1<<(20-PAGE_SHIFT)) - base;
218 base = 1<<(20-PAGE_SHIFT); 107 base = 1<<(20-PAGE_SHIFT);
219 } 108 }
220 subtract_range(range, base, base + size - 1); 109 subtract_range(range, RANGE_NUM, base, base + size);
221 } 110 }
222 if (extra_remove_size) 111 if (extra_remove_size)
223 subtract_range(range, extra_remove_base, 112 subtract_range(range, RANGE_NUM, extra_remove_base,
224 extra_remove_base + extra_remove_size - 1); 113 extra_remove_base + extra_remove_size);
225 114
226 /* get new range num */
227 nr_range = 0;
228 for (i = 0; i < RANGE_NUM; i++) {
229 if (!range[i].end)
230 continue;
231 nr_range++;
232 }
233 if (debug_print) { 115 if (debug_print) {
234 printk(KERN_DEBUG "After UC checking\n"); 116 printk(KERN_DEBUG "After UC checking\n");
235 for (i = 0; i < nr_range; i++) 117 for (i = 0; i < RANGE_NUM; i++) {
236 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 118 if (!range[i].end)
237 range[i].start, range[i].end + 1); 119 continue;
120 printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
121 range[i].start, range[i].end);
122 }
238 } 123 }
239 124
240 /* sort the ranges */ 125 /* sort the ranges */
241 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); 126 nr_range = clean_sort_range(range, RANGE_NUM);
242 if (debug_print) { 127 if (debug_print) {
243 printk(KERN_DEBUG "After sorting\n"); 128 printk(KERN_DEBUG "After sorting\n");
244 for (i = 0; i < nr_range; i++) 129 for (i = 0; i < nr_range; i++)
245 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 130 printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
246 range[i].start, range[i].end + 1); 131 range[i].start, range[i].end);
247 } 132 }
248 133
249 /* clear those is not used */
250 for (i = nr_range; i < RANGE_NUM; i++)
251 memset(&range[i], 0, sizeof(range[i]));
252
253 return nr_range; 134 return nr_range;
254} 135}
255 136
256#ifdef CONFIG_MTRR_SANITIZER 137#ifdef CONFIG_MTRR_SANITIZER
257 138
258static unsigned long __init sum_ranges(struct res_range *range, int nr_range) 139static unsigned long __init sum_ranges(struct range *range, int nr_range)
259{ 140{
260 unsigned long sum = 0; 141 unsigned long sum = 0;
261 int i; 142 int i;
262 143
263 for (i = 0; i < nr_range; i++) 144 for (i = 0; i < nr_range; i++)
264 sum += range[i].end + 1 - range[i].start; 145 sum += range[i].end - range[i].start;
265 146
266 return sum; 147 return sum;
267} 148}
@@ -590,7 +471,7 @@ static int __init parse_mtrr_spare_reg(char *arg)
590early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); 471early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
591 472
592static int __init 473static int __init
593x86_setup_var_mtrrs(struct res_range *range, int nr_range, 474x86_setup_var_mtrrs(struct range *range, int nr_range,
594 u64 chunk_size, u64 gran_size) 475 u64 chunk_size, u64 gran_size)
595{ 476{
596 struct var_mtrr_state var_state; 477 struct var_mtrr_state var_state;
@@ -608,7 +489,7 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
608 /* Write the range: */ 489 /* Write the range: */
609 for (i = 0; i < nr_range; i++) { 490 for (i = 0; i < nr_range; i++) {
610 set_var_mtrr_range(&var_state, range[i].start, 491 set_var_mtrr_range(&var_state, range[i].start,
611 range[i].end - range[i].start + 1); 492 range[i].end - range[i].start);
612 } 493 }
613 494
614 /* Write the last range: */ 495 /* Write the last range: */
@@ -689,8 +570,6 @@ static int __init mtrr_need_cleanup(void)
689 continue; 570 continue;
690 if (!size) 571 if (!size)
691 type = MTRR_NUM_TYPES; 572 type = MTRR_NUM_TYPES;
692 if (type == MTRR_TYPE_WRPROT)
693 type = MTRR_TYPE_UNCACHABLE;
694 num[type]++; 573 num[type]++;
695 } 574 }
696 575
@@ -713,7 +592,7 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
713 unsigned long x_remove_base, 592 unsigned long x_remove_base,
714 unsigned long x_remove_size, int i) 593 unsigned long x_remove_size, int i)
715{ 594{
716 static struct res_range range_new[RANGE_NUM]; 595 static struct range range_new[RANGE_NUM];
717 unsigned long range_sums_new; 596 unsigned long range_sums_new;
718 static int nr_range_new; 597 static int nr_range_new;
719 int num_reg; 598 int num_reg;
@@ -840,10 +719,10 @@ int __init mtrr_cleanup(unsigned address_bits)
840 * [0, 1M) should always be covered by var mtrr with WB 719 * [0, 1M) should always be covered by var mtrr with WB
841 * and fixed mtrrs should take effect before var mtrr for it: 720 * and fixed mtrrs should take effect before var mtrr for it:
842 */ 721 */
843 nr_range = add_range_with_merge(range, nr_range, 0, 722 nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0,
844 (1ULL<<(20 - PAGE_SHIFT)) - 1); 723 1ULL<<(20 - PAGE_SHIFT));
845 /* Sort the ranges: */ 724 /* Sort the ranges: */
846 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); 725 sort_range(range, nr_range);
847 726
848 range_sums = sum_ranges(range, nr_range); 727 range_sums = sum_ranges(range, nr_range);
849 printk(KERN_INFO "total RAM covered: %ldM\n", 728 printk(KERN_INFO "total RAM covered: %ldM\n",
@@ -1060,9 +939,9 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1060 nr_range = 0; 939 nr_range = 0;
1061 if (mtrr_tom2) { 940 if (mtrr_tom2) {
1062 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); 941 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
1063 range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; 942 range[nr_range].end = mtrr_tom2 >> PAGE_SHIFT;
1064 if (highest_pfn < range[nr_range].end + 1) 943 if (highest_pfn < range[nr_range].end)
1065 highest_pfn = range[nr_range].end + 1; 944 highest_pfn = range[nr_range].end;
1066 nr_range++; 945 nr_range++;
1067 } 946 }
1068 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); 947 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
@@ -1074,15 +953,15 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1074 953
1075 /* Check the holes: */ 954 /* Check the holes: */
1076 for (i = 0; i < nr_range - 1; i++) { 955 for (i = 0; i < nr_range - 1; i++) {
1077 if (range[i].end + 1 < range[i+1].start) 956 if (range[i].end < range[i+1].start)
1078 total_trim_size += real_trim_memory(range[i].end + 1, 957 total_trim_size += real_trim_memory(range[i].end,
1079 range[i+1].start); 958 range[i+1].start);
1080 } 959 }
1081 960
1082 /* Check the top: */ 961 /* Check the top: */
1083 i = nr_range - 1; 962 i = nr_range - 1;
1084 if (range[i].end + 1 < end_pfn) 963 if (range[i].end < end_pfn)
1085 total_trim_size += real_trim_memory(range[i].end + 1, 964 total_trim_size += real_trim_memory(range[i].end,
1086 end_pfn); 965 end_pfn);
1087 966
1088 if (total_trim_size) { 967 if (total_trim_size) {
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 228d982ce09c..68a3343e5798 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -265,7 +265,7 @@ static void cyrix_set_all(void)
265 post_set(); 265 post_set();
266} 266}
267 267
268static struct mtrr_ops cyrix_mtrr_ops = { 268static const struct mtrr_ops cyrix_mtrr_ops = {
269 .vendor = X86_VENDOR_CYRIX, 269 .vendor = X86_VENDOR_CYRIX,
270 .set_all = cyrix_set_all, 270 .set_all = cyrix_set_all,
271 .set = cyrix_set_arr, 271 .set = cyrix_set_arr,
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 55da0c5f68dd..fd31a441c61c 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -6,7 +6,6 @@
6 6
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/slab.h>
10#include <linux/io.h> 9#include <linux/io.h>
11#include <linux/mm.h> 10#include <linux/mm.h>
12 11
@@ -464,7 +463,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
464 tmp |= ~((1<<(hi - 1)) - 1); 463 tmp |= ~((1<<(hi - 1)) - 1);
465 464
466 if (tmp != mask_lo) { 465 if (tmp != mask_lo) {
467 WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); 466 printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
468 mask_lo = tmp; 467 mask_lo = tmp;
469 } 468 }
470 } 469 }
@@ -570,7 +569,7 @@ static unsigned long set_mtrr_state(void)
570 569
571 570
572static unsigned long cr4; 571static unsigned long cr4;
573static DEFINE_SPINLOCK(set_atomicity_lock); 572static DEFINE_RAW_SPINLOCK(set_atomicity_lock);
574 573
575/* 574/*
576 * Since we are disabling the cache don't allow any interrupts, 575 * Since we are disabling the cache don't allow any interrupts,
@@ -590,7 +589,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
590 * changes to the way the kernel boots 589 * changes to the way the kernel boots
591 */ 590 */
592 591
593 spin_lock(&set_atomicity_lock); 592 raw_spin_lock(&set_atomicity_lock);
594 593
595 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ 594 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
596 cr0 = read_cr0() | X86_CR0_CD; 595 cr0 = read_cr0() | X86_CR0_CD;
@@ -627,7 +626,7 @@ static void post_set(void) __releases(set_atomicity_lock)
627 /* Restore value of CR4 */ 626 /* Restore value of CR4 */
628 if (cpu_has_pge) 627 if (cpu_has_pge)
629 write_cr4(cr4); 628 write_cr4(cr4);
630 spin_unlock(&set_atomicity_lock); 629 raw_spin_unlock(&set_atomicity_lock);
631} 630}
632 631
633static void generic_set_all(void) 632static void generic_set_all(void)
@@ -752,7 +751,7 @@ int positive_have_wrcomb(void)
752/* 751/*
753 * Generic structure... 752 * Generic structure...
754 */ 753 */
755struct mtrr_ops generic_mtrr_ops = { 754const struct mtrr_ops generic_mtrr_ops = {
756 .use_intel_if = 1, 755 .use_intel_if = 1,
757 .set_all = generic_set_all, 756 .set_all = generic_set_all,
758 .get = generic_get_mtrr, 757 .get = generic_get_mtrr,
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 3c1b12d461d1..79289632cb27 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -4,6 +4,8 @@
4#include <linux/proc_fs.h> 4#include <linux/proc_fs.h>
5#include <linux/module.h> 5#include <linux/module.h>
6#include <linux/ctype.h> 6#include <linux/ctype.h>
7#include <linux/string.h>
8#include <linux/slab.h>
7#include <linux/init.h> 9#include <linux/init.h>
8 10
9#define LINE_SIZE 80 11#define LINE_SIZE 80
@@ -133,8 +135,7 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
133 return -EINVAL; 135 return -EINVAL;
134 136
135 base = simple_strtoull(line + 5, &ptr, 0); 137 base = simple_strtoull(line + 5, &ptr, 0);
136 while (isspace(*ptr)) 138 ptr = skip_spaces(ptr);
137 ptr++;
138 139
139 if (strncmp(ptr, "size=", 5)) 140 if (strncmp(ptr, "size=", 5))
140 return -EINVAL; 141 return -EINVAL;
@@ -142,14 +143,11 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
142 size = simple_strtoull(ptr + 5, &ptr, 0); 143 size = simple_strtoull(ptr + 5, &ptr, 0);
143 if ((base & 0xfff) || (size & 0xfff)) 144 if ((base & 0xfff) || (size & 0xfff))
144 return -EINVAL; 145 return -EINVAL;
145 while (isspace(*ptr)) 146 ptr = skip_spaces(ptr);
146 ptr++;
147 147
148 if (strncmp(ptr, "type=", 5)) 148 if (strncmp(ptr, "type=", 5))
149 return -EINVAL; 149 return -EINVAL;
150 ptr += 5; 150 ptr = skip_spaces(ptr + 5);
151 while (isspace(*ptr))
152 ptr++;
153 151
154 for (i = 0; i < MTRR_NUM_TYPES; ++i) { 152 for (i = 0; i < MTRR_NUM_TYPES; ++i) {
155 if (strcmp(ptr, mtrr_strings[i])) 153 if (strcmp(ptr, mtrr_strings[i]))
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 84e83de54575..79556bd9b602 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -60,14 +60,14 @@ static DEFINE_MUTEX(mtrr_mutex);
60u64 size_or_mask, size_and_mask; 60u64 size_or_mask, size_and_mask;
61static bool mtrr_aps_delayed_init; 61static bool mtrr_aps_delayed_init;
62 62
63static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; 63static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
64 64
65struct mtrr_ops *mtrr_if; 65const struct mtrr_ops *mtrr_if;
66 66
67static void set_mtrr(unsigned int reg, unsigned long base, 67static void set_mtrr(unsigned int reg, unsigned long base,
68 unsigned long size, mtrr_type type); 68 unsigned long size, mtrr_type type);
69 69
70void set_mtrr_ops(struct mtrr_ops *ops) 70void set_mtrr_ops(const struct mtrr_ops *ops)
71{ 71{
72 if (ops->vendor && ops->vendor < X86_VENDOR_NUM) 72 if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
73 mtrr_ops[ops->vendor] = ops; 73 mtrr_ops[ops->vendor] = ops;
@@ -145,6 +145,7 @@ struct set_mtrr_data {
145 145
146/** 146/**
147 * ipi_handler - Synchronisation handler. Executed by "other" CPUs. 147 * ipi_handler - Synchronisation handler. Executed by "other" CPUs.
148 * @info: pointer to mtrr configuration data
148 * 149 *
149 * Returns nothing. 150 * Returns nothing.
150 */ 151 */
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index a501dee9a87a..df5e41f31a27 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -32,7 +32,7 @@ extern int generic_get_free_region(unsigned long base, unsigned long size,
32extern int generic_validate_add_page(unsigned long base, unsigned long size, 32extern int generic_validate_add_page(unsigned long base, unsigned long size,
33 unsigned int type); 33 unsigned int type);
34 34
35extern struct mtrr_ops generic_mtrr_ops; 35extern const struct mtrr_ops generic_mtrr_ops;
36 36
37extern int positive_have_wrcomb(void); 37extern int positive_have_wrcomb(void);
38 38
@@ -53,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index,
53 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); 53 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
54void get_mtrr_state(void); 54void get_mtrr_state(void);
55 55
56extern void set_mtrr_ops(struct mtrr_ops *ops); 56extern void set_mtrr_ops(const struct mtrr_ops *ops);
57 57
58extern u64 size_or_mask, size_and_mask; 58extern u64 size_or_mask, size_and_mask;
59extern struct mtrr_ops *mtrr_if; 59extern const struct mtrr_ops *mtrr_if;
60 60
61#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) 61#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
62#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) 62#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c
deleted file mode 100644
index dfc80b4e6b0d..000000000000
--- a/arch/x86/kernel/cpu/mtrr/state.c
+++ /dev/null
@@ -1,94 +0,0 @@
1#include <linux/init.h>
2#include <linux/io.h>
3#include <linux/mm.h>
4
5#include <asm/processor-cyrix.h>
6#include <asm/processor-flags.h>
7#include <asm/mtrr.h>
8#include <asm/msr.h>
9
10#include "mtrr.h"
11
12/* Put the processor into a state where MTRRs can be safely set */
13void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
14{
15 unsigned int cr0;
16
17 /* Disable interrupts locally */
18 local_irq_save(ctxt->flags);
19
20 if (use_intel() || is_cpu(CYRIX)) {
21
22 /* Save value of CR4 and clear Page Global Enable (bit 7) */
23 if (cpu_has_pge) {
24 ctxt->cr4val = read_cr4();
25 write_cr4(ctxt->cr4val & ~X86_CR4_PGE);
26 }
27
28 /*
29 * Disable and flush caches. Note that wbinvd flushes the TLBs
30 * as a side-effect
31 */
32 cr0 = read_cr0() | X86_CR0_CD;
33 wbinvd();
34 write_cr0(cr0);
35 wbinvd();
36
37 if (use_intel()) {
38 /* Save MTRR state */
39 rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
40 } else {
41 /*
42 * Cyrix ARRs -
43 * everything else were excluded at the top
44 */
45 ctxt->ccr3 = getCx86(CX86_CCR3);
46 }
47 }
48}
49
50void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
51{
52 if (use_intel()) {
53 /* Disable MTRRs, and set the default type to uncached */
54 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL,
55 ctxt->deftype_hi);
56 } else {
57 if (is_cpu(CYRIX)) {
58 /* Cyrix ARRs - everything else were excluded at the top */
59 setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
60 }
61 }
62}
63
64/* Restore the processor after a set_mtrr_prepare */
65void set_mtrr_done(struct set_mtrr_context *ctxt)
66{
67 if (use_intel() || is_cpu(CYRIX)) {
68
69 /* Flush caches and TLBs */
70 wbinvd();
71
72 /* Restore MTRRdefType */
73 if (use_intel()) {
74 /* Intel (P6) standard MTRRs */
75 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo,
76 ctxt->deftype_hi);
77 } else {
78 /*
79 * Cyrix ARRs -
80 * everything else was excluded at the top
81 */
82 setCx86(CX86_CCR3, ctxt->ccr3);
83 }
84
85 /* Enable caches */
86 write_cr0(read_cr0() & 0xbfffffff);
87
88 /* Restore value of CR4 */
89 if (cpu_has_pge)
90 write_cr4(ctxt->cr4val);
91 }
92 /* Re-enable interrupts locally (if enabled previously) */
93 local_irq_restore(ctxt->flags);
94}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index b5801c311846..db5bdc8addf8 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -7,6 +7,7 @@
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter 7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> 9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10 * Copyright (C) 2009 Google, Inc., Stephane Eranian
10 * 11 *
11 * For licencing details see kernel-base/COPYING 12 * For licencing details see kernel-base/COPYING
12 */ 13 */
@@ -20,12 +21,15 @@
20#include <linux/kdebug.h> 21#include <linux/kdebug.h>
21#include <linux/sched.h> 22#include <linux/sched.h>
22#include <linux/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/slab.h>
23#include <linux/highmem.h> 25#include <linux/highmem.h>
24#include <linux/cpu.h> 26#include <linux/cpu.h>
27#include <linux/bitops.h>
25 28
26#include <asm/apic.h> 29#include <asm/apic.h>
27#include <asm/stacktrace.h> 30#include <asm/stacktrace.h>
28#include <asm/nmi.h> 31#include <asm/nmi.h>
32#include <asm/compat.h>
29 33
30static u64 perf_event_mask __read_mostly; 34static u64 perf_event_mask __read_mostly;
31 35
@@ -68,15 +72,60 @@ struct debug_store {
68 u64 pebs_event_reset[MAX_PEBS_EVENTS]; 72 u64 pebs_event_reset[MAX_PEBS_EVENTS];
69}; 73};
70 74
75struct event_constraint {
76 union {
77 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
78 u64 idxmsk64;
79 };
80 u64 code;
81 u64 cmask;
82 int weight;
83};
84
85struct amd_nb {
86 int nb_id; /* NorthBridge id */
87 int refcnt; /* reference count */
88 struct perf_event *owners[X86_PMC_IDX_MAX];
89 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
90};
91
71struct cpu_hw_events { 92struct cpu_hw_events {
72 struct perf_event *events[X86_PMC_IDX_MAX]; 93 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
73 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
74 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 94 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
75 unsigned long interrupts; 95 unsigned long interrupts;
76 int enabled; 96 int enabled;
77 struct debug_store *ds; 97 struct debug_store *ds;
98
99 int n_events;
100 int n_added;
101 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
102 u64 tags[X86_PMC_IDX_MAX];
103 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
104 struct amd_nb *amd_nb;
78}; 105};
79 106
107#define __EVENT_CONSTRAINT(c, n, m, w) {\
108 { .idxmsk64 = (n) }, \
109 .code = (c), \
110 .cmask = (m), \
111 .weight = (w), \
112}
113
114#define EVENT_CONSTRAINT(c, n, m) \
115 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
116
117#define INTEL_EVENT_CONSTRAINT(c, n) \
118 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
119
120#define FIXED_EVENT_CONSTRAINT(c, n) \
121 EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK)
122
123#define EVENT_CONSTRAINT_END \
124 EVENT_CONSTRAINT(0, 0, 0)
125
126#define for_each_event_constraint(e, c) \
127 for ((e) = (c); (e)->cmask; (e)++)
128
80/* 129/*
81 * struct x86_pmu - generic x86 pmu 130 * struct x86_pmu - generic x86 pmu
82 */ 131 */
@@ -86,8 +135,8 @@ struct x86_pmu {
86 int (*handle_irq)(struct pt_regs *); 135 int (*handle_irq)(struct pt_regs *);
87 void (*disable_all)(void); 136 void (*disable_all)(void);
88 void (*enable_all)(void); 137 void (*enable_all)(void);
89 void (*enable)(struct hw_perf_event *, int); 138 void (*enable)(struct perf_event *);
90 void (*disable)(struct hw_perf_event *, int); 139 void (*disable)(struct perf_event *);
91 unsigned eventsel; 140 unsigned eventsel;
92 unsigned perfctr; 141 unsigned perfctr;
93 u64 (*event_map)(int); 142 u64 (*event_map)(int);
@@ -102,78 +151,28 @@ struct x86_pmu {
102 u64 intel_ctrl; 151 u64 intel_ctrl;
103 void (*enable_bts)(u64 config); 152 void (*enable_bts)(u64 config);
104 void (*disable_bts)(void); 153 void (*disable_bts)(void);
105};
106 154
107static struct x86_pmu x86_pmu __read_mostly; 155 struct event_constraint *
156 (*get_event_constraints)(struct cpu_hw_events *cpuc,
157 struct perf_event *event);
108 158
109static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { 159 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
110 .enabled = 1, 160 struct perf_event *event);
111}; 161 struct event_constraint *event_constraints;
112 162
113/* 163 int (*cpu_prepare)(int cpu);
114 * Not sure about some of these 164 void (*cpu_starting)(int cpu);
115 */ 165 void (*cpu_dying)(int cpu);
116static const u64 p6_perfmon_event_map[] = 166 void (*cpu_dead)(int cpu);
117{
118 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
119 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
120 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
121 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
122 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
123 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
124 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
125}; 167};
126 168
127static u64 p6_pmu_event_map(int hw_event) 169static struct x86_pmu x86_pmu __read_mostly;
128{
129 return p6_perfmon_event_map[hw_event];
130}
131
132/*
133 * Event setting that is specified not to count anything.
134 * We use this to effectively disable a counter.
135 *
136 * L2_RQSTS with 0 MESI unit mask.
137 */
138#define P6_NOP_EVENT 0x0000002EULL
139
140static u64 p6_pmu_raw_event(u64 hw_event)
141{
142#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
143#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
144#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
145#define P6_EVNTSEL_INV_MASK 0x00800000ULL
146#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
147
148#define P6_EVNTSEL_MASK \
149 (P6_EVNTSEL_EVENT_MASK | \
150 P6_EVNTSEL_UNIT_MASK | \
151 P6_EVNTSEL_EDGE_MASK | \
152 P6_EVNTSEL_INV_MASK | \
153 P6_EVNTSEL_REG_MASK)
154
155 return hw_event & P6_EVNTSEL_MASK;
156}
157
158 170
159/* 171static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
160 * Intel PerfMon v3. Used on Core2 and later. 172 .enabled = 1,
161 */
162static const u64 intel_perfmon_event_map[] =
163{
164 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
165 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
166 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
167 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
168 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
169 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
170 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
171}; 173};
172 174
173static u64 intel_pmu_event_map(int hw_event) 175static int x86_perf_event_set_period(struct perf_event *event);
174{
175 return intel_perfmon_event_map[hw_event];
176}
177 176
178/* 177/*
179 * Generalized hw caching related hw_event table, filled 178 * Generalized hw caching related hw_event table, filled
@@ -190,435 +189,18 @@ static u64 __read_mostly hw_cache_event_ids
190 [PERF_COUNT_HW_CACHE_OP_MAX] 189 [PERF_COUNT_HW_CACHE_OP_MAX]
191 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 190 [PERF_COUNT_HW_CACHE_RESULT_MAX];
192 191
193static const u64 nehalem_hw_cache_event_ids
194 [PERF_COUNT_HW_CACHE_MAX]
195 [PERF_COUNT_HW_CACHE_OP_MAX]
196 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
197{
198 [ C(L1D) ] = {
199 [ C(OP_READ) ] = {
200 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
201 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
202 },
203 [ C(OP_WRITE) ] = {
204 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
205 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
206 },
207 [ C(OP_PREFETCH) ] = {
208 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
209 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
210 },
211 },
212 [ C(L1I ) ] = {
213 [ C(OP_READ) ] = {
214 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
215 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
216 },
217 [ C(OP_WRITE) ] = {
218 [ C(RESULT_ACCESS) ] = -1,
219 [ C(RESULT_MISS) ] = -1,
220 },
221 [ C(OP_PREFETCH) ] = {
222 [ C(RESULT_ACCESS) ] = 0x0,
223 [ C(RESULT_MISS) ] = 0x0,
224 },
225 },
226 [ C(LL ) ] = {
227 [ C(OP_READ) ] = {
228 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
229 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
230 },
231 [ C(OP_WRITE) ] = {
232 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
233 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
234 },
235 [ C(OP_PREFETCH) ] = {
236 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
237 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
238 },
239 },
240 [ C(DTLB) ] = {
241 [ C(OP_READ) ] = {
242 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
243 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
244 },
245 [ C(OP_WRITE) ] = {
246 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
247 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
248 },
249 [ C(OP_PREFETCH) ] = {
250 [ C(RESULT_ACCESS) ] = 0x0,
251 [ C(RESULT_MISS) ] = 0x0,
252 },
253 },
254 [ C(ITLB) ] = {
255 [ C(OP_READ) ] = {
256 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
257 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
258 },
259 [ C(OP_WRITE) ] = {
260 [ C(RESULT_ACCESS) ] = -1,
261 [ C(RESULT_MISS) ] = -1,
262 },
263 [ C(OP_PREFETCH) ] = {
264 [ C(RESULT_ACCESS) ] = -1,
265 [ C(RESULT_MISS) ] = -1,
266 },
267 },
268 [ C(BPU ) ] = {
269 [ C(OP_READ) ] = {
270 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
271 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
272 },
273 [ C(OP_WRITE) ] = {
274 [ C(RESULT_ACCESS) ] = -1,
275 [ C(RESULT_MISS) ] = -1,
276 },
277 [ C(OP_PREFETCH) ] = {
278 [ C(RESULT_ACCESS) ] = -1,
279 [ C(RESULT_MISS) ] = -1,
280 },
281 },
282};
283
284static const u64 core2_hw_cache_event_ids
285 [PERF_COUNT_HW_CACHE_MAX]
286 [PERF_COUNT_HW_CACHE_OP_MAX]
287 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
288{
289 [ C(L1D) ] = {
290 [ C(OP_READ) ] = {
291 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
292 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
293 },
294 [ C(OP_WRITE) ] = {
295 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
296 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
297 },
298 [ C(OP_PREFETCH) ] = {
299 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
300 [ C(RESULT_MISS) ] = 0,
301 },
302 },
303 [ C(L1I ) ] = {
304 [ C(OP_READ) ] = {
305 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
306 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
307 },
308 [ C(OP_WRITE) ] = {
309 [ C(RESULT_ACCESS) ] = -1,
310 [ C(RESULT_MISS) ] = -1,
311 },
312 [ C(OP_PREFETCH) ] = {
313 [ C(RESULT_ACCESS) ] = 0,
314 [ C(RESULT_MISS) ] = 0,
315 },
316 },
317 [ C(LL ) ] = {
318 [ C(OP_READ) ] = {
319 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
320 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
321 },
322 [ C(OP_WRITE) ] = {
323 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
324 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
325 },
326 [ C(OP_PREFETCH) ] = {
327 [ C(RESULT_ACCESS) ] = 0,
328 [ C(RESULT_MISS) ] = 0,
329 },
330 },
331 [ C(DTLB) ] = {
332 [ C(OP_READ) ] = {
333 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
334 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
335 },
336 [ C(OP_WRITE) ] = {
337 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
338 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
339 },
340 [ C(OP_PREFETCH) ] = {
341 [ C(RESULT_ACCESS) ] = 0,
342 [ C(RESULT_MISS) ] = 0,
343 },
344 },
345 [ C(ITLB) ] = {
346 [ C(OP_READ) ] = {
347 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
348 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
349 },
350 [ C(OP_WRITE) ] = {
351 [ C(RESULT_ACCESS) ] = -1,
352 [ C(RESULT_MISS) ] = -1,
353 },
354 [ C(OP_PREFETCH) ] = {
355 [ C(RESULT_ACCESS) ] = -1,
356 [ C(RESULT_MISS) ] = -1,
357 },
358 },
359 [ C(BPU ) ] = {
360 [ C(OP_READ) ] = {
361 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
362 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
363 },
364 [ C(OP_WRITE) ] = {
365 [ C(RESULT_ACCESS) ] = -1,
366 [ C(RESULT_MISS) ] = -1,
367 },
368 [ C(OP_PREFETCH) ] = {
369 [ C(RESULT_ACCESS) ] = -1,
370 [ C(RESULT_MISS) ] = -1,
371 },
372 },
373};
374
375static const u64 atom_hw_cache_event_ids
376 [PERF_COUNT_HW_CACHE_MAX]
377 [PERF_COUNT_HW_CACHE_OP_MAX]
378 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
379{
380 [ C(L1D) ] = {
381 [ C(OP_READ) ] = {
382 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
383 [ C(RESULT_MISS) ] = 0,
384 },
385 [ C(OP_WRITE) ] = {
386 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
387 [ C(RESULT_MISS) ] = 0,
388 },
389 [ C(OP_PREFETCH) ] = {
390 [ C(RESULT_ACCESS) ] = 0x0,
391 [ C(RESULT_MISS) ] = 0,
392 },
393 },
394 [ C(L1I ) ] = {
395 [ C(OP_READ) ] = {
396 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
397 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
398 },
399 [ C(OP_WRITE) ] = {
400 [ C(RESULT_ACCESS) ] = -1,
401 [ C(RESULT_MISS) ] = -1,
402 },
403 [ C(OP_PREFETCH) ] = {
404 [ C(RESULT_ACCESS) ] = 0,
405 [ C(RESULT_MISS) ] = 0,
406 },
407 },
408 [ C(LL ) ] = {
409 [ C(OP_READ) ] = {
410 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
411 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
412 },
413 [ C(OP_WRITE) ] = {
414 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
415 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
416 },
417 [ C(OP_PREFETCH) ] = {
418 [ C(RESULT_ACCESS) ] = 0,
419 [ C(RESULT_MISS) ] = 0,
420 },
421 },
422 [ C(DTLB) ] = {
423 [ C(OP_READ) ] = {
424 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
425 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
426 },
427 [ C(OP_WRITE) ] = {
428 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
429 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
430 },
431 [ C(OP_PREFETCH) ] = {
432 [ C(RESULT_ACCESS) ] = 0,
433 [ C(RESULT_MISS) ] = 0,
434 },
435 },
436 [ C(ITLB) ] = {
437 [ C(OP_READ) ] = {
438 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
439 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
440 },
441 [ C(OP_WRITE) ] = {
442 [ C(RESULT_ACCESS) ] = -1,
443 [ C(RESULT_MISS) ] = -1,
444 },
445 [ C(OP_PREFETCH) ] = {
446 [ C(RESULT_ACCESS) ] = -1,
447 [ C(RESULT_MISS) ] = -1,
448 },
449 },
450 [ C(BPU ) ] = {
451 [ C(OP_READ) ] = {
452 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
453 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
454 },
455 [ C(OP_WRITE) ] = {
456 [ C(RESULT_ACCESS) ] = -1,
457 [ C(RESULT_MISS) ] = -1,
458 },
459 [ C(OP_PREFETCH) ] = {
460 [ C(RESULT_ACCESS) ] = -1,
461 [ C(RESULT_MISS) ] = -1,
462 },
463 },
464};
465
466static u64 intel_pmu_raw_event(u64 hw_event)
467{
468#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
469#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
470#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
471#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
472#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
473
474#define CORE_EVNTSEL_MASK \
475 (CORE_EVNTSEL_EVENT_MASK | \
476 CORE_EVNTSEL_UNIT_MASK | \
477 CORE_EVNTSEL_EDGE_MASK | \
478 CORE_EVNTSEL_INV_MASK | \
479 CORE_EVNTSEL_REG_MASK)
480
481 return hw_event & CORE_EVNTSEL_MASK;
482}
483
484static const u64 amd_hw_cache_event_ids
485 [PERF_COUNT_HW_CACHE_MAX]
486 [PERF_COUNT_HW_CACHE_OP_MAX]
487 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
488{
489 [ C(L1D) ] = {
490 [ C(OP_READ) ] = {
491 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
492 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
493 },
494 [ C(OP_WRITE) ] = {
495 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
496 [ C(RESULT_MISS) ] = 0,
497 },
498 [ C(OP_PREFETCH) ] = {
499 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
500 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
501 },
502 },
503 [ C(L1I ) ] = {
504 [ C(OP_READ) ] = {
505 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
506 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
507 },
508 [ C(OP_WRITE) ] = {
509 [ C(RESULT_ACCESS) ] = -1,
510 [ C(RESULT_MISS) ] = -1,
511 },
512 [ C(OP_PREFETCH) ] = {
513 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
514 [ C(RESULT_MISS) ] = 0,
515 },
516 },
517 [ C(LL ) ] = {
518 [ C(OP_READ) ] = {
519 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
520 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
521 },
522 [ C(OP_WRITE) ] = {
523 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
524 [ C(RESULT_MISS) ] = 0,
525 },
526 [ C(OP_PREFETCH) ] = {
527 [ C(RESULT_ACCESS) ] = 0,
528 [ C(RESULT_MISS) ] = 0,
529 },
530 },
531 [ C(DTLB) ] = {
532 [ C(OP_READ) ] = {
533 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
534 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
535 },
536 [ C(OP_WRITE) ] = {
537 [ C(RESULT_ACCESS) ] = 0,
538 [ C(RESULT_MISS) ] = 0,
539 },
540 [ C(OP_PREFETCH) ] = {
541 [ C(RESULT_ACCESS) ] = 0,
542 [ C(RESULT_MISS) ] = 0,
543 },
544 },
545 [ C(ITLB) ] = {
546 [ C(OP_READ) ] = {
547 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
548 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
549 },
550 [ C(OP_WRITE) ] = {
551 [ C(RESULT_ACCESS) ] = -1,
552 [ C(RESULT_MISS) ] = -1,
553 },
554 [ C(OP_PREFETCH) ] = {
555 [ C(RESULT_ACCESS) ] = -1,
556 [ C(RESULT_MISS) ] = -1,
557 },
558 },
559 [ C(BPU ) ] = {
560 [ C(OP_READ) ] = {
561 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
562 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
563 },
564 [ C(OP_WRITE) ] = {
565 [ C(RESULT_ACCESS) ] = -1,
566 [ C(RESULT_MISS) ] = -1,
567 },
568 [ C(OP_PREFETCH) ] = {
569 [ C(RESULT_ACCESS) ] = -1,
570 [ C(RESULT_MISS) ] = -1,
571 },
572 },
573};
574
575/*
576 * AMD Performance Monitor K7 and later.
577 */
578static const u64 amd_perfmon_event_map[] =
579{
580 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
581 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
582 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
583 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
584 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
585 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
586};
587
588static u64 amd_pmu_event_map(int hw_event)
589{
590 return amd_perfmon_event_map[hw_event];
591}
592
593static u64 amd_pmu_raw_event(u64 hw_event)
594{
595#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
596#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
597#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
598#define K7_EVNTSEL_INV_MASK 0x000800000ULL
599#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
600
601#define K7_EVNTSEL_MASK \
602 (K7_EVNTSEL_EVENT_MASK | \
603 K7_EVNTSEL_UNIT_MASK | \
604 K7_EVNTSEL_EDGE_MASK | \
605 K7_EVNTSEL_INV_MASK | \
606 K7_EVNTSEL_REG_MASK)
607
608 return hw_event & K7_EVNTSEL_MASK;
609}
610
611/* 192/*
612 * Propagate event elapsed time into the generic event. 193 * Propagate event elapsed time into the generic event.
613 * Can only be executed on the CPU where the event is active. 194 * Can only be executed on the CPU where the event is active.
614 * Returns the delta events processed. 195 * Returns the delta events processed.
615 */ 196 */
616static u64 197static u64
617x86_perf_event_update(struct perf_event *event, 198x86_perf_event_update(struct perf_event *event)
618 struct hw_perf_event *hwc, int idx)
619{ 199{
200 struct hw_perf_event *hwc = &event->hw;
620 int shift = 64 - x86_pmu.event_bits; 201 int shift = 64 - x86_pmu.event_bits;
621 u64 prev_raw_count, new_raw_count; 202 u64 prev_raw_count, new_raw_count;
203 int idx = hwc->idx;
622 s64 delta; 204 s64 delta;
623 205
624 if (idx == X86_PMC_IDX_FIXED_BTS) 206 if (idx == X86_PMC_IDX_FIXED_BTS)
@@ -718,7 +300,7 @@ static inline bool bts_available(void)
718 return x86_pmu.enable_bts != NULL; 300 return x86_pmu.enable_bts != NULL;
719} 301}
720 302
721static inline void init_debug_store_on_cpu(int cpu) 303static void init_debug_store_on_cpu(int cpu)
722{ 304{
723 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 305 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
724 306
@@ -730,7 +312,7 @@ static inline void init_debug_store_on_cpu(int cpu)
730 (u32)((u64)(unsigned long)ds >> 32)); 312 (u32)((u64)(unsigned long)ds >> 32));
731} 313}
732 314
733static inline void fini_debug_store_on_cpu(int cpu) 315static void fini_debug_store_on_cpu(int cpu)
734{ 316{
735 if (!per_cpu(cpu_hw_events, cpu).ds) 317 if (!per_cpu(cpu_hw_events, cpu).ds)
736 return; 318 return;
@@ -859,42 +441,6 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
859 return 0; 441 return 0;
860} 442}
861 443
862static void intel_pmu_enable_bts(u64 config)
863{
864 unsigned long debugctlmsr;
865
866 debugctlmsr = get_debugctlmsr();
867
868 debugctlmsr |= X86_DEBUGCTL_TR;
869 debugctlmsr |= X86_DEBUGCTL_BTS;
870 debugctlmsr |= X86_DEBUGCTL_BTINT;
871
872 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
873 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
874
875 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
876 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
877
878 update_debugctlmsr(debugctlmsr);
879}
880
881static void intel_pmu_disable_bts(void)
882{
883 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
884 unsigned long debugctlmsr;
885
886 if (!cpuc->ds)
887 return;
888
889 debugctlmsr = get_debugctlmsr();
890
891 debugctlmsr &=
892 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
893 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
894
895 update_debugctlmsr(debugctlmsr);
896}
897
898/* 444/*
899 * Setup the hardware configuration for a given attr_type 445 * Setup the hardware configuration for a given attr_type
900 */ 446 */
@@ -932,6 +478,10 @@ static int __hw_perf_event_init(struct perf_event *event)
932 */ 478 */
933 hwc->config = ARCH_PERFMON_EVENTSEL_INT; 479 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
934 480
481 hwc->idx = -1;
482 hwc->last_cpu = -1;
483 hwc->last_tag = ~0ULL;
484
935 /* 485 /*
936 * Count user and OS events unless requested not to. 486 * Count user and OS events unless requested not to.
937 */ 487 */
@@ -960,6 +510,9 @@ static int __hw_perf_event_init(struct perf_event *event)
960 */ 510 */
961 if (attr->type == PERF_TYPE_RAW) { 511 if (attr->type == PERF_TYPE_RAW) {
962 hwc->config |= x86_pmu.raw_event(attr->config); 512 hwc->config |= x86_pmu.raw_event(attr->config);
513 if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
514 perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
515 return -EACCES;
963 return 0; 516 return 0;
964 } 517 }
965 518
@@ -999,216 +552,314 @@ static int __hw_perf_event_init(struct perf_event *event)
999 return 0; 552 return 0;
1000} 553}
1001 554
1002static void p6_pmu_disable_all(void) 555static void x86_pmu_disable_all(void)
1003{ 556{
1004 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 557 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1005 u64 val; 558 int idx;
1006
1007 if (!cpuc->enabled)
1008 return;
1009 559
1010 cpuc->enabled = 0; 560 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1011 barrier(); 561 u64 val;
1012 562
1013 /* p6 only has one enable register */ 563 if (!test_bit(idx, cpuc->active_mask))
1014 rdmsrl(MSR_P6_EVNTSEL0, val); 564 continue;
1015 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 565 rdmsrl(x86_pmu.eventsel + idx, val);
1016 wrmsrl(MSR_P6_EVNTSEL0, val); 566 if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
567 continue;
568 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
569 wrmsrl(x86_pmu.eventsel + idx, val);
570 }
1017} 571}
1018 572
1019static void intel_pmu_disable_all(void) 573void hw_perf_disable(void)
1020{ 574{
1021 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 575 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1022 576
577 if (!x86_pmu_initialized())
578 return;
579
1023 if (!cpuc->enabled) 580 if (!cpuc->enabled)
1024 return; 581 return;
1025 582
583 cpuc->n_added = 0;
1026 cpuc->enabled = 0; 584 cpuc->enabled = 0;
1027 barrier(); 585 barrier();
1028 586
1029 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); 587 x86_pmu.disable_all();
1030
1031 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
1032 intel_pmu_disable_bts();
1033} 588}
1034 589
1035static void amd_pmu_disable_all(void) 590static void x86_pmu_enable_all(void)
1036{ 591{
1037 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 592 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1038 int idx; 593 int idx;
1039 594
1040 if (!cpuc->enabled)
1041 return;
1042
1043 cpuc->enabled = 0;
1044 /*
1045 * ensure we write the disable before we start disabling the
1046 * events proper, so that amd_pmu_enable_event() does the
1047 * right thing.
1048 */
1049 barrier();
1050
1051 for (idx = 0; idx < x86_pmu.num_events; idx++) { 595 for (idx = 0; idx < x86_pmu.num_events; idx++) {
596 struct perf_event *event = cpuc->events[idx];
1052 u64 val; 597 u64 val;
1053 598
1054 if (!test_bit(idx, cpuc->active_mask)) 599 if (!test_bit(idx, cpuc->active_mask))
1055 continue; 600 continue;
1056 rdmsrl(MSR_K7_EVNTSEL0 + idx, val); 601
1057 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) 602 val = event->hw.config;
1058 continue; 603 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
1059 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 604 wrmsrl(x86_pmu.eventsel + idx, val);
1060 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1061 } 605 }
1062} 606}
1063 607
1064void hw_perf_disable(void) 608static const struct pmu pmu;
609
610static inline int is_x86_event(struct perf_event *event)
1065{ 611{
1066 if (!x86_pmu_initialized()) 612 return event->pmu == &pmu;
1067 return;
1068 return x86_pmu.disable_all();
1069} 613}
1070 614
1071static void p6_pmu_enable_all(void) 615static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
1072{ 616{
1073 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 617 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
1074 unsigned long val; 618 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
619 int i, j, w, wmax, num = 0;
620 struct hw_perf_event *hwc;
1075 621
1076 if (cpuc->enabled) 622 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1077 return;
1078 623
1079 cpuc->enabled = 1; 624 for (i = 0; i < n; i++) {
1080 barrier(); 625 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
626 constraints[i] = c;
627 }
1081 628
1082 /* p6 only has one enable register */ 629 /*
1083 rdmsrl(MSR_P6_EVNTSEL0, val); 630 * fastpath, try to reuse previous register
1084 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 631 */
1085 wrmsrl(MSR_P6_EVNTSEL0, val); 632 for (i = 0; i < n; i++) {
1086} 633 hwc = &cpuc->event_list[i]->hw;
634 c = constraints[i];
1087 635
1088static void intel_pmu_enable_all(void) 636 /* never assigned */
1089{ 637 if (hwc->idx == -1)
1090 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 638 break;
1091 639
1092 if (cpuc->enabled) 640 /* constraint still honored */
1093 return; 641 if (!test_bit(hwc->idx, c->idxmsk))
642 break;
1094 643
1095 cpuc->enabled = 1; 644 /* not already used */
1096 barrier(); 645 if (test_bit(hwc->idx, used_mask))
646 break;
1097 647
1098 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 648 __set_bit(hwc->idx, used_mask);
649 if (assign)
650 assign[i] = hwc->idx;
651 }
652 if (i == n)
653 goto done;
1099 654
1100 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 655 /*
1101 struct perf_event *event = 656 * begin slow path
1102 cpuc->events[X86_PMC_IDX_FIXED_BTS]; 657 */
1103 658
1104 if (WARN_ON_ONCE(!event)) 659 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1105 return;
1106 660
1107 intel_pmu_enable_bts(event->hw.config); 661 /*
1108 } 662 * weight = number of possible counters
1109} 663 *
664 * 1 = most constrained, only works on one counter
665 * wmax = least constrained, works on any counter
666 *
667 * assign events to counters starting with most
668 * constrained events.
669 */
670 wmax = x86_pmu.num_events;
1110 671
1111static void amd_pmu_enable_all(void) 672 /*
1112{ 673 * when fixed event counters are present,
1113 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 674 * wmax is incremented by 1 to account
1114 int idx; 675 * for one more choice
676 */
677 if (x86_pmu.num_events_fixed)
678 wmax++;
1115 679
1116 if (cpuc->enabled) 680 for (w = 1, num = n; num && w <= wmax; w++) {
1117 return; 681 /* for each event */
682 for (i = 0; num && i < n; i++) {
683 c = constraints[i];
684 hwc = &cpuc->event_list[i]->hw;
1118 685
1119 cpuc->enabled = 1; 686 if (c->weight != w)
1120 barrier(); 687 continue;
1121 688
1122 for (idx = 0; idx < x86_pmu.num_events; idx++) { 689 for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
1123 struct perf_event *event = cpuc->events[idx]; 690 if (!test_bit(j, used_mask))
1124 u64 val; 691 break;
692 }
1125 693
1126 if (!test_bit(idx, cpuc->active_mask)) 694 if (j == X86_PMC_IDX_MAX)
1127 continue; 695 break;
1128 696
1129 val = event->hw.config; 697 __set_bit(j, used_mask);
1130 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 698
1131 wrmsrl(MSR_K7_EVNTSEL0 + idx, val); 699 if (assign)
700 assign[i] = j;
701 num--;
702 }
1132 } 703 }
704done:
705 /*
706 * scheduling failed or is just a simulation,
707 * free resources if necessary
708 */
709 if (!assign || num) {
710 for (i = 0; i < n; i++) {
711 if (x86_pmu.put_event_constraints)
712 x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
713 }
714 }
715 return num ? -ENOSPC : 0;
1133} 716}
1134 717
1135void hw_perf_enable(void) 718/*
719 * dogrp: true if must collect siblings events (group)
720 * returns total number of events and error code
721 */
722static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
1136{ 723{
1137 if (!x86_pmu_initialized()) 724 struct perf_event *event;
1138 return; 725 int n, max_count;
1139 x86_pmu.enable_all();
1140}
1141 726
1142static inline u64 intel_pmu_get_status(void) 727 max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
1143{
1144 u64 status;
1145 728
1146 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); 729 /* current number of events already accepted */
730 n = cpuc->n_events;
1147 731
1148 return status; 732 if (is_x86_event(leader)) {
1149} 733 if (n >= max_count)
734 return -ENOSPC;
735 cpuc->event_list[n] = leader;
736 n++;
737 }
738 if (!dogrp)
739 return n;
1150 740
1151static inline void intel_pmu_ack_status(u64 ack) 741 list_for_each_entry(event, &leader->sibling_list, group_entry) {
1152{ 742 if (!is_x86_event(event) ||
1153 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); 743 event->state <= PERF_EVENT_STATE_OFF)
1154} 744 continue;
1155 745
1156static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) 746 if (n >= max_count)
1157{ 747 return -ENOSPC;
1158 (void)checking_wrmsrl(hwc->config_base + idx,
1159 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
1160}
1161 748
1162static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) 749 cpuc->event_list[n] = event;
1163{ 750 n++;
1164 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); 751 }
752 return n;
1165} 753}
1166 754
1167static inline void 755static inline void x86_assign_hw_event(struct perf_event *event,
1168intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) 756 struct cpu_hw_events *cpuc, int i)
1169{ 757{
1170 int idx = __idx - X86_PMC_IDX_FIXED; 758 struct hw_perf_event *hwc = &event->hw;
1171 u64 ctrl_val, mask;
1172 759
1173 mask = 0xfULL << (idx * 4); 760 hwc->idx = cpuc->assign[i];
761 hwc->last_cpu = smp_processor_id();
762 hwc->last_tag = ++cpuc->tags[i];
1174 763
1175 rdmsrl(hwc->config_base, ctrl_val); 764 if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
1176 ctrl_val &= ~mask; 765 hwc->config_base = 0;
1177 (void)checking_wrmsrl(hwc->config_base, ctrl_val); 766 hwc->event_base = 0;
767 } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
768 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
769 /*
770 * We set it so that event_base + idx in wrmsr/rdmsr maps to
771 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
772 */
773 hwc->event_base =
774 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
775 } else {
776 hwc->config_base = x86_pmu.eventsel;
777 hwc->event_base = x86_pmu.perfctr;
778 }
1178} 779}
1179 780
1180static inline void 781static inline int match_prev_assignment(struct hw_perf_event *hwc,
1181p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) 782 struct cpu_hw_events *cpuc,
783 int i)
1182{ 784{
1183 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 785 return hwc->idx == cpuc->assign[i] &&
1184 u64 val = P6_NOP_EVENT; 786 hwc->last_cpu == smp_processor_id() &&
1185 787 hwc->last_tag == cpuc->tags[i];
1186 if (cpuc->enabled)
1187 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1188
1189 (void)checking_wrmsrl(hwc->config_base + idx, val);
1190} 788}
1191 789
1192static inline void 790static int x86_pmu_start(struct perf_event *event);
1193intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) 791static void x86_pmu_stop(struct perf_event *event);
792
793void hw_perf_enable(void)
1194{ 794{
1195 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { 795 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1196 intel_pmu_disable_bts(); 796 struct perf_event *event;
797 struct hw_perf_event *hwc;
798 int i;
799
800 if (!x86_pmu_initialized())
1197 return; 801 return;
1198 }
1199 802
1200 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 803 if (cpuc->enabled)
1201 intel_pmu_disable_fixed(hwc, idx);
1202 return; 804 return;
805
806 if (cpuc->n_added) {
807 int n_running = cpuc->n_events - cpuc->n_added;
808 /*
809 * apply assignment obtained either from
810 * hw_perf_group_sched_in() or x86_pmu_enable()
811 *
812 * step1: save events moving to new counters
813 * step2: reprogram moved events into new counters
814 */
815 for (i = 0; i < n_running; i++) {
816 event = cpuc->event_list[i];
817 hwc = &event->hw;
818
819 /*
820 * we can avoid reprogramming counter if:
821 * - assigned same counter as last time
822 * - running on same CPU as last time
823 * - no other event has used the counter since
824 */
825 if (hwc->idx == -1 ||
826 match_prev_assignment(hwc, cpuc, i))
827 continue;
828
829 x86_pmu_stop(event);
830 }
831
832 for (i = 0; i < cpuc->n_events; i++) {
833 event = cpuc->event_list[i];
834 hwc = &event->hw;
835
836 if (!match_prev_assignment(hwc, cpuc, i))
837 x86_assign_hw_event(event, cpuc, i);
838 else if (i < n_running)
839 continue;
840
841 x86_pmu_start(event);
842 }
843 cpuc->n_added = 0;
844 perf_events_lapic_init();
1203 } 845 }
1204 846
1205 x86_pmu_disable_event(hwc, idx); 847 cpuc->enabled = 1;
848 barrier();
849
850 x86_pmu.enable_all();
1206} 851}
1207 852
1208static inline void 853static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc)
1209amd_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1210{ 854{
1211 x86_pmu_disable_event(hwc, idx); 855 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
856 hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
857}
858
859static inline void x86_pmu_disable_event(struct perf_event *event)
860{
861 struct hw_perf_event *hwc = &event->hw;
862 (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config);
1212} 863}
1213 864
1214static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 865static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -1218,18 +869,18 @@ static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
1218 * To be called with the event disabled in hw: 869 * To be called with the event disabled in hw:
1219 */ 870 */
1220static int 871static int
1221x86_perf_event_set_period(struct perf_event *event, 872x86_perf_event_set_period(struct perf_event *event)
1222 struct hw_perf_event *hwc, int idx)
1223{ 873{
874 struct hw_perf_event *hwc = &event->hw;
1224 s64 left = atomic64_read(&hwc->period_left); 875 s64 left = atomic64_read(&hwc->period_left);
1225 s64 period = hwc->sample_period; 876 s64 period = hwc->sample_period;
1226 int err, ret = 0; 877 int err, ret = 0, idx = hwc->idx;
1227 878
1228 if (idx == X86_PMC_IDX_FIXED_BTS) 879 if (idx == X86_PMC_IDX_FIXED_BTS)
1229 return 0; 880 return 0;
1230 881
1231 /* 882 /*
1232 * If we are way outside a reasoable range then just skip forward: 883 * If we are way outside a reasonable range then just skip forward:
1233 */ 884 */
1234 if (unlikely(left <= -period)) { 885 if (unlikely(left <= -period)) {
1235 left = period; 886 left = period;
@@ -1269,157 +920,63 @@ x86_perf_event_set_period(struct perf_event *event,
1269 return ret; 920 return ret;
1270} 921}
1271 922
1272static inline void 923static void x86_pmu_enable_event(struct perf_event *event)
1273intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
1274{
1275 int idx = __idx - X86_PMC_IDX_FIXED;
1276 u64 ctrl_val, bits, mask;
1277 int err;
1278
1279 /*
1280 * Enable IRQ generation (0x8),
1281 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
1282 * if requested:
1283 */
1284 bits = 0x8ULL;
1285 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
1286 bits |= 0x2;
1287 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1288 bits |= 0x1;
1289 bits <<= (idx * 4);
1290 mask = 0xfULL << (idx * 4);
1291
1292 rdmsrl(hwc->config_base, ctrl_val);
1293 ctrl_val &= ~mask;
1294 ctrl_val |= bits;
1295 err = checking_wrmsrl(hwc->config_base, ctrl_val);
1296}
1297
1298static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1299{ 924{
1300 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 925 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1301 u64 val;
1302
1303 val = hwc->config;
1304 if (cpuc->enabled) 926 if (cpuc->enabled)
1305 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 927 __x86_pmu_enable_event(&event->hw);
1306
1307 (void)checking_wrmsrl(hwc->config_base + idx, val);
1308} 928}
1309 929
1310 930/*
1311static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) 931 * activate a single event
1312{ 932 *
1313 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { 933 * The event is added to the group of enabled events
1314 if (!__get_cpu_var(cpu_hw_events).enabled) 934 * but only if it can be scehduled with existing events.
1315 return; 935 *
1316 936 * Called with PMU disabled. If successful and return value 1,
1317 intel_pmu_enable_bts(hwc->config); 937 * then guaranteed to call perf_enable() and hw_perf_enable()
1318 return; 938 */
1319 } 939static int x86_pmu_enable(struct perf_event *event)
1320
1321 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1322 intel_pmu_enable_fixed(hwc, idx);
1323 return;
1324 }
1325
1326 x86_pmu_enable_event(hwc, idx);
1327}
1328
1329static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1330{ 940{
1331 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 941 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
942 struct hw_perf_event *hwc;
943 int assign[X86_PMC_IDX_MAX];
944 int n, n0, ret;
1332 945
1333 if (cpuc->enabled) 946 hwc = &event->hw;
1334 x86_pmu_enable_event(hwc, idx);
1335}
1336
1337static int
1338fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
1339{
1340 unsigned int hw_event;
1341
1342 hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
1343 947
1344 if (unlikely((hw_event == 948 n0 = cpuc->n_events;
1345 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && 949 n = collect_events(cpuc, event, false);
1346 (hwc->sample_period == 1))) 950 if (n < 0)
1347 return X86_PMC_IDX_FIXED_BTS; 951 return n;
1348 952
1349 if (!x86_pmu.num_events_fixed) 953 ret = x86_schedule_events(cpuc, n, assign);
1350 return -1; 954 if (ret)
955 return ret;
956 /*
957 * copy new assignment, now we know it is possible
958 * will be used by hw_perf_enable()
959 */
960 memcpy(cpuc->assign, assign, n*sizeof(int));
1351 961
1352 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) 962 cpuc->n_events = n;
1353 return X86_PMC_IDX_FIXED_INSTRUCTIONS; 963 cpuc->n_added += n - n0;
1354 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
1355 return X86_PMC_IDX_FIXED_CPU_CYCLES;
1356 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
1357 return X86_PMC_IDX_FIXED_BUS_CYCLES;
1358 964
1359 return -1; 965 return 0;
1360} 966}
1361 967
1362/* 968static int x86_pmu_start(struct perf_event *event)
1363 * Find a PMC slot for the freshly enabled / scheduled in event:
1364 */
1365static int x86_pmu_enable(struct perf_event *event)
1366{ 969{
1367 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 970 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1368 struct hw_perf_event *hwc = &event->hw; 971 int idx = event->hw.idx;
1369 int idx;
1370 972
1371 idx = fixed_mode_idx(event, hwc); 973 if (idx == -1)
1372 if (idx == X86_PMC_IDX_FIXED_BTS) { 974 return -EAGAIN;
1373 /* BTS is already occupied. */
1374 if (test_and_set_bit(idx, cpuc->used_mask))
1375 return -EAGAIN;
1376
1377 hwc->config_base = 0;
1378 hwc->event_base = 0;
1379 hwc->idx = idx;
1380 } else if (idx >= 0) {
1381 /*
1382 * Try to get the fixed event, if that is already taken
1383 * then try to get a generic event:
1384 */
1385 if (test_and_set_bit(idx, cpuc->used_mask))
1386 goto try_generic;
1387
1388 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1389 /*
1390 * We set it so that event_base + idx in wrmsr/rdmsr maps to
1391 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
1392 */
1393 hwc->event_base =
1394 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
1395 hwc->idx = idx;
1396 } else {
1397 idx = hwc->idx;
1398 /* Try to get the previous generic event again */
1399 if (test_and_set_bit(idx, cpuc->used_mask)) {
1400try_generic:
1401 idx = find_first_zero_bit(cpuc->used_mask,
1402 x86_pmu.num_events);
1403 if (idx == x86_pmu.num_events)
1404 return -EAGAIN;
1405
1406 set_bit(idx, cpuc->used_mask);
1407 hwc->idx = idx;
1408 }
1409 hwc->config_base = x86_pmu.eventsel;
1410 hwc->event_base = x86_pmu.perfctr;
1411 }
1412
1413 perf_events_lapic_init();
1414
1415 x86_pmu.disable(hwc, idx);
1416 975
976 x86_perf_event_set_period(event);
1417 cpuc->events[idx] = event; 977 cpuc->events[idx] = event;
1418 set_bit(idx, cpuc->active_mask); 978 __set_bit(idx, cpuc->active_mask);
1419 979 x86_pmu.enable(event);
1420 x86_perf_event_set_period(event, hwc, idx);
1421 x86_pmu.enable(hwc, idx);
1422
1423 perf_event_update_userpage(event); 980 perf_event_update_userpage(event);
1424 981
1425 return 0; 982 return 0;
@@ -1427,14 +984,8 @@ try_generic:
1427 984
1428static void x86_pmu_unthrottle(struct perf_event *event) 985static void x86_pmu_unthrottle(struct perf_event *event)
1429{ 986{
1430 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 987 int ret = x86_pmu_start(event);
1431 struct hw_perf_event *hwc = &event->hw; 988 WARN_ON_ONCE(ret);
1432
1433 if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
1434 cpuc->events[hwc->idx] != event))
1435 return;
1436
1437 x86_pmu.enable(hwc, hwc->idx);
1438} 989}
1439 990
1440void perf_event_print_debug(void) 991void perf_event_print_debug(void)
@@ -1464,7 +1015,7 @@ void perf_event_print_debug(void)
1464 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); 1015 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1465 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); 1016 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
1466 } 1017 }
1467 pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); 1018 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1468 1019
1469 for (idx = 0; idx < x86_pmu.num_events; idx++) { 1020 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1470 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 1021 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
@@ -1488,254 +1039,50 @@ void perf_event_print_debug(void)
1488 local_irq_restore(flags); 1039 local_irq_restore(flags);
1489} 1040}
1490 1041
1491static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) 1042static void x86_pmu_stop(struct perf_event *event)
1492{
1493 struct debug_store *ds = cpuc->ds;
1494 struct bts_record {
1495 u64 from;
1496 u64 to;
1497 u64 flags;
1498 };
1499 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
1500 struct bts_record *at, *top;
1501 struct perf_output_handle handle;
1502 struct perf_event_header header;
1503 struct perf_sample_data data;
1504 struct pt_regs regs;
1505
1506 if (!event)
1507 return;
1508
1509 if (!ds)
1510 return;
1511
1512 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
1513 top = (struct bts_record *)(unsigned long)ds->bts_index;
1514
1515 if (top <= at)
1516 return;
1517
1518 ds->bts_index = ds->bts_buffer_base;
1519
1520
1521 data.period = event->hw.last_period;
1522 data.addr = 0;
1523 regs.ip = 0;
1524
1525 /*
1526 * Prepare a generic sample, i.e. fill in the invariant fields.
1527 * We will overwrite the from and to address before we output
1528 * the sample.
1529 */
1530 perf_prepare_sample(&header, &data, event, &regs);
1531
1532 if (perf_output_begin(&handle, event,
1533 header.size * (top - at), 1, 1))
1534 return;
1535
1536 for (; at < top; at++) {
1537 data.ip = at->from;
1538 data.addr = at->to;
1539
1540 perf_output_sample(&handle, &header, &data, event);
1541 }
1542
1543 perf_output_end(&handle);
1544
1545 /* There's new data available. */
1546 event->hw.interrupts++;
1547 event->pending_kill = POLL_IN;
1548}
1549
1550static void x86_pmu_disable(struct perf_event *event)
1551{ 1043{
1552 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1044 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1553 struct hw_perf_event *hwc = &event->hw; 1045 struct hw_perf_event *hwc = &event->hw;
1554 int idx = hwc->idx; 1046 int idx = hwc->idx;
1555 1047
1556 /* 1048 if (!__test_and_clear_bit(idx, cpuc->active_mask))
1557 * Must be done before we disable, otherwise the nmi handler 1049 return;
1558 * could reenable again:
1559 */
1560 clear_bit(idx, cpuc->active_mask);
1561 x86_pmu.disable(hwc, idx);
1562 1050
1563 /* 1051 x86_pmu.disable(event);
1564 * Make sure the cleared pointer becomes visible before we
1565 * (potentially) free the event:
1566 */
1567 barrier();
1568 1052
1569 /* 1053 /*
1570 * Drain the remaining delta count out of a event 1054 * Drain the remaining delta count out of a event
1571 * that we are disabling: 1055 * that we are disabling:
1572 */ 1056 */
1573 x86_perf_event_update(event, hwc, idx); 1057 x86_perf_event_update(event);
1574
1575 /* Drain the remaining BTS records. */
1576 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
1577 intel_pmu_drain_bts_buffer(cpuc);
1578 1058
1579 cpuc->events[idx] = NULL; 1059 cpuc->events[idx] = NULL;
1580 clear_bit(idx, cpuc->used_mask);
1581
1582 perf_event_update_userpage(event);
1583}
1584
1585/*
1586 * Save and restart an expired event. Called by NMI contexts,
1587 * so it has to be careful about preempting normal event ops:
1588 */
1589static int intel_pmu_save_and_restart(struct perf_event *event)
1590{
1591 struct hw_perf_event *hwc = &event->hw;
1592 int idx = hwc->idx;
1593 int ret;
1594
1595 x86_perf_event_update(event, hwc, idx);
1596 ret = x86_perf_event_set_period(event, hwc, idx);
1597
1598 if (event->state == PERF_EVENT_STATE_ACTIVE)
1599 intel_pmu_enable_event(hwc, idx);
1600
1601 return ret;
1602} 1060}
1603 1061
1604static void intel_pmu_reset(void) 1062static void x86_pmu_disable(struct perf_event *event)
1605{
1606 struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
1607 unsigned long flags;
1608 int idx;
1609
1610 if (!x86_pmu.num_events)
1611 return;
1612
1613 local_irq_save(flags);
1614
1615 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
1616
1617 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1618 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
1619 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
1620 }
1621 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1622 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1623 }
1624 if (ds)
1625 ds->bts_index = ds->bts_buffer_base;
1626
1627 local_irq_restore(flags);
1628}
1629
1630static int p6_pmu_handle_irq(struct pt_regs *regs)
1631{
1632 struct perf_sample_data data;
1633 struct cpu_hw_events *cpuc;
1634 struct perf_event *event;
1635 struct hw_perf_event *hwc;
1636 int idx, handled = 0;
1637 u64 val;
1638
1639 data.addr = 0;
1640
1641 cpuc = &__get_cpu_var(cpu_hw_events);
1642
1643 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1644 if (!test_bit(idx, cpuc->active_mask))
1645 continue;
1646
1647 event = cpuc->events[idx];
1648 hwc = &event->hw;
1649
1650 val = x86_perf_event_update(event, hwc, idx);
1651 if (val & (1ULL << (x86_pmu.event_bits - 1)))
1652 continue;
1653
1654 /*
1655 * event overflow
1656 */
1657 handled = 1;
1658 data.period = event->hw.last_period;
1659
1660 if (!x86_perf_event_set_period(event, hwc, idx))
1661 continue;
1662
1663 if (perf_event_overflow(event, 1, &data, regs))
1664 p6_pmu_disable_event(hwc, idx);
1665 }
1666
1667 if (handled)
1668 inc_irq_stat(apic_perf_irqs);
1669
1670 return handled;
1671}
1672
1673/*
1674 * This handler is triggered by the local APIC, so the APIC IRQ handling
1675 * rules apply:
1676 */
1677static int intel_pmu_handle_irq(struct pt_regs *regs)
1678{ 1063{
1679 struct perf_sample_data data; 1064 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1680 struct cpu_hw_events *cpuc; 1065 int i;
1681 int bit, loops;
1682 u64 ack, status;
1683
1684 data.addr = 0;
1685
1686 cpuc = &__get_cpu_var(cpu_hw_events);
1687
1688 perf_disable();
1689 intel_pmu_drain_bts_buffer(cpuc);
1690 status = intel_pmu_get_status();
1691 if (!status) {
1692 perf_enable();
1693 return 0;
1694 }
1695
1696 loops = 0;
1697again:
1698 if (++loops > 100) {
1699 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
1700 perf_event_print_debug();
1701 intel_pmu_reset();
1702 perf_enable();
1703 return 1;
1704 }
1705 1066
1706 inc_irq_stat(apic_perf_irqs); 1067 x86_pmu_stop(event);
1707 ack = status;
1708 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1709 struct perf_event *event = cpuc->events[bit];
1710 1068
1711 clear_bit(bit, (unsigned long *) &status); 1069 for (i = 0; i < cpuc->n_events; i++) {
1712 if (!test_bit(bit, cpuc->active_mask)) 1070 if (event == cpuc->event_list[i]) {
1713 continue;
1714 1071
1715 if (!intel_pmu_save_and_restart(event)) 1072 if (x86_pmu.put_event_constraints)
1716 continue; 1073 x86_pmu.put_event_constraints(cpuc, event);
1717 1074
1718 data.period = event->hw.last_period; 1075 while (++i < cpuc->n_events)
1076 cpuc->event_list[i-1] = cpuc->event_list[i];
1719 1077
1720 if (perf_event_overflow(event, 1, &data, regs)) 1078 --cpuc->n_events;
1721 intel_pmu_disable_event(&event->hw, bit); 1079 break;
1080 }
1722 } 1081 }
1723 1082 perf_event_update_userpage(event);
1724 intel_pmu_ack_status(ack);
1725
1726 /*
1727 * Repeat if there is more work to be done:
1728 */
1729 status = intel_pmu_get_status();
1730 if (status)
1731 goto again;
1732
1733 perf_enable();
1734
1735 return 1;
1736} 1083}
1737 1084
1738static int amd_pmu_handle_irq(struct pt_regs *regs) 1085static int x86_pmu_handle_irq(struct pt_regs *regs)
1739{ 1086{
1740 struct perf_sample_data data; 1087 struct perf_sample_data data;
1741 struct cpu_hw_events *cpuc; 1088 struct cpu_hw_events *cpuc;
@@ -1744,7 +1091,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
1744 int idx, handled = 0; 1091 int idx, handled = 0;
1745 u64 val; 1092 u64 val;
1746 1093
1747 data.addr = 0; 1094 perf_sample_data_init(&data, 0);
1748 1095
1749 cpuc = &__get_cpu_var(cpu_hw_events); 1096 cpuc = &__get_cpu_var(cpu_hw_events);
1750 1097
@@ -1755,7 +1102,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
1755 event = cpuc->events[idx]; 1102 event = cpuc->events[idx];
1756 hwc = &event->hw; 1103 hwc = &event->hw;
1757 1104
1758 val = x86_perf_event_update(event, hwc, idx); 1105 val = x86_perf_event_update(event);
1759 if (val & (1ULL << (x86_pmu.event_bits - 1))) 1106 if (val & (1ULL << (x86_pmu.event_bits - 1)))
1760 continue; 1107 continue;
1761 1108
@@ -1765,11 +1112,11 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
1765 handled = 1; 1112 handled = 1;
1766 data.period = event->hw.last_period; 1113 data.period = event->hw.last_period;
1767 1114
1768 if (!x86_perf_event_set_period(event, hwc, idx)) 1115 if (!x86_perf_event_set_period(event))
1769 continue; 1116 continue;
1770 1117
1771 if (perf_event_overflow(event, 1, &data, regs)) 1118 if (perf_event_overflow(event, 1, &data, regs))
1772 amd_pmu_disable_event(hwc, idx); 1119 x86_pmu_stop(event);
1773 } 1120 }
1774 1121
1775 if (handled) 1122 if (handled)
@@ -1852,196 +1199,186 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1852 .priority = 1 1199 .priority = 1
1853}; 1200};
1854 1201
1855static struct x86_pmu p6_pmu = { 1202static struct event_constraint unconstrained;
1856 .name = "p6", 1203static struct event_constraint emptyconstraint;
1857 .handle_irq = p6_pmu_handle_irq,
1858 .disable_all = p6_pmu_disable_all,
1859 .enable_all = p6_pmu_enable_all,
1860 .enable = p6_pmu_enable_event,
1861 .disable = p6_pmu_disable_event,
1862 .eventsel = MSR_P6_EVNTSEL0,
1863 .perfctr = MSR_P6_PERFCTR0,
1864 .event_map = p6_pmu_event_map,
1865 .raw_event = p6_pmu_raw_event,
1866 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1867 .apic = 1,
1868 .max_period = (1ULL << 31) - 1,
1869 .version = 0,
1870 .num_events = 2,
1871 /*
1872 * Events have 40 bits implemented. However they are designed such
1873 * that bits [32-39] are sign extensions of bit 31. As such the
1874 * effective width of a event for P6-like PMU is 32 bits only.
1875 *
1876 * See IA-32 Intel Architecture Software developer manual Vol 3B
1877 */
1878 .event_bits = 32,
1879 .event_mask = (1ULL << 32) - 1,
1880};
1881 1204
1882static struct x86_pmu intel_pmu = { 1205static struct event_constraint *
1883 .name = "Intel", 1206x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1884 .handle_irq = intel_pmu_handle_irq, 1207{
1885 .disable_all = intel_pmu_disable_all, 1208 struct event_constraint *c;
1886 .enable_all = intel_pmu_enable_all,
1887 .enable = intel_pmu_enable_event,
1888 .disable = intel_pmu_disable_event,
1889 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
1890 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
1891 .event_map = intel_pmu_event_map,
1892 .raw_event = intel_pmu_raw_event,
1893 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
1894 .apic = 1,
1895 /*
1896 * Intel PMCs cannot be accessed sanely above 32 bit width,
1897 * so we install an artificial 1<<31 period regardless of
1898 * the generic event period:
1899 */
1900 .max_period = (1ULL << 31) - 1,
1901 .enable_bts = intel_pmu_enable_bts,
1902 .disable_bts = intel_pmu_disable_bts,
1903};
1904 1209
1905static struct x86_pmu amd_pmu = { 1210 if (x86_pmu.event_constraints) {
1906 .name = "AMD", 1211 for_each_event_constraint(c, x86_pmu.event_constraints) {
1907 .handle_irq = amd_pmu_handle_irq, 1212 if ((event->hw.config & c->cmask) == c->code)
1908 .disable_all = amd_pmu_disable_all, 1213 return c;
1909 .enable_all = amd_pmu_enable_all, 1214 }
1910 .enable = amd_pmu_enable_event, 1215 }
1911 .disable = amd_pmu_disable_event, 1216
1912 .eventsel = MSR_K7_EVNTSEL0, 1217 return &unconstrained;
1913 .perfctr = MSR_K7_PERFCTR0, 1218}
1914 .event_map = amd_pmu_event_map,
1915 .raw_event = amd_pmu_raw_event,
1916 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
1917 .num_events = 4,
1918 .event_bits = 48,
1919 .event_mask = (1ULL << 48) - 1,
1920 .apic = 1,
1921 /* use highest bit to detect overflow */
1922 .max_period = (1ULL << 47) - 1,
1923};
1924 1219
1925static int p6_pmu_init(void) 1220static int x86_event_sched_in(struct perf_event *event,
1221 struct perf_cpu_context *cpuctx)
1926{ 1222{
1927 switch (boot_cpu_data.x86_model) { 1223 int ret = 0;
1928 case 1:
1929 case 3: /* Pentium Pro */
1930 case 5:
1931 case 6: /* Pentium II */
1932 case 7:
1933 case 8:
1934 case 11: /* Pentium III */
1935 break;
1936 case 9:
1937 case 13:
1938 /* Pentium M */
1939 break;
1940 default:
1941 pr_cont("unsupported p6 CPU model %d ",
1942 boot_cpu_data.x86_model);
1943 return -ENODEV;
1944 }
1945 1224
1946 x86_pmu = p6_pmu; 1225 event->state = PERF_EVENT_STATE_ACTIVE;
1226 event->oncpu = smp_processor_id();
1227 event->tstamp_running += event->ctx->time - event->tstamp_stopped;
1947 1228
1948 if (!cpu_has_apic) { 1229 if (!is_x86_event(event))
1949 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); 1230 ret = event->pmu->enable(event);
1950 pr_info("no hardware sampling interrupt available.\n");
1951 x86_pmu.apic = 0;
1952 }
1953 1231
1954 return 0; 1232 if (!ret && !is_software_event(event))
1233 cpuctx->active_oncpu++;
1234
1235 if (!ret && event->attr.exclusive)
1236 cpuctx->exclusive = 1;
1237
1238 return ret;
1955} 1239}
1956 1240
1957static int intel_pmu_init(void) 1241static void x86_event_sched_out(struct perf_event *event,
1242 struct perf_cpu_context *cpuctx)
1958{ 1243{
1959 union cpuid10_edx edx; 1244 event->state = PERF_EVENT_STATE_INACTIVE;
1960 union cpuid10_eax eax; 1245 event->oncpu = -1;
1961 unsigned int unused;
1962 unsigned int ebx;
1963 int version;
1964
1965 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
1966 /* check for P6 processor family */
1967 if (boot_cpu_data.x86 == 6) {
1968 return p6_pmu_init();
1969 } else {
1970 return -ENODEV;
1971 }
1972 }
1973 1246
1974 /* 1247 if (!is_x86_event(event))
1975 * Check whether the Architectural PerfMon supports 1248 event->pmu->disable(event);
1976 * Branch Misses Retired hw_event or not.
1977 */
1978 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
1979 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
1980 return -ENODEV;
1981 1249
1982 version = eax.split.version_id; 1250 event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
1983 if (version < 2) 1251
1984 return -ENODEV; 1252 if (!is_software_event(event))
1253 cpuctx->active_oncpu--;
1985 1254
1986 x86_pmu = intel_pmu; 1255 if (event->attr.exclusive || !cpuctx->active_oncpu)
1987 x86_pmu.version = version; 1256 cpuctx->exclusive = 0;
1988 x86_pmu.num_events = eax.split.num_events; 1257}
1989 x86_pmu.event_bits = eax.split.bit_width;
1990 x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1;
1991 1258
1259/*
1260 * Called to enable a whole group of events.
1261 * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
1262 * Assumes the caller has disabled interrupts and has
1263 * frozen the PMU with hw_perf_save_disable.
1264 *
1265 * called with PMU disabled. If successful and return value 1,
1266 * then guaranteed to call perf_enable() and hw_perf_enable()
1267 */
1268int hw_perf_group_sched_in(struct perf_event *leader,
1269 struct perf_cpu_context *cpuctx,
1270 struct perf_event_context *ctx)
1271{
1272 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1273 struct perf_event *sub;
1274 int assign[X86_PMC_IDX_MAX];
1275 int n0, n1, ret;
1276
1277 /* n0 = total number of events */
1278 n0 = collect_events(cpuc, leader, true);
1279 if (n0 < 0)
1280 return n0;
1281
1282 ret = x86_schedule_events(cpuc, n0, assign);
1283 if (ret)
1284 return ret;
1285
1286 ret = x86_event_sched_in(leader, cpuctx);
1287 if (ret)
1288 return ret;
1289
1290 n1 = 1;
1291 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1292 if (sub->state > PERF_EVENT_STATE_OFF) {
1293 ret = x86_event_sched_in(sub, cpuctx);
1294 if (ret)
1295 goto undo;
1296 ++n1;
1297 }
1298 }
1992 /* 1299 /*
1993 * Quirk: v2 perfmon does not report fixed-purpose events, so 1300 * copy new assignment, now we know it is possible
1994 * assume at least 3 events: 1301 * will be used by hw_perf_enable()
1995 */ 1302 */
1996 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); 1303 memcpy(cpuc->assign, assign, n0*sizeof(int));
1304
1305 cpuc->n_events = n0;
1306 cpuc->n_added += n1;
1307 ctx->nr_active += n1;
1997 1308
1998 /* 1309 /*
1999 * Install the hw-cache-events table: 1310 * 1 means successful and events are active
1311 * This is not quite true because we defer
1312 * actual activation until hw_perf_enable() but
1313 * this way we* ensure caller won't try to enable
1314 * individual events
2000 */ 1315 */
2001 switch (boot_cpu_data.x86_model) { 1316 return 1;
2002 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ 1317undo:
2003 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ 1318 x86_event_sched_out(leader, cpuctx);
2004 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ 1319 n0 = 1;
2005 case 29: /* six-core 45 nm xeon "Dunnington" */ 1320 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
2006 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, 1321 if (sub->state == PERF_EVENT_STATE_ACTIVE) {
2007 sizeof(hw_cache_event_ids)); 1322 x86_event_sched_out(sub, cpuctx);
2008 1323 if (++n0 == n1)
2009 pr_cont("Core2 events, "); 1324 break;
1325 }
1326 }
1327 return ret;
1328}
1329
1330#include "perf_event_amd.c"
1331#include "perf_event_p6.c"
1332#include "perf_event_intel.c"
1333
1334static int __cpuinit
1335x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1336{
1337 unsigned int cpu = (long)hcpu;
1338 int ret = NOTIFY_OK;
1339
1340 switch (action & ~CPU_TASKS_FROZEN) {
1341 case CPU_UP_PREPARE:
1342 if (x86_pmu.cpu_prepare)
1343 ret = x86_pmu.cpu_prepare(cpu);
2010 break; 1344 break;
2011 default:
2012 case 26:
2013 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
2014 sizeof(hw_cache_event_ids));
2015 1345
2016 pr_cont("Nehalem/Corei7 events, "); 1346 case CPU_STARTING:
1347 if (x86_pmu.cpu_starting)
1348 x86_pmu.cpu_starting(cpu);
2017 break; 1349 break;
2018 case 28:
2019 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2020 sizeof(hw_cache_event_ids));
2021 1350
2022 pr_cont("Atom events, "); 1351 case CPU_DYING:
1352 if (x86_pmu.cpu_dying)
1353 x86_pmu.cpu_dying(cpu);
1354 break;
1355
1356 case CPU_UP_CANCELED:
1357 case CPU_DEAD:
1358 if (x86_pmu.cpu_dead)
1359 x86_pmu.cpu_dead(cpu);
1360 break;
1361
1362 default:
2023 break; 1363 break;
2024 } 1364 }
2025 return 0; 1365
1366 return ret;
2026} 1367}
2027 1368
2028static int amd_pmu_init(void) 1369static void __init pmu_check_apic(void)
2029{ 1370{
2030 /* Performance-monitoring supported from K7 and later: */ 1371 if (cpu_has_apic)
2031 if (boot_cpu_data.x86 < 6) 1372 return;
2032 return -ENODEV;
2033
2034 x86_pmu = amd_pmu;
2035
2036 /* Events are common for all AMDs */
2037 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
2038 sizeof(hw_cache_event_ids));
2039 1373
2040 return 0; 1374 x86_pmu.apic = 0;
1375 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1376 pr_info("no hardware sampling interrupt available.\n");
2041} 1377}
2042 1378
2043void __init init_hw_perf_events(void) 1379void __init init_hw_perf_events(void)
2044{ 1380{
1381 struct event_constraint *c;
2045 int err; 1382 int err;
2046 1383
2047 pr_info("Performance Events: "); 1384 pr_info("Performance Events: ");
@@ -2061,6 +1398,8 @@ void __init init_hw_perf_events(void)
2061 return; 1398 return;
2062 } 1399 }
2063 1400
1401 pmu_check_apic();
1402
2064 pr_cont("%s PMU driver.\n", x86_pmu.name); 1403 pr_cont("%s PMU driver.\n", x86_pmu.name);
2065 1404
2066 if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { 1405 if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
@@ -2084,6 +1423,20 @@ void __init init_hw_perf_events(void)
2084 perf_events_lapic_init(); 1423 perf_events_lapic_init();
2085 register_die_notifier(&perf_event_nmi_notifier); 1424 register_die_notifier(&perf_event_nmi_notifier);
2086 1425
1426 unconstrained = (struct event_constraint)
1427 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
1428 0, x86_pmu.num_events);
1429
1430 if (x86_pmu.event_constraints) {
1431 for_each_event_constraint(c, x86_pmu.event_constraints) {
1432 if (c->cmask != INTEL_ARCH_FIXED_MASK)
1433 continue;
1434
1435 c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1;
1436 c->weight += x86_pmu.num_events;
1437 }
1438 }
1439
2087 pr_info("... version: %d\n", x86_pmu.version); 1440 pr_info("... version: %d\n", x86_pmu.version);
2088 pr_info("... bit width: %d\n", x86_pmu.event_bits); 1441 pr_info("... bit width: %d\n", x86_pmu.event_bits);
2089 pr_info("... generic registers: %d\n", x86_pmu.num_events); 1442 pr_info("... generic registers: %d\n", x86_pmu.num_events);
@@ -2091,25 +1444,92 @@ void __init init_hw_perf_events(void)
2091 pr_info("... max period: %016Lx\n", x86_pmu.max_period); 1444 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
2092 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); 1445 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed);
2093 pr_info("... event mask: %016Lx\n", perf_event_mask); 1446 pr_info("... event mask: %016Lx\n", perf_event_mask);
1447
1448 perf_cpu_notifier(x86_pmu_notifier);
2094} 1449}
2095 1450
2096static inline void x86_pmu_read(struct perf_event *event) 1451static inline void x86_pmu_read(struct perf_event *event)
2097{ 1452{
2098 x86_perf_event_update(event, &event->hw, event->hw.idx); 1453 x86_perf_event_update(event);
2099} 1454}
2100 1455
2101static const struct pmu pmu = { 1456static const struct pmu pmu = {
2102 .enable = x86_pmu_enable, 1457 .enable = x86_pmu_enable,
2103 .disable = x86_pmu_disable, 1458 .disable = x86_pmu_disable,
1459 .start = x86_pmu_start,
1460 .stop = x86_pmu_stop,
2104 .read = x86_pmu_read, 1461 .read = x86_pmu_read,
2105 .unthrottle = x86_pmu_unthrottle, 1462 .unthrottle = x86_pmu_unthrottle,
2106}; 1463};
2107 1464
1465/*
1466 * validate a single event group
1467 *
1468 * validation include:
1469 * - check events are compatible which each other
1470 * - events do not compete for the same counter
1471 * - number of events <= number of counters
1472 *
1473 * validation ensures the group can be loaded onto the
1474 * PMU if it was the only group available.
1475 */
1476static int validate_group(struct perf_event *event)
1477{
1478 struct perf_event *leader = event->group_leader;
1479 struct cpu_hw_events *fake_cpuc;
1480 int ret, n;
1481
1482 ret = -ENOMEM;
1483 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1484 if (!fake_cpuc)
1485 goto out;
1486
1487 /*
1488 * the event is not yet connected with its
1489 * siblings therefore we must first collect
1490 * existing siblings, then add the new event
1491 * before we can simulate the scheduling
1492 */
1493 ret = -ENOSPC;
1494 n = collect_events(fake_cpuc, leader, true);
1495 if (n < 0)
1496 goto out_free;
1497
1498 fake_cpuc->n_events = n;
1499 n = collect_events(fake_cpuc, event, false);
1500 if (n < 0)
1501 goto out_free;
1502
1503 fake_cpuc->n_events = n;
1504
1505 ret = x86_schedule_events(fake_cpuc, n, NULL);
1506
1507out_free:
1508 kfree(fake_cpuc);
1509out:
1510 return ret;
1511}
1512
2108const struct pmu *hw_perf_event_init(struct perf_event *event) 1513const struct pmu *hw_perf_event_init(struct perf_event *event)
2109{ 1514{
1515 const struct pmu *tmp;
2110 int err; 1516 int err;
2111 1517
2112 err = __hw_perf_event_init(event); 1518 err = __hw_perf_event_init(event);
1519 if (!err) {
1520 /*
1521 * we temporarily connect event to its pmu
1522 * such that validate_group() can classify
1523 * it as an x86 event using is_x86_event()
1524 */
1525 tmp = event->pmu;
1526 event->pmu = &pmu;
1527
1528 if (event->group_leader != event)
1529 err = validate_group(event);
1530
1531 event->pmu = tmp;
1532 }
2113 if (err) { 1533 if (err) {
2114 if (event->destroy) 1534 if (event->destroy)
2115 event->destroy(event); 1535 event->destroy(event);
@@ -2132,7 +1552,6 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
2132 1552
2133static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); 1553static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
2134static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); 1554static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
2135static DEFINE_PER_CPU(int, in_nmi_frame);
2136 1555
2137 1556
2138static void 1557static void
@@ -2148,9 +1567,6 @@ static void backtrace_warning(void *data, char *msg)
2148 1567
2149static int backtrace_stack(void *data, char *name) 1568static int backtrace_stack(void *data, char *name)
2150{ 1569{
2151 per_cpu(in_nmi_frame, smp_processor_id()) =
2152 x86_is_stack_id(NMI_STACK, name);
2153
2154 return 0; 1570 return 0;
2155} 1571}
2156 1572
@@ -2158,9 +1574,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
2158{ 1574{
2159 struct perf_callchain_entry *entry = data; 1575 struct perf_callchain_entry *entry = data;
2160 1576
2161 if (per_cpu(in_nmi_frame, smp_processor_id()))
2162 return;
2163
2164 if (reliable) 1577 if (reliable)
2165 callchain_store(entry, addr); 1578 callchain_store(entry, addr);
2166} 1579}
@@ -2170,6 +1583,7 @@ static const struct stacktrace_ops backtrace_ops = {
2170 .warning_symbol = backtrace_warning_symbol, 1583 .warning_symbol = backtrace_warning_symbol,
2171 .stack = backtrace_stack, 1584 .stack = backtrace_stack,
2172 .address = backtrace_address, 1585 .address = backtrace_address,
1586 .walk_stack = print_context_stack_bp,
2173}; 1587};
2174 1588
2175#include "../dumpstack.h" 1589#include "../dumpstack.h"
@@ -2180,7 +1594,7 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
2180 callchain_store(entry, PERF_CONTEXT_KERNEL); 1594 callchain_store(entry, PERF_CONTEXT_KERNEL);
2181 callchain_store(entry, regs->ip); 1595 callchain_store(entry, regs->ip);
2182 1596
2183 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); 1597 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
2184} 1598}
2185 1599
2186/* 1600/*
@@ -2218,14 +1632,42 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
2218 return len; 1632 return len;
2219} 1633}
2220 1634
2221static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) 1635#ifdef CONFIG_COMPAT
1636static inline int
1637perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
2222{ 1638{
2223 unsigned long bytes; 1639 /* 32-bit process in 64-bit kernel. */
1640 struct stack_frame_ia32 frame;
1641 const void __user *fp;
1642
1643 if (!test_thread_flag(TIF_IA32))
1644 return 0;
1645
1646 fp = compat_ptr(regs->bp);
1647 while (entry->nr < PERF_MAX_STACK_DEPTH) {
1648 unsigned long bytes;
1649 frame.next_frame = 0;
1650 frame.return_address = 0;
2224 1651
2225 bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); 1652 bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
1653 if (bytes != sizeof(frame))
1654 break;
2226 1655
2227 return bytes == sizeof(*frame); 1656 if (fp < compat_ptr(regs->sp))
1657 break;
1658
1659 callchain_store(entry, frame.return_address);
1660 fp = compat_ptr(frame.next_frame);
1661 }
1662 return 1;
2228} 1663}
1664#else
1665static inline int
1666perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1667{
1668 return 0;
1669}
1670#endif
2229 1671
2230static void 1672static void
2231perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) 1673perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
@@ -2241,11 +1683,16 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
2241 callchain_store(entry, PERF_CONTEXT_USER); 1683 callchain_store(entry, PERF_CONTEXT_USER);
2242 callchain_store(entry, regs->ip); 1684 callchain_store(entry, regs->ip);
2243 1685
1686 if (perf_callchain_user32(regs, entry))
1687 return;
1688
2244 while (entry->nr < PERF_MAX_STACK_DEPTH) { 1689 while (entry->nr < PERF_MAX_STACK_DEPTH) {
1690 unsigned long bytes;
2245 frame.next_frame = NULL; 1691 frame.next_frame = NULL;
2246 frame.return_address = 0; 1692 frame.return_address = 0;
2247 1693
2248 if (!copy_stack_frame(fp, &frame)) 1694 bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
1695 if (bytes != sizeof(frame))
2249 break; 1696 break;
2250 1697
2251 if ((unsigned long)fp < regs->sp) 1698 if ((unsigned long)fp < regs->sp)
@@ -2266,9 +1713,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
2266 1713
2267 is_user = user_mode(regs); 1714 is_user = user_mode(regs);
2268 1715
2269 if (!current || current->pid == 0)
2270 return;
2271
2272 if (is_user && current->state != TASK_RUNNING) 1716 if (is_user && current->state != TASK_RUNNING)
2273 return; 1717 return;
2274 1718
@@ -2295,7 +1739,14 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2295 return entry; 1739 return entry;
2296} 1740}
2297 1741
2298void hw_perf_event_setup_online(int cpu) 1742void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
2299{ 1743{
2300 init_debug_store_on_cpu(cpu); 1744 regs->ip = ip;
1745 /*
1746 * perf_arch_fetch_caller_regs adds another call, we need to increment
1747 * the skip level
1748 */
1749 regs->bp = rewind_frame_pointer(skip + 1);
1750 regs->cs = __KERNEL_CS;
1751 local_save_flags(regs->flags);
2301} 1752}
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
new file mode 100644
index 000000000000..db6f7d4056e1
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -0,0 +1,422 @@
1#ifdef CONFIG_CPU_SUP_AMD
2
3static DEFINE_RAW_SPINLOCK(amd_nb_lock);
4
5static __initconst u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX]
7 [PERF_COUNT_HW_CACHE_OP_MAX]
8 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
9{
10 [ C(L1D) ] = {
11 [ C(OP_READ) ] = {
12 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
13 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
14 },
15 [ C(OP_WRITE) ] = {
16 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
17 [ C(RESULT_MISS) ] = 0,
18 },
19 [ C(OP_PREFETCH) ] = {
20 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
21 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
22 },
23 },
24 [ C(L1I ) ] = {
25 [ C(OP_READ) ] = {
26 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
27 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
28 },
29 [ C(OP_WRITE) ] = {
30 [ C(RESULT_ACCESS) ] = -1,
31 [ C(RESULT_MISS) ] = -1,
32 },
33 [ C(OP_PREFETCH) ] = {
34 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
35 [ C(RESULT_MISS) ] = 0,
36 },
37 },
38 [ C(LL ) ] = {
39 [ C(OP_READ) ] = {
40 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
41 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
42 },
43 [ C(OP_WRITE) ] = {
44 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
45 [ C(RESULT_MISS) ] = 0,
46 },
47 [ C(OP_PREFETCH) ] = {
48 [ C(RESULT_ACCESS) ] = 0,
49 [ C(RESULT_MISS) ] = 0,
50 },
51 },
52 [ C(DTLB) ] = {
53 [ C(OP_READ) ] = {
54 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
55 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
56 },
57 [ C(OP_WRITE) ] = {
58 [ C(RESULT_ACCESS) ] = 0,
59 [ C(RESULT_MISS) ] = 0,
60 },
61 [ C(OP_PREFETCH) ] = {
62 [ C(RESULT_ACCESS) ] = 0,
63 [ C(RESULT_MISS) ] = 0,
64 },
65 },
66 [ C(ITLB) ] = {
67 [ C(OP_READ) ] = {
68 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
69 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
70 },
71 [ C(OP_WRITE) ] = {
72 [ C(RESULT_ACCESS) ] = -1,
73 [ C(RESULT_MISS) ] = -1,
74 },
75 [ C(OP_PREFETCH) ] = {
76 [ C(RESULT_ACCESS) ] = -1,
77 [ C(RESULT_MISS) ] = -1,
78 },
79 },
80 [ C(BPU ) ] = {
81 [ C(OP_READ) ] = {
82 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
83 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
84 },
85 [ C(OP_WRITE) ] = {
86 [ C(RESULT_ACCESS) ] = -1,
87 [ C(RESULT_MISS) ] = -1,
88 },
89 [ C(OP_PREFETCH) ] = {
90 [ C(RESULT_ACCESS) ] = -1,
91 [ C(RESULT_MISS) ] = -1,
92 },
93 },
94};
95
96/*
97 * AMD Performance Monitor K7 and later.
98 */
99static const u64 amd_perfmon_event_map[] =
100{
101 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
102 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
103 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
104 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
105 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
106 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
107};
108
109static u64 amd_pmu_event_map(int hw_event)
110{
111 return amd_perfmon_event_map[hw_event];
112}
113
114static u64 amd_pmu_raw_event(u64 hw_event)
115{
116#define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL
117#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
118#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
119#define K7_EVNTSEL_INV_MASK 0x000800000ULL
120#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
121
122#define K7_EVNTSEL_MASK \
123 (K7_EVNTSEL_EVENT_MASK | \
124 K7_EVNTSEL_UNIT_MASK | \
125 K7_EVNTSEL_EDGE_MASK | \
126 K7_EVNTSEL_INV_MASK | \
127 K7_EVNTSEL_REG_MASK)
128
129 return hw_event & K7_EVNTSEL_MASK;
130}
131
132/*
133 * AMD64 events are detected based on their event codes.
134 */
135static inline int amd_is_nb_event(struct hw_perf_event *hwc)
136{
137 return (hwc->config & 0xe0) == 0xe0;
138}
139
140static inline int amd_has_nb(struct cpu_hw_events *cpuc)
141{
142 struct amd_nb *nb = cpuc->amd_nb;
143
144 return nb && nb->nb_id != -1;
145}
146
147static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
148 struct perf_event *event)
149{
150 struct hw_perf_event *hwc = &event->hw;
151 struct amd_nb *nb = cpuc->amd_nb;
152 int i;
153
154 /*
155 * only care about NB events
156 */
157 if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
158 return;
159
160 /*
161 * need to scan whole list because event may not have
162 * been assigned during scheduling
163 *
164 * no race condition possible because event can only
165 * be removed on one CPU at a time AND PMU is disabled
166 * when we come here
167 */
168 for (i = 0; i < x86_pmu.num_events; i++) {
169 if (nb->owners[i] == event) {
170 cmpxchg(nb->owners+i, event, NULL);
171 break;
172 }
173 }
174}
175
176 /*
177 * AMD64 NorthBridge events need special treatment because
178 * counter access needs to be synchronized across all cores
179 * of a package. Refer to BKDG section 3.12
180 *
181 * NB events are events measuring L3 cache, Hypertransport
182 * traffic. They are identified by an event code >= 0xe00.
183 * They measure events on the NorthBride which is shared
184 * by all cores on a package. NB events are counted on a
185 * shared set of counters. When a NB event is programmed
186 * in a counter, the data actually comes from a shared
187 * counter. Thus, access to those counters needs to be
188 * synchronized.
189 *
190 * We implement the synchronization such that no two cores
191 * can be measuring NB events using the same counters. Thus,
192 * we maintain a per-NB allocation table. The available slot
193 * is propagated using the event_constraint structure.
194 *
195 * We provide only one choice for each NB event based on
196 * the fact that only NB events have restrictions. Consequently,
197 * if a counter is available, there is a guarantee the NB event
198 * will be assigned to it. If no slot is available, an empty
199 * constraint is returned and scheduling will eventually fail
200 * for this event.
201 *
202 * Note that all cores attached the same NB compete for the same
203 * counters to host NB events, this is why we use atomic ops. Some
204 * multi-chip CPUs may have more than one NB.
205 *
206 * Given that resources are allocated (cmpxchg), they must be
207 * eventually freed for others to use. This is accomplished by
208 * calling amd_put_event_constraints().
209 *
210 * Non NB events are not impacted by this restriction.
211 */
212static struct event_constraint *
213amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
214{
215 struct hw_perf_event *hwc = &event->hw;
216 struct amd_nb *nb = cpuc->amd_nb;
217 struct perf_event *old = NULL;
218 int max = x86_pmu.num_events;
219 int i, j, k = -1;
220
221 /*
222 * if not NB event or no NB, then no constraints
223 */
224 if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
225 return &unconstrained;
226
227 /*
228 * detect if already present, if so reuse
229 *
230 * cannot merge with actual allocation
231 * because of possible holes
232 *
233 * event can already be present yet not assigned (in hwc->idx)
234 * because of successive calls to x86_schedule_events() from
235 * hw_perf_group_sched_in() without hw_perf_enable()
236 */
237 for (i = 0; i < max; i++) {
238 /*
239 * keep track of first free slot
240 */
241 if (k == -1 && !nb->owners[i])
242 k = i;
243
244 /* already present, reuse */
245 if (nb->owners[i] == event)
246 goto done;
247 }
248 /*
249 * not present, so grab a new slot
250 * starting either at:
251 */
252 if (hwc->idx != -1) {
253 /* previous assignment */
254 i = hwc->idx;
255 } else if (k != -1) {
256 /* start from free slot found */
257 i = k;
258 } else {
259 /*
260 * event not found, no slot found in
261 * first pass, try again from the
262 * beginning
263 */
264 i = 0;
265 }
266 j = i;
267 do {
268 old = cmpxchg(nb->owners+i, NULL, event);
269 if (!old)
270 break;
271 if (++i == max)
272 i = 0;
273 } while (i != j);
274done:
275 if (!old)
276 return &nb->event_constraints[i];
277
278 return &emptyconstraint;
279}
280
281static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
282{
283 struct amd_nb *nb;
284 int i;
285
286 nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
287 if (!nb)
288 return NULL;
289
290 memset(nb, 0, sizeof(*nb));
291 nb->nb_id = nb_id;
292
293 /*
294 * initialize all possible NB constraints
295 */
296 for (i = 0; i < x86_pmu.num_events; i++) {
297 __set_bit(i, nb->event_constraints[i].idxmsk);
298 nb->event_constraints[i].weight = 1;
299 }
300 return nb;
301}
302
303static int amd_pmu_cpu_prepare(int cpu)
304{
305 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
306
307 WARN_ON_ONCE(cpuc->amd_nb);
308
309 if (boot_cpu_data.x86_max_cores < 2)
310 return NOTIFY_OK;
311
312 cpuc->amd_nb = amd_alloc_nb(cpu, -1);
313 if (!cpuc->amd_nb)
314 return NOTIFY_BAD;
315
316 return NOTIFY_OK;
317}
318
319static void amd_pmu_cpu_starting(int cpu)
320{
321 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
322 struct amd_nb *nb;
323 int i, nb_id;
324
325 if (boot_cpu_data.x86_max_cores < 2)
326 return;
327
328 nb_id = amd_get_nb_id(cpu);
329 WARN_ON_ONCE(nb_id == BAD_APICID);
330
331 raw_spin_lock(&amd_nb_lock);
332
333 for_each_online_cpu(i) {
334 nb = per_cpu(cpu_hw_events, i).amd_nb;
335 if (WARN_ON_ONCE(!nb))
336 continue;
337
338 if (nb->nb_id == nb_id) {
339 kfree(cpuc->amd_nb);
340 cpuc->amd_nb = nb;
341 break;
342 }
343 }
344
345 cpuc->amd_nb->nb_id = nb_id;
346 cpuc->amd_nb->refcnt++;
347
348 raw_spin_unlock(&amd_nb_lock);
349}
350
351static void amd_pmu_cpu_dead(int cpu)
352{
353 struct cpu_hw_events *cpuhw;
354
355 if (boot_cpu_data.x86_max_cores < 2)
356 return;
357
358 cpuhw = &per_cpu(cpu_hw_events, cpu);
359
360 raw_spin_lock(&amd_nb_lock);
361
362 if (cpuhw->amd_nb) {
363 struct amd_nb *nb = cpuhw->amd_nb;
364
365 if (nb->nb_id == -1 || --nb->refcnt == 0)
366 kfree(nb);
367
368 cpuhw->amd_nb = NULL;
369 }
370
371 raw_spin_unlock(&amd_nb_lock);
372}
373
374static __initconst struct x86_pmu amd_pmu = {
375 .name = "AMD",
376 .handle_irq = x86_pmu_handle_irq,
377 .disable_all = x86_pmu_disable_all,
378 .enable_all = x86_pmu_enable_all,
379 .enable = x86_pmu_enable_event,
380 .disable = x86_pmu_disable_event,
381 .eventsel = MSR_K7_EVNTSEL0,
382 .perfctr = MSR_K7_PERFCTR0,
383 .event_map = amd_pmu_event_map,
384 .raw_event = amd_pmu_raw_event,
385 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
386 .num_events = 4,
387 .event_bits = 48,
388 .event_mask = (1ULL << 48) - 1,
389 .apic = 1,
390 /* use highest bit to detect overflow */
391 .max_period = (1ULL << 47) - 1,
392 .get_event_constraints = amd_get_event_constraints,
393 .put_event_constraints = amd_put_event_constraints,
394
395 .cpu_prepare = amd_pmu_cpu_prepare,
396 .cpu_starting = amd_pmu_cpu_starting,
397 .cpu_dead = amd_pmu_cpu_dead,
398};
399
400static __init int amd_pmu_init(void)
401{
402 /* Performance-monitoring supported from K7 and later: */
403 if (boot_cpu_data.x86 < 6)
404 return -ENODEV;
405
406 x86_pmu = amd_pmu;
407
408 /* Events are common for all AMDs */
409 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
410 sizeof(hw_cache_event_ids));
411
412 return 0;
413}
414
415#else /* CONFIG_CPU_SUP_AMD */
416
417static int amd_pmu_init(void)
418{
419 return 0;
420}
421
422#endif
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
new file mode 100644
index 000000000000..9c794ac87837
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -0,0 +1,980 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3/*
4 * Intel PerfMon, used on Core and later.
5 */
6static const u64 intel_perfmon_event_map[] =
7{
8 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
9 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
10 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
11 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
12 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
13 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
14 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
15};
16
17static struct event_constraint intel_core_event_constraints[] =
18{
19 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
20 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
21 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
22 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
23 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
24 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
25 EVENT_CONSTRAINT_END
26};
27
28static struct event_constraint intel_core2_event_constraints[] =
29{
30 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
31 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
32 /*
33 * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
34 * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
35 * ratio between these counters.
36 */
37 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
38 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
39 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
40 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
41 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
42 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
43 INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
44 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
45 INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
46 INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
47 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
48 EVENT_CONSTRAINT_END
49};
50
51static struct event_constraint intel_nehalem_event_constraints[] =
52{
53 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
54 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
55 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
56 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
57 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
58 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
59 INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
60 INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
61 INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
62 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
63 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
64 EVENT_CONSTRAINT_END
65};
66
67static struct event_constraint intel_westmere_event_constraints[] =
68{
69 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
70 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
71 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
72 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
73 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
74 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
75 EVENT_CONSTRAINT_END
76};
77
78static struct event_constraint intel_gen_event_constraints[] =
79{
80 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
81 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
82 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
83 EVENT_CONSTRAINT_END
84};
85
86static u64 intel_pmu_event_map(int hw_event)
87{
88 return intel_perfmon_event_map[hw_event];
89}
90
91static __initconst u64 westmere_hw_cache_event_ids
92 [PERF_COUNT_HW_CACHE_MAX]
93 [PERF_COUNT_HW_CACHE_OP_MAX]
94 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
95{
96 [ C(L1D) ] = {
97 [ C(OP_READ) ] = {
98 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
99 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
100 },
101 [ C(OP_WRITE) ] = {
102 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
103 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
104 },
105 [ C(OP_PREFETCH) ] = {
106 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
107 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
108 },
109 },
110 [ C(L1I ) ] = {
111 [ C(OP_READ) ] = {
112 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
113 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
114 },
115 [ C(OP_WRITE) ] = {
116 [ C(RESULT_ACCESS) ] = -1,
117 [ C(RESULT_MISS) ] = -1,
118 },
119 [ C(OP_PREFETCH) ] = {
120 [ C(RESULT_ACCESS) ] = 0x0,
121 [ C(RESULT_MISS) ] = 0x0,
122 },
123 },
124 [ C(LL ) ] = {
125 [ C(OP_READ) ] = {
126 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
127 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
128 },
129 [ C(OP_WRITE) ] = {
130 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
131 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
132 },
133 [ C(OP_PREFETCH) ] = {
134 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
135 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
136 },
137 },
138 [ C(DTLB) ] = {
139 [ C(OP_READ) ] = {
140 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
141 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
142 },
143 [ C(OP_WRITE) ] = {
144 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
145 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
146 },
147 [ C(OP_PREFETCH) ] = {
148 [ C(RESULT_ACCESS) ] = 0x0,
149 [ C(RESULT_MISS) ] = 0x0,
150 },
151 },
152 [ C(ITLB) ] = {
153 [ C(OP_READ) ] = {
154 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
155 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */
156 },
157 [ C(OP_WRITE) ] = {
158 [ C(RESULT_ACCESS) ] = -1,
159 [ C(RESULT_MISS) ] = -1,
160 },
161 [ C(OP_PREFETCH) ] = {
162 [ C(RESULT_ACCESS) ] = -1,
163 [ C(RESULT_MISS) ] = -1,
164 },
165 },
166 [ C(BPU ) ] = {
167 [ C(OP_READ) ] = {
168 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
169 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
170 },
171 [ C(OP_WRITE) ] = {
172 [ C(RESULT_ACCESS) ] = -1,
173 [ C(RESULT_MISS) ] = -1,
174 },
175 [ C(OP_PREFETCH) ] = {
176 [ C(RESULT_ACCESS) ] = -1,
177 [ C(RESULT_MISS) ] = -1,
178 },
179 },
180};
181
182static __initconst u64 nehalem_hw_cache_event_ids
183 [PERF_COUNT_HW_CACHE_MAX]
184 [PERF_COUNT_HW_CACHE_OP_MAX]
185 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
186{
187 [ C(L1D) ] = {
188 [ C(OP_READ) ] = {
189 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
190 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
191 },
192 [ C(OP_WRITE) ] = {
193 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
194 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
195 },
196 [ C(OP_PREFETCH) ] = {
197 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
198 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
199 },
200 },
201 [ C(L1I ) ] = {
202 [ C(OP_READ) ] = {
203 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
204 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
205 },
206 [ C(OP_WRITE) ] = {
207 [ C(RESULT_ACCESS) ] = -1,
208 [ C(RESULT_MISS) ] = -1,
209 },
210 [ C(OP_PREFETCH) ] = {
211 [ C(RESULT_ACCESS) ] = 0x0,
212 [ C(RESULT_MISS) ] = 0x0,
213 },
214 },
215 [ C(LL ) ] = {
216 [ C(OP_READ) ] = {
217 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
218 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
219 },
220 [ C(OP_WRITE) ] = {
221 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
222 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
223 },
224 [ C(OP_PREFETCH) ] = {
225 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
226 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
227 },
228 },
229 [ C(DTLB) ] = {
230 [ C(OP_READ) ] = {
231 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
232 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
233 },
234 [ C(OP_WRITE) ] = {
235 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
236 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
237 },
238 [ C(OP_PREFETCH) ] = {
239 [ C(RESULT_ACCESS) ] = 0x0,
240 [ C(RESULT_MISS) ] = 0x0,
241 },
242 },
243 [ C(ITLB) ] = {
244 [ C(OP_READ) ] = {
245 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
246 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
247 },
248 [ C(OP_WRITE) ] = {
249 [ C(RESULT_ACCESS) ] = -1,
250 [ C(RESULT_MISS) ] = -1,
251 },
252 [ C(OP_PREFETCH) ] = {
253 [ C(RESULT_ACCESS) ] = -1,
254 [ C(RESULT_MISS) ] = -1,
255 },
256 },
257 [ C(BPU ) ] = {
258 [ C(OP_READ) ] = {
259 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
260 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
261 },
262 [ C(OP_WRITE) ] = {
263 [ C(RESULT_ACCESS) ] = -1,
264 [ C(RESULT_MISS) ] = -1,
265 },
266 [ C(OP_PREFETCH) ] = {
267 [ C(RESULT_ACCESS) ] = -1,
268 [ C(RESULT_MISS) ] = -1,
269 },
270 },
271};
272
273static __initconst u64 core2_hw_cache_event_ids
274 [PERF_COUNT_HW_CACHE_MAX]
275 [PERF_COUNT_HW_CACHE_OP_MAX]
276 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
277{
278 [ C(L1D) ] = {
279 [ C(OP_READ) ] = {
280 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
281 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
282 },
283 [ C(OP_WRITE) ] = {
284 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
285 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
286 },
287 [ C(OP_PREFETCH) ] = {
288 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
289 [ C(RESULT_MISS) ] = 0,
290 },
291 },
292 [ C(L1I ) ] = {
293 [ C(OP_READ) ] = {
294 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
295 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
296 },
297 [ C(OP_WRITE) ] = {
298 [ C(RESULT_ACCESS) ] = -1,
299 [ C(RESULT_MISS) ] = -1,
300 },
301 [ C(OP_PREFETCH) ] = {
302 [ C(RESULT_ACCESS) ] = 0,
303 [ C(RESULT_MISS) ] = 0,
304 },
305 },
306 [ C(LL ) ] = {
307 [ C(OP_READ) ] = {
308 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
309 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
310 },
311 [ C(OP_WRITE) ] = {
312 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
313 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
314 },
315 [ C(OP_PREFETCH) ] = {
316 [ C(RESULT_ACCESS) ] = 0,
317 [ C(RESULT_MISS) ] = 0,
318 },
319 },
320 [ C(DTLB) ] = {
321 [ C(OP_READ) ] = {
322 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
323 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
324 },
325 [ C(OP_WRITE) ] = {
326 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
327 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
328 },
329 [ C(OP_PREFETCH) ] = {
330 [ C(RESULT_ACCESS) ] = 0,
331 [ C(RESULT_MISS) ] = 0,
332 },
333 },
334 [ C(ITLB) ] = {
335 [ C(OP_READ) ] = {
336 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
337 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
338 },
339 [ C(OP_WRITE) ] = {
340 [ C(RESULT_ACCESS) ] = -1,
341 [ C(RESULT_MISS) ] = -1,
342 },
343 [ C(OP_PREFETCH) ] = {
344 [ C(RESULT_ACCESS) ] = -1,
345 [ C(RESULT_MISS) ] = -1,
346 },
347 },
348 [ C(BPU ) ] = {
349 [ C(OP_READ) ] = {
350 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
351 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
352 },
353 [ C(OP_WRITE) ] = {
354 [ C(RESULT_ACCESS) ] = -1,
355 [ C(RESULT_MISS) ] = -1,
356 },
357 [ C(OP_PREFETCH) ] = {
358 [ C(RESULT_ACCESS) ] = -1,
359 [ C(RESULT_MISS) ] = -1,
360 },
361 },
362};
363
364static __initconst u64 atom_hw_cache_event_ids
365 [PERF_COUNT_HW_CACHE_MAX]
366 [PERF_COUNT_HW_CACHE_OP_MAX]
367 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
368{
369 [ C(L1D) ] = {
370 [ C(OP_READ) ] = {
371 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
372 [ C(RESULT_MISS) ] = 0,
373 },
374 [ C(OP_WRITE) ] = {
375 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
376 [ C(RESULT_MISS) ] = 0,
377 },
378 [ C(OP_PREFETCH) ] = {
379 [ C(RESULT_ACCESS) ] = 0x0,
380 [ C(RESULT_MISS) ] = 0,
381 },
382 },
383 [ C(L1I ) ] = {
384 [ C(OP_READ) ] = {
385 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
386 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
387 },
388 [ C(OP_WRITE) ] = {
389 [ C(RESULT_ACCESS) ] = -1,
390 [ C(RESULT_MISS) ] = -1,
391 },
392 [ C(OP_PREFETCH) ] = {
393 [ C(RESULT_ACCESS) ] = 0,
394 [ C(RESULT_MISS) ] = 0,
395 },
396 },
397 [ C(LL ) ] = {
398 [ C(OP_READ) ] = {
399 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
400 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
401 },
402 [ C(OP_WRITE) ] = {
403 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
404 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
405 },
406 [ C(OP_PREFETCH) ] = {
407 [ C(RESULT_ACCESS) ] = 0,
408 [ C(RESULT_MISS) ] = 0,
409 },
410 },
411 [ C(DTLB) ] = {
412 [ C(OP_READ) ] = {
413 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
414 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
415 },
416 [ C(OP_WRITE) ] = {
417 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
418 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
419 },
420 [ C(OP_PREFETCH) ] = {
421 [ C(RESULT_ACCESS) ] = 0,
422 [ C(RESULT_MISS) ] = 0,
423 },
424 },
425 [ C(ITLB) ] = {
426 [ C(OP_READ) ] = {
427 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
428 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
429 },
430 [ C(OP_WRITE) ] = {
431 [ C(RESULT_ACCESS) ] = -1,
432 [ C(RESULT_MISS) ] = -1,
433 },
434 [ C(OP_PREFETCH) ] = {
435 [ C(RESULT_ACCESS) ] = -1,
436 [ C(RESULT_MISS) ] = -1,
437 },
438 },
439 [ C(BPU ) ] = {
440 [ C(OP_READ) ] = {
441 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
442 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
443 },
444 [ C(OP_WRITE) ] = {
445 [ C(RESULT_ACCESS) ] = -1,
446 [ C(RESULT_MISS) ] = -1,
447 },
448 [ C(OP_PREFETCH) ] = {
449 [ C(RESULT_ACCESS) ] = -1,
450 [ C(RESULT_MISS) ] = -1,
451 },
452 },
453};
454
455static u64 intel_pmu_raw_event(u64 hw_event)
456{
457#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
458#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
459#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
460#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
461#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
462
463#define CORE_EVNTSEL_MASK \
464 (INTEL_ARCH_EVTSEL_MASK | \
465 INTEL_ARCH_UNIT_MASK | \
466 INTEL_ARCH_EDGE_MASK | \
467 INTEL_ARCH_INV_MASK | \
468 INTEL_ARCH_CNT_MASK)
469
470 return hw_event & CORE_EVNTSEL_MASK;
471}
472
473static void intel_pmu_enable_bts(u64 config)
474{
475 unsigned long debugctlmsr;
476
477 debugctlmsr = get_debugctlmsr();
478
479 debugctlmsr |= X86_DEBUGCTL_TR;
480 debugctlmsr |= X86_DEBUGCTL_BTS;
481 debugctlmsr |= X86_DEBUGCTL_BTINT;
482
483 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
484 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
485
486 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
487 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
488
489 update_debugctlmsr(debugctlmsr);
490}
491
492static void intel_pmu_disable_bts(void)
493{
494 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
495 unsigned long debugctlmsr;
496
497 if (!cpuc->ds)
498 return;
499
500 debugctlmsr = get_debugctlmsr();
501
502 debugctlmsr &=
503 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
504 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
505
506 update_debugctlmsr(debugctlmsr);
507}
508
509static void intel_pmu_disable_all(void)
510{
511 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
512
513 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
514
515 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
516 intel_pmu_disable_bts();
517}
518
519static void intel_pmu_enable_all(void)
520{
521 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
522
523 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
524
525 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
526 struct perf_event *event =
527 cpuc->events[X86_PMC_IDX_FIXED_BTS];
528
529 if (WARN_ON_ONCE(!event))
530 return;
531
532 intel_pmu_enable_bts(event->hw.config);
533 }
534}
535
536static inline u64 intel_pmu_get_status(void)
537{
538 u64 status;
539
540 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
541
542 return status;
543}
544
545static inline void intel_pmu_ack_status(u64 ack)
546{
547 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
548}
549
550static inline void
551intel_pmu_disable_fixed(struct hw_perf_event *hwc)
552{
553 int idx = hwc->idx - X86_PMC_IDX_FIXED;
554 u64 ctrl_val, mask;
555
556 mask = 0xfULL << (idx * 4);
557
558 rdmsrl(hwc->config_base, ctrl_val);
559 ctrl_val &= ~mask;
560 (void)checking_wrmsrl(hwc->config_base, ctrl_val);
561}
562
563static void intel_pmu_drain_bts_buffer(void)
564{
565 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
566 struct debug_store *ds = cpuc->ds;
567 struct bts_record {
568 u64 from;
569 u64 to;
570 u64 flags;
571 };
572 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
573 struct bts_record *at, *top;
574 struct perf_output_handle handle;
575 struct perf_event_header header;
576 struct perf_sample_data data;
577 struct pt_regs regs;
578
579 if (!event)
580 return;
581
582 if (!ds)
583 return;
584
585 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
586 top = (struct bts_record *)(unsigned long)ds->bts_index;
587
588 if (top <= at)
589 return;
590
591 ds->bts_index = ds->bts_buffer_base;
592
593 perf_sample_data_init(&data, 0);
594
595 data.period = event->hw.last_period;
596 regs.ip = 0;
597
598 /*
599 * Prepare a generic sample, i.e. fill in the invariant fields.
600 * We will overwrite the from and to address before we output
601 * the sample.
602 */
603 perf_prepare_sample(&header, &data, event, &regs);
604
605 if (perf_output_begin(&handle, event,
606 header.size * (top - at), 1, 1))
607 return;
608
609 for (; at < top; at++) {
610 data.ip = at->from;
611 data.addr = at->to;
612
613 perf_output_sample(&handle, &header, &data, event);
614 }
615
616 perf_output_end(&handle);
617
618 /* There's new data available. */
619 event->hw.interrupts++;
620 event->pending_kill = POLL_IN;
621}
622
623static inline void
624intel_pmu_disable_event(struct perf_event *event)
625{
626 struct hw_perf_event *hwc = &event->hw;
627
628 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
629 intel_pmu_disable_bts();
630 intel_pmu_drain_bts_buffer();
631 return;
632 }
633
634 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
635 intel_pmu_disable_fixed(hwc);
636 return;
637 }
638
639 x86_pmu_disable_event(event);
640}
641
642static inline void
643intel_pmu_enable_fixed(struct hw_perf_event *hwc)
644{
645 int idx = hwc->idx - X86_PMC_IDX_FIXED;
646 u64 ctrl_val, bits, mask;
647 int err;
648
649 /*
650 * Enable IRQ generation (0x8),
651 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
652 * if requested:
653 */
654 bits = 0x8ULL;
655 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
656 bits |= 0x2;
657 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
658 bits |= 0x1;
659
660 /*
661 * ANY bit is supported in v3 and up
662 */
663 if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
664 bits |= 0x4;
665
666 bits <<= (idx * 4);
667 mask = 0xfULL << (idx * 4);
668
669 rdmsrl(hwc->config_base, ctrl_val);
670 ctrl_val &= ~mask;
671 ctrl_val |= bits;
672 err = checking_wrmsrl(hwc->config_base, ctrl_val);
673}
674
675static void intel_pmu_enable_event(struct perf_event *event)
676{
677 struct hw_perf_event *hwc = &event->hw;
678
679 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
680 if (!__get_cpu_var(cpu_hw_events).enabled)
681 return;
682
683 intel_pmu_enable_bts(hwc->config);
684 return;
685 }
686
687 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
688 intel_pmu_enable_fixed(hwc);
689 return;
690 }
691
692 __x86_pmu_enable_event(hwc);
693}
694
695/*
696 * Save and restart an expired event. Called by NMI contexts,
697 * so it has to be careful about preempting normal event ops:
698 */
699static int intel_pmu_save_and_restart(struct perf_event *event)
700{
701 x86_perf_event_update(event);
702 return x86_perf_event_set_period(event);
703}
704
705static void intel_pmu_reset(void)
706{
707 struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
708 unsigned long flags;
709 int idx;
710
711 if (!x86_pmu.num_events)
712 return;
713
714 local_irq_save(flags);
715
716 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
717
718 for (idx = 0; idx < x86_pmu.num_events; idx++) {
719 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
720 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
721 }
722 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
723 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
724 }
725 if (ds)
726 ds->bts_index = ds->bts_buffer_base;
727
728 local_irq_restore(flags);
729}
730
731/*
732 * This handler is triggered by the local APIC, so the APIC IRQ handling
733 * rules apply:
734 */
735static int intel_pmu_handle_irq(struct pt_regs *regs)
736{
737 struct perf_sample_data data;
738 struct cpu_hw_events *cpuc;
739 int bit, loops;
740 u64 ack, status;
741
742 perf_sample_data_init(&data, 0);
743
744 cpuc = &__get_cpu_var(cpu_hw_events);
745
746 intel_pmu_disable_all();
747 intel_pmu_drain_bts_buffer();
748 status = intel_pmu_get_status();
749 if (!status) {
750 intel_pmu_enable_all();
751 return 0;
752 }
753
754 loops = 0;
755again:
756 if (++loops > 100) {
757 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
758 perf_event_print_debug();
759 intel_pmu_reset();
760 goto done;
761 }
762
763 inc_irq_stat(apic_perf_irqs);
764 ack = status;
765 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
766 struct perf_event *event = cpuc->events[bit];
767
768 if (!test_bit(bit, cpuc->active_mask))
769 continue;
770
771 if (!intel_pmu_save_and_restart(event))
772 continue;
773
774 data.period = event->hw.last_period;
775
776 if (perf_event_overflow(event, 1, &data, regs))
777 x86_pmu_stop(event);
778 }
779
780 intel_pmu_ack_status(ack);
781
782 /*
783 * Repeat if there is more work to be done:
784 */
785 status = intel_pmu_get_status();
786 if (status)
787 goto again;
788
789done:
790 intel_pmu_enable_all();
791 return 1;
792}
793
794static struct event_constraint bts_constraint =
795 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
796
797static struct event_constraint *
798intel_special_constraints(struct perf_event *event)
799{
800 unsigned int hw_event;
801
802 hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
803
804 if (unlikely((hw_event ==
805 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
806 (event->hw.sample_period == 1))) {
807
808 return &bts_constraint;
809 }
810 return NULL;
811}
812
813static struct event_constraint *
814intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
815{
816 struct event_constraint *c;
817
818 c = intel_special_constraints(event);
819 if (c)
820 return c;
821
822 return x86_get_event_constraints(cpuc, event);
823}
824
825static __initconst struct x86_pmu core_pmu = {
826 .name = "core",
827 .handle_irq = x86_pmu_handle_irq,
828 .disable_all = x86_pmu_disable_all,
829 .enable_all = x86_pmu_enable_all,
830 .enable = x86_pmu_enable_event,
831 .disable = x86_pmu_disable_event,
832 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
833 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
834 .event_map = intel_pmu_event_map,
835 .raw_event = intel_pmu_raw_event,
836 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
837 .apic = 1,
838 /*
839 * Intel PMCs cannot be accessed sanely above 32 bit width,
840 * so we install an artificial 1<<31 period regardless of
841 * the generic event period:
842 */
843 .max_period = (1ULL << 31) - 1,
844 .get_event_constraints = intel_get_event_constraints,
845 .event_constraints = intel_core_event_constraints,
846};
847
848static __initconst struct x86_pmu intel_pmu = {
849 .name = "Intel",
850 .handle_irq = intel_pmu_handle_irq,
851 .disable_all = intel_pmu_disable_all,
852 .enable_all = intel_pmu_enable_all,
853 .enable = intel_pmu_enable_event,
854 .disable = intel_pmu_disable_event,
855 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
856 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
857 .event_map = intel_pmu_event_map,
858 .raw_event = intel_pmu_raw_event,
859 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
860 .apic = 1,
861 /*
862 * Intel PMCs cannot be accessed sanely above 32 bit width,
863 * so we install an artificial 1<<31 period regardless of
864 * the generic event period:
865 */
866 .max_period = (1ULL << 31) - 1,
867 .enable_bts = intel_pmu_enable_bts,
868 .disable_bts = intel_pmu_disable_bts,
869 .get_event_constraints = intel_get_event_constraints,
870
871 .cpu_starting = init_debug_store_on_cpu,
872 .cpu_dying = fini_debug_store_on_cpu,
873};
874
875static __init int intel_pmu_init(void)
876{
877 union cpuid10_edx edx;
878 union cpuid10_eax eax;
879 unsigned int unused;
880 unsigned int ebx;
881 int version;
882
883 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
884 /* check for P6 processor family */
885 if (boot_cpu_data.x86 == 6) {
886 return p6_pmu_init();
887 } else {
888 return -ENODEV;
889 }
890 }
891
892 /*
893 * Check whether the Architectural PerfMon supports
894 * Branch Misses Retired hw_event or not.
895 */
896 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
897 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
898 return -ENODEV;
899
900 version = eax.split.version_id;
901 if (version < 2)
902 x86_pmu = core_pmu;
903 else
904 x86_pmu = intel_pmu;
905
906 x86_pmu.version = version;
907 x86_pmu.num_events = eax.split.num_events;
908 x86_pmu.event_bits = eax.split.bit_width;
909 x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1;
910
911 /*
912 * Quirk: v2 perfmon does not report fixed-purpose events, so
913 * assume at least 3 events:
914 */
915 if (version > 1)
916 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
917
918 /*
919 * Install the hw-cache-events table:
920 */
921 switch (boot_cpu_data.x86_model) {
922 case 14: /* 65 nm core solo/duo, "Yonah" */
923 pr_cont("Core events, ");
924 break;
925
926 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
927 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
928 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
929 case 29: /* six-core 45 nm xeon "Dunnington" */
930 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
931 sizeof(hw_cache_event_ids));
932
933 x86_pmu.event_constraints = intel_core2_event_constraints;
934 pr_cont("Core2 events, ");
935 break;
936
937 case 26: /* 45 nm nehalem, "Bloomfield" */
938 case 30: /* 45 nm nehalem, "Lynnfield" */
939 case 46: /* 45 nm nehalem-ex, "Beckton" */
940 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
941 sizeof(hw_cache_event_ids));
942
943 x86_pmu.event_constraints = intel_nehalem_event_constraints;
944 pr_cont("Nehalem/Corei7 events, ");
945 break;
946 case 28: /* Atom */
947 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
948 sizeof(hw_cache_event_ids));
949
950 x86_pmu.event_constraints = intel_gen_event_constraints;
951 pr_cont("Atom events, ");
952 break;
953
954 case 37: /* 32 nm nehalem, "Clarkdale" */
955 case 44: /* 32 nm nehalem, "Gulftown" */
956 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
957 sizeof(hw_cache_event_ids));
958
959 x86_pmu.event_constraints = intel_westmere_event_constraints;
960 pr_cont("Westmere events, ");
961 break;
962
963 default:
964 /*
965 * default constraints for v2 and up
966 */
967 x86_pmu.event_constraints = intel_gen_event_constraints;
968 pr_cont("generic architected perfmon, ");
969 }
970 return 0;
971}
972
973#else /* CONFIG_CPU_SUP_INTEL */
974
975static int intel_pmu_init(void)
976{
977 return 0;
978}
979
980#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
new file mode 100644
index 000000000000..a330485d14da
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -0,0 +1,159 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3/*
4 * Not sure about some of these
5 */
6static const u64 p6_perfmon_event_map[] =
7{
8 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
9 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
10 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
11 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
12 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
13 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
14 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
15};
16
17static u64 p6_pmu_event_map(int hw_event)
18{
19 return p6_perfmon_event_map[hw_event];
20}
21
22/*
23 * Event setting that is specified not to count anything.
24 * We use this to effectively disable a counter.
25 *
26 * L2_RQSTS with 0 MESI unit mask.
27 */
28#define P6_NOP_EVENT 0x0000002EULL
29
30static u64 p6_pmu_raw_event(u64 hw_event)
31{
32#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
33#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
34#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
35#define P6_EVNTSEL_INV_MASK 0x00800000ULL
36#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
37
38#define P6_EVNTSEL_MASK \
39 (P6_EVNTSEL_EVENT_MASK | \
40 P6_EVNTSEL_UNIT_MASK | \
41 P6_EVNTSEL_EDGE_MASK | \
42 P6_EVNTSEL_INV_MASK | \
43 P6_EVNTSEL_REG_MASK)
44
45 return hw_event & P6_EVNTSEL_MASK;
46}
47
48static struct event_constraint p6_event_constraints[] =
49{
50 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
51 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
52 INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
53 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
54 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
55 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
56 EVENT_CONSTRAINT_END
57};
58
59static void p6_pmu_disable_all(void)
60{
61 u64 val;
62
63 /* p6 only has one enable register */
64 rdmsrl(MSR_P6_EVNTSEL0, val);
65 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
66 wrmsrl(MSR_P6_EVNTSEL0, val);
67}
68
69static void p6_pmu_enable_all(void)
70{
71 unsigned long val;
72
73 /* p6 only has one enable register */
74 rdmsrl(MSR_P6_EVNTSEL0, val);
75 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
76 wrmsrl(MSR_P6_EVNTSEL0, val);
77}
78
79static inline void
80p6_pmu_disable_event(struct perf_event *event)
81{
82 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
83 struct hw_perf_event *hwc = &event->hw;
84 u64 val = P6_NOP_EVENT;
85
86 if (cpuc->enabled)
87 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
88
89 (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
90}
91
92static void p6_pmu_enable_event(struct perf_event *event)
93{
94 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
95 struct hw_perf_event *hwc = &event->hw;
96 u64 val;
97
98 val = hwc->config;
99 if (cpuc->enabled)
100 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
101
102 (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
103}
104
105static __initconst struct x86_pmu p6_pmu = {
106 .name = "p6",
107 .handle_irq = x86_pmu_handle_irq,
108 .disable_all = p6_pmu_disable_all,
109 .enable_all = p6_pmu_enable_all,
110 .enable = p6_pmu_enable_event,
111 .disable = p6_pmu_disable_event,
112 .eventsel = MSR_P6_EVNTSEL0,
113 .perfctr = MSR_P6_PERFCTR0,
114 .event_map = p6_pmu_event_map,
115 .raw_event = p6_pmu_raw_event,
116 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
117 .apic = 1,
118 .max_period = (1ULL << 31) - 1,
119 .version = 0,
120 .num_events = 2,
121 /*
122 * Events have 40 bits implemented. However they are designed such
123 * that bits [32-39] are sign extensions of bit 31. As such the
124 * effective width of a event for P6-like PMU is 32 bits only.
125 *
126 * See IA-32 Intel Architecture Software developer manual Vol 3B
127 */
128 .event_bits = 32,
129 .event_mask = (1ULL << 32) - 1,
130 .get_event_constraints = x86_get_event_constraints,
131 .event_constraints = p6_event_constraints,
132};
133
134static __init int p6_pmu_init(void)
135{
136 switch (boot_cpu_data.x86_model) {
137 case 1:
138 case 3: /* Pentium Pro */
139 case 5:
140 case 6: /* Pentium II */
141 case 7:
142 case 8:
143 case 11: /* Pentium III */
144 case 9:
145 case 13:
146 /* Pentium M */
147 break;
148 default:
149 pr_cont("unsupported p6 CPU model %d ",
150 boot_cpu_data.x86_model);
151 return -ENODEV;
152 }
153
154 x86_pmu = p6_pmu;
155
156 return 0;
157}
158
159#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index fab786f60ed6..fb329e9f8494 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -115,17 +115,6 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
115 115
116 return !test_bit(counter, perfctr_nmi_owner); 116 return !test_bit(counter, perfctr_nmi_owner);
117} 117}
118
119/* checks the an msr for availability */
120int avail_to_resrv_perfctr_nmi(unsigned int msr)
121{
122 unsigned int counter;
123
124 counter = nmi_perfctr_msr_to_bit(msr);
125 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
126
127 return !test_bit(counter, perfctr_nmi_owner);
128}
129EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); 118EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
130 119
131int reserve_perfctr_nmi(unsigned int msr) 120int reserve_perfctr_nmi(unsigned int msr)
@@ -691,7 +680,7 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
691 cpu_nmi_set_wd_enabled(); 680 cpu_nmi_set_wd_enabled();
692 681
693 apic_write(APIC_LVTPC, APIC_DM_NMI); 682 apic_write(APIC_LVTPC, APIC_DM_NMI);
694 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; 683 evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
695 wrmsr(evntsel_msr, evntsel, 0); 684 wrmsr(evntsel_msr, evntsel, 0);
696 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); 685 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
697 return 1; 686 return 1;
@@ -712,7 +701,7 @@ static void probe_nmi_watchdog(void)
712 switch (boot_cpu_data.x86_vendor) { 701 switch (boot_cpu_data.x86_vendor) {
713 case X86_VENDOR_AMD: 702 case X86_VENDOR_AMD:
714 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && 703 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
715 boot_cpu_data.x86 != 16) 704 boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17)
716 return; 705 return;
717 wd_ops = &k7_wd_ops; 706 wd_ops = &k7_wd_ops;
718 break; 707 break;
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index bb62b3e5caad..28000743bbb0 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -26,7 +26,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
26 26
27 early_init_transmeta(c); 27 early_init_transmeta(c);
28 28
29 display_cacheinfo(c); 29 cpu_detect_cache_sizes(c);
30 30
31 /* Print CMS and CPU revision */ 31 /* Print CMS and CPU revision */
32 max = cpuid_eax(0x80860000); 32 max = cpuid_eax(0x80860000);
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 1cbed97b59cf..dfdb4dba2320 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -22,6 +22,7 @@
22 */ 22 */
23 23
24#include <linux/dmi.h> 24#include <linux/dmi.h>
25#include <linux/module.h>
25#include <asm/div64.h> 26#include <asm/div64.h>
26#include <asm/vmware.h> 27#include <asm/vmware.h>
27#include <asm/x86_init.h> 28#include <asm/x86_init.h>
@@ -101,6 +102,7 @@ int vmware_platform(void)
101 102
102 return 0; 103 return 0;
103} 104}
105EXPORT_SYMBOL(vmware_platform);
104 106
105/* 107/*
106 * VMware hypervisor takes care of exporting a reliable TSC to the guest. 108 * VMware hypervisor takes care of exporting a reliable TSC to the guest.