aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2010-03-04 05:47:50 -0500
committerIngo Molnar <mingo@elte.hu>2010-03-04 05:47:52 -0500
commit4f16d4e0c9a4b20d9f0db365587b96d6001efd7d (patch)
treefa25dcf285b26f1fac2bf267d0d1cd2c4eba90b8 /arch/x86
parent1e259e0a9982078896f3404240096cbea01daca4 (diff)
parent6630125419ef37ff8781713c5e9d416f2a4ba357 (diff)
Merge branch 'perf/core' into perf/urgent
Merge reason: Switch from pre-merge topical split to the post-merge urgent track Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/alternative.h5
-rw-r--r--arch/x86/include/asm/debugreg.h3
-rw-r--r--arch/x86/include/asm/elf.h5
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h1
-rw-r--r--arch/x86/include/asm/nmi.h1
-rw-r--r--arch/x86/include/asm/perf_event.h30
-rw-r--r--arch/x86/include/asm/ptrace.h4
-rw-r--r--arch/x86/include/asm/stacktrace.h2
-rw-r--r--arch/x86/include/asm/system.h4
-rw-r--r--arch/x86/kernel/acpi/boot.c13
-rw-r--r--arch/x86/kernel/alternative.c18
-rw-r--r--arch/x86/kernel/apic/apic.c17
-rw-r--r--arch/x86/kernel/apic/probe_32.c29
-rw-r--r--arch/x86/kernel/apic/probe_64.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event.c1868
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c416
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c982
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c157
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c13
-rw-r--r--arch/x86/kernel/dumpstack_32.c5
-rw-r--r--arch/x86/kernel/dumpstack_64.c5
-rw-r--r--arch/x86/kernel/hw_breakpoint.c17
-rw-r--r--arch/x86/kernel/kprobes.c5
-rw-r--r--arch/x86/kernel/mpparse.c7
-rw-r--r--arch/x86/kernel/process_64.c1
-rw-r--r--arch/x86/kernel/ptrace.c24
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/traps.c3
-rw-r--r--arch/x86/kvm/i8254.c3
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--arch/x86/mm/gup.c2
-rw-r--r--arch/x86/oprofile/nmi_int.c17
-rw-r--r--arch/x86/oprofile/op_model_amd.c261
-rw-r--r--arch/x86/oprofile/op_model_p4.c6
-rw-r--r--arch/x86/oprofile/op_model_ppro.c21
-rw-r--r--arch/x86/oprofile/op_x86_model.h20
37 files changed, 2415 insertions, 1564 deletions
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 69b74a7b877f..ac80b7d70014 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -65,12 +65,17 @@ extern void alternatives_smp_module_add(struct module *mod, char *name,
65 void *text, void *text_end); 65 void *text, void *text_end);
66extern void alternatives_smp_module_del(struct module *mod); 66extern void alternatives_smp_module_del(struct module *mod);
67extern void alternatives_smp_switch(int smp); 67extern void alternatives_smp_switch(int smp);
68extern int alternatives_text_reserved(void *start, void *end);
68#else 69#else
69static inline void alternatives_smp_module_add(struct module *mod, char *name, 70static inline void alternatives_smp_module_add(struct module *mod, char *name,
70 void *locks, void *locks_end, 71 void *locks, void *locks_end,
71 void *text, void *text_end) {} 72 void *text, void *text_end) {}
72static inline void alternatives_smp_module_del(struct module *mod) {} 73static inline void alternatives_smp_module_del(struct module *mod) {}
73static inline void alternatives_smp_switch(int smp) {} 74static inline void alternatives_smp_switch(int smp) {}
75static inline int alternatives_text_reserved(void *start, void *end)
76{
77 return 0;
78}
74#endif /* CONFIG_SMP */ 79#endif /* CONFIG_SMP */
75 80
76/* alternative assembly primitive: */ 81/* alternative assembly primitive: */
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 8240f76b531e..b81002f23614 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -14,6 +14,9 @@
14 which debugging register was responsible for the trap. The other bits 14 which debugging register was responsible for the trap. The other bits
15 are either reserved or not of interest to us. */ 15 are either reserved or not of interest to us. */
16 16
17/* Define reserved bits in DR6 which are always set to 1 */
18#define DR6_RESERVED (0xFFFF0FF0)
19
17#define DR_TRAP0 (0x1) /* db0 */ 20#define DR_TRAP0 (0x1) /* db0 */
18#define DR_TRAP1 (0x2) /* db1 */ 21#define DR_TRAP1 (0x2) /* db1 */
19#define DR_TRAP2 (0x4) /* db2 */ 22#define DR_TRAP2 (0x4) /* db2 */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1994d3f58443..f2ad2163109d 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -170,10 +170,7 @@ static inline void elf_common_init(struct thread_struct *t,
170} 170}
171 171
172#define ELF_PLAT_INIT(_r, load_addr) \ 172#define ELF_PLAT_INIT(_r, load_addr) \
173do { \ 173 elf_common_init(&current->thread, _r, 0)
174 elf_common_init(&current->thread, _r, 0); \
175 clear_thread_flag(TIF_IA32); \
176} while (0)
177 174
178#define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ 175#define COMPAT_ELF_PLAT_INIT(regs, load_addr) \
179 elf_common_init(&current->thread, regs, __USER_DS) 176 elf_common_init(&current->thread, regs, __USER_DS)
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 0675a7c4c20e..2a1bd8f4f23a 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -10,7 +10,6 @@
10 * (display/resolving) 10 * (display/resolving)
11 */ 11 */
12struct arch_hw_breakpoint { 12struct arch_hw_breakpoint {
13 char *name; /* Contains name of the symbol to set bkpt */
14 unsigned long address; 13 unsigned long address;
15 u8 len; 14 u8 len;
16 u8 type; 15 u8 type;
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 139d4c1a33a7..93da9c3f3341 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -19,7 +19,6 @@ extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
19extern int check_nmi_watchdog(void); 19extern int check_nmi_watchdog(void);
20extern int nmi_watchdog_enabled; 20extern int nmi_watchdog_enabled;
21extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); 21extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
22extern int avail_to_resrv_perfctr_nmi(unsigned int);
23extern int reserve_perfctr_nmi(unsigned int); 22extern int reserve_perfctr_nmi(unsigned int);
24extern void release_perfctr_nmi(unsigned int); 23extern void release_perfctr_nmi(unsigned int);
25extern int reserve_evntsel_nmi(unsigned int); 24extern int reserve_evntsel_nmi(unsigned int);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 1380367dabd9..db6109a885a7 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -18,7 +18,7 @@
18#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 18#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
19#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 19#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
20 20
21#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) 21#define ARCH_PERFMON_EVENTSEL_ENABLE (1 << 22)
22#define ARCH_PERFMON_EVENTSEL_ANY (1 << 21) 22#define ARCH_PERFMON_EVENTSEL_ANY (1 << 21)
23#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) 23#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
24#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) 24#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
@@ -27,7 +27,14 @@
27/* 27/*
28 * Includes eventsel and unit mask as well: 28 * Includes eventsel and unit mask as well:
29 */ 29 */
30#define ARCH_PERFMON_EVENT_MASK 0xffff 30
31
32#define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL
33#define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL
34#define INTEL_ARCH_EDGE_MASK 0x00040000ULL
35#define INTEL_ARCH_INV_MASK 0x00800000ULL
36#define INTEL_ARCH_CNT_MASK 0xFF000000ULL
37#define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK)
31 38
32/* 39/*
33 * filter mask to validate fixed counter events. 40 * filter mask to validate fixed counter events.
@@ -38,7 +45,12 @@
38 * The other filters are supported by fixed counters. 45 * The other filters are supported by fixed counters.
39 * The any-thread option is supported starting with v3. 46 * The any-thread option is supported starting with v3.
40 */ 47 */
41#define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000 48#define INTEL_ARCH_FIXED_MASK \
49 (INTEL_ARCH_CNT_MASK| \
50 INTEL_ARCH_INV_MASK| \
51 INTEL_ARCH_EDGE_MASK|\
52 INTEL_ARCH_UNIT_MASK|\
53 INTEL_ARCH_EVENT_MASK)
42 54
43#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c 55#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
44#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) 56#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
@@ -105,6 +117,18 @@ union cpuid10_edx {
105 */ 117 */
106#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) 118#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16)
107 119
120/* IbsFetchCtl bits/masks */
121#define IBS_FETCH_RAND_EN (1ULL<<57)
122#define IBS_FETCH_VAL (1ULL<<49)
123#define IBS_FETCH_ENABLE (1ULL<<48)
124#define IBS_FETCH_CNT 0xFFFF0000ULL
125#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
126
127/* IbsOpCtl bits */
128#define IBS_OP_CNT_CTL (1ULL<<19)
129#define IBS_OP_VAL (1ULL<<18)
130#define IBS_OP_ENABLE (1ULL<<17)
131#define IBS_OP_MAX_CNT 0x0000FFFFULL
108 132
109#ifdef CONFIG_PERF_EVENTS 133#ifdef CONFIG_PERF_EVENTS
110extern void init_hw_perf_events(void); 134extern void init_hw_perf_events(void);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 9d369f680321..20102808b191 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -274,10 +274,6 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
274 return 0; 274 return 0;
275} 275}
276 276
277/* Get Nth argument at function call */
278extern unsigned long regs_get_argument_nth(struct pt_regs *regs,
279 unsigned int n);
280
281/* 277/*
282 * These are defined as per linux/ptrace.h, which see. 278 * These are defined as per linux/ptrace.h, which see.
283 */ 279 */
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 35e89122a42f..4dab78edbad9 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -3,8 +3,6 @@
3 3
4extern int kstack_depth_to_print; 4extern int kstack_depth_to_print;
5 5
6int x86_is_stack_id(int id, char *name);
7
8struct thread_info; 6struct thread_info;
9struct stacktrace_ops; 7struct stacktrace_ops;
10 8
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index ecb544e65382..e04740f7a0bb 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -11,9 +11,9 @@
11#include <linux/irqflags.h> 11#include <linux/irqflags.h>
12 12
13/* entries in ARCH_DLINFO: */ 13/* entries in ARCH_DLINFO: */
14#ifdef CONFIG_IA32_EMULATION 14#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64)
15# define AT_VECTOR_SIZE_ARCH 2 15# define AT_VECTOR_SIZE_ARCH 2
16#else 16#else /* else it's non-compat x86-64 */
17# define AT_VECTOR_SIZE_ARCH 1 17# define AT_VECTOR_SIZE_ARCH 1
18#endif 18#endif
19 19
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 036d28adf59d..af1c5833ff23 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1185,9 +1185,6 @@ static void __init acpi_process_madt(void)
1185 if (!error) { 1185 if (!error) {
1186 acpi_lapic = 1; 1186 acpi_lapic = 1;
1187 1187
1188#ifdef CONFIG_X86_BIGSMP
1189 generic_bigsmp_probe();
1190#endif
1191 /* 1188 /*
1192 * Parse MADT IO-APIC entries 1189 * Parse MADT IO-APIC entries
1193 */ 1190 */
@@ -1197,8 +1194,6 @@ static void __init acpi_process_madt(void)
1197 acpi_ioapic = 1; 1194 acpi_ioapic = 1;
1198 1195
1199 smp_found_config = 1; 1196 smp_found_config = 1;
1200 if (apic->setup_apic_routing)
1201 apic->setup_apic_routing();
1202 } 1197 }
1203 } 1198 }
1204 if (error == -EINVAL) { 1199 if (error == -EINVAL) {
@@ -1349,14 +1344,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
1349 }, 1344 },
1350 { 1345 {
1351 .callback = force_acpi_ht, 1346 .callback = force_acpi_ht,
1352 .ident = "ASUS P2B-DS",
1353 .matches = {
1354 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
1355 DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"),
1356 },
1357 },
1358 {
1359 .callback = force_acpi_ht,
1360 .ident = "ASUS CUR-DLS", 1347 .ident = "ASUS CUR-DLS",
1361 .matches = { 1348 .matches = {
1362 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), 1349 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index de7353c0ce9c..e63b80e5861c 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -390,6 +390,24 @@ void alternatives_smp_switch(int smp)
390 mutex_unlock(&smp_alt); 390 mutex_unlock(&smp_alt);
391} 391}
392 392
393/* Return 1 if the address range is reserved for smp-alternatives */
394int alternatives_text_reserved(void *start, void *end)
395{
396 struct smp_alt_module *mod;
397 u8 **ptr;
398 u8 *text_start = start;
399 u8 *text_end = end;
400
401 list_for_each_entry(mod, &smp_alt_modules, next) {
402 if (mod->text > text_end || mod->text_end < text_start)
403 continue;
404 for (ptr = mod->locks; ptr < mod->locks_end; ptr++)
405 if (text_start <= *ptr && text_end >= *ptr)
406 return 1;
407 }
408
409 return 0;
410}
393#endif 411#endif
394 412
395#ifdef CONFIG_PARAVIRT 413#ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3987e4408f75..dfca210f6a10 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1641,9 +1641,7 @@ int __init APIC_init_uniprocessor(void)
1641#endif 1641#endif
1642 1642
1643 enable_IR_x2apic(); 1643 enable_IR_x2apic();
1644#ifdef CONFIG_X86_64
1645 default_setup_apic_routing(); 1644 default_setup_apic_routing();
1646#endif
1647 1645
1648 verify_local_APIC(); 1646 verify_local_APIC();
1649 connect_bsp_APIC(); 1647 connect_bsp_APIC();
@@ -1891,21 +1889,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
1891 if (apicid > max_physical_apicid) 1889 if (apicid > max_physical_apicid)
1892 max_physical_apicid = apicid; 1890 max_physical_apicid = apicid;
1893 1891
1894#ifdef CONFIG_X86_32
1895 if (num_processors > 8) {
1896 switch (boot_cpu_data.x86_vendor) {
1897 case X86_VENDOR_INTEL:
1898 if (!APIC_XAPIC(version)) {
1899 def_to_bigsmp = 0;
1900 break;
1901 }
1902 /* If P4 and above fall through */
1903 case X86_VENDOR_AMD:
1904 def_to_bigsmp = 1;
1905 }
1906 }
1907#endif
1908
1909#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) 1892#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
1910 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; 1893 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1911 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 1894 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 1a6559f6768c..99d2fe016084 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -52,7 +52,32 @@ static int __init print_ipi_mode(void)
52} 52}
53late_initcall(print_ipi_mode); 53late_initcall(print_ipi_mode);
54 54
55void default_setup_apic_routing(void) 55void __init default_setup_apic_routing(void)
56{
57 int version = apic_version[boot_cpu_physical_apicid];
58
59 if (num_possible_cpus() > 8) {
60 switch (boot_cpu_data.x86_vendor) {
61 case X86_VENDOR_INTEL:
62 if (!APIC_XAPIC(version)) {
63 def_to_bigsmp = 0;
64 break;
65 }
66 /* If P4 and above fall through */
67 case X86_VENDOR_AMD:
68 def_to_bigsmp = 1;
69 }
70 }
71
72#ifdef CONFIG_X86_BIGSMP
73 generic_bigsmp_probe();
74#endif
75
76 if (apic->setup_apic_routing)
77 apic->setup_apic_routing();
78}
79
80static void setup_apic_flat_routing(void)
56{ 81{
57#ifdef CONFIG_X86_IO_APIC 82#ifdef CONFIG_X86_IO_APIC
58 printk(KERN_INFO 83 printk(KERN_INFO
@@ -103,7 +128,7 @@ struct apic apic_default = {
103 .init_apic_ldr = default_init_apic_ldr, 128 .init_apic_ldr = default_init_apic_ldr,
104 129
105 .ioapic_phys_id_map = default_ioapic_phys_id_map, 130 .ioapic_phys_id_map = default_ioapic_phys_id_map,
106 .setup_apic_routing = default_setup_apic_routing, 131 .setup_apic_routing = setup_apic_flat_routing,
107 .multi_timer_check = NULL, 132 .multi_timer_check = NULL,
108 .apicid_to_node = default_apicid_to_node, 133 .apicid_to_node = default_apicid_to_node,
109 .cpu_to_logical_apicid = default_cpu_to_logical_apicid, 134 .cpu_to_logical_apicid = default_cpu_to_logical_apicid,
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 450fe2064a14..83e9be4778e2 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -67,7 +67,7 @@ void __init default_setup_apic_routing(void)
67 } 67 }
68#endif 68#endif
69 69
70 if (apic == &apic_flat && num_processors > 8) 70 if (apic == &apic_flat && num_possible_cpus() > 8)
71 apic = &apic_physflat; 71 apic = &apic_physflat;
72 72
73 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); 73 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index f125e5c551c0..6e44519960c8 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1356,6 +1356,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
1356 1356
1357 kfree(data->powernow_table); 1357 kfree(data->powernow_table);
1358 kfree(data); 1358 kfree(data);
1359 per_cpu(powernow_data, pol->cpu) = NULL;
1359 1360
1360 return 0; 1361 return 0;
1361} 1362}
@@ -1375,7 +1376,7 @@ static unsigned int powernowk8_get(unsigned int cpu)
1375 int err; 1376 int err;
1376 1377
1377 if (!data) 1378 if (!data)
1378 return -EINVAL; 1379 return 0;
1379 1380
1380 smp_call_function_single(cpu, query_values_on_cpu, &err, true); 1381 smp_call_function_single(cpu, query_values_on_cpu, &err, true);
1381 if (err) 1382 if (err)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8c1c07073ccc..bfc43fa208bc 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -7,6 +7,7 @@
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter 7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> 9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10 * Copyright (C) 2009 Google, Inc., Stephane Eranian
10 * 11 *
11 * For licencing details see kernel-base/COPYING 12 * For licencing details see kernel-base/COPYING
12 */ 13 */
@@ -22,6 +23,7 @@
22#include <linux/uaccess.h> 23#include <linux/uaccess.h>
23#include <linux/highmem.h> 24#include <linux/highmem.h>
24#include <linux/cpu.h> 25#include <linux/cpu.h>
26#include <linux/bitops.h>
25 27
26#include <asm/apic.h> 28#include <asm/apic.h>
27#include <asm/stacktrace.h> 29#include <asm/stacktrace.h>
@@ -68,26 +70,59 @@ struct debug_store {
68 u64 pebs_event_reset[MAX_PEBS_EVENTS]; 70 u64 pebs_event_reset[MAX_PEBS_EVENTS];
69}; 71};
70 72
73struct event_constraint {
74 union {
75 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
76 u64 idxmsk64;
77 };
78 u64 code;
79 u64 cmask;
80 int weight;
81};
82
83struct amd_nb {
84 int nb_id; /* NorthBridge id */
85 int refcnt; /* reference count */
86 struct perf_event *owners[X86_PMC_IDX_MAX];
87 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
88};
89
71struct cpu_hw_events { 90struct cpu_hw_events {
72 struct perf_event *events[X86_PMC_IDX_MAX]; 91 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
73 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
74 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 92 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
75 unsigned long interrupts; 93 unsigned long interrupts;
76 int enabled; 94 int enabled;
77 struct debug_store *ds; 95 struct debug_store *ds;
78};
79 96
80struct event_constraint { 97 int n_events;
81 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 98 int n_added;
82 int code; 99 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
100 u64 tags[X86_PMC_IDX_MAX];
101 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
102 struct amd_nb *amd_nb;
83}; 103};
84 104
85#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } 105#define __EVENT_CONSTRAINT(c, n, m, w) {\
86#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } 106 { .idxmsk64 = (n) }, \
107 .code = (c), \
108 .cmask = (m), \
109 .weight = (w), \
110}
111
112#define EVENT_CONSTRAINT(c, n, m) \
113 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
87 114
88#define for_each_event_constraint(e, c) \ 115#define INTEL_EVENT_CONSTRAINT(c, n) \
89 for ((e) = (c); (e)->idxmsk[0]; (e)++) 116 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
90 117
118#define FIXED_EVENT_CONSTRAINT(c, n) \
119 EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK)
120
121#define EVENT_CONSTRAINT_END \
122 EVENT_CONSTRAINT(0, 0, 0)
123
124#define for_each_event_constraint(e, c) \
125 for ((e) = (c); (e)->cmask; (e)++)
91 126
92/* 127/*
93 * struct x86_pmu - generic x86 pmu 128 * struct x86_pmu - generic x86 pmu
@@ -114,8 +149,14 @@ struct x86_pmu {
114 u64 intel_ctrl; 149 u64 intel_ctrl;
115 void (*enable_bts)(u64 config); 150 void (*enable_bts)(u64 config);
116 void (*disable_bts)(void); 151 void (*disable_bts)(void);
117 int (*get_event_idx)(struct cpu_hw_events *cpuc, 152
118 struct hw_perf_event *hwc); 153 struct event_constraint *
154 (*get_event_constraints)(struct cpu_hw_events *cpuc,
155 struct perf_event *event);
156
157 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
158 struct perf_event *event);
159 struct event_constraint *event_constraints;
119}; 160};
120 161
121static struct x86_pmu x86_pmu __read_mostly; 162static struct x86_pmu x86_pmu __read_mostly;
@@ -124,111 +165,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
124 .enabled = 1, 165 .enabled = 1,
125}; 166};
126 167
127static const struct event_constraint *event_constraints; 168static int x86_perf_event_set_period(struct perf_event *event,
128 169 struct hw_perf_event *hwc, int idx);
129/*
130 * Not sure about some of these
131 */
132static const u64 p6_perfmon_event_map[] =
133{
134 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
135 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
136 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
137 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
138 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
139 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
140 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
141};
142
143static u64 p6_pmu_event_map(int hw_event)
144{
145 return p6_perfmon_event_map[hw_event];
146}
147
148/*
149 * Event setting that is specified not to count anything.
150 * We use this to effectively disable a counter.
151 *
152 * L2_RQSTS with 0 MESI unit mask.
153 */
154#define P6_NOP_EVENT 0x0000002EULL
155
156static u64 p6_pmu_raw_event(u64 hw_event)
157{
158#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
159#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
160#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
161#define P6_EVNTSEL_INV_MASK 0x00800000ULL
162#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
163
164#define P6_EVNTSEL_MASK \
165 (P6_EVNTSEL_EVENT_MASK | \
166 P6_EVNTSEL_UNIT_MASK | \
167 P6_EVNTSEL_EDGE_MASK | \
168 P6_EVNTSEL_INV_MASK | \
169 P6_EVNTSEL_REG_MASK)
170
171 return hw_event & P6_EVNTSEL_MASK;
172}
173
174static const struct event_constraint intel_p6_event_constraints[] =
175{
176 EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
177 EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
178 EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
179 EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
180 EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
181 EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
182 EVENT_CONSTRAINT_END
183};
184
185/*
186 * Intel PerfMon v3. Used on Core2 and later.
187 */
188static const u64 intel_perfmon_event_map[] =
189{
190 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
191 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
192 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
193 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
194 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
195 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
196 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
197};
198
199static const struct event_constraint intel_core_event_constraints[] =
200{
201 EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
202 EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
203 EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
204 EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
205 EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
206 EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
207 EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
208 EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
209 EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
210 EVENT_CONSTRAINT_END
211};
212
213static const struct event_constraint intel_nehalem_event_constraints[] =
214{
215 EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
216 EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
217 EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
218 EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
219 EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
220 EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
221 EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
222 EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
223 EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
224 EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
225 EVENT_CONSTRAINT_END
226};
227
228static u64 intel_pmu_event_map(int hw_event)
229{
230 return intel_perfmon_event_map[hw_event];
231}
232 170
233/* 171/*
234 * Generalized hw caching related hw_event table, filled 172 * Generalized hw caching related hw_event table, filled
@@ -245,424 +183,6 @@ static u64 __read_mostly hw_cache_event_ids
245 [PERF_COUNT_HW_CACHE_OP_MAX] 183 [PERF_COUNT_HW_CACHE_OP_MAX]
246 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 184 [PERF_COUNT_HW_CACHE_RESULT_MAX];
247 185
248static __initconst u64 nehalem_hw_cache_event_ids
249 [PERF_COUNT_HW_CACHE_MAX]
250 [PERF_COUNT_HW_CACHE_OP_MAX]
251 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
252{
253 [ C(L1D) ] = {
254 [ C(OP_READ) ] = {
255 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
256 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
257 },
258 [ C(OP_WRITE) ] = {
259 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
260 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
261 },
262 [ C(OP_PREFETCH) ] = {
263 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
264 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
265 },
266 },
267 [ C(L1I ) ] = {
268 [ C(OP_READ) ] = {
269 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
270 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
271 },
272 [ C(OP_WRITE) ] = {
273 [ C(RESULT_ACCESS) ] = -1,
274 [ C(RESULT_MISS) ] = -1,
275 },
276 [ C(OP_PREFETCH) ] = {
277 [ C(RESULT_ACCESS) ] = 0x0,
278 [ C(RESULT_MISS) ] = 0x0,
279 },
280 },
281 [ C(LL ) ] = {
282 [ C(OP_READ) ] = {
283 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
284 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
285 },
286 [ C(OP_WRITE) ] = {
287 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
288 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
289 },
290 [ C(OP_PREFETCH) ] = {
291 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
292 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
293 },
294 },
295 [ C(DTLB) ] = {
296 [ C(OP_READ) ] = {
297 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
298 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
299 },
300 [ C(OP_WRITE) ] = {
301 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
302 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
303 },
304 [ C(OP_PREFETCH) ] = {
305 [ C(RESULT_ACCESS) ] = 0x0,
306 [ C(RESULT_MISS) ] = 0x0,
307 },
308 },
309 [ C(ITLB) ] = {
310 [ C(OP_READ) ] = {
311 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
312 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
313 },
314 [ C(OP_WRITE) ] = {
315 [ C(RESULT_ACCESS) ] = -1,
316 [ C(RESULT_MISS) ] = -1,
317 },
318 [ C(OP_PREFETCH) ] = {
319 [ C(RESULT_ACCESS) ] = -1,
320 [ C(RESULT_MISS) ] = -1,
321 },
322 },
323 [ C(BPU ) ] = {
324 [ C(OP_READ) ] = {
325 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
326 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
327 },
328 [ C(OP_WRITE) ] = {
329 [ C(RESULT_ACCESS) ] = -1,
330 [ C(RESULT_MISS) ] = -1,
331 },
332 [ C(OP_PREFETCH) ] = {
333 [ C(RESULT_ACCESS) ] = -1,
334 [ C(RESULT_MISS) ] = -1,
335 },
336 },
337};
338
339static __initconst u64 core2_hw_cache_event_ids
340 [PERF_COUNT_HW_CACHE_MAX]
341 [PERF_COUNT_HW_CACHE_OP_MAX]
342 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
343{
344 [ C(L1D) ] = {
345 [ C(OP_READ) ] = {
346 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
347 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
348 },
349 [ C(OP_WRITE) ] = {
350 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
351 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
352 },
353 [ C(OP_PREFETCH) ] = {
354 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
355 [ C(RESULT_MISS) ] = 0,
356 },
357 },
358 [ C(L1I ) ] = {
359 [ C(OP_READ) ] = {
360 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
361 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
362 },
363 [ C(OP_WRITE) ] = {
364 [ C(RESULT_ACCESS) ] = -1,
365 [ C(RESULT_MISS) ] = -1,
366 },
367 [ C(OP_PREFETCH) ] = {
368 [ C(RESULT_ACCESS) ] = 0,
369 [ C(RESULT_MISS) ] = 0,
370 },
371 },
372 [ C(LL ) ] = {
373 [ C(OP_READ) ] = {
374 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
375 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
376 },
377 [ C(OP_WRITE) ] = {
378 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
379 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
380 },
381 [ C(OP_PREFETCH) ] = {
382 [ C(RESULT_ACCESS) ] = 0,
383 [ C(RESULT_MISS) ] = 0,
384 },
385 },
386 [ C(DTLB) ] = {
387 [ C(OP_READ) ] = {
388 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
389 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
390 },
391 [ C(OP_WRITE) ] = {
392 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
393 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
394 },
395 [ C(OP_PREFETCH) ] = {
396 [ C(RESULT_ACCESS) ] = 0,
397 [ C(RESULT_MISS) ] = 0,
398 },
399 },
400 [ C(ITLB) ] = {
401 [ C(OP_READ) ] = {
402 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
403 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
404 },
405 [ C(OP_WRITE) ] = {
406 [ C(RESULT_ACCESS) ] = -1,
407 [ C(RESULT_MISS) ] = -1,
408 },
409 [ C(OP_PREFETCH) ] = {
410 [ C(RESULT_ACCESS) ] = -1,
411 [ C(RESULT_MISS) ] = -1,
412 },
413 },
414 [ C(BPU ) ] = {
415 [ C(OP_READ) ] = {
416 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
417 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
418 },
419 [ C(OP_WRITE) ] = {
420 [ C(RESULT_ACCESS) ] = -1,
421 [ C(RESULT_MISS) ] = -1,
422 },
423 [ C(OP_PREFETCH) ] = {
424 [ C(RESULT_ACCESS) ] = -1,
425 [ C(RESULT_MISS) ] = -1,
426 },
427 },
428};
429
430static __initconst u64 atom_hw_cache_event_ids
431 [PERF_COUNT_HW_CACHE_MAX]
432 [PERF_COUNT_HW_CACHE_OP_MAX]
433 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
434{
435 [ C(L1D) ] = {
436 [ C(OP_READ) ] = {
437 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
438 [ C(RESULT_MISS) ] = 0,
439 },
440 [ C(OP_WRITE) ] = {
441 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
442 [ C(RESULT_MISS) ] = 0,
443 },
444 [ C(OP_PREFETCH) ] = {
445 [ C(RESULT_ACCESS) ] = 0x0,
446 [ C(RESULT_MISS) ] = 0,
447 },
448 },
449 [ C(L1I ) ] = {
450 [ C(OP_READ) ] = {
451 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
452 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
453 },
454 [ C(OP_WRITE) ] = {
455 [ C(RESULT_ACCESS) ] = -1,
456 [ C(RESULT_MISS) ] = -1,
457 },
458 [ C(OP_PREFETCH) ] = {
459 [ C(RESULT_ACCESS) ] = 0,
460 [ C(RESULT_MISS) ] = 0,
461 },
462 },
463 [ C(LL ) ] = {
464 [ C(OP_READ) ] = {
465 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
466 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
467 },
468 [ C(OP_WRITE) ] = {
469 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
470 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
471 },
472 [ C(OP_PREFETCH) ] = {
473 [ C(RESULT_ACCESS) ] = 0,
474 [ C(RESULT_MISS) ] = 0,
475 },
476 },
477 [ C(DTLB) ] = {
478 [ C(OP_READ) ] = {
479 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
480 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
481 },
482 [ C(OP_WRITE) ] = {
483 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
484 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
485 },
486 [ C(OP_PREFETCH) ] = {
487 [ C(RESULT_ACCESS) ] = 0,
488 [ C(RESULT_MISS) ] = 0,
489 },
490 },
491 [ C(ITLB) ] = {
492 [ C(OP_READ) ] = {
493 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
494 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
495 },
496 [ C(OP_WRITE) ] = {
497 [ C(RESULT_ACCESS) ] = -1,
498 [ C(RESULT_MISS) ] = -1,
499 },
500 [ C(OP_PREFETCH) ] = {
501 [ C(RESULT_ACCESS) ] = -1,
502 [ C(RESULT_MISS) ] = -1,
503 },
504 },
505 [ C(BPU ) ] = {
506 [ C(OP_READ) ] = {
507 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
508 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
509 },
510 [ C(OP_WRITE) ] = {
511 [ C(RESULT_ACCESS) ] = -1,
512 [ C(RESULT_MISS) ] = -1,
513 },
514 [ C(OP_PREFETCH) ] = {
515 [ C(RESULT_ACCESS) ] = -1,
516 [ C(RESULT_MISS) ] = -1,
517 },
518 },
519};
520
521static u64 intel_pmu_raw_event(u64 hw_event)
522{
523#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
524#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
525#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
526#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
527#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
528
529#define CORE_EVNTSEL_MASK \
530 (CORE_EVNTSEL_EVENT_MASK | \
531 CORE_EVNTSEL_UNIT_MASK | \
532 CORE_EVNTSEL_EDGE_MASK | \
533 CORE_EVNTSEL_INV_MASK | \
534 CORE_EVNTSEL_REG_MASK)
535
536 return hw_event & CORE_EVNTSEL_MASK;
537}
538
539static __initconst u64 amd_hw_cache_event_ids
540 [PERF_COUNT_HW_CACHE_MAX]
541 [PERF_COUNT_HW_CACHE_OP_MAX]
542 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
543{
544 [ C(L1D) ] = {
545 [ C(OP_READ) ] = {
546 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
547 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
548 },
549 [ C(OP_WRITE) ] = {
550 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
551 [ C(RESULT_MISS) ] = 0,
552 },
553 [ C(OP_PREFETCH) ] = {
554 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
555 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
556 },
557 },
558 [ C(L1I ) ] = {
559 [ C(OP_READ) ] = {
560 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
561 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
562 },
563 [ C(OP_WRITE) ] = {
564 [ C(RESULT_ACCESS) ] = -1,
565 [ C(RESULT_MISS) ] = -1,
566 },
567 [ C(OP_PREFETCH) ] = {
568 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
569 [ C(RESULT_MISS) ] = 0,
570 },
571 },
572 [ C(LL ) ] = {
573 [ C(OP_READ) ] = {
574 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
575 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
576 },
577 [ C(OP_WRITE) ] = {
578 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
579 [ C(RESULT_MISS) ] = 0,
580 },
581 [ C(OP_PREFETCH) ] = {
582 [ C(RESULT_ACCESS) ] = 0,
583 [ C(RESULT_MISS) ] = 0,
584 },
585 },
586 [ C(DTLB) ] = {
587 [ C(OP_READ) ] = {
588 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
589 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
590 },
591 [ C(OP_WRITE) ] = {
592 [ C(RESULT_ACCESS) ] = 0,
593 [ C(RESULT_MISS) ] = 0,
594 },
595 [ C(OP_PREFETCH) ] = {
596 [ C(RESULT_ACCESS) ] = 0,
597 [ C(RESULT_MISS) ] = 0,
598 },
599 },
600 [ C(ITLB) ] = {
601 [ C(OP_READ) ] = {
602 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
603 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
604 },
605 [ C(OP_WRITE) ] = {
606 [ C(RESULT_ACCESS) ] = -1,
607 [ C(RESULT_MISS) ] = -1,
608 },
609 [ C(OP_PREFETCH) ] = {
610 [ C(RESULT_ACCESS) ] = -1,
611 [ C(RESULT_MISS) ] = -1,
612 },
613 },
614 [ C(BPU ) ] = {
615 [ C(OP_READ) ] = {
616 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
617 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
618 },
619 [ C(OP_WRITE) ] = {
620 [ C(RESULT_ACCESS) ] = -1,
621 [ C(RESULT_MISS) ] = -1,
622 },
623 [ C(OP_PREFETCH) ] = {
624 [ C(RESULT_ACCESS) ] = -1,
625 [ C(RESULT_MISS) ] = -1,
626 },
627 },
628};
629
630/*
631 * AMD Performance Monitor K7 and later.
632 */
633static const u64 amd_perfmon_event_map[] =
634{
635 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
636 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
637 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
638 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
639 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
640 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
641};
642
643static u64 amd_pmu_event_map(int hw_event)
644{
645 return amd_perfmon_event_map[hw_event];
646}
647
648static u64 amd_pmu_raw_event(u64 hw_event)
649{
650#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
651#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
652#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
653#define K7_EVNTSEL_INV_MASK 0x000800000ULL
654#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
655
656#define K7_EVNTSEL_MASK \
657 (K7_EVNTSEL_EVENT_MASK | \
658 K7_EVNTSEL_UNIT_MASK | \
659 K7_EVNTSEL_EDGE_MASK | \
660 K7_EVNTSEL_INV_MASK | \
661 K7_EVNTSEL_REG_MASK)
662
663 return hw_event & K7_EVNTSEL_MASK;
664}
665
666/* 186/*
667 * Propagate event elapsed time into the generic event. 187 * Propagate event elapsed time into the generic event.
668 * Can only be executed on the CPU where the event is active. 188 * Can only be executed on the CPU where the event is active.
@@ -914,42 +434,6 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
914 return 0; 434 return 0;
915} 435}
916 436
917static void intel_pmu_enable_bts(u64 config)
918{
919 unsigned long debugctlmsr;
920
921 debugctlmsr = get_debugctlmsr();
922
923 debugctlmsr |= X86_DEBUGCTL_TR;
924 debugctlmsr |= X86_DEBUGCTL_BTS;
925 debugctlmsr |= X86_DEBUGCTL_BTINT;
926
927 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
928 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
929
930 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
931 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
932
933 update_debugctlmsr(debugctlmsr);
934}
935
936static void intel_pmu_disable_bts(void)
937{
938 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
939 unsigned long debugctlmsr;
940
941 if (!cpuc->ds)
942 return;
943
944 debugctlmsr = get_debugctlmsr();
945
946 debugctlmsr &=
947 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
948 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
949
950 update_debugctlmsr(debugctlmsr);
951}
952
953/* 437/*
954 * Setup the hardware configuration for a given attr_type 438 * Setup the hardware configuration for a given attr_type
955 */ 439 */
@@ -988,6 +472,8 @@ static int __hw_perf_event_init(struct perf_event *event)
988 hwc->config = ARCH_PERFMON_EVENTSEL_INT; 472 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
989 473
990 hwc->idx = -1; 474 hwc->idx = -1;
475 hwc->last_cpu = -1;
476 hwc->last_tag = ~0ULL;
991 477
992 /* 478 /*
993 * Count user and OS events unless requested not to. 479 * Count user and OS events unless requested not to.
@@ -1017,6 +503,9 @@ static int __hw_perf_event_init(struct perf_event *event)
1017 */ 503 */
1018 if (attr->type == PERF_TYPE_RAW) { 504 if (attr->type == PERF_TYPE_RAW) {
1019 hwc->config |= x86_pmu.raw_event(attr->config); 505 hwc->config |= x86_pmu.raw_event(attr->config);
506 if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
507 perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
508 return -EACCES;
1020 return 0; 509 return 0;
1021 } 510 }
1022 511
@@ -1056,216 +545,323 @@ static int __hw_perf_event_init(struct perf_event *event)
1056 return 0; 545 return 0;
1057} 546}
1058 547
1059static void p6_pmu_disable_all(void) 548static void x86_pmu_disable_all(void)
1060{ 549{
1061 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 550 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1062 u64 val; 551 int idx;
1063
1064 if (!cpuc->enabled)
1065 return;
1066 552
1067 cpuc->enabled = 0; 553 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1068 barrier(); 554 u64 val;
1069 555
1070 /* p6 only has one enable register */ 556 if (!test_bit(idx, cpuc->active_mask))
1071 rdmsrl(MSR_P6_EVNTSEL0, val); 557 continue;
1072 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 558 rdmsrl(x86_pmu.eventsel + idx, val);
1073 wrmsrl(MSR_P6_EVNTSEL0, val); 559 if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
560 continue;
561 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
562 wrmsrl(x86_pmu.eventsel + idx, val);
563 }
1074} 564}
1075 565
1076static void intel_pmu_disable_all(void) 566void hw_perf_disable(void)
1077{ 567{
1078 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 568 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1079 569
570 if (!x86_pmu_initialized())
571 return;
572
1080 if (!cpuc->enabled) 573 if (!cpuc->enabled)
1081 return; 574 return;
1082 575
576 cpuc->n_added = 0;
1083 cpuc->enabled = 0; 577 cpuc->enabled = 0;
1084 barrier(); 578 barrier();
1085 579
1086 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); 580 x86_pmu.disable_all();
1087
1088 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
1089 intel_pmu_disable_bts();
1090} 581}
1091 582
1092static void amd_pmu_disable_all(void) 583static void x86_pmu_enable_all(void)
1093{ 584{
1094 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 585 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1095 int idx; 586 int idx;
1096 587
1097 if (!cpuc->enabled)
1098 return;
1099
1100 cpuc->enabled = 0;
1101 /*
1102 * ensure we write the disable before we start disabling the
1103 * events proper, so that amd_pmu_enable_event() does the
1104 * right thing.
1105 */
1106 barrier();
1107
1108 for (idx = 0; idx < x86_pmu.num_events; idx++) { 588 for (idx = 0; idx < x86_pmu.num_events; idx++) {
589 struct perf_event *event = cpuc->events[idx];
1109 u64 val; 590 u64 val;
1110 591
1111 if (!test_bit(idx, cpuc->active_mask)) 592 if (!test_bit(idx, cpuc->active_mask))
1112 continue; 593 continue;
1113 rdmsrl(MSR_K7_EVNTSEL0 + idx, val); 594
1114 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) 595 val = event->hw.config;
1115 continue; 596 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
1116 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 597 wrmsrl(x86_pmu.eventsel + idx, val);
1117 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1118 } 598 }
1119} 599}
1120 600
1121void hw_perf_disable(void) 601static const struct pmu pmu;
602
603static inline int is_x86_event(struct perf_event *event)
1122{ 604{
1123 if (!x86_pmu_initialized()) 605 return event->pmu == &pmu;
1124 return;
1125 return x86_pmu.disable_all();
1126} 606}
1127 607
1128static void p6_pmu_enable_all(void) 608static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
1129{ 609{
1130 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 610 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
1131 unsigned long val; 611 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
612 int i, j, w, wmax, num = 0;
613 struct hw_perf_event *hwc;
1132 614
1133 if (cpuc->enabled) 615 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1134 return;
1135 616
1136 cpuc->enabled = 1; 617 for (i = 0; i < n; i++) {
1137 barrier(); 618 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
619 constraints[i] = c;
620 }
1138 621
1139 /* p6 only has one enable register */ 622 /*
1140 rdmsrl(MSR_P6_EVNTSEL0, val); 623 * fastpath, try to reuse previous register
1141 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 624 */
1142 wrmsrl(MSR_P6_EVNTSEL0, val); 625 for (i = 0; i < n; i++) {
1143} 626 hwc = &cpuc->event_list[i]->hw;
627 c = constraints[i];
1144 628
1145static void intel_pmu_enable_all(void) 629 /* never assigned */
1146{ 630 if (hwc->idx == -1)
1147 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 631 break;
1148 632
1149 if (cpuc->enabled) 633 /* constraint still honored */
1150 return; 634 if (!test_bit(hwc->idx, c->idxmsk))
635 break;
1151 636
1152 cpuc->enabled = 1; 637 /* not already used */
1153 barrier(); 638 if (test_bit(hwc->idx, used_mask))
639 break;
640
641 set_bit(hwc->idx, used_mask);
642 if (assign)
643 assign[i] = hwc->idx;
644 }
645 if (i == n)
646 goto done;
647
648 /*
649 * begin slow path
650 */
651
652 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1154 653
1155 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 654 /*
655 * weight = number of possible counters
656 *
657 * 1 = most constrained, only works on one counter
658 * wmax = least constrained, works on any counter
659 *
660 * assign events to counters starting with most
661 * constrained events.
662 */
663 wmax = x86_pmu.num_events;
664
665 /*
666 * when fixed event counters are present,
667 * wmax is incremented by 1 to account
668 * for one more choice
669 */
670 if (x86_pmu.num_events_fixed)
671 wmax++;
672
673 for (w = 1, num = n; num && w <= wmax; w++) {
674 /* for each event */
675 for (i = 0; num && i < n; i++) {
676 c = constraints[i];
677 hwc = &cpuc->event_list[i]->hw;
678
679 if (c->weight != w)
680 continue;
681
682 for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
683 if (!test_bit(j, used_mask))
684 break;
685 }
1156 686
1157 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 687 if (j == X86_PMC_IDX_MAX)
1158 struct perf_event *event = 688 break;
1159 cpuc->events[X86_PMC_IDX_FIXED_BTS];
1160 689
1161 if (WARN_ON_ONCE(!event)) 690 set_bit(j, used_mask);
1162 return;
1163 691
1164 intel_pmu_enable_bts(event->hw.config); 692 if (assign)
693 assign[i] = j;
694 num--;
695 }
696 }
697done:
698 /*
699 * scheduling failed or is just a simulation,
700 * free resources if necessary
701 */
702 if (!assign || num) {
703 for (i = 0; i < n; i++) {
704 if (x86_pmu.put_event_constraints)
705 x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
706 }
1165 } 707 }
708 return num ? -ENOSPC : 0;
1166} 709}
1167 710
1168static void amd_pmu_enable_all(void) 711/*
712 * dogrp: true if must collect siblings events (group)
713 * returns total number of events and error code
714 */
715static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
1169{ 716{
1170 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 717 struct perf_event *event;
1171 int idx; 718 int n, max_count;
1172 719
1173 if (cpuc->enabled) 720 max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
1174 return;
1175 721
1176 cpuc->enabled = 1; 722 /* current number of events already accepted */
1177 barrier(); 723 n = cpuc->n_events;
1178 724
1179 for (idx = 0; idx < x86_pmu.num_events; idx++) { 725 if (is_x86_event(leader)) {
1180 struct perf_event *event = cpuc->events[idx]; 726 if (n >= max_count)
1181 u64 val; 727 return -ENOSPC;
728 cpuc->event_list[n] = leader;
729 n++;
730 }
731 if (!dogrp)
732 return n;
1182 733
1183 if (!test_bit(idx, cpuc->active_mask)) 734 list_for_each_entry(event, &leader->sibling_list, group_entry) {
735 if (!is_x86_event(event) ||
736 event->state <= PERF_EVENT_STATE_OFF)
1184 continue; 737 continue;
1185 738
1186 val = event->hw.config; 739 if (n >= max_count)
1187 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 740 return -ENOSPC;
1188 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1189 }
1190}
1191 741
1192void hw_perf_enable(void) 742 cpuc->event_list[n] = event;
1193{ 743 n++;
1194 if (!x86_pmu_initialized()) 744 }
1195 return; 745 return n;
1196 x86_pmu.enable_all();
1197} 746}
1198 747
1199static inline u64 intel_pmu_get_status(void) 748static inline void x86_assign_hw_event(struct perf_event *event,
749 struct cpu_hw_events *cpuc, int i)
1200{ 750{
1201 u64 status; 751 struct hw_perf_event *hwc = &event->hw;
1202 752
1203 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); 753 hwc->idx = cpuc->assign[i];
754 hwc->last_cpu = smp_processor_id();
755 hwc->last_tag = ++cpuc->tags[i];
1204 756
1205 return status; 757 if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
758 hwc->config_base = 0;
759 hwc->event_base = 0;
760 } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
761 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
762 /*
763 * We set it so that event_base + idx in wrmsr/rdmsr maps to
764 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
765 */
766 hwc->event_base =
767 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
768 } else {
769 hwc->config_base = x86_pmu.eventsel;
770 hwc->event_base = x86_pmu.perfctr;
771 }
1206} 772}
1207 773
1208static inline void intel_pmu_ack_status(u64 ack) 774static inline int match_prev_assignment(struct hw_perf_event *hwc,
775 struct cpu_hw_events *cpuc,
776 int i)
1209{ 777{
1210 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); 778 return hwc->idx == cpuc->assign[i] &&
779 hwc->last_cpu == smp_processor_id() &&
780 hwc->last_tag == cpuc->tags[i];
1211} 781}
1212 782
1213static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) 783static void x86_pmu_stop(struct perf_event *event);
1214{
1215 (void)checking_wrmsrl(hwc->config_base + idx,
1216 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
1217}
1218 784
1219static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) 785void hw_perf_enable(void)
1220{ 786{
1221 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); 787 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1222} 788 struct perf_event *event;
789 struct hw_perf_event *hwc;
790 int i;
1223 791
1224static inline void 792 if (!x86_pmu_initialized())
1225intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) 793 return;
1226{
1227 int idx = __idx - X86_PMC_IDX_FIXED;
1228 u64 ctrl_val, mask;
1229 794
1230 mask = 0xfULL << (idx * 4); 795 if (cpuc->enabled)
796 return;
1231 797
1232 rdmsrl(hwc->config_base, ctrl_val); 798 if (cpuc->n_added) {
1233 ctrl_val &= ~mask; 799 /*
1234 (void)checking_wrmsrl(hwc->config_base, ctrl_val); 800 * apply assignment obtained either from
1235} 801 * hw_perf_group_sched_in() or x86_pmu_enable()
802 *
803 * step1: save events moving to new counters
804 * step2: reprogram moved events into new counters
805 */
806 for (i = 0; i < cpuc->n_events; i++) {
1236 807
1237static inline void 808 event = cpuc->event_list[i];
1238p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) 809 hwc = &event->hw;
1239{
1240 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1241 u64 val = P6_NOP_EVENT;
1242 810
1243 if (cpuc->enabled) 811 /*
1244 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 812 * we can avoid reprogramming counter if:
813 * - assigned same counter as last time
814 * - running on same CPU as last time
815 * - no other event has used the counter since
816 */
817 if (hwc->idx == -1 ||
818 match_prev_assignment(hwc, cpuc, i))
819 continue;
1245 820
1246 (void)checking_wrmsrl(hwc->config_base + idx, val); 821 x86_pmu_stop(event);
1247}
1248 822
1249static inline void 823 hwc->idx = -1;
1250intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) 824 }
1251{
1252 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1253 intel_pmu_disable_bts();
1254 return;
1255 }
1256 825
1257 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 826 for (i = 0; i < cpuc->n_events; i++) {
1258 intel_pmu_disable_fixed(hwc, idx); 827
1259 return; 828 event = cpuc->event_list[i];
829 hwc = &event->hw;
830
831 if (hwc->idx == -1) {
832 x86_assign_hw_event(event, cpuc, i);
833 x86_perf_event_set_period(event, hwc, hwc->idx);
834 }
835 /*
836 * need to mark as active because x86_pmu_disable()
837 * clear active_mask and events[] yet it preserves
838 * idx
839 */
840 set_bit(hwc->idx, cpuc->active_mask);
841 cpuc->events[hwc->idx] = event;
842
843 x86_pmu.enable(hwc, hwc->idx);
844 perf_event_update_userpage(event);
845 }
846 cpuc->n_added = 0;
847 perf_events_lapic_init();
1260 } 848 }
1261 849
1262 x86_pmu_disable_event(hwc, idx); 850 cpuc->enabled = 1;
851 barrier();
852
853 x86_pmu.enable_all();
1263} 854}
1264 855
1265static inline void 856static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1266amd_pmu_disable_event(struct hw_perf_event *hwc, int idx) 857{
858 (void)checking_wrmsrl(hwc->config_base + idx,
859 hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
860}
861
862static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1267{ 863{
1268 x86_pmu_disable_event(hwc, idx); 864 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
1269} 865}
1270 866
1271static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 867static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -1326,220 +922,60 @@ x86_perf_event_set_period(struct perf_event *event,
1326 return ret; 922 return ret;
1327} 923}
1328 924
1329static inline void 925static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1330intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
1331{
1332 int idx = __idx - X86_PMC_IDX_FIXED;
1333 u64 ctrl_val, bits, mask;
1334 int err;
1335
1336 /*
1337 * Enable IRQ generation (0x8),
1338 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
1339 * if requested:
1340 */
1341 bits = 0x8ULL;
1342 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
1343 bits |= 0x2;
1344 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1345 bits |= 0x1;
1346
1347 /*
1348 * ANY bit is supported in v3 and up
1349 */
1350 if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
1351 bits |= 0x4;
1352
1353 bits <<= (idx * 4);
1354 mask = 0xfULL << (idx * 4);
1355
1356 rdmsrl(hwc->config_base, ctrl_val);
1357 ctrl_val &= ~mask;
1358 ctrl_val |= bits;
1359 err = checking_wrmsrl(hwc->config_base, ctrl_val);
1360}
1361
1362static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1363{ 926{
1364 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 927 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1365 u64 val;
1366
1367 val = hwc->config;
1368 if (cpuc->enabled) 928 if (cpuc->enabled)
1369 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 929 __x86_pmu_enable_event(hwc, idx);
1370
1371 (void)checking_wrmsrl(hwc->config_base + idx, val);
1372} 930}
1373 931
1374 932/*
1375static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) 933 * activate a single event
1376{ 934 *
1377 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { 935 * The event is added to the group of enabled events
1378 if (!__get_cpu_var(cpu_hw_events).enabled) 936 * but only if it can be scehduled with existing events.
1379 return; 937 *
1380 938 * Called with PMU disabled. If successful and return value 1,
1381 intel_pmu_enable_bts(hwc->config); 939 * then guaranteed to call perf_enable() and hw_perf_enable()
1382 return; 940 */
1383 } 941static int x86_pmu_enable(struct perf_event *event)
1384
1385 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1386 intel_pmu_enable_fixed(hwc, idx);
1387 return;
1388 }
1389
1390 x86_pmu_enable_event(hwc, idx);
1391}
1392
1393static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1394{ 942{
1395 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 943 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
944 struct hw_perf_event *hwc;
945 int assign[X86_PMC_IDX_MAX];
946 int n, n0, ret;
1396 947
1397 if (cpuc->enabled) 948 hwc = &event->hw;
1398 x86_pmu_enable_event(hwc, idx);
1399}
1400
1401static int fixed_mode_idx(struct hw_perf_event *hwc)
1402{
1403 unsigned int hw_event;
1404
1405 hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
1406
1407 if (unlikely((hw_event ==
1408 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
1409 (hwc->sample_period == 1)))
1410 return X86_PMC_IDX_FIXED_BTS;
1411 949
1412 if (!x86_pmu.num_events_fixed) 950 n0 = cpuc->n_events;
1413 return -1; 951 n = collect_events(cpuc, event, false);
952 if (n < 0)
953 return n;
1414 954
955 ret = x86_schedule_events(cpuc, n, assign);
956 if (ret)
957 return ret;
1415 /* 958 /*
1416 * fixed counters do not take all possible filters 959 * copy new assignment, now we know it is possible
960 * will be used by hw_perf_enable()
1417 */ 961 */
1418 if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) 962 memcpy(cpuc->assign, assign, n*sizeof(int));
1419 return -1;
1420
1421 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
1422 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
1423 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
1424 return X86_PMC_IDX_FIXED_CPU_CYCLES;
1425 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
1426 return X86_PMC_IDX_FIXED_BUS_CYCLES;
1427
1428 return -1;
1429}
1430
1431/*
1432 * generic counter allocator: get next free counter
1433 */
1434static int
1435gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1436{
1437 int idx;
1438
1439 idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
1440 return idx == x86_pmu.num_events ? -1 : idx;
1441}
1442
1443/*
1444 * intel-specific counter allocator: check event constraints
1445 */
1446static int
1447intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1448{
1449 const struct event_constraint *event_constraint;
1450 int i, code;
1451
1452 if (!event_constraints)
1453 goto skip;
1454 963
1455 code = hwc->config & CORE_EVNTSEL_EVENT_MASK; 964 cpuc->n_events = n;
1456 965 cpuc->n_added = n - n0;
1457 for_each_event_constraint(event_constraint, event_constraints) {
1458 if (code == event_constraint->code) {
1459 for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
1460 if (!test_and_set_bit(i, cpuc->used_mask))
1461 return i;
1462 }
1463 return -1;
1464 }
1465 }
1466skip:
1467 return gen_get_event_idx(cpuc, hwc);
1468}
1469
1470static int
1471x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1472{
1473 int idx;
1474 966
1475 idx = fixed_mode_idx(hwc); 967 return 0;
1476 if (idx == X86_PMC_IDX_FIXED_BTS) {
1477 /* BTS is already occupied. */
1478 if (test_and_set_bit(idx, cpuc->used_mask))
1479 return -EAGAIN;
1480
1481 hwc->config_base = 0;
1482 hwc->event_base = 0;
1483 hwc->idx = idx;
1484 } else if (idx >= 0) {
1485 /*
1486 * Try to get the fixed event, if that is already taken
1487 * then try to get a generic event:
1488 */
1489 if (test_and_set_bit(idx, cpuc->used_mask))
1490 goto try_generic;
1491
1492 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1493 /*
1494 * We set it so that event_base + idx in wrmsr/rdmsr maps to
1495 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
1496 */
1497 hwc->event_base =
1498 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
1499 hwc->idx = idx;
1500 } else {
1501 idx = hwc->idx;
1502 /* Try to get the previous generic event again */
1503 if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
1504try_generic:
1505 idx = x86_pmu.get_event_idx(cpuc, hwc);
1506 if (idx == -1)
1507 return -EAGAIN;
1508
1509 set_bit(idx, cpuc->used_mask);
1510 hwc->idx = idx;
1511 }
1512 hwc->config_base = x86_pmu.eventsel;
1513 hwc->event_base = x86_pmu.perfctr;
1514 }
1515
1516 return idx;
1517} 968}
1518 969
1519/* 970static int x86_pmu_start(struct perf_event *event)
1520 * Find a PMC slot for the freshly enabled / scheduled in event:
1521 */
1522static int x86_pmu_enable(struct perf_event *event)
1523{ 971{
1524 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1525 struct hw_perf_event *hwc = &event->hw; 972 struct hw_perf_event *hwc = &event->hw;
1526 int idx;
1527
1528 idx = x86_schedule_event(cpuc, hwc);
1529 if (idx < 0)
1530 return idx;
1531
1532 perf_events_lapic_init();
1533
1534 x86_pmu.disable(hwc, idx);
1535 973
1536 cpuc->events[idx] = event; 974 if (hwc->idx == -1)
1537 set_bit(idx, cpuc->active_mask); 975 return -EAGAIN;
1538 976
1539 x86_perf_event_set_period(event, hwc, idx); 977 x86_perf_event_set_period(event, hwc, hwc->idx);
1540 x86_pmu.enable(hwc, idx); 978 x86_pmu.enable(hwc, hwc->idx);
1541
1542 perf_event_update_userpage(event);
1543 979
1544 return 0; 980 return 0;
1545} 981}
@@ -1583,7 +1019,7 @@ void perf_event_print_debug(void)
1583 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); 1019 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1584 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); 1020 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
1585 } 1021 }
1586 pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); 1022 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1587 1023
1588 for (idx = 0; idx < x86_pmu.num_events; idx++) { 1024 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1589 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 1025 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
@@ -1607,67 +1043,7 @@ void perf_event_print_debug(void)
1607 local_irq_restore(flags); 1043 local_irq_restore(flags);
1608} 1044}
1609 1045
1610static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) 1046static void x86_pmu_stop(struct perf_event *event)
1611{
1612 struct debug_store *ds = cpuc->ds;
1613 struct bts_record {
1614 u64 from;
1615 u64 to;
1616 u64 flags;
1617 };
1618 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
1619 struct bts_record *at, *top;
1620 struct perf_output_handle handle;
1621 struct perf_event_header header;
1622 struct perf_sample_data data;
1623 struct pt_regs regs;
1624
1625 if (!event)
1626 return;
1627
1628 if (!ds)
1629 return;
1630
1631 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
1632 top = (struct bts_record *)(unsigned long)ds->bts_index;
1633
1634 if (top <= at)
1635 return;
1636
1637 ds->bts_index = ds->bts_buffer_base;
1638
1639
1640 data.period = event->hw.last_period;
1641 data.addr = 0;
1642 data.raw = NULL;
1643 regs.ip = 0;
1644
1645 /*
1646 * Prepare a generic sample, i.e. fill in the invariant fields.
1647 * We will overwrite the from and to address before we output
1648 * the sample.
1649 */
1650 perf_prepare_sample(&header, &data, event, &regs);
1651
1652 if (perf_output_begin(&handle, event,
1653 header.size * (top - at), 1, 1))
1654 return;
1655
1656 for (; at < top; at++) {
1657 data.ip = at->from;
1658 data.addr = at->to;
1659
1660 perf_output_sample(&handle, &header, &data, event);
1661 }
1662
1663 perf_output_end(&handle);
1664
1665 /* There's new data available. */
1666 event->hw.interrupts++;
1667 event->pending_kill = POLL_IN;
1668}
1669
1670static void x86_pmu_disable(struct perf_event *event)
1671{ 1047{
1672 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1048 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1673 struct hw_perf_event *hwc = &event->hw; 1049 struct hw_perf_event *hwc = &event->hw;
@@ -1681,183 +1057,38 @@ static void x86_pmu_disable(struct perf_event *event)
1681 x86_pmu.disable(hwc, idx); 1057 x86_pmu.disable(hwc, idx);
1682 1058
1683 /* 1059 /*
1684 * Make sure the cleared pointer becomes visible before we
1685 * (potentially) free the event:
1686 */
1687 barrier();
1688
1689 /*
1690 * Drain the remaining delta count out of a event 1060 * Drain the remaining delta count out of a event
1691 * that we are disabling: 1061 * that we are disabling:
1692 */ 1062 */
1693 x86_perf_event_update(event, hwc, idx); 1063 x86_perf_event_update(event, hwc, idx);
1694 1064
1695 /* Drain the remaining BTS records. */
1696 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
1697 intel_pmu_drain_bts_buffer(cpuc);
1698
1699 cpuc->events[idx] = NULL; 1065 cpuc->events[idx] = NULL;
1700 clear_bit(idx, cpuc->used_mask);
1701
1702 perf_event_update_userpage(event);
1703}
1704
1705/*
1706 * Save and restart an expired event. Called by NMI contexts,
1707 * so it has to be careful about preempting normal event ops:
1708 */
1709static int intel_pmu_save_and_restart(struct perf_event *event)
1710{
1711 struct hw_perf_event *hwc = &event->hw;
1712 int idx = hwc->idx;
1713 int ret;
1714
1715 x86_perf_event_update(event, hwc, idx);
1716 ret = x86_perf_event_set_period(event, hwc, idx);
1717
1718 if (event->state == PERF_EVENT_STATE_ACTIVE)
1719 intel_pmu_enable_event(hwc, idx);
1720
1721 return ret;
1722}
1723
1724static void intel_pmu_reset(void)
1725{
1726 struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
1727 unsigned long flags;
1728 int idx;
1729
1730 if (!x86_pmu.num_events)
1731 return;
1732
1733 local_irq_save(flags);
1734
1735 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
1736
1737 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1738 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
1739 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
1740 }
1741 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1742 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1743 }
1744 if (ds)
1745 ds->bts_index = ds->bts_buffer_base;
1746
1747 local_irq_restore(flags);
1748}
1749
1750static int p6_pmu_handle_irq(struct pt_regs *regs)
1751{
1752 struct perf_sample_data data;
1753 struct cpu_hw_events *cpuc;
1754 struct perf_event *event;
1755 struct hw_perf_event *hwc;
1756 int idx, handled = 0;
1757 u64 val;
1758
1759 data.addr = 0;
1760 data.raw = NULL;
1761
1762 cpuc = &__get_cpu_var(cpu_hw_events);
1763
1764 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1765 if (!test_bit(idx, cpuc->active_mask))
1766 continue;
1767
1768 event = cpuc->events[idx];
1769 hwc = &event->hw;
1770
1771 val = x86_perf_event_update(event, hwc, idx);
1772 if (val & (1ULL << (x86_pmu.event_bits - 1)))
1773 continue;
1774
1775 /*
1776 * event overflow
1777 */
1778 handled = 1;
1779 data.period = event->hw.last_period;
1780
1781 if (!x86_perf_event_set_period(event, hwc, idx))
1782 continue;
1783
1784 if (perf_event_overflow(event, 1, &data, regs))
1785 p6_pmu_disable_event(hwc, idx);
1786 }
1787
1788 if (handled)
1789 inc_irq_stat(apic_perf_irqs);
1790
1791 return handled;
1792} 1066}
1793 1067
1794/* 1068static void x86_pmu_disable(struct perf_event *event)
1795 * This handler is triggered by the local APIC, so the APIC IRQ handling
1796 * rules apply:
1797 */
1798static int intel_pmu_handle_irq(struct pt_regs *regs)
1799{ 1069{
1800 struct perf_sample_data data; 1070 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1801 struct cpu_hw_events *cpuc; 1071 int i;
1802 int bit, loops;
1803 u64 ack, status;
1804
1805 data.addr = 0;
1806 data.raw = NULL;
1807
1808 cpuc = &__get_cpu_var(cpu_hw_events);
1809
1810 perf_disable();
1811 intel_pmu_drain_bts_buffer(cpuc);
1812 status = intel_pmu_get_status();
1813 if (!status) {
1814 perf_enable();
1815 return 0;
1816 }
1817
1818 loops = 0;
1819again:
1820 if (++loops > 100) {
1821 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
1822 perf_event_print_debug();
1823 intel_pmu_reset();
1824 perf_enable();
1825 return 1;
1826 }
1827 1072
1828 inc_irq_stat(apic_perf_irqs); 1073 x86_pmu_stop(event);
1829 ack = status;
1830 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1831 struct perf_event *event = cpuc->events[bit];
1832 1074
1833 clear_bit(bit, (unsigned long *) &status); 1075 for (i = 0; i < cpuc->n_events; i++) {
1834 if (!test_bit(bit, cpuc->active_mask)) 1076 if (event == cpuc->event_list[i]) {
1835 continue;
1836 1077
1837 if (!intel_pmu_save_and_restart(event)) 1078 if (x86_pmu.put_event_constraints)
1838 continue; 1079 x86_pmu.put_event_constraints(cpuc, event);
1839 1080
1840 data.period = event->hw.last_period; 1081 while (++i < cpuc->n_events)
1082 cpuc->event_list[i-1] = cpuc->event_list[i];
1841 1083
1842 if (perf_event_overflow(event, 1, &data, regs)) 1084 --cpuc->n_events;
1843 intel_pmu_disable_event(&event->hw, bit); 1085 break;
1086 }
1844 } 1087 }
1845 1088 perf_event_update_userpage(event);
1846 intel_pmu_ack_status(ack);
1847
1848 /*
1849 * Repeat if there is more work to be done:
1850 */
1851 status = intel_pmu_get_status();
1852 if (status)
1853 goto again;
1854
1855 perf_enable();
1856
1857 return 1;
1858} 1089}
1859 1090
1860static int amd_pmu_handle_irq(struct pt_regs *regs) 1091static int x86_pmu_handle_irq(struct pt_regs *regs)
1861{ 1092{
1862 struct perf_sample_data data; 1093 struct perf_sample_data data;
1863 struct cpu_hw_events *cpuc; 1094 struct cpu_hw_events *cpuc;
@@ -1892,7 +1123,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
1892 continue; 1123 continue;
1893 1124
1894 if (perf_event_overflow(event, 1, &data, regs)) 1125 if (perf_event_overflow(event, 1, &data, regs))
1895 amd_pmu_disable_event(hwc, idx); 1126 x86_pmu.disable(hwc, idx);
1896 } 1127 }
1897 1128
1898 if (handled) 1129 if (handled)
@@ -1975,194 +1206,137 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1975 .priority = 1 1206 .priority = 1
1976}; 1207};
1977 1208
1978static __initconst struct x86_pmu p6_pmu = { 1209static struct event_constraint unconstrained;
1979 .name = "p6", 1210static struct event_constraint emptyconstraint;
1980 .handle_irq = p6_pmu_handle_irq,
1981 .disable_all = p6_pmu_disable_all,
1982 .enable_all = p6_pmu_enable_all,
1983 .enable = p6_pmu_enable_event,
1984 .disable = p6_pmu_disable_event,
1985 .eventsel = MSR_P6_EVNTSEL0,
1986 .perfctr = MSR_P6_PERFCTR0,
1987 .event_map = p6_pmu_event_map,
1988 .raw_event = p6_pmu_raw_event,
1989 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1990 .apic = 1,
1991 .max_period = (1ULL << 31) - 1,
1992 .version = 0,
1993 .num_events = 2,
1994 /*
1995 * Events have 40 bits implemented. However they are designed such
1996 * that bits [32-39] are sign extensions of bit 31. As such the
1997 * effective width of a event for P6-like PMU is 32 bits only.
1998 *
1999 * See IA-32 Intel Architecture Software developer manual Vol 3B
2000 */
2001 .event_bits = 32,
2002 .event_mask = (1ULL << 32) - 1,
2003 .get_event_idx = intel_get_event_idx,
2004};
2005 1211
2006static __initconst struct x86_pmu intel_pmu = { 1212static struct event_constraint *
2007 .name = "Intel", 1213x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
2008 .handle_irq = intel_pmu_handle_irq, 1214{
2009 .disable_all = intel_pmu_disable_all, 1215 struct event_constraint *c;
2010 .enable_all = intel_pmu_enable_all,
2011 .enable = intel_pmu_enable_event,
2012 .disable = intel_pmu_disable_event,
2013 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
2014 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
2015 .event_map = intel_pmu_event_map,
2016 .raw_event = intel_pmu_raw_event,
2017 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
2018 .apic = 1,
2019 /*
2020 * Intel PMCs cannot be accessed sanely above 32 bit width,
2021 * so we install an artificial 1<<31 period regardless of
2022 * the generic event period:
2023 */
2024 .max_period = (1ULL << 31) - 1,
2025 .enable_bts = intel_pmu_enable_bts,
2026 .disable_bts = intel_pmu_disable_bts,
2027 .get_event_idx = intel_get_event_idx,
2028};
2029 1216
2030static __initconst struct x86_pmu amd_pmu = { 1217 if (x86_pmu.event_constraints) {
2031 .name = "AMD", 1218 for_each_event_constraint(c, x86_pmu.event_constraints) {
2032 .handle_irq = amd_pmu_handle_irq, 1219 if ((event->hw.config & c->cmask) == c->code)
2033 .disable_all = amd_pmu_disable_all, 1220 return c;
2034 .enable_all = amd_pmu_enable_all, 1221 }
2035 .enable = amd_pmu_enable_event, 1222 }
2036 .disable = amd_pmu_disable_event, 1223
2037 .eventsel = MSR_K7_EVNTSEL0, 1224 return &unconstrained;
2038 .perfctr = MSR_K7_PERFCTR0, 1225}
2039 .event_map = amd_pmu_event_map,
2040 .raw_event = amd_pmu_raw_event,
2041 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
2042 .num_events = 4,
2043 .event_bits = 48,
2044 .event_mask = (1ULL << 48) - 1,
2045 .apic = 1,
2046 /* use highest bit to detect overflow */
2047 .max_period = (1ULL << 47) - 1,
2048 .get_event_idx = gen_get_event_idx,
2049};
2050 1226
2051static __init int p6_pmu_init(void) 1227static int x86_event_sched_in(struct perf_event *event,
1228 struct perf_cpu_context *cpuctx)
2052{ 1229{
2053 switch (boot_cpu_data.x86_model) { 1230 int ret = 0;
2054 case 1:
2055 case 3: /* Pentium Pro */
2056 case 5:
2057 case 6: /* Pentium II */
2058 case 7:
2059 case 8:
2060 case 11: /* Pentium III */
2061 event_constraints = intel_p6_event_constraints;
2062 break;
2063 case 9:
2064 case 13:
2065 /* Pentium M */
2066 event_constraints = intel_p6_event_constraints;
2067 break;
2068 default:
2069 pr_cont("unsupported p6 CPU model %d ",
2070 boot_cpu_data.x86_model);
2071 return -ENODEV;
2072 }
2073 1231
2074 x86_pmu = p6_pmu; 1232 event->state = PERF_EVENT_STATE_ACTIVE;
1233 event->oncpu = smp_processor_id();
1234 event->tstamp_running += event->ctx->time - event->tstamp_stopped;
2075 1235
2076 return 0; 1236 if (!is_x86_event(event))
1237 ret = event->pmu->enable(event);
1238
1239 if (!ret && !is_software_event(event))
1240 cpuctx->active_oncpu++;
1241
1242 if (!ret && event->attr.exclusive)
1243 cpuctx->exclusive = 1;
1244
1245 return ret;
2077} 1246}
2078 1247
2079static __init int intel_pmu_init(void) 1248static void x86_event_sched_out(struct perf_event *event,
1249 struct perf_cpu_context *cpuctx)
2080{ 1250{
2081 union cpuid10_edx edx; 1251 event->state = PERF_EVENT_STATE_INACTIVE;
2082 union cpuid10_eax eax; 1252 event->oncpu = -1;
2083 unsigned int unused;
2084 unsigned int ebx;
2085 int version;
2086
2087 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
2088 /* check for P6 processor family */
2089 if (boot_cpu_data.x86 == 6) {
2090 return p6_pmu_init();
2091 } else {
2092 return -ENODEV;
2093 }
2094 }
2095 1253
2096 /* 1254 if (!is_x86_event(event))
2097 * Check whether the Architectural PerfMon supports 1255 event->pmu->disable(event);
2098 * Branch Misses Retired hw_event or not.
2099 */
2100 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
2101 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
2102 return -ENODEV;
2103 1256
2104 version = eax.split.version_id; 1257 event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
2105 if (version < 2)
2106 return -ENODEV;
2107 1258
2108 x86_pmu = intel_pmu; 1259 if (!is_software_event(event))
2109 x86_pmu.version = version; 1260 cpuctx->active_oncpu--;
2110 x86_pmu.num_events = eax.split.num_events;
2111 x86_pmu.event_bits = eax.split.bit_width;
2112 x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1;
2113 1261
2114 /* 1262 if (event->attr.exclusive || !cpuctx->active_oncpu)
2115 * Quirk: v2 perfmon does not report fixed-purpose events, so 1263 cpuctx->exclusive = 0;
2116 * assume at least 3 events: 1264}
2117 */
2118 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
2119 1265
1266/*
1267 * Called to enable a whole group of events.
1268 * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
1269 * Assumes the caller has disabled interrupts and has
1270 * frozen the PMU with hw_perf_save_disable.
1271 *
1272 * called with PMU disabled. If successful and return value 1,
1273 * then guaranteed to call perf_enable() and hw_perf_enable()
1274 */
1275int hw_perf_group_sched_in(struct perf_event *leader,
1276 struct perf_cpu_context *cpuctx,
1277 struct perf_event_context *ctx)
1278{
1279 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1280 struct perf_event *sub;
1281 int assign[X86_PMC_IDX_MAX];
1282 int n0, n1, ret;
1283
1284 /* n0 = total number of events */
1285 n0 = collect_events(cpuc, leader, true);
1286 if (n0 < 0)
1287 return n0;
1288
1289 ret = x86_schedule_events(cpuc, n0, assign);
1290 if (ret)
1291 return ret;
1292
1293 ret = x86_event_sched_in(leader, cpuctx);
1294 if (ret)
1295 return ret;
1296
1297 n1 = 1;
1298 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1299 if (sub->state > PERF_EVENT_STATE_OFF) {
1300 ret = x86_event_sched_in(sub, cpuctx);
1301 if (ret)
1302 goto undo;
1303 ++n1;
1304 }
1305 }
2120 /* 1306 /*
2121 * Install the hw-cache-events table: 1307 * copy new assignment, now we know it is possible
1308 * will be used by hw_perf_enable()
2122 */ 1309 */
2123 switch (boot_cpu_data.x86_model) { 1310 memcpy(cpuc->assign, assign, n0*sizeof(int));
2124 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
2125 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
2126 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
2127 case 29: /* six-core 45 nm xeon "Dunnington" */
2128 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
2129 sizeof(hw_cache_event_ids));
2130
2131 pr_cont("Core2 events, ");
2132 event_constraints = intel_core_event_constraints;
2133 break;
2134 default:
2135 case 26:
2136 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
2137 sizeof(hw_cache_event_ids));
2138 1311
2139 event_constraints = intel_nehalem_event_constraints; 1312 cpuc->n_events = n0;
2140 pr_cont("Nehalem/Corei7 events, "); 1313 cpuc->n_added = n1;
2141 break; 1314 ctx->nr_active += n1;
2142 case 28:
2143 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2144 sizeof(hw_cache_event_ids));
2145 1315
2146 pr_cont("Atom events, "); 1316 /*
2147 break; 1317 * 1 means successful and events are active
1318 * This is not quite true because we defer
1319 * actual activation until hw_perf_enable() but
1320 * this way we* ensure caller won't try to enable
1321 * individual events
1322 */
1323 return 1;
1324undo:
1325 x86_event_sched_out(leader, cpuctx);
1326 n0 = 1;
1327 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1328 if (sub->state == PERF_EVENT_STATE_ACTIVE) {
1329 x86_event_sched_out(sub, cpuctx);
1330 if (++n0 == n1)
1331 break;
1332 }
2148 } 1333 }
2149 return 0; 1334 return ret;
2150} 1335}
2151 1336
2152static __init int amd_pmu_init(void) 1337#include "perf_event_amd.c"
2153{ 1338#include "perf_event_p6.c"
2154 /* Performance-monitoring supported from K7 and later: */ 1339#include "perf_event_intel.c"
2155 if (boot_cpu_data.x86 < 6)
2156 return -ENODEV;
2157
2158 x86_pmu = amd_pmu;
2159
2160 /* Events are common for all AMDs */
2161 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
2162 sizeof(hw_cache_event_ids));
2163
2164 return 0;
2165}
2166 1340
2167static void __init pmu_check_apic(void) 1341static void __init pmu_check_apic(void)
2168{ 1342{
@@ -2176,6 +1350,7 @@ static void __init pmu_check_apic(void)
2176 1350
2177void __init init_hw_perf_events(void) 1351void __init init_hw_perf_events(void)
2178{ 1352{
1353 struct event_constraint *c;
2179 int err; 1354 int err;
2180 1355
2181 pr_info("Performance Events: "); 1356 pr_info("Performance Events: ");
@@ -2220,6 +1395,20 @@ void __init init_hw_perf_events(void)
2220 perf_events_lapic_init(); 1395 perf_events_lapic_init();
2221 register_die_notifier(&perf_event_nmi_notifier); 1396 register_die_notifier(&perf_event_nmi_notifier);
2222 1397
1398 unconstrained = (struct event_constraint)
1399 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
1400 0, x86_pmu.num_events);
1401
1402 if (x86_pmu.event_constraints) {
1403 for_each_event_constraint(c, x86_pmu.event_constraints) {
1404 if (c->cmask != INTEL_ARCH_FIXED_MASK)
1405 continue;
1406
1407 c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1;
1408 c->weight += x86_pmu.num_events;
1409 }
1410 }
1411
2223 pr_info("... version: %d\n", x86_pmu.version); 1412 pr_info("... version: %d\n", x86_pmu.version);
2224 pr_info("... bit width: %d\n", x86_pmu.event_bits); 1413 pr_info("... bit width: %d\n", x86_pmu.event_bits);
2225 pr_info("... generic registers: %d\n", x86_pmu.num_events); 1414 pr_info("... generic registers: %d\n", x86_pmu.num_events);
@@ -2237,50 +1426,79 @@ static inline void x86_pmu_read(struct perf_event *event)
2237static const struct pmu pmu = { 1426static const struct pmu pmu = {
2238 .enable = x86_pmu_enable, 1427 .enable = x86_pmu_enable,
2239 .disable = x86_pmu_disable, 1428 .disable = x86_pmu_disable,
1429 .start = x86_pmu_start,
1430 .stop = x86_pmu_stop,
2240 .read = x86_pmu_read, 1431 .read = x86_pmu_read,
2241 .unthrottle = x86_pmu_unthrottle, 1432 .unthrottle = x86_pmu_unthrottle,
2242}; 1433};
2243 1434
2244static int 1435/*
2245validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) 1436 * validate a single event group
2246{ 1437 *
2247 struct hw_perf_event fake_event = event->hw; 1438 * validation include:
2248 1439 * - check events are compatible which each other
2249 if (event->pmu && event->pmu != &pmu) 1440 * - events do not compete for the same counter
2250 return 0; 1441 * - number of events <= number of counters
2251 1442 *
2252 return x86_schedule_event(cpuc, &fake_event) >= 0; 1443 * validation ensures the group can be loaded onto the
2253} 1444 * PMU if it was the only group available.
2254 1445 */
2255static int validate_group(struct perf_event *event) 1446static int validate_group(struct perf_event *event)
2256{ 1447{
2257 struct perf_event *sibling, *leader = event->group_leader; 1448 struct perf_event *leader = event->group_leader;
2258 struct cpu_hw_events fake_pmu; 1449 struct cpu_hw_events *fake_cpuc;
1450 int ret, n;
2259 1451
2260 memset(&fake_pmu, 0, sizeof(fake_pmu)); 1452 ret = -ENOMEM;
1453 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1454 if (!fake_cpuc)
1455 goto out;
1456
1457 /*
1458 * the event is not yet connected with its
1459 * siblings therefore we must first collect
1460 * existing siblings, then add the new event
1461 * before we can simulate the scheduling
1462 */
1463 ret = -ENOSPC;
1464 n = collect_events(fake_cpuc, leader, true);
1465 if (n < 0)
1466 goto out_free;
2261 1467
2262 if (!validate_event(&fake_pmu, leader)) 1468 fake_cpuc->n_events = n;
2263 return -ENOSPC; 1469 n = collect_events(fake_cpuc, event, false);
1470 if (n < 0)
1471 goto out_free;
2264 1472
2265 list_for_each_entry(sibling, &leader->sibling_list, group_entry) { 1473 fake_cpuc->n_events = n;
2266 if (!validate_event(&fake_pmu, sibling))
2267 return -ENOSPC;
2268 }
2269 1474
2270 if (!validate_event(&fake_pmu, event)) 1475 ret = x86_schedule_events(fake_cpuc, n, NULL);
2271 return -ENOSPC;
2272 1476
2273 return 0; 1477out_free:
1478 kfree(fake_cpuc);
1479out:
1480 return ret;
2274} 1481}
2275 1482
2276const struct pmu *hw_perf_event_init(struct perf_event *event) 1483const struct pmu *hw_perf_event_init(struct perf_event *event)
2277{ 1484{
1485 const struct pmu *tmp;
2278 int err; 1486 int err;
2279 1487
2280 err = __hw_perf_event_init(event); 1488 err = __hw_perf_event_init(event);
2281 if (!err) { 1489 if (!err) {
1490 /*
1491 * we temporarily connect event to its pmu
1492 * such that validate_group() can classify
1493 * it as an x86 event using is_x86_event()
1494 */
1495 tmp = event->pmu;
1496 event->pmu = &pmu;
1497
2282 if (event->group_leader != event) 1498 if (event->group_leader != event)
2283 err = validate_group(event); 1499 err = validate_group(event);
1500
1501 event->pmu = tmp;
2284 } 1502 }
2285 if (err) { 1503 if (err) {
2286 if (event->destroy) 1504 if (event->destroy)
@@ -2304,7 +1522,6 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
2304 1522
2305static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); 1523static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
2306static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); 1524static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
2307static DEFINE_PER_CPU(int, in_ignored_frame);
2308 1525
2309 1526
2310static void 1527static void
@@ -2320,10 +1537,6 @@ static void backtrace_warning(void *data, char *msg)
2320 1537
2321static int backtrace_stack(void *data, char *name) 1538static int backtrace_stack(void *data, char *name)
2322{ 1539{
2323 per_cpu(in_ignored_frame, smp_processor_id()) =
2324 x86_is_stack_id(NMI_STACK, name) ||
2325 x86_is_stack_id(DEBUG_STACK, name);
2326
2327 return 0; 1540 return 0;
2328} 1541}
2329 1542
@@ -2331,9 +1544,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
2331{ 1544{
2332 struct perf_callchain_entry *entry = data; 1545 struct perf_callchain_entry *entry = data;
2333 1546
2334 if (per_cpu(in_ignored_frame, smp_processor_id()))
2335 return;
2336
2337 if (reliable) 1547 if (reliable)
2338 callchain_store(entry, addr); 1548 callchain_store(entry, addr);
2339} 1549}
@@ -2440,9 +1650,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
2440 1650
2441 is_user = user_mode(regs); 1651 is_user = user_mode(regs);
2442 1652
2443 if (!current || current->pid == 0)
2444 return;
2445
2446 if (is_user && current->state != TASK_RUNNING) 1653 if (is_user && current->state != TASK_RUNNING)
2447 return; 1654 return;
2448 1655
@@ -2472,4 +1679,25 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2472void hw_perf_event_setup_online(int cpu) 1679void hw_perf_event_setup_online(int cpu)
2473{ 1680{
2474 init_debug_store_on_cpu(cpu); 1681 init_debug_store_on_cpu(cpu);
1682
1683 switch (boot_cpu_data.x86_vendor) {
1684 case X86_VENDOR_AMD:
1685 amd_pmu_cpu_online(cpu);
1686 break;
1687 default:
1688 return;
1689 }
1690}
1691
1692void hw_perf_event_setup_offline(int cpu)
1693{
1694 init_debug_store_on_cpu(cpu);
1695
1696 switch (boot_cpu_data.x86_vendor) {
1697 case X86_VENDOR_AMD:
1698 amd_pmu_cpu_offline(cpu);
1699 break;
1700 default:
1701 return;
1702 }
2475} 1703}
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
new file mode 100644
index 000000000000..8f3dbfda3c4f
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -0,0 +1,416 @@
1#ifdef CONFIG_CPU_SUP_AMD
2
3static DEFINE_RAW_SPINLOCK(amd_nb_lock);
4
5static __initconst u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX]
7 [PERF_COUNT_HW_CACHE_OP_MAX]
8 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
9{
10 [ C(L1D) ] = {
11 [ C(OP_READ) ] = {
12 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
13 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
14 },
15 [ C(OP_WRITE) ] = {
16 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
17 [ C(RESULT_MISS) ] = 0,
18 },
19 [ C(OP_PREFETCH) ] = {
20 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
21 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
22 },
23 },
24 [ C(L1I ) ] = {
25 [ C(OP_READ) ] = {
26 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
27 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
28 },
29 [ C(OP_WRITE) ] = {
30 [ C(RESULT_ACCESS) ] = -1,
31 [ C(RESULT_MISS) ] = -1,
32 },
33 [ C(OP_PREFETCH) ] = {
34 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
35 [ C(RESULT_MISS) ] = 0,
36 },
37 },
38 [ C(LL ) ] = {
39 [ C(OP_READ) ] = {
40 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
41 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
42 },
43 [ C(OP_WRITE) ] = {
44 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
45 [ C(RESULT_MISS) ] = 0,
46 },
47 [ C(OP_PREFETCH) ] = {
48 [ C(RESULT_ACCESS) ] = 0,
49 [ C(RESULT_MISS) ] = 0,
50 },
51 },
52 [ C(DTLB) ] = {
53 [ C(OP_READ) ] = {
54 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
55 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
56 },
57 [ C(OP_WRITE) ] = {
58 [ C(RESULT_ACCESS) ] = 0,
59 [ C(RESULT_MISS) ] = 0,
60 },
61 [ C(OP_PREFETCH) ] = {
62 [ C(RESULT_ACCESS) ] = 0,
63 [ C(RESULT_MISS) ] = 0,
64 },
65 },
66 [ C(ITLB) ] = {
67 [ C(OP_READ) ] = {
68 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
69 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
70 },
71 [ C(OP_WRITE) ] = {
72 [ C(RESULT_ACCESS) ] = -1,
73 [ C(RESULT_MISS) ] = -1,
74 },
75 [ C(OP_PREFETCH) ] = {
76 [ C(RESULT_ACCESS) ] = -1,
77 [ C(RESULT_MISS) ] = -1,
78 },
79 },
80 [ C(BPU ) ] = {
81 [ C(OP_READ) ] = {
82 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
83 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
84 },
85 [ C(OP_WRITE) ] = {
86 [ C(RESULT_ACCESS) ] = -1,
87 [ C(RESULT_MISS) ] = -1,
88 },
89 [ C(OP_PREFETCH) ] = {
90 [ C(RESULT_ACCESS) ] = -1,
91 [ C(RESULT_MISS) ] = -1,
92 },
93 },
94};
95
96/*
97 * AMD Performance Monitor K7 and later.
98 */
99static const u64 amd_perfmon_event_map[] =
100{
101 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
102 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
103 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
104 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
105 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
106 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
107};
108
109static u64 amd_pmu_event_map(int hw_event)
110{
111 return amd_perfmon_event_map[hw_event];
112}
113
114static u64 amd_pmu_raw_event(u64 hw_event)
115{
116#define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL
117#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
118#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
119#define K7_EVNTSEL_INV_MASK 0x000800000ULL
120#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
121
122#define K7_EVNTSEL_MASK \
123 (K7_EVNTSEL_EVENT_MASK | \
124 K7_EVNTSEL_UNIT_MASK | \
125 K7_EVNTSEL_EDGE_MASK | \
126 K7_EVNTSEL_INV_MASK | \
127 K7_EVNTSEL_REG_MASK)
128
129 return hw_event & K7_EVNTSEL_MASK;
130}
131
132/*
133 * AMD64 events are detected based on their event codes.
134 */
135static inline int amd_is_nb_event(struct hw_perf_event *hwc)
136{
137 return (hwc->config & 0xe0) == 0xe0;
138}
139
140static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
141 struct perf_event *event)
142{
143 struct hw_perf_event *hwc = &event->hw;
144 struct amd_nb *nb = cpuc->amd_nb;
145 int i;
146
147 /*
148 * only care about NB events
149 */
150 if (!(nb && amd_is_nb_event(hwc)))
151 return;
152
153 /*
154 * need to scan whole list because event may not have
155 * been assigned during scheduling
156 *
157 * no race condition possible because event can only
158 * be removed on one CPU at a time AND PMU is disabled
159 * when we come here
160 */
161 for (i = 0; i < x86_pmu.num_events; i++) {
162 if (nb->owners[i] == event) {
163 cmpxchg(nb->owners+i, event, NULL);
164 break;
165 }
166 }
167}
168
169 /*
170 * AMD64 NorthBridge events need special treatment because
171 * counter access needs to be synchronized across all cores
172 * of a package. Refer to BKDG section 3.12
173 *
174 * NB events are events measuring L3 cache, Hypertransport
175 * traffic. They are identified by an event code >= 0xe00.
176 * They measure events on the NorthBride which is shared
177 * by all cores on a package. NB events are counted on a
178 * shared set of counters. When a NB event is programmed
179 * in a counter, the data actually comes from a shared
180 * counter. Thus, access to those counters needs to be
181 * synchronized.
182 *
183 * We implement the synchronization such that no two cores
184 * can be measuring NB events using the same counters. Thus,
185 * we maintain a per-NB allocation table. The available slot
186 * is propagated using the event_constraint structure.
187 *
188 * We provide only one choice for each NB event based on
189 * the fact that only NB events have restrictions. Consequently,
190 * if a counter is available, there is a guarantee the NB event
191 * will be assigned to it. If no slot is available, an empty
192 * constraint is returned and scheduling will eventually fail
193 * for this event.
194 *
195 * Note that all cores attached the same NB compete for the same
196 * counters to host NB events, this is why we use atomic ops. Some
197 * multi-chip CPUs may have more than one NB.
198 *
199 * Given that resources are allocated (cmpxchg), they must be
200 * eventually freed for others to use. This is accomplished by
201 * calling amd_put_event_constraints().
202 *
203 * Non NB events are not impacted by this restriction.
204 */
205static struct event_constraint *
206amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
207{
208 struct hw_perf_event *hwc = &event->hw;
209 struct amd_nb *nb = cpuc->amd_nb;
210 struct perf_event *old = NULL;
211 int max = x86_pmu.num_events;
212 int i, j, k = -1;
213
214 /*
215 * if not NB event or no NB, then no constraints
216 */
217 if (!(nb && amd_is_nb_event(hwc)))
218 return &unconstrained;
219
220 /*
221 * detect if already present, if so reuse
222 *
223 * cannot merge with actual allocation
224 * because of possible holes
225 *
226 * event can already be present yet not assigned (in hwc->idx)
227 * because of successive calls to x86_schedule_events() from
228 * hw_perf_group_sched_in() without hw_perf_enable()
229 */
230 for (i = 0; i < max; i++) {
231 /*
232 * keep track of first free slot
233 */
234 if (k == -1 && !nb->owners[i])
235 k = i;
236
237 /* already present, reuse */
238 if (nb->owners[i] == event)
239 goto done;
240 }
241 /*
242 * not present, so grab a new slot
243 * starting either at:
244 */
245 if (hwc->idx != -1) {
246 /* previous assignment */
247 i = hwc->idx;
248 } else if (k != -1) {
249 /* start from free slot found */
250 i = k;
251 } else {
252 /*
253 * event not found, no slot found in
254 * first pass, try again from the
255 * beginning
256 */
257 i = 0;
258 }
259 j = i;
260 do {
261 old = cmpxchg(nb->owners+i, NULL, event);
262 if (!old)
263 break;
264 if (++i == max)
265 i = 0;
266 } while (i != j);
267done:
268 if (!old)
269 return &nb->event_constraints[i];
270
271 return &emptyconstraint;
272}
273
274static __initconst struct x86_pmu amd_pmu = {
275 .name = "AMD",
276 .handle_irq = x86_pmu_handle_irq,
277 .disable_all = x86_pmu_disable_all,
278 .enable_all = x86_pmu_enable_all,
279 .enable = x86_pmu_enable_event,
280 .disable = x86_pmu_disable_event,
281 .eventsel = MSR_K7_EVNTSEL0,
282 .perfctr = MSR_K7_PERFCTR0,
283 .event_map = amd_pmu_event_map,
284 .raw_event = amd_pmu_raw_event,
285 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
286 .num_events = 4,
287 .event_bits = 48,
288 .event_mask = (1ULL << 48) - 1,
289 .apic = 1,
290 /* use highest bit to detect overflow */
291 .max_period = (1ULL << 47) - 1,
292 .get_event_constraints = amd_get_event_constraints,
293 .put_event_constraints = amd_put_event_constraints
294};
295
296static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
297{
298 struct amd_nb *nb;
299 int i;
300
301 nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
302 if (!nb)
303 return NULL;
304
305 memset(nb, 0, sizeof(*nb));
306 nb->nb_id = nb_id;
307
308 /*
309 * initialize all possible NB constraints
310 */
311 for (i = 0; i < x86_pmu.num_events; i++) {
312 set_bit(i, nb->event_constraints[i].idxmsk);
313 nb->event_constraints[i].weight = 1;
314 }
315 return nb;
316}
317
318static void amd_pmu_cpu_online(int cpu)
319{
320 struct cpu_hw_events *cpu1, *cpu2;
321 struct amd_nb *nb = NULL;
322 int i, nb_id;
323
324 if (boot_cpu_data.x86_max_cores < 2)
325 return;
326
327 /*
328 * function may be called too early in the
329 * boot process, in which case nb_id is bogus
330 */
331 nb_id = amd_get_nb_id(cpu);
332 if (nb_id == BAD_APICID)
333 return;
334
335 cpu1 = &per_cpu(cpu_hw_events, cpu);
336 cpu1->amd_nb = NULL;
337
338 raw_spin_lock(&amd_nb_lock);
339
340 for_each_online_cpu(i) {
341 cpu2 = &per_cpu(cpu_hw_events, i);
342 nb = cpu2->amd_nb;
343 if (!nb)
344 continue;
345 if (nb->nb_id == nb_id)
346 goto found;
347 }
348
349 nb = amd_alloc_nb(cpu, nb_id);
350 if (!nb) {
351 pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
352 raw_spin_unlock(&amd_nb_lock);
353 return;
354 }
355found:
356 nb->refcnt++;
357 cpu1->amd_nb = nb;
358
359 raw_spin_unlock(&amd_nb_lock);
360}
361
362static void amd_pmu_cpu_offline(int cpu)
363{
364 struct cpu_hw_events *cpuhw;
365
366 if (boot_cpu_data.x86_max_cores < 2)
367 return;
368
369 cpuhw = &per_cpu(cpu_hw_events, cpu);
370
371 raw_spin_lock(&amd_nb_lock);
372
373 if (--cpuhw->amd_nb->refcnt == 0)
374 kfree(cpuhw->amd_nb);
375
376 cpuhw->amd_nb = NULL;
377
378 raw_spin_unlock(&amd_nb_lock);
379}
380
381static __init int amd_pmu_init(void)
382{
383 /* Performance-monitoring supported from K7 and later: */
384 if (boot_cpu_data.x86 < 6)
385 return -ENODEV;
386
387 x86_pmu = amd_pmu;
388
389 /* Events are common for all AMDs */
390 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
391 sizeof(hw_cache_event_ids));
392
393 /*
394 * explicitly initialize the boot cpu, other cpus will get
395 * the cpu hotplug callbacks from smp_init()
396 */
397 amd_pmu_cpu_online(smp_processor_id());
398 return 0;
399}
400
401#else /* CONFIG_CPU_SUP_AMD */
402
403static int amd_pmu_init(void)
404{
405 return 0;
406}
407
408static void amd_pmu_cpu_online(int cpu)
409{
410}
411
412static void amd_pmu_cpu_offline(int cpu)
413{
414}
415
416#endif
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
new file mode 100644
index 000000000000..4fbdfe5708d9
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -0,0 +1,982 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3/*
4 * Intel PerfMon, used on Core and later.
5 */
6static const u64 intel_perfmon_event_map[] =
7{
8 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
9 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
10 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
11 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
12 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
13 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
14 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
15};
16
17static struct event_constraint intel_core_event_constraints[] =
18{
19 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
20 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
21 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
22 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
23 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
24 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
25 EVENT_CONSTRAINT_END
26};
27
28static struct event_constraint intel_core2_event_constraints[] =
29{
30 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
31 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
32 /*
33 * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
34 * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
35 * ratio between these counters.
36 */
37 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
38 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
39 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
40 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
41 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
42 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
43 INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
44 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
45 INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
46 INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
47 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
48 EVENT_CONSTRAINT_END
49};
50
51static struct event_constraint intel_nehalem_event_constraints[] =
52{
53 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
54 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
55 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
56 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
57 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
58 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
59 INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
60 INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
61 INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
62 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
63 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
64 EVENT_CONSTRAINT_END
65};
66
67static struct event_constraint intel_westmere_event_constraints[] =
68{
69 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
70 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
71 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
72 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
73 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
74 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
75 EVENT_CONSTRAINT_END
76};
77
78static struct event_constraint intel_gen_event_constraints[] =
79{
80 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
81 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
82 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
83 EVENT_CONSTRAINT_END
84};
85
86static u64 intel_pmu_event_map(int hw_event)
87{
88 return intel_perfmon_event_map[hw_event];
89}
90
91static __initconst u64 westmere_hw_cache_event_ids
92 [PERF_COUNT_HW_CACHE_MAX]
93 [PERF_COUNT_HW_CACHE_OP_MAX]
94 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
95{
96 [ C(L1D) ] = {
97 [ C(OP_READ) ] = {
98 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
99 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
100 },
101 [ C(OP_WRITE) ] = {
102 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
103 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
104 },
105 [ C(OP_PREFETCH) ] = {
106 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
107 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
108 },
109 },
110 [ C(L1I ) ] = {
111 [ C(OP_READ) ] = {
112 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
113 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
114 },
115 [ C(OP_WRITE) ] = {
116 [ C(RESULT_ACCESS) ] = -1,
117 [ C(RESULT_MISS) ] = -1,
118 },
119 [ C(OP_PREFETCH) ] = {
120 [ C(RESULT_ACCESS) ] = 0x0,
121 [ C(RESULT_MISS) ] = 0x0,
122 },
123 },
124 [ C(LL ) ] = {
125 [ C(OP_READ) ] = {
126 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
127 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
128 },
129 [ C(OP_WRITE) ] = {
130 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
131 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
132 },
133 [ C(OP_PREFETCH) ] = {
134 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
135 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
136 },
137 },
138 [ C(DTLB) ] = {
139 [ C(OP_READ) ] = {
140 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
141 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
142 },
143 [ C(OP_WRITE) ] = {
144 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
145 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
146 },
147 [ C(OP_PREFETCH) ] = {
148 [ C(RESULT_ACCESS) ] = 0x0,
149 [ C(RESULT_MISS) ] = 0x0,
150 },
151 },
152 [ C(ITLB) ] = {
153 [ C(OP_READ) ] = {
154 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
155 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */
156 },
157 [ C(OP_WRITE) ] = {
158 [ C(RESULT_ACCESS) ] = -1,
159 [ C(RESULT_MISS) ] = -1,
160 },
161 [ C(OP_PREFETCH) ] = {
162 [ C(RESULT_ACCESS) ] = -1,
163 [ C(RESULT_MISS) ] = -1,
164 },
165 },
166 [ C(BPU ) ] = {
167 [ C(OP_READ) ] = {
168 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
169 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
170 },
171 [ C(OP_WRITE) ] = {
172 [ C(RESULT_ACCESS) ] = -1,
173 [ C(RESULT_MISS) ] = -1,
174 },
175 [ C(OP_PREFETCH) ] = {
176 [ C(RESULT_ACCESS) ] = -1,
177 [ C(RESULT_MISS) ] = -1,
178 },
179 },
180};
181
182static __initconst u64 nehalem_hw_cache_event_ids
183 [PERF_COUNT_HW_CACHE_MAX]
184 [PERF_COUNT_HW_CACHE_OP_MAX]
185 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
186{
187 [ C(L1D) ] = {
188 [ C(OP_READ) ] = {
189 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
190 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
191 },
192 [ C(OP_WRITE) ] = {
193 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
194 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
195 },
196 [ C(OP_PREFETCH) ] = {
197 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
198 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
199 },
200 },
201 [ C(L1I ) ] = {
202 [ C(OP_READ) ] = {
203 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
204 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
205 },
206 [ C(OP_WRITE) ] = {
207 [ C(RESULT_ACCESS) ] = -1,
208 [ C(RESULT_MISS) ] = -1,
209 },
210 [ C(OP_PREFETCH) ] = {
211 [ C(RESULT_ACCESS) ] = 0x0,
212 [ C(RESULT_MISS) ] = 0x0,
213 },
214 },
215 [ C(LL ) ] = {
216 [ C(OP_READ) ] = {
217 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
218 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
219 },
220 [ C(OP_WRITE) ] = {
221 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
222 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
223 },
224 [ C(OP_PREFETCH) ] = {
225 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
226 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
227 },
228 },
229 [ C(DTLB) ] = {
230 [ C(OP_READ) ] = {
231 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
232 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
233 },
234 [ C(OP_WRITE) ] = {
235 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
236 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
237 },
238 [ C(OP_PREFETCH) ] = {
239 [ C(RESULT_ACCESS) ] = 0x0,
240 [ C(RESULT_MISS) ] = 0x0,
241 },
242 },
243 [ C(ITLB) ] = {
244 [ C(OP_READ) ] = {
245 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
246 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
247 },
248 [ C(OP_WRITE) ] = {
249 [ C(RESULT_ACCESS) ] = -1,
250 [ C(RESULT_MISS) ] = -1,
251 },
252 [ C(OP_PREFETCH) ] = {
253 [ C(RESULT_ACCESS) ] = -1,
254 [ C(RESULT_MISS) ] = -1,
255 },
256 },
257 [ C(BPU ) ] = {
258 [ C(OP_READ) ] = {
259 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
260 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
261 },
262 [ C(OP_WRITE) ] = {
263 [ C(RESULT_ACCESS) ] = -1,
264 [ C(RESULT_MISS) ] = -1,
265 },
266 [ C(OP_PREFETCH) ] = {
267 [ C(RESULT_ACCESS) ] = -1,
268 [ C(RESULT_MISS) ] = -1,
269 },
270 },
271};
272
273static __initconst u64 core2_hw_cache_event_ids
274 [PERF_COUNT_HW_CACHE_MAX]
275 [PERF_COUNT_HW_CACHE_OP_MAX]
276 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
277{
278 [ C(L1D) ] = {
279 [ C(OP_READ) ] = {
280 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
281 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
282 },
283 [ C(OP_WRITE) ] = {
284 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
285 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
286 },
287 [ C(OP_PREFETCH) ] = {
288 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
289 [ C(RESULT_MISS) ] = 0,
290 },
291 },
292 [ C(L1I ) ] = {
293 [ C(OP_READ) ] = {
294 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
295 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
296 },
297 [ C(OP_WRITE) ] = {
298 [ C(RESULT_ACCESS) ] = -1,
299 [ C(RESULT_MISS) ] = -1,
300 },
301 [ C(OP_PREFETCH) ] = {
302 [ C(RESULT_ACCESS) ] = 0,
303 [ C(RESULT_MISS) ] = 0,
304 },
305 },
306 [ C(LL ) ] = {
307 [ C(OP_READ) ] = {
308 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
309 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
310 },
311 [ C(OP_WRITE) ] = {
312 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
313 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
314 },
315 [ C(OP_PREFETCH) ] = {
316 [ C(RESULT_ACCESS) ] = 0,
317 [ C(RESULT_MISS) ] = 0,
318 },
319 },
320 [ C(DTLB) ] = {
321 [ C(OP_READ) ] = {
322 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
323 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
324 },
325 [ C(OP_WRITE) ] = {
326 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
327 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
328 },
329 [ C(OP_PREFETCH) ] = {
330 [ C(RESULT_ACCESS) ] = 0,
331 [ C(RESULT_MISS) ] = 0,
332 },
333 },
334 [ C(ITLB) ] = {
335 [ C(OP_READ) ] = {
336 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
337 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
338 },
339 [ C(OP_WRITE) ] = {
340 [ C(RESULT_ACCESS) ] = -1,
341 [ C(RESULT_MISS) ] = -1,
342 },
343 [ C(OP_PREFETCH) ] = {
344 [ C(RESULT_ACCESS) ] = -1,
345 [ C(RESULT_MISS) ] = -1,
346 },
347 },
348 [ C(BPU ) ] = {
349 [ C(OP_READ) ] = {
350 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
351 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
352 },
353 [ C(OP_WRITE) ] = {
354 [ C(RESULT_ACCESS) ] = -1,
355 [ C(RESULT_MISS) ] = -1,
356 },
357 [ C(OP_PREFETCH) ] = {
358 [ C(RESULT_ACCESS) ] = -1,
359 [ C(RESULT_MISS) ] = -1,
360 },
361 },
362};
363
364static __initconst u64 atom_hw_cache_event_ids
365 [PERF_COUNT_HW_CACHE_MAX]
366 [PERF_COUNT_HW_CACHE_OP_MAX]
367 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
368{
369 [ C(L1D) ] = {
370 [ C(OP_READ) ] = {
371 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
372 [ C(RESULT_MISS) ] = 0,
373 },
374 [ C(OP_WRITE) ] = {
375 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
376 [ C(RESULT_MISS) ] = 0,
377 },
378 [ C(OP_PREFETCH) ] = {
379 [ C(RESULT_ACCESS) ] = 0x0,
380 [ C(RESULT_MISS) ] = 0,
381 },
382 },
383 [ C(L1I ) ] = {
384 [ C(OP_READ) ] = {
385 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
386 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
387 },
388 [ C(OP_WRITE) ] = {
389 [ C(RESULT_ACCESS) ] = -1,
390 [ C(RESULT_MISS) ] = -1,
391 },
392 [ C(OP_PREFETCH) ] = {
393 [ C(RESULT_ACCESS) ] = 0,
394 [ C(RESULT_MISS) ] = 0,
395 },
396 },
397 [ C(LL ) ] = {
398 [ C(OP_READ) ] = {
399 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
400 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
401 },
402 [ C(OP_WRITE) ] = {
403 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
404 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
405 },
406 [ C(OP_PREFETCH) ] = {
407 [ C(RESULT_ACCESS) ] = 0,
408 [ C(RESULT_MISS) ] = 0,
409 },
410 },
411 [ C(DTLB) ] = {
412 [ C(OP_READ) ] = {
413 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
414 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
415 },
416 [ C(OP_WRITE) ] = {
417 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
418 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
419 },
420 [ C(OP_PREFETCH) ] = {
421 [ C(RESULT_ACCESS) ] = 0,
422 [ C(RESULT_MISS) ] = 0,
423 },
424 },
425 [ C(ITLB) ] = {
426 [ C(OP_READ) ] = {
427 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
428 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
429 },
430 [ C(OP_WRITE) ] = {
431 [ C(RESULT_ACCESS) ] = -1,
432 [ C(RESULT_MISS) ] = -1,
433 },
434 [ C(OP_PREFETCH) ] = {
435 [ C(RESULT_ACCESS) ] = -1,
436 [ C(RESULT_MISS) ] = -1,
437 },
438 },
439 [ C(BPU ) ] = {
440 [ C(OP_READ) ] = {
441 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
442 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
443 },
444 [ C(OP_WRITE) ] = {
445 [ C(RESULT_ACCESS) ] = -1,
446 [ C(RESULT_MISS) ] = -1,
447 },
448 [ C(OP_PREFETCH) ] = {
449 [ C(RESULT_ACCESS) ] = -1,
450 [ C(RESULT_MISS) ] = -1,
451 },
452 },
453};
454
455static u64 intel_pmu_raw_event(u64 hw_event)
456{
457#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
458#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
459#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
460#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
461#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
462
463#define CORE_EVNTSEL_MASK \
464 (INTEL_ARCH_EVTSEL_MASK | \
465 INTEL_ARCH_UNIT_MASK | \
466 INTEL_ARCH_EDGE_MASK | \
467 INTEL_ARCH_INV_MASK | \
468 INTEL_ARCH_CNT_MASK)
469
470 return hw_event & CORE_EVNTSEL_MASK;
471}
472
473static void intel_pmu_enable_bts(u64 config)
474{
475 unsigned long debugctlmsr;
476
477 debugctlmsr = get_debugctlmsr();
478
479 debugctlmsr |= X86_DEBUGCTL_TR;
480 debugctlmsr |= X86_DEBUGCTL_BTS;
481 debugctlmsr |= X86_DEBUGCTL_BTINT;
482
483 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
484 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
485
486 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
487 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
488
489 update_debugctlmsr(debugctlmsr);
490}
491
492static void intel_pmu_disable_bts(void)
493{
494 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
495 unsigned long debugctlmsr;
496
497 if (!cpuc->ds)
498 return;
499
500 debugctlmsr = get_debugctlmsr();
501
502 debugctlmsr &=
503 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
504 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
505
506 update_debugctlmsr(debugctlmsr);
507}
508
509static void intel_pmu_disable_all(void)
510{
511 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
512
513 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
514
515 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
516 intel_pmu_disable_bts();
517}
518
519static void intel_pmu_enable_all(void)
520{
521 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
522
523 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
524
525 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
526 struct perf_event *event =
527 cpuc->events[X86_PMC_IDX_FIXED_BTS];
528
529 if (WARN_ON_ONCE(!event))
530 return;
531
532 intel_pmu_enable_bts(event->hw.config);
533 }
534}
535
536static inline u64 intel_pmu_get_status(void)
537{
538 u64 status;
539
540 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
541
542 return status;
543}
544
545static inline void intel_pmu_ack_status(u64 ack)
546{
547 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
548}
549
550static inline void
551intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
552{
553 int idx = __idx - X86_PMC_IDX_FIXED;
554 u64 ctrl_val, mask;
555
556 mask = 0xfULL << (idx * 4);
557
558 rdmsrl(hwc->config_base, ctrl_val);
559 ctrl_val &= ~mask;
560 (void)checking_wrmsrl(hwc->config_base, ctrl_val);
561}
562
563static void intel_pmu_drain_bts_buffer(void)
564{
565 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
566 struct debug_store *ds = cpuc->ds;
567 struct bts_record {
568 u64 from;
569 u64 to;
570 u64 flags;
571 };
572 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
573 struct bts_record *at, *top;
574 struct perf_output_handle handle;
575 struct perf_event_header header;
576 struct perf_sample_data data;
577 struct pt_regs regs;
578
579 if (!event)
580 return;
581
582 if (!ds)
583 return;
584
585 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
586 top = (struct bts_record *)(unsigned long)ds->bts_index;
587
588 if (top <= at)
589 return;
590
591 ds->bts_index = ds->bts_buffer_base;
592
593
594 data.period = event->hw.last_period;
595 data.addr = 0;
596 data.raw = NULL;
597 regs.ip = 0;
598
599 /*
600 * Prepare a generic sample, i.e. fill in the invariant fields.
601 * We will overwrite the from and to address before we output
602 * the sample.
603 */
604 perf_prepare_sample(&header, &data, event, &regs);
605
606 if (perf_output_begin(&handle, event,
607 header.size * (top - at), 1, 1))
608 return;
609
610 for (; at < top; at++) {
611 data.ip = at->from;
612 data.addr = at->to;
613
614 perf_output_sample(&handle, &header, &data, event);
615 }
616
617 perf_output_end(&handle);
618
619 /* There's new data available. */
620 event->hw.interrupts++;
621 event->pending_kill = POLL_IN;
622}
623
624static inline void
625intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
626{
627 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
628 intel_pmu_disable_bts();
629 intel_pmu_drain_bts_buffer();
630 return;
631 }
632
633 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
634 intel_pmu_disable_fixed(hwc, idx);
635 return;
636 }
637
638 x86_pmu_disable_event(hwc, idx);
639}
640
641static inline void
642intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
643{
644 int idx = __idx - X86_PMC_IDX_FIXED;
645 u64 ctrl_val, bits, mask;
646 int err;
647
648 /*
649 * Enable IRQ generation (0x8),
650 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
651 * if requested:
652 */
653 bits = 0x8ULL;
654 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
655 bits |= 0x2;
656 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
657 bits |= 0x1;
658
659 /*
660 * ANY bit is supported in v3 and up
661 */
662 if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
663 bits |= 0x4;
664
665 bits <<= (idx * 4);
666 mask = 0xfULL << (idx * 4);
667
668 rdmsrl(hwc->config_base, ctrl_val);
669 ctrl_val &= ~mask;
670 ctrl_val |= bits;
671 err = checking_wrmsrl(hwc->config_base, ctrl_val);
672}
673
674static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
675{
676 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
677 if (!__get_cpu_var(cpu_hw_events).enabled)
678 return;
679
680 intel_pmu_enable_bts(hwc->config);
681 return;
682 }
683
684 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
685 intel_pmu_enable_fixed(hwc, idx);
686 return;
687 }
688
689 __x86_pmu_enable_event(hwc, idx);
690}
691
692/*
693 * Save and restart an expired event. Called by NMI contexts,
694 * so it has to be careful about preempting normal event ops:
695 */
696static int intel_pmu_save_and_restart(struct perf_event *event)
697{
698 struct hw_perf_event *hwc = &event->hw;
699 int idx = hwc->idx;
700 int ret;
701
702 x86_perf_event_update(event, hwc, idx);
703 ret = x86_perf_event_set_period(event, hwc, idx);
704
705 return ret;
706}
707
708static void intel_pmu_reset(void)
709{
710 struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
711 unsigned long flags;
712 int idx;
713
714 if (!x86_pmu.num_events)
715 return;
716
717 local_irq_save(flags);
718
719 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
720
721 for (idx = 0; idx < x86_pmu.num_events; idx++) {
722 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
723 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
724 }
725 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
726 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
727 }
728 if (ds)
729 ds->bts_index = ds->bts_buffer_base;
730
731 local_irq_restore(flags);
732}
733
734/*
735 * This handler is triggered by the local APIC, so the APIC IRQ handling
736 * rules apply:
737 */
738static int intel_pmu_handle_irq(struct pt_regs *regs)
739{
740 struct perf_sample_data data;
741 struct cpu_hw_events *cpuc;
742 int bit, loops;
743 u64 ack, status;
744
745 data.addr = 0;
746 data.raw = NULL;
747
748 cpuc = &__get_cpu_var(cpu_hw_events);
749
750 perf_disable();
751 intel_pmu_drain_bts_buffer();
752 status = intel_pmu_get_status();
753 if (!status) {
754 perf_enable();
755 return 0;
756 }
757
758 loops = 0;
759again:
760 if (++loops > 100) {
761 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
762 perf_event_print_debug();
763 intel_pmu_reset();
764 perf_enable();
765 return 1;
766 }
767
768 inc_irq_stat(apic_perf_irqs);
769 ack = status;
770 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
771 struct perf_event *event = cpuc->events[bit];
772
773 clear_bit(bit, (unsigned long *) &status);
774 if (!test_bit(bit, cpuc->active_mask))
775 continue;
776
777 if (!intel_pmu_save_and_restart(event))
778 continue;
779
780 data.period = event->hw.last_period;
781
782 if (perf_event_overflow(event, 1, &data, regs))
783 intel_pmu_disable_event(&event->hw, bit);
784 }
785
786 intel_pmu_ack_status(ack);
787
788 /*
789 * Repeat if there is more work to be done:
790 */
791 status = intel_pmu_get_status();
792 if (status)
793 goto again;
794
795 perf_enable();
796
797 return 1;
798}
799
800static struct event_constraint bts_constraint =
801 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
802
803static struct event_constraint *
804intel_special_constraints(struct perf_event *event)
805{
806 unsigned int hw_event;
807
808 hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
809
810 if (unlikely((hw_event ==
811 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
812 (event->hw.sample_period == 1))) {
813
814 return &bts_constraint;
815 }
816 return NULL;
817}
818
819static struct event_constraint *
820intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
821{
822 struct event_constraint *c;
823
824 c = intel_special_constraints(event);
825 if (c)
826 return c;
827
828 return x86_get_event_constraints(cpuc, event);
829}
830
831static __initconst struct x86_pmu core_pmu = {
832 .name = "core",
833 .handle_irq = x86_pmu_handle_irq,
834 .disable_all = x86_pmu_disable_all,
835 .enable_all = x86_pmu_enable_all,
836 .enable = x86_pmu_enable_event,
837 .disable = x86_pmu_disable_event,
838 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
839 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
840 .event_map = intel_pmu_event_map,
841 .raw_event = intel_pmu_raw_event,
842 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
843 .apic = 1,
844 /*
845 * Intel PMCs cannot be accessed sanely above 32 bit width,
846 * so we install an artificial 1<<31 period regardless of
847 * the generic event period:
848 */
849 .max_period = (1ULL << 31) - 1,
850 .get_event_constraints = intel_get_event_constraints,
851 .event_constraints = intel_core_event_constraints,
852};
853
854static __initconst struct x86_pmu intel_pmu = {
855 .name = "Intel",
856 .handle_irq = intel_pmu_handle_irq,
857 .disable_all = intel_pmu_disable_all,
858 .enable_all = intel_pmu_enable_all,
859 .enable = intel_pmu_enable_event,
860 .disable = intel_pmu_disable_event,
861 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
862 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
863 .event_map = intel_pmu_event_map,
864 .raw_event = intel_pmu_raw_event,
865 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
866 .apic = 1,
867 /*
868 * Intel PMCs cannot be accessed sanely above 32 bit width,
869 * so we install an artificial 1<<31 period regardless of
870 * the generic event period:
871 */
872 .max_period = (1ULL << 31) - 1,
873 .enable_bts = intel_pmu_enable_bts,
874 .disable_bts = intel_pmu_disable_bts,
875 .get_event_constraints = intel_get_event_constraints
876};
877
878static __init int intel_pmu_init(void)
879{
880 union cpuid10_edx edx;
881 union cpuid10_eax eax;
882 unsigned int unused;
883 unsigned int ebx;
884 int version;
885
886 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
887 /* check for P6 processor family */
888 if (boot_cpu_data.x86 == 6) {
889 return p6_pmu_init();
890 } else {
891 return -ENODEV;
892 }
893 }
894
895 /*
896 * Check whether the Architectural PerfMon supports
897 * Branch Misses Retired hw_event or not.
898 */
899 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
900 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
901 return -ENODEV;
902
903 version = eax.split.version_id;
904 if (version < 2)
905 x86_pmu = core_pmu;
906 else
907 x86_pmu = intel_pmu;
908
909 x86_pmu.version = version;
910 x86_pmu.num_events = eax.split.num_events;
911 x86_pmu.event_bits = eax.split.bit_width;
912 x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1;
913
914 /*
915 * Quirk: v2 perfmon does not report fixed-purpose events, so
916 * assume at least 3 events:
917 */
918 if (version > 1)
919 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
920
921 /*
922 * Install the hw-cache-events table:
923 */
924 switch (boot_cpu_data.x86_model) {
925 case 14: /* 65 nm core solo/duo, "Yonah" */
926 pr_cont("Core events, ");
927 break;
928
929 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
930 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
931 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
932 case 29: /* six-core 45 nm xeon "Dunnington" */
933 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
934 sizeof(hw_cache_event_ids));
935
936 x86_pmu.event_constraints = intel_core2_event_constraints;
937 pr_cont("Core2 events, ");
938 break;
939
940 case 26: /* 45 nm nehalem, "Bloomfield" */
941 case 30: /* 45 nm nehalem, "Lynnfield" */
942 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
943 sizeof(hw_cache_event_ids));
944
945 x86_pmu.event_constraints = intel_nehalem_event_constraints;
946 pr_cont("Nehalem/Corei7 events, ");
947 break;
948 case 28: /* Atom */
949 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
950 sizeof(hw_cache_event_ids));
951
952 x86_pmu.event_constraints = intel_gen_event_constraints;
953 pr_cont("Atom events, ");
954 break;
955
956 case 37: /* 32 nm nehalem, "Clarkdale" */
957 case 44: /* 32 nm nehalem, "Gulftown" */
958 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
959 sizeof(hw_cache_event_ids));
960
961 x86_pmu.event_constraints = intel_westmere_event_constraints;
962 pr_cont("Westmere events, ");
963 break;
964
965 default:
966 /*
967 * default constraints for v2 and up
968 */
969 x86_pmu.event_constraints = intel_gen_event_constraints;
970 pr_cont("generic architected perfmon, ");
971 }
972 return 0;
973}
974
975#else /* CONFIG_CPU_SUP_INTEL */
976
977static int intel_pmu_init(void)
978{
979 return 0;
980}
981
982#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
new file mode 100644
index 000000000000..a4e67b99d91c
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -0,0 +1,157 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3/*
4 * Not sure about some of these
5 */
6static const u64 p6_perfmon_event_map[] =
7{
8 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
9 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
10 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
11 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
12 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
13 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
14 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
15};
16
17static u64 p6_pmu_event_map(int hw_event)
18{
19 return p6_perfmon_event_map[hw_event];
20}
21
22/*
23 * Event setting that is specified not to count anything.
24 * We use this to effectively disable a counter.
25 *
26 * L2_RQSTS with 0 MESI unit mask.
27 */
28#define P6_NOP_EVENT 0x0000002EULL
29
30static u64 p6_pmu_raw_event(u64 hw_event)
31{
32#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
33#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
34#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
35#define P6_EVNTSEL_INV_MASK 0x00800000ULL
36#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
37
38#define P6_EVNTSEL_MASK \
39 (P6_EVNTSEL_EVENT_MASK | \
40 P6_EVNTSEL_UNIT_MASK | \
41 P6_EVNTSEL_EDGE_MASK | \
42 P6_EVNTSEL_INV_MASK | \
43 P6_EVNTSEL_REG_MASK)
44
45 return hw_event & P6_EVNTSEL_MASK;
46}
47
48static struct event_constraint p6_event_constraints[] =
49{
50 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
51 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
52 INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
53 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
54 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
55 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
56 EVENT_CONSTRAINT_END
57};
58
59static void p6_pmu_disable_all(void)
60{
61 u64 val;
62
63 /* p6 only has one enable register */
64 rdmsrl(MSR_P6_EVNTSEL0, val);
65 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
66 wrmsrl(MSR_P6_EVNTSEL0, val);
67}
68
69static void p6_pmu_enable_all(void)
70{
71 unsigned long val;
72
73 /* p6 only has one enable register */
74 rdmsrl(MSR_P6_EVNTSEL0, val);
75 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
76 wrmsrl(MSR_P6_EVNTSEL0, val);
77}
78
79static inline void
80p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
81{
82 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
83 u64 val = P6_NOP_EVENT;
84
85 if (cpuc->enabled)
86 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
87
88 (void)checking_wrmsrl(hwc->config_base + idx, val);
89}
90
91static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
92{
93 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
94 u64 val;
95
96 val = hwc->config;
97 if (cpuc->enabled)
98 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
99
100 (void)checking_wrmsrl(hwc->config_base + idx, val);
101}
102
103static __initconst struct x86_pmu p6_pmu = {
104 .name = "p6",
105 .handle_irq = x86_pmu_handle_irq,
106 .disable_all = p6_pmu_disable_all,
107 .enable_all = p6_pmu_enable_all,
108 .enable = p6_pmu_enable_event,
109 .disable = p6_pmu_disable_event,
110 .eventsel = MSR_P6_EVNTSEL0,
111 .perfctr = MSR_P6_PERFCTR0,
112 .event_map = p6_pmu_event_map,
113 .raw_event = p6_pmu_raw_event,
114 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
115 .apic = 1,
116 .max_period = (1ULL << 31) - 1,
117 .version = 0,
118 .num_events = 2,
119 /*
120 * Events have 40 bits implemented. However they are designed such
121 * that bits [32-39] are sign extensions of bit 31. As such the
122 * effective width of a event for P6-like PMU is 32 bits only.
123 *
124 * See IA-32 Intel Architecture Software developer manual Vol 3B
125 */
126 .event_bits = 32,
127 .event_mask = (1ULL << 32) - 1,
128 .get_event_constraints = x86_get_event_constraints,
129 .event_constraints = p6_event_constraints,
130};
131
132static __init int p6_pmu_init(void)
133{
134 switch (boot_cpu_data.x86_model) {
135 case 1:
136 case 3: /* Pentium Pro */
137 case 5:
138 case 6: /* Pentium II */
139 case 7:
140 case 8:
141 case 11: /* Pentium III */
142 case 9:
143 case 13:
144 /* Pentium M */
145 break;
146 default:
147 pr_cont("unsupported p6 CPU model %d ",
148 boot_cpu_data.x86_model);
149 return -ENODEV;
150 }
151
152 x86_pmu = p6_pmu;
153
154 return 0;
155}
156
157#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 898df9719afb..fb329e9f8494 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -115,17 +115,6 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
115 115
116 return !test_bit(counter, perfctr_nmi_owner); 116 return !test_bit(counter, perfctr_nmi_owner);
117} 117}
118
119/* checks the an msr for availability */
120int avail_to_resrv_perfctr_nmi(unsigned int msr)
121{
122 unsigned int counter;
123
124 counter = nmi_perfctr_msr_to_bit(msr);
125 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
126
127 return !test_bit(counter, perfctr_nmi_owner);
128}
129EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); 118EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
130 119
131int reserve_perfctr_nmi(unsigned int msr) 120int reserve_perfctr_nmi(unsigned int msr)
@@ -691,7 +680,7 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
691 cpu_nmi_set_wd_enabled(); 680 cpu_nmi_set_wd_enabled();
692 681
693 apic_write(APIC_LVTPC, APIC_DM_NMI); 682 apic_write(APIC_LVTPC, APIC_DM_NMI);
694 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; 683 evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
695 wrmsr(evntsel_msr, evntsel, 0); 684 wrmsr(evntsel_msr, evntsel, 0);
696 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); 685 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
697 return 1; 686 return 1;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index ae775ca47b25..11540a189d93 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -18,11 +18,6 @@
18 18
19#include "dumpstack.h" 19#include "dumpstack.h"
20 20
21/* Just a stub for now */
22int x86_is_stack_id(int id, char *name)
23{
24 return 0;
25}
26 21
27void dump_trace(struct task_struct *task, struct pt_regs *regs, 22void dump_trace(struct task_struct *task, struct pt_regs *regs,
28 unsigned long *stack, unsigned long bp, 23 unsigned long *stack, unsigned long bp,
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 0ad9597073f5..676bc051252e 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -33,11 +33,6 @@ static char x86_stack_ids[][8] = {
33#endif 33#endif
34}; 34};
35 35
36int x86_is_stack_id(int id, char *name)
37{
38 return x86_stack_ids[id - 1] == name;
39}
40
41static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 36static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
42 unsigned *usedp, char **idp) 37 unsigned *usedp, char **idp)
43{ 38{
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 1e8ceadc0d6a..d6cc065f519f 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -344,13 +344,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
344 } 344 }
345 345
346 /* 346 /*
347 * For kernel-addresses, either the address or symbol name can be
348 * specified.
349 */
350 if (info->name)
351 info->address = (unsigned long)
352 kallsyms_lookup_name(info->name);
353 /*
354 * Check that the low-order bits of the address are appropriate 347 * Check that the low-order bits of the address are appropriate
355 * for the alignment implied by len. 348 * for the alignment implied by len.
356 */ 349 */
@@ -486,8 +479,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
486 rcu_read_lock(); 479 rcu_read_lock();
487 480
488 bp = per_cpu(bp_per_reg[i], cpu); 481 bp = per_cpu(bp_per_reg[i], cpu);
489 if (bp)
490 rc = NOTIFY_DONE;
491 /* 482 /*
492 * Reset the 'i'th TRAP bit in dr6 to denote completion of 483 * Reset the 'i'th TRAP bit in dr6 to denote completion of
493 * exception handling 484 * exception handling
@@ -506,7 +497,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
506 497
507 rcu_read_unlock(); 498 rcu_read_unlock();
508 } 499 }
509 if (dr6 & (~DR_TRAP_BITS)) 500 /*
501 * Further processing in do_debug() is needed for a) user-space
502 * breakpoints (to generate signals) and b) when the system has
503 * taken exception due to multiple causes
504 */
505 if ((current->thread.debugreg6 & DR_TRAP_BITS) ||
506 (dr6 & (~DR_TRAP_BITS)))
510 rc = NOTIFY_DONE; 507 rc = NOTIFY_DONE;
511 508
512 set_debugreg(dr7, 7); 509 set_debugreg(dr7, 7);
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 5b8c7505b3bc..5de9f4a9c3fd 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -337,6 +337,9 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
337 337
338int __kprobes arch_prepare_kprobe(struct kprobe *p) 338int __kprobes arch_prepare_kprobe(struct kprobe *p)
339{ 339{
340 if (alternatives_text_reserved(p->addr, p->addr))
341 return -EINVAL;
342
340 if (!can_probe((unsigned long)p->addr)) 343 if (!can_probe((unsigned long)p->addr))
341 return -EILSEQ; 344 return -EILSEQ;
342 /* insn: must be on special executable page on x86. */ 345 /* insn: must be on special executable page on x86. */
@@ -429,7 +432,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
429static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, 432static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
430 struct kprobe_ctlblk *kcb) 433 struct kprobe_ctlblk *kcb)
431{ 434{
432#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) 435#if !defined(CONFIG_PREEMPT)
433 if (p->ainsn.boostable == 1 && !p->post_handler) { 436 if (p->ainsn.boostable == 1 && !p->post_handler) {
434 /* Boost up -- we can execute copied instructions directly */ 437 /* Boost up -- we can execute copied instructions directly */
435 reset_current_kprobe(); 438 reset_current_kprobe();
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 40b54ceb68b5..a2c1edd2d3ac 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -359,13 +359,6 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
359 x86_init.mpparse.mpc_record(1); 359 x86_init.mpparse.mpc_record(1);
360 } 360 }
361 361
362#ifdef CONFIG_X86_BIGSMP
363 generic_bigsmp_probe();
364#endif
365
366 if (apic->setup_apic_routing)
367 apic->setup_apic_routing();
368
369 if (!num_processors) 362 if (!num_processors)
370 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 363 printk(KERN_ERR "MPTABLE: no processors registered!\n");
371 return num_processors; 364 return num_processors;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 41a26a82470a..126f0b493d04 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -527,6 +527,7 @@ void set_personality_ia32(void)
527 527
528 /* Make sure to be in 32bit mode */ 528 /* Make sure to be in 32bit mode */
529 set_thread_flag(TIF_IA32); 529 set_thread_flag(TIF_IA32);
530 current->personality |= force_personality32;
530 531
531 /* Prepare the first "return" to user space */ 532 /* Prepare the first "return" to user space */
532 current_thread_info()->status |= TS_COMPAT; 533 current_thread_info()->status |= TS_COMPAT;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0c1033d61e59..d03146f71b2f 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -140,30 +140,6 @@ static const int arg_offs_table[] = {
140#endif 140#endif
141}; 141};
142 142
143/**
144 * regs_get_argument_nth() - get Nth argument at function call
145 * @regs: pt_regs which contains registers at function entry.
146 * @n: argument number.
147 *
148 * regs_get_argument_nth() returns @n th argument of a function call.
149 * Since usually the kernel stack will be changed right after function entry,
150 * you must use this at function entry. If the @n th entry is NOT in the
151 * kernel stack or pt_regs, this returns 0.
152 */
153unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
154{
155 if (n < ARRAY_SIZE(arg_offs_table))
156 return *(unsigned long *)((char *)regs + arg_offs_table[n]);
157 else {
158 /*
159 * The typical case: arg n is on the stack.
160 * (Note: stack[0] = return address, so skip it)
161 */
162 n -= ARRAY_SIZE(arg_offs_table);
163 return regs_get_kernel_stack_nth(regs, 1 + n);
164 }
165}
166
167/* 143/*
168 * does not yet catch signals sent when the child dies. 144 * does not yet catch signals sent when the child dies.
169 * in exit.c or in signal.c. 145 * in exit.c or in signal.c.
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 678d0b8c26f3..b4e870cbdc60 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1083,9 +1083,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1083 set_cpu_sibling_map(0); 1083 set_cpu_sibling_map(0);
1084 1084
1085 enable_IR_x2apic(); 1085 enable_IR_x2apic();
1086#ifdef CONFIG_X86_64
1087 default_setup_apic_routing(); 1086 default_setup_apic_routing();
1088#endif
1089 1087
1090 if (smp_sanity_check(max_cpus) < 0) { 1088 if (smp_sanity_check(max_cpus) < 0) {
1091 printk(KERN_INFO "SMP disabled\n"); 1089 printk(KERN_INFO "SMP disabled\n");
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 33399176512a..1168e4454188 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -534,6 +534,9 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
534 534
535 get_debugreg(dr6, 6); 535 get_debugreg(dr6, 6);
536 536
537 /* Filter out all the reserved bits which are preset to 1 */
538 dr6 &= ~DR6_RESERVED;
539
537 /* Catch kmemcheck conditions first of all! */ 540 /* Catch kmemcheck conditions first of all! */
538 if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) 541 if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
539 return; 542 return;
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 296aba49472a..15578f180e59 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -467,6 +467,9 @@ static int pit_ioport_read(struct kvm_io_device *this,
467 return -EOPNOTSUPP; 467 return -EOPNOTSUPP;
468 468
469 addr &= KVM_PIT_CHANNEL_MASK; 469 addr &= KVM_PIT_CHANNEL_MASK;
470 if (addr == 3)
471 return 0;
472
470 s = &pit_state->channels[addr]; 473 s = &pit_state->channels[addr];
471 474
472 mutex_lock(&pit_state->lock); 475 mutex_lock(&pit_state->lock);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1ddcad452add..a1e1bc9d412d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -670,7 +670,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
670{ 670{
671 static int version; 671 static int version;
672 struct pvclock_wall_clock wc; 672 struct pvclock_wall_clock wc;
673 struct timespec now, sys, boot; 673 struct timespec boot;
674 674
675 if (!wall_clock) 675 if (!wall_clock)
676 return; 676 return;
@@ -685,9 +685,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
685 * wall clock specified here. guest system time equals host 685 * wall clock specified here. guest system time equals host
686 * system time for us, thus we must fill in host boot time here. 686 * system time for us, thus we must fill in host boot time here.
687 */ 687 */
688 now = current_kernel_time(); 688 getboottime(&boot);
689 ktime_get_ts(&sys);
690 boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
691 689
692 wc.sec = boot.tv_sec; 690 wc.sec = boot.tv_sec;
693 wc.nsec = boot.tv_nsec; 691 wc.nsec = boot.tv_nsec;
@@ -762,6 +760,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
762 local_irq_save(flags); 760 local_irq_save(flags);
763 kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); 761 kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
764 ktime_get_ts(&ts); 762 ktime_get_ts(&ts);
763 monotonic_to_bootbased(&ts);
765 local_irq_restore(flags); 764 local_irq_restore(flags);
766 765
767 /* With all the info we got, fill in the values */ 766 /* With all the info we got, fill in the values */
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 71da1bca13cb..738e6593799d 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -18,7 +18,7 @@ static inline pte_t gup_get_pte(pte_t *ptep)
18#else 18#else
19 /* 19 /*
20 * With get_user_pages_fast, we walk down the pagetables without taking 20 * With get_user_pages_fast, we walk down the pagetables without taking
21 * any locks. For this we would like to load the pointers atoimcally, 21 * any locks. For this we would like to load the pointers atomically,
22 * but that is not possible (without expensive cmpxchg8b) on PAE. What 22 * but that is not possible (without expensive cmpxchg8b) on PAE. What
23 * we do have is the guarantee that a pte will only either go from not 23 * we do have is the guarantee that a pte will only either go from not
24 * present to present, or present to not present or both -- it will not 24 * present to present, or present to not present or both -- it will not
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 3347f696edc7..2c505ee71014 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -159,7 +159,7 @@ static int nmi_setup_mux(void)
159 159
160 for_each_possible_cpu(i) { 160 for_each_possible_cpu(i) {
161 per_cpu(cpu_msrs, i).multiplex = 161 per_cpu(cpu_msrs, i).multiplex =
162 kmalloc(multiplex_size, GFP_KERNEL); 162 kzalloc(multiplex_size, GFP_KERNEL);
163 if (!per_cpu(cpu_msrs, i).multiplex) 163 if (!per_cpu(cpu_msrs, i).multiplex)
164 return 0; 164 return 0;
165 } 165 }
@@ -179,7 +179,6 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
179 if (counter_config[i].enabled) { 179 if (counter_config[i].enabled) {
180 multiplex[i].saved = -(u64)counter_config[i].count; 180 multiplex[i].saved = -(u64)counter_config[i].count;
181 } else { 181 } else {
182 multiplex[i].addr = 0;
183 multiplex[i].saved = 0; 182 multiplex[i].saved = 0;
184 } 183 }
185 } 184 }
@@ -189,25 +188,27 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
189 188
190static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) 189static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
191{ 190{
191 struct op_msr *counters = msrs->counters;
192 struct op_msr *multiplex = msrs->multiplex; 192 struct op_msr *multiplex = msrs->multiplex;
193 int i; 193 int i;
194 194
195 for (i = 0; i < model->num_counters; ++i) { 195 for (i = 0; i < model->num_counters; ++i) {
196 int virt = op_x86_phys_to_virt(i); 196 int virt = op_x86_phys_to_virt(i);
197 if (multiplex[virt].addr) 197 if (counters[i].addr)
198 rdmsrl(multiplex[virt].addr, multiplex[virt].saved); 198 rdmsrl(counters[i].addr, multiplex[virt].saved);
199 } 199 }
200} 200}
201 201
202static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) 202static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
203{ 203{
204 struct op_msr *counters = msrs->counters;
204 struct op_msr *multiplex = msrs->multiplex; 205 struct op_msr *multiplex = msrs->multiplex;
205 int i; 206 int i;
206 207
207 for (i = 0; i < model->num_counters; ++i) { 208 for (i = 0; i < model->num_counters; ++i) {
208 int virt = op_x86_phys_to_virt(i); 209 int virt = op_x86_phys_to_virt(i);
209 if (multiplex[virt].addr) 210 if (counters[i].addr)
210 wrmsrl(multiplex[virt].addr, multiplex[virt].saved); 211 wrmsrl(counters[i].addr, multiplex[virt].saved);
211 } 212 }
212} 213}
213 214
@@ -303,11 +304,11 @@ static int allocate_msrs(void)
303 304
304 int i; 305 int i;
305 for_each_possible_cpu(i) { 306 for_each_possible_cpu(i) {
306 per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, 307 per_cpu(cpu_msrs, i).counters = kzalloc(counters_size,
307 GFP_KERNEL); 308 GFP_KERNEL);
308 if (!per_cpu(cpu_msrs, i).counters) 309 if (!per_cpu(cpu_msrs, i).counters)
309 return 0; 310 return 0;
310 per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, 311 per_cpu(cpu_msrs, i).controls = kzalloc(controls_size,
311 GFP_KERNEL); 312 GFP_KERNEL);
312 if (!per_cpu(cpu_msrs, i).controls) 313 if (!per_cpu(cpu_msrs, i).controls)
313 return 0; 314 return 0;
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 39686c29f03a..090cbbec7dbd 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -22,6 +22,9 @@
22#include <asm/ptrace.h> 22#include <asm/ptrace.h>
23#include <asm/msr.h> 23#include <asm/msr.h>
24#include <asm/nmi.h> 24#include <asm/nmi.h>
25#include <asm/apic.h>
26#include <asm/processor.h>
27#include <asm/cpufeature.h>
25 28
26#include "op_x86_model.h" 29#include "op_x86_model.h"
27#include "op_counter.h" 30#include "op_counter.h"
@@ -43,23 +46,10 @@
43 46
44static unsigned long reset_value[NUM_VIRT_COUNTERS]; 47static unsigned long reset_value[NUM_VIRT_COUNTERS];
45 48
46#ifdef CONFIG_OPROFILE_IBS
47
48/* IbsFetchCtl bits/masks */
49#define IBS_FETCH_RAND_EN (1ULL<<57)
50#define IBS_FETCH_VAL (1ULL<<49)
51#define IBS_FETCH_ENABLE (1ULL<<48)
52#define IBS_FETCH_CNT_MASK 0xFFFF0000ULL
53
54/*IbsOpCtl bits */
55#define IBS_OP_CNT_CTL (1ULL<<19)
56#define IBS_OP_VAL (1ULL<<18)
57#define IBS_OP_ENABLE (1ULL<<17)
58
59#define IBS_FETCH_SIZE 6 49#define IBS_FETCH_SIZE 6
60#define IBS_OP_SIZE 12 50#define IBS_OP_SIZE 12
61 51
62static int has_ibs; /* AMD Family10h and later */ 52static u32 ibs_caps;
63 53
64struct op_ibs_config { 54struct op_ibs_config {
65 unsigned long op_enabled; 55 unsigned long op_enabled;
@@ -71,24 +61,52 @@ struct op_ibs_config {
71}; 61};
72 62
73static struct op_ibs_config ibs_config; 63static struct op_ibs_config ibs_config;
64static u64 ibs_op_ctl;
74 65
75#endif 66/*
67 * IBS cpuid feature detection
68 */
76 69
77#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX 70#define IBS_CPUID_FEATURES 0x8000001b
71
72/*
73 * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
74 * bit 0 is used to indicate the existence of IBS.
75 */
76#define IBS_CAPS_AVAIL (1LL<<0)
77#define IBS_CAPS_RDWROPCNT (1LL<<3)
78#define IBS_CAPS_OPCNT (1LL<<4)
78 79
79static void op_mux_fill_in_addresses(struct op_msrs * const msrs) 80/*
81 * IBS randomization macros
82 */
83#define IBS_RANDOM_BITS 12
84#define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1)
85#define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5))
86
87static u32 get_ibs_caps(void)
80{ 88{
81 int i; 89 u32 ibs_caps;
90 unsigned int max_level;
82 91
83 for (i = 0; i < NUM_VIRT_COUNTERS; i++) { 92 if (!boot_cpu_has(X86_FEATURE_IBS))
84 int hw_counter = op_x86_virt_to_phys(i); 93 return 0;
85 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) 94
86 msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; 95 /* check IBS cpuid feature flags */
87 else 96 max_level = cpuid_eax(0x80000000);
88 msrs->multiplex[i].addr = 0; 97 if (max_level < IBS_CPUID_FEATURES)
89 } 98 return IBS_CAPS_AVAIL;
99
100 ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
101 if (!(ibs_caps & IBS_CAPS_AVAIL))
102 /* cpuid flags not valid */
103 return IBS_CAPS_AVAIL;
104
105 return ibs_caps;
90} 106}
91 107
108#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
109
92static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, 110static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
93 struct op_msrs const * const msrs) 111 struct op_msrs const * const msrs)
94{ 112{
@@ -98,7 +116,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
98 /* enable active counters */ 116 /* enable active counters */
99 for (i = 0; i < NUM_COUNTERS; ++i) { 117 for (i = 0; i < NUM_COUNTERS; ++i) {
100 int virt = op_x86_phys_to_virt(i); 118 int virt = op_x86_phys_to_virt(i);
101 if (!counter_config[virt].enabled) 119 if (!reset_value[virt])
102 continue; 120 continue;
103 rdmsrl(msrs->controls[i].addr, val); 121 rdmsrl(msrs->controls[i].addr, val);
104 val &= model->reserved; 122 val &= model->reserved;
@@ -107,10 +125,6 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
107 } 125 }
108} 126}
109 127
110#else
111
112static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { }
113
114#endif 128#endif
115 129
116/* functions for op_amd_spec */ 130/* functions for op_amd_spec */
@@ -122,18 +136,12 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
122 for (i = 0; i < NUM_COUNTERS; i++) { 136 for (i = 0; i < NUM_COUNTERS; i++) {
123 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) 137 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
124 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; 138 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
125 else
126 msrs->counters[i].addr = 0;
127 } 139 }
128 140
129 for (i = 0; i < NUM_CONTROLS; i++) { 141 for (i = 0; i < NUM_CONTROLS; i++) {
130 if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) 142 if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
131 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; 143 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
132 else
133 msrs->controls[i].addr = 0;
134 } 144 }
135
136 op_mux_fill_in_addresses(msrs);
137} 145}
138 146
139static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, 147static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
@@ -144,7 +152,8 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
144 152
145 /* setup reset_value */ 153 /* setup reset_value */
146 for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { 154 for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
147 if (counter_config[i].enabled) 155 if (counter_config[i].enabled
156 && msrs->counters[op_x86_virt_to_phys(i)].addr)
148 reset_value[i] = counter_config[i].count; 157 reset_value[i] = counter_config[i].count;
149 else 158 else
150 reset_value[i] = 0; 159 reset_value[i] = 0;
@@ -152,9 +161,18 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
152 161
153 /* clear all counters */ 162 /* clear all counters */
154 for (i = 0; i < NUM_CONTROLS; ++i) { 163 for (i = 0; i < NUM_CONTROLS; ++i) {
155 if (unlikely(!msrs->controls[i].addr)) 164 if (unlikely(!msrs->controls[i].addr)) {
165 if (counter_config[i].enabled && !smp_processor_id())
166 /*
167 * counter is reserved, this is on all
168 * cpus, so report only for cpu #0
169 */
170 op_x86_warn_reserved(i);
156 continue; 171 continue;
172 }
157 rdmsrl(msrs->controls[i].addr, val); 173 rdmsrl(msrs->controls[i].addr, val);
174 if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
175 op_x86_warn_in_use(i);
158 val &= model->reserved; 176 val &= model->reserved;
159 wrmsrl(msrs->controls[i].addr, val); 177 wrmsrl(msrs->controls[i].addr, val);
160 } 178 }
@@ -169,9 +187,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
169 /* enable active counters */ 187 /* enable active counters */
170 for (i = 0; i < NUM_COUNTERS; ++i) { 188 for (i = 0; i < NUM_COUNTERS; ++i) {
171 int virt = op_x86_phys_to_virt(i); 189 int virt = op_x86_phys_to_virt(i);
172 if (!counter_config[virt].enabled) 190 if (!reset_value[virt])
173 continue;
174 if (!msrs->counters[i].addr)
175 continue; 191 continue;
176 192
177 /* setup counter registers */ 193 /* setup counter registers */
@@ -185,7 +201,60 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
185 } 201 }
186} 202}
187 203
188#ifdef CONFIG_OPROFILE_IBS 204/*
205 * 16-bit Linear Feedback Shift Register (LFSR)
206 *
207 * 16 14 13 11
208 * Feedback polynomial = X + X + X + X + 1
209 */
210static unsigned int lfsr_random(void)
211{
212 static unsigned int lfsr_value = 0xF00D;
213 unsigned int bit;
214
215 /* Compute next bit to shift in */
216 bit = ((lfsr_value >> 0) ^
217 (lfsr_value >> 2) ^
218 (lfsr_value >> 3) ^
219 (lfsr_value >> 5)) & 0x0001;
220
221 /* Advance to next register value */
222 lfsr_value = (lfsr_value >> 1) | (bit << 15);
223
224 return lfsr_value;
225}
226
227/*
228 * IBS software randomization
229 *
230 * The IBS periodic op counter is randomized in software. The lower 12
231 * bits of the 20 bit counter are randomized. IbsOpCurCnt is
232 * initialized with a 12 bit random value.
233 */
234static inline u64 op_amd_randomize_ibs_op(u64 val)
235{
236 unsigned int random = lfsr_random();
237
238 if (!(ibs_caps & IBS_CAPS_RDWROPCNT))
239 /*
240 * Work around if the hw can not write to IbsOpCurCnt
241 *
242 * Randomize the lower 8 bits of the 16 bit
243 * IbsOpMaxCnt [15:0] value in the range of -128 to
244 * +127 by adding/subtracting an offset to the
245 * maximum count (IbsOpMaxCnt).
246 *
247 * To avoid over or underflows and protect upper bits
248 * starting at bit 16, the initial value for
249 * IbsOpMaxCnt must fit in the range from 0x0081 to
250 * 0xff80.
251 */
252 val += (s8)(random >> 4);
253 else
254 val |= (u64)(random & IBS_RANDOM_MASK) << 32;
255
256 return val;
257}
189 258
190static inline void 259static inline void
191op_amd_handle_ibs(struct pt_regs * const regs, 260op_amd_handle_ibs(struct pt_regs * const regs,
@@ -194,7 +263,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
194 u64 val, ctl; 263 u64 val, ctl;
195 struct op_entry entry; 264 struct op_entry entry;
196 265
197 if (!has_ibs) 266 if (!ibs_caps)
198 return; 267 return;
199 268
200 if (ibs_config.fetch_enabled) { 269 if (ibs_config.fetch_enabled) {
@@ -210,7 +279,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
210 oprofile_write_commit(&entry); 279 oprofile_write_commit(&entry);
211 280
212 /* reenable the IRQ */ 281 /* reenable the IRQ */
213 ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT_MASK); 282 ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT);
214 ctl |= IBS_FETCH_ENABLE; 283 ctl |= IBS_FETCH_ENABLE;
215 wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl); 284 wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
216 } 285 }
@@ -236,8 +305,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
236 oprofile_write_commit(&entry); 305 oprofile_write_commit(&entry);
237 306
238 /* reenable the IRQ */ 307 /* reenable the IRQ */
239 ctl &= ~IBS_OP_VAL & 0xFFFFFFFF; 308 ctl = op_amd_randomize_ibs_op(ibs_op_ctl);
240 ctl |= IBS_OP_ENABLE;
241 wrmsrl(MSR_AMD64_IBSOPCTL, ctl); 309 wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
242 } 310 }
243 } 311 }
@@ -246,41 +314,57 @@ op_amd_handle_ibs(struct pt_regs * const regs,
246static inline void op_amd_start_ibs(void) 314static inline void op_amd_start_ibs(void)
247{ 315{
248 u64 val; 316 u64 val;
249 if (has_ibs && ibs_config.fetch_enabled) { 317
250 val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; 318 if (!ibs_caps)
319 return;
320
321 if (ibs_config.fetch_enabled) {
322 val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT;
251 val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; 323 val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
252 val |= IBS_FETCH_ENABLE; 324 val |= IBS_FETCH_ENABLE;
253 wrmsrl(MSR_AMD64_IBSFETCHCTL, val); 325 wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
254 } 326 }
255 327
256 if (has_ibs && ibs_config.op_enabled) { 328 if (ibs_config.op_enabled) {
257 val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; 329 ibs_op_ctl = ibs_config.max_cnt_op >> 4;
258 val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; 330 if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
259 val |= IBS_OP_ENABLE; 331 /*
332 * IbsOpCurCnt not supported. See
333 * op_amd_randomize_ibs_op() for details.
334 */
335 ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL);
336 } else {
337 /*
338 * The start value is randomized with a
339 * positive offset, we need to compensate it
340 * with the half of the randomized range. Also
341 * avoid underflows.
342 */
343 ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET,
344 IBS_OP_MAX_CNT);
345 }
346 if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops)
347 ibs_op_ctl |= IBS_OP_CNT_CTL;
348 ibs_op_ctl |= IBS_OP_ENABLE;
349 val = op_amd_randomize_ibs_op(ibs_op_ctl);
260 wrmsrl(MSR_AMD64_IBSOPCTL, val); 350 wrmsrl(MSR_AMD64_IBSOPCTL, val);
261 } 351 }
262} 352}
263 353
264static void op_amd_stop_ibs(void) 354static void op_amd_stop_ibs(void)
265{ 355{
266 if (has_ibs && ibs_config.fetch_enabled) 356 if (!ibs_caps)
357 return;
358
359 if (ibs_config.fetch_enabled)
267 /* clear max count and enable */ 360 /* clear max count and enable */
268 wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); 361 wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
269 362
270 if (has_ibs && ibs_config.op_enabled) 363 if (ibs_config.op_enabled)
271 /* clear max count and enable */ 364 /* clear max count and enable */
272 wrmsrl(MSR_AMD64_IBSOPCTL, 0); 365 wrmsrl(MSR_AMD64_IBSOPCTL, 0);
273} 366}
274 367
275#else
276
277static inline void op_amd_handle_ibs(struct pt_regs * const regs,
278 struct op_msrs const * const msrs) { }
279static inline void op_amd_start_ibs(void) { }
280static inline void op_amd_stop_ibs(void) { }
281
282#endif
283
284static int op_amd_check_ctrs(struct pt_regs * const regs, 368static int op_amd_check_ctrs(struct pt_regs * const regs,
285 struct op_msrs const * const msrs) 369 struct op_msrs const * const msrs)
286{ 370{
@@ -314,7 +398,7 @@ static void op_amd_start(struct op_msrs const * const msrs)
314 if (!reset_value[op_x86_phys_to_virt(i)]) 398 if (!reset_value[op_x86_phys_to_virt(i)])
315 continue; 399 continue;
316 rdmsrl(msrs->controls[i].addr, val); 400 rdmsrl(msrs->controls[i].addr, val);
317 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 401 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
318 wrmsrl(msrs->controls[i].addr, val); 402 wrmsrl(msrs->controls[i].addr, val);
319 } 403 }
320 404
@@ -334,7 +418,7 @@ static void op_amd_stop(struct op_msrs const * const msrs)
334 if (!reset_value[op_x86_phys_to_virt(i)]) 418 if (!reset_value[op_x86_phys_to_virt(i)])
335 continue; 419 continue;
336 rdmsrl(msrs->controls[i].addr, val); 420 rdmsrl(msrs->controls[i].addr, val);
337 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 421 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
338 wrmsrl(msrs->controls[i].addr, val); 422 wrmsrl(msrs->controls[i].addr, val);
339 } 423 }
340 424
@@ -355,8 +439,6 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
355 } 439 }
356} 440}
357 441
358#ifdef CONFIG_OPROFILE_IBS
359
360static u8 ibs_eilvt_off; 442static u8 ibs_eilvt_off;
361 443
362static inline void apic_init_ibs_nmi_per_cpu(void *arg) 444static inline void apic_init_ibs_nmi_per_cpu(void *arg)
@@ -405,45 +487,36 @@ static int init_ibs_nmi(void)
405 return 1; 487 return 1;
406 } 488 }
407 489
408#ifdef CONFIG_NUMA
409 /* Sanity check */
410 /* Works only for 64bit with proper numa implementation. */
411 if (nodes != num_possible_nodes()) {
412 printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, "
413 "found: %d, expected %d",
414 nodes, num_possible_nodes());
415 return 1;
416 }
417#endif
418 return 0; 490 return 0;
419} 491}
420 492
421/* uninitialize the APIC for the IBS interrupts if needed */ 493/* uninitialize the APIC for the IBS interrupts if needed */
422static void clear_ibs_nmi(void) 494static void clear_ibs_nmi(void)
423{ 495{
424 if (has_ibs) 496 if (ibs_caps)
425 on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); 497 on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1);
426} 498}
427 499
428/* initialize the APIC for the IBS interrupts if available */ 500/* initialize the APIC for the IBS interrupts if available */
429static void ibs_init(void) 501static void ibs_init(void)
430{ 502{
431 has_ibs = boot_cpu_has(X86_FEATURE_IBS); 503 ibs_caps = get_ibs_caps();
432 504
433 if (!has_ibs) 505 if (!ibs_caps)
434 return; 506 return;
435 507
436 if (init_ibs_nmi()) { 508 if (init_ibs_nmi()) {
437 has_ibs = 0; 509 ibs_caps = 0;
438 return; 510 return;
439 } 511 }
440 512
441 printk(KERN_INFO "oprofile: AMD IBS detected\n"); 513 printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n",
514 (unsigned)ibs_caps);
442} 515}
443 516
444static void ibs_exit(void) 517static void ibs_exit(void)
445{ 518{
446 if (!has_ibs) 519 if (!ibs_caps)
447 return; 520 return;
448 521
449 clear_ibs_nmi(); 522 clear_ibs_nmi();
@@ -463,7 +536,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
463 if (ret) 536 if (ret)
464 return ret; 537 return ret;
465 538
466 if (!has_ibs) 539 if (!ibs_caps)
467 return ret; 540 return ret;
468 541
469 /* model specific files */ 542 /* model specific files */
@@ -473,7 +546,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
473 ibs_config.fetch_enabled = 0; 546 ibs_config.fetch_enabled = 0;
474 ibs_config.max_cnt_op = 250000; 547 ibs_config.max_cnt_op = 250000;
475 ibs_config.op_enabled = 0; 548 ibs_config.op_enabled = 0;
476 ibs_config.dispatched_ops = 1; 549 ibs_config.dispatched_ops = 0;
477 550
478 dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); 551 dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
479 oprofilefs_create_ulong(sb, dir, "enable", 552 oprofilefs_create_ulong(sb, dir, "enable",
@@ -488,8 +561,9 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
488 &ibs_config.op_enabled); 561 &ibs_config.op_enabled);
489 oprofilefs_create_ulong(sb, dir, "max_count", 562 oprofilefs_create_ulong(sb, dir, "max_count",
490 &ibs_config.max_cnt_op); 563 &ibs_config.max_cnt_op);
491 oprofilefs_create_ulong(sb, dir, "dispatched_ops", 564 if (ibs_caps & IBS_CAPS_OPCNT)
492 &ibs_config.dispatched_ops); 565 oprofilefs_create_ulong(sb, dir, "dispatched_ops",
566 &ibs_config.dispatched_ops);
493 567
494 return 0; 568 return 0;
495} 569}
@@ -507,19 +581,6 @@ static void op_amd_exit(void)
507 ibs_exit(); 581 ibs_exit();
508} 582}
509 583
510#else
511
512/* no IBS support */
513
514static int op_amd_init(struct oprofile_operations *ops)
515{
516 return 0;
517}
518
519static void op_amd_exit(void) {}
520
521#endif /* CONFIG_OPROFILE_IBS */
522
523struct op_x86_model_spec op_amd_spec = { 584struct op_x86_model_spec op_amd_spec = {
524 .num_counters = NUM_COUNTERS, 585 .num_counters = NUM_COUNTERS,
525 .num_controls = NUM_CONTROLS, 586 .num_controls = NUM_CONTROLS,
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index ac6b354becdf..e6a160a4684a 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -394,12 +394,6 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
394 setup_num_counters(); 394 setup_num_counters();
395 stag = get_stagger(); 395 stag = get_stagger();
396 396
397 /* initialize some registers */
398 for (i = 0; i < num_counters; ++i)
399 msrs->counters[i].addr = 0;
400 for (i = 0; i < num_controls; ++i)
401 msrs->controls[i].addr = 0;
402
403 /* the counter & cccr registers we pay attention to */ 397 /* the counter & cccr registers we pay attention to */
404 for (i = 0; i < num_counters; ++i) { 398 for (i = 0; i < num_counters; ++i) {
405 addr = p4_counters[VIRT_CTR(stag, i)].counter_address; 399 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 8eb05878554c..2bf90fafa7b5 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -37,15 +37,11 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs)
37 for (i = 0; i < num_counters; i++) { 37 for (i = 0; i < num_counters; i++) {
38 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) 38 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
39 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; 39 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
40 else
41 msrs->counters[i].addr = 0;
42 } 40 }
43 41
44 for (i = 0; i < num_counters; i++) { 42 for (i = 0; i < num_counters; i++) {
45 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) 43 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
46 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; 44 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
47 else
48 msrs->controls[i].addr = 0;
49 } 45 }
50} 46}
51 47
@@ -57,7 +53,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
57 int i; 53 int i;
58 54
59 if (!reset_value) { 55 if (!reset_value) {
60 reset_value = kmalloc(sizeof(reset_value[0]) * num_counters, 56 reset_value = kzalloc(sizeof(reset_value[0]) * num_counters,
61 GFP_ATOMIC); 57 GFP_ATOMIC);
62 if (!reset_value) 58 if (!reset_value)
63 return; 59 return;
@@ -82,9 +78,18 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
82 78
83 /* clear all counters */ 79 /* clear all counters */
84 for (i = 0; i < num_counters; ++i) { 80 for (i = 0; i < num_counters; ++i) {
85 if (unlikely(!msrs->controls[i].addr)) 81 if (unlikely(!msrs->controls[i].addr)) {
82 if (counter_config[i].enabled && !smp_processor_id())
83 /*
84 * counter is reserved, this is on all
85 * cpus, so report only for cpu #0
86 */
87 op_x86_warn_reserved(i);
86 continue; 88 continue;
89 }
87 rdmsrl(msrs->controls[i].addr, val); 90 rdmsrl(msrs->controls[i].addr, val);
91 if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
92 op_x86_warn_in_use(i);
88 val &= model->reserved; 93 val &= model->reserved;
89 wrmsrl(msrs->controls[i].addr, val); 94 wrmsrl(msrs->controls[i].addr, val);
90 } 95 }
@@ -161,7 +166,7 @@ static void ppro_start(struct op_msrs const * const msrs)
161 for (i = 0; i < num_counters; ++i) { 166 for (i = 0; i < num_counters; ++i) {
162 if (reset_value[i]) { 167 if (reset_value[i]) {
163 rdmsrl(msrs->controls[i].addr, val); 168 rdmsrl(msrs->controls[i].addr, val);
164 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 169 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
165 wrmsrl(msrs->controls[i].addr, val); 170 wrmsrl(msrs->controls[i].addr, val);
166 } 171 }
167 } 172 }
@@ -179,7 +184,7 @@ static void ppro_stop(struct op_msrs const * const msrs)
179 if (!reset_value[i]) 184 if (!reset_value[i])
180 continue; 185 continue;
181 rdmsrl(msrs->controls[i].addr, val); 186 rdmsrl(msrs->controls[i].addr, val);
182 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 187 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
183 wrmsrl(msrs->controls[i].addr, val); 188 wrmsrl(msrs->controls[i].addr, val);
184 } 189 }
185} 190}
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 7b8e75d16081..ff82a755edd4 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -57,6 +57,26 @@ struct op_x86_model_spec {
57 57
58struct op_counter_config; 58struct op_counter_config;
59 59
60static inline void op_x86_warn_in_use(int counter)
61{
62 /*
63 * The warning indicates an already running counter. If
64 * oprofile doesn't collect data, then try using a different
65 * performance counter on your platform to monitor the desired
66 * event. Delete counter #%d from the desired event by editing
67 * the /usr/share/oprofile/%s/<cpu>/events file. If the event
68 * cannot be monitored by any other counter, contact your
69 * hardware or BIOS vendor.
70 */
71 pr_warning("oprofile: counter #%d on cpu #%d may already be used\n",
72 counter, smp_processor_id());
73}
74
75static inline void op_x86_warn_reserved(int counter)
76{
77 pr_warning("oprofile: counter #%d is already reserved\n", counter);
78}
79
60extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, 80extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
61 struct op_counter_config *counter_config); 81 struct op_counter_config *counter_config);
62extern int op_x86_phys_to_virt(int phys); 82extern int op_x86_phys_to_virt(int phys);