aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorRobert Richter <robert.richter@amd.com>2010-10-15 06:45:00 -0400
committerRobert Richter <robert.richter@amd.com>2010-10-15 06:45:00 -0400
commit6268464b370e234e0255330190f9bd5d19386ad7 (patch)
tree5742641092ce64227dd2086d78baaede57da1f80 /arch/x86
parent7df01d96b295e400167e78061b81d4c91630b12d (diff)
parent0fdf13606b67f830559abdaad15980c7f4f05ec4 (diff)
Merge remote branch 'tip/perf/core' into oprofile/core
Conflicts: arch/arm/oprofile/common.c kernel/perf_event.c
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig7
-rw-r--r--arch/x86/boot/early_serial_console.c14
-rw-r--r--arch/x86/include/asm/alternative.h11
-rw-r--r--arch/x86/include/asm/amd_iommu_proto.h6
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h12
-rw-r--r--arch/x86/include/asm/bitops.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/jump_label.h37
-rw-r--r--arch/x86/include/asm/perf_event_p4.h52
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/alternative.c71
-rw-r--r--arch/x86/kernel/amd_iommu.c4
-rw-r--r--arch/x86/kernel/amd_iommu_init.c67
-rw-r--r--arch/x86/kernel/apic/io_apic.c11
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/cpu.h1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c18
-rw-r--r--arch/x86/kernel/cpu/intel.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c261
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c8
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c298
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/ftrace.c63
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/jump_label.c50
-rw-r--r--arch/x86/kernel/kprobes.c14
-rw-r--r--arch/x86/kernel/module.c6
-rw-r--r--arch/x86/kernel/setup.c6
-rw-r--r--arch/x86/mm/fault.c4
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c2
-rw-r--r--arch/x86/xen/time.c5
33 files changed, 735 insertions, 319 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cea0cd9a316f..9815221976a7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -33,6 +33,7 @@ config X86
33 select HAVE_KRETPROBES 33 select HAVE_KRETPROBES
34 select HAVE_OPTPROBES 34 select HAVE_OPTPROBES
35 select HAVE_FTRACE_MCOUNT_RECORD 35 select HAVE_FTRACE_MCOUNT_RECORD
36 select HAVE_C_RECORDMCOUNT
36 select HAVE_DYNAMIC_FTRACE 37 select HAVE_DYNAMIC_FTRACE
37 select HAVE_FUNCTION_TRACER 38 select HAVE_FUNCTION_TRACER
38 select HAVE_FUNCTION_GRAPH_TRACER 39 select HAVE_FUNCTION_GRAPH_TRACER
@@ -59,6 +60,8 @@ config X86
59 select ANON_INODES 60 select ANON_INODES
60 select HAVE_ARCH_KMEMCHECK 61 select HAVE_ARCH_KMEMCHECK
61 select HAVE_USER_RETURN_NOTIFIER 62 select HAVE_USER_RETURN_NOTIFIER
63 select HAVE_ARCH_JUMP_LABEL
64 select HAVE_TEXT_POKE_SMP
62 65
63config INSTRUCTION_DECODER 66config INSTRUCTION_DECODER
64 def_bool (KPROBES || PERF_EVENTS) 67 def_bool (KPROBES || PERF_EVENTS)
@@ -2125,6 +2128,10 @@ config HAVE_ATOMIC_IOMAP
2125 def_bool y 2128 def_bool y
2126 depends on X86_32 2129 depends on X86_32
2127 2130
2131config HAVE_TEXT_POKE_SMP
2132 bool
2133 select STOP_MACHINE if SMP
2134
2128source "net/Kconfig" 2135source "net/Kconfig"
2129 2136
2130source "drivers/Kconfig" 2137source "drivers/Kconfig"
diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c
index 030f4b93e255..5df2869c874b 100644
--- a/arch/x86/boot/early_serial_console.c
+++ b/arch/x86/boot/early_serial_console.c
@@ -58,7 +58,19 @@ static void parse_earlyprintk(void)
58 if (arg[pos] == ',') 58 if (arg[pos] == ',')
59 pos++; 59 pos++;
60 60
61 if (!strncmp(arg, "ttyS", 4)) { 61 /*
62 * make sure we have
63 * "serial,0x3f8,115200"
64 * "serial,ttyS0,115200"
65 * "ttyS0,115200"
66 */
67 if (pos == 7 && !strncmp(arg + pos, "0x", 2)) {
68 port = simple_strtoull(arg + pos, &e, 16);
69 if (port == 0 || arg + pos == e)
70 port = DEFAULT_SERIAL_PORT;
71 else
72 pos = e - arg;
73 } else if (!strncmp(arg + pos, "ttyS", 4)) {
62 static const int bases[] = { 0x3f8, 0x2f8 }; 74 static const int bases[] = { 0x3f8, 0x2f8 };
63 int idx = 0; 75 int idx = 0;
64 76
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index bc6abb7bc7ee..76561d20ea2f 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -4,6 +4,7 @@
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/stddef.h> 5#include <linux/stddef.h>
6#include <linux/stringify.h> 6#include <linux/stringify.h>
7#include <linux/jump_label.h>
7#include <asm/asm.h> 8#include <asm/asm.h>
8 9
9/* 10/*
@@ -160,6 +161,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
160#define __parainstructions_end NULL 161#define __parainstructions_end NULL
161#endif 162#endif
162 163
164extern void *text_poke_early(void *addr, const void *opcode, size_t len);
165
163/* 166/*
164 * Clear and restore the kernel write-protection flag on the local CPU. 167 * Clear and restore the kernel write-protection flag on the local CPU.
165 * Allows the kernel to edit read-only pages. 168 * Allows the kernel to edit read-only pages.
@@ -180,4 +183,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
180extern void *text_poke(void *addr, const void *opcode, size_t len); 183extern void *text_poke(void *addr, const void *opcode, size_t len);
181extern void *text_poke_smp(void *addr, const void *opcode, size_t len); 184extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
182 185
186#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
187#define IDEAL_NOP_SIZE_5 5
188extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
189extern void arch_init_ideal_nop5(void);
190#else
191static inline void arch_init_ideal_nop5(void) {}
192#endif
193
183#endif /* _ASM_X86_ALTERNATIVE_H */ 194#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index d2544f1d705d..cb030374b90a 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -38,4 +38,10 @@ static inline void amd_iommu_stats_init(void) { }
38 38
39#endif /* !CONFIG_AMD_IOMMU_STATS */ 39#endif /* !CONFIG_AMD_IOMMU_STATS */
40 40
41static inline bool is_rd890_iommu(struct pci_dev *pdev)
42{
43 return (pdev->vendor == PCI_VENDOR_ID_ATI) &&
44 (pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
45}
46
41#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ 47#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 7014e88bc779..08616180deaf 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -368,6 +368,9 @@ struct amd_iommu {
368 /* capabilities of that IOMMU read from ACPI */ 368 /* capabilities of that IOMMU read from ACPI */
369 u32 cap; 369 u32 cap;
370 370
371 /* flags read from acpi table */
372 u8 acpi_flags;
373
371 /* 374 /*
372 * Capability pointer. There could be more than one IOMMU per PCI 375 * Capability pointer. There could be more than one IOMMU per PCI
373 * device function if there are more than one AMD IOMMU capability 376 * device function if there are more than one AMD IOMMU capability
@@ -411,6 +414,15 @@ struct amd_iommu {
411 414
412 /* default dma_ops domain for that IOMMU */ 415 /* default dma_ops domain for that IOMMU */
413 struct dma_ops_domain *default_dom; 416 struct dma_ops_domain *default_dom;
417
418 /*
419 * This array is required to work around a potential BIOS bug.
420 * The BIOS may miss to restore parts of the PCI configuration
421 * space when the system resumes from S3. The result is that the
422 * IOMMU does not execute commands anymore which leads to system
423 * failure.
424 */
425 u32 cache_cfg[4];
414}; 426};
415 427
416/* 428/*
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 545776efeb16..bafd80defa43 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -309,7 +309,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
309static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) 309static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
310{ 310{
311 return ((1UL << (nr % BITS_PER_LONG)) & 311 return ((1UL << (nr % BITS_PER_LONG)) &
312 (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; 312 (addr[nr / BITS_PER_LONG])) != 0;
313} 313}
314 314
315static inline int variable_test_bit(int nr, volatile const unsigned long *addr) 315static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index c6fbb7b430d1..3f76523589af 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -168,6 +168,7 @@
168#define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */ 168#define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */
169#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ 169#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */
170#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ 170#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */
171#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */
171 172
172/* Virtualization flags: Linux defined, word 8 */ 173/* Virtualization flags: Linux defined, word 8 */
173#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ 174#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
new file mode 100644
index 000000000000..f52d42e80585
--- /dev/null
+++ b/arch/x86/include/asm/jump_label.h
@@ -0,0 +1,37 @@
1#ifndef _ASM_X86_JUMP_LABEL_H
2#define _ASM_X86_JUMP_LABEL_H
3
4#ifdef __KERNEL__
5
6#include <linux/types.h>
7#include <asm/nops.h>
8
9#define JUMP_LABEL_NOP_SIZE 5
10
11# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
12
13# define JUMP_LABEL(key, label) \
14 do { \
15 asm goto("1:" \
16 JUMP_LABEL_INITIAL_NOP \
17 ".pushsection __jump_table, \"a\" \n\t"\
18 _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \
19 ".popsection \n\t" \
20 : : "i" (key) : : label); \
21 } while (0)
22
23#endif /* __KERNEL__ */
24
25#ifdef CONFIG_X86_64
26typedef u64 jump_label_t;
27#else
28typedef u32 jump_label_t;
29#endif
30
31struct jump_entry {
32 jump_label_t code;
33 jump_label_t target;
34 jump_label_t key;
35};
36
37#endif
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index def500776b16..a70cd216be5d 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -36,19 +36,6 @@
36#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) 36#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT)
37#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) 37#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT)
38 38
39/* Non HT mask */
40#define P4_ESCR_MASK \
41 (P4_ESCR_EVENT_MASK | \
42 P4_ESCR_EVENTMASK_MASK | \
43 P4_ESCR_TAG_MASK | \
44 P4_ESCR_TAG_ENABLE | \
45 P4_ESCR_T0_OS | \
46 P4_ESCR_T0_USR)
47
48/* HT mask */
49#define P4_ESCR_MASK_HT \
50 (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR)
51
52#define P4_CCCR_OVF 0x80000000U 39#define P4_CCCR_OVF 0x80000000U
53#define P4_CCCR_CASCADE 0x40000000U 40#define P4_CCCR_CASCADE 0x40000000U
54#define P4_CCCR_OVF_PMI_T0 0x04000000U 41#define P4_CCCR_OVF_PMI_T0 0x04000000U
@@ -70,23 +57,6 @@
70#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) 57#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
71#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) 58#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
72 59
73/* Non HT mask */
74#define P4_CCCR_MASK \
75 (P4_CCCR_OVF | \
76 P4_CCCR_CASCADE | \
77 P4_CCCR_OVF_PMI_T0 | \
78 P4_CCCR_FORCE_OVF | \
79 P4_CCCR_EDGE | \
80 P4_CCCR_THRESHOLD_MASK | \
81 P4_CCCR_COMPLEMENT | \
82 P4_CCCR_COMPARE | \
83 P4_CCCR_ESCR_SELECT_MASK | \
84 P4_CCCR_ENABLE)
85
86/* HT mask */
87#define P4_CCCR_MASK_HT \
88 (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY)
89
90#define P4_GEN_ESCR_EMASK(class, name, bit) \ 60#define P4_GEN_ESCR_EMASK(class, name, bit) \
91 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) 61 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
92#define P4_ESCR_EMASK_BIT(class, name) class##__##name 62#define P4_ESCR_EMASK_BIT(class, name) class##__##name
@@ -127,6 +97,28 @@
127#define P4_CONFIG_HT_SHIFT 63 97#define P4_CONFIG_HT_SHIFT 63
128#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) 98#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
129 99
100/*
101 * The bits we allow to pass for RAW events
102 */
103#define P4_CONFIG_MASK_ESCR \
104 P4_ESCR_EVENT_MASK | \
105 P4_ESCR_EVENTMASK_MASK | \
106 P4_ESCR_TAG_MASK | \
107 P4_ESCR_TAG_ENABLE
108
109#define P4_CONFIG_MASK_CCCR \
110 P4_CCCR_EDGE | \
111 P4_CCCR_THRESHOLD_MASK | \
112 P4_CCCR_COMPLEMENT | \
113 P4_CCCR_COMPARE | \
114 P4_CCCR_THREAD_ANY | \
115 P4_CCCR_RESERVED
116
117/* some dangerous bits are reserved for kernel internals */
118#define P4_CONFIG_MASK \
119 (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \
120 (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
121
130static inline bool p4_is_event_cascaded(u64 config) 122static inline bool p4_is_event_cascaded(u64 config)
131{ 123{
132 u32 cccr = p4_config_unpack_cccr(config); 124 u32 cccr = p4_config_unpack_cccr(config);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index fedf32a8c3ec..9d3f485e5dd0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -34,7 +34,7 @@ GCOV_PROFILE_paravirt.o := n
34obj-y := process_$(BITS).o signal.o entry_$(BITS).o 34obj-y := process_$(BITS).o signal.o entry_$(BITS).o
35obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 35obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
36obj-y += time.o ioport.o ldt.o dumpstack.o 36obj-y += time.o ioport.o ldt.o dumpstack.o
37obj-y += setup.o x86_init.o i8259.o irqinit.o 37obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
38obj-$(CONFIG_X86_VISWS) += visws_quirks.o 38obj-$(CONFIG_X86_VISWS) += visws_quirks.o
39obj-$(CONFIG_X86_32) += probe_roms_32.o 39obj-$(CONFIG_X86_32) += probe_roms_32.o
40obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 40obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index fb7a5f052e2b..fb16f17e59be 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -61,7 +61,7 @@ struct cstate_entry {
61 unsigned int ecx; 61 unsigned int ecx;
62 } states[ACPI_PROCESSOR_MAX_POWER]; 62 } states[ACPI_PROCESSOR_MAX_POWER];
63}; 63};
64static struct cstate_entry *cpu_cstate_entry; /* per CPU ptr */ 64static struct cstate_entry __percpu *cpu_cstate_entry; /* per CPU ptr */
65 65
66static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; 66static short mwait_supported[ACPI_PROCESSOR_MAX_POWER];
67 67
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f65ab8b014c4..a36bb90aef53 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
195 195
196extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 196extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
197extern s32 __smp_locks[], __smp_locks_end[]; 197extern s32 __smp_locks[], __smp_locks_end[];
198static void *text_poke_early(void *addr, const void *opcode, size_t len); 198void *text_poke_early(void *addr, const void *opcode, size_t len);
199 199
200/* Replace instructions with better alternatives for this CPU type. 200/* Replace instructions with better alternatives for this CPU type.
201 This runs before SMP is initialized to avoid SMP problems with 201 This runs before SMP is initialized to avoid SMP problems with
@@ -522,7 +522,7 @@ void __init alternative_instructions(void)
522 * instructions. And on the local CPU you need to be protected again NMI or MCE 522 * instructions. And on the local CPU you need to be protected again NMI or MCE
523 * handlers seeing an inconsistent instruction while you patch. 523 * handlers seeing an inconsistent instruction while you patch.
524 */ 524 */
525static void *__init_or_module text_poke_early(void *addr, const void *opcode, 525void *__init_or_module text_poke_early(void *addr, const void *opcode,
526 size_t len) 526 size_t len)
527{ 527{
528 unsigned long flags; 528 unsigned long flags;
@@ -637,7 +637,72 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
637 tpp.len = len; 637 tpp.len = len;
638 atomic_set(&stop_machine_first, 1); 638 atomic_set(&stop_machine_first, 1);
639 wrote_text = 0; 639 wrote_text = 0;
640 stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); 640 /* Use __stop_machine() because the caller already got online_cpus. */
641 __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
641 return addr; 642 return addr;
642} 643}
643 644
645#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
646
647unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
648
649void __init arch_init_ideal_nop5(void)
650{
651 extern const unsigned char ftrace_test_p6nop[];
652 extern const unsigned char ftrace_test_nop5[];
653 extern const unsigned char ftrace_test_jmp[];
654 int faulted = 0;
655
656 /*
657 * There is no good nop for all x86 archs.
658 * We will default to using the P6_NOP5, but first we
659 * will test to make sure that the nop will actually
660 * work on this CPU. If it faults, we will then
661 * go to a lesser efficient 5 byte nop. If that fails
662 * we then just use a jmp as our nop. This isn't the most
663 * efficient nop, but we can not use a multi part nop
664 * since we would then risk being preempted in the middle
665 * of that nop, and if we enabled tracing then, it might
666 * cause a system crash.
667 *
668 * TODO: check the cpuid to determine the best nop.
669 */
670 asm volatile (
671 "ftrace_test_jmp:"
672 "jmp ftrace_test_p6nop\n"
673 "nop\n"
674 "nop\n"
675 "nop\n" /* 2 byte jmp + 3 bytes */
676 "ftrace_test_p6nop:"
677 P6_NOP5
678 "jmp 1f\n"
679 "ftrace_test_nop5:"
680 ".byte 0x66,0x66,0x66,0x66,0x90\n"
681 "1:"
682 ".section .fixup, \"ax\"\n"
683 "2: movl $1, %0\n"
684 " jmp ftrace_test_nop5\n"
685 "3: movl $2, %0\n"
686 " jmp 1b\n"
687 ".previous\n"
688 _ASM_EXTABLE(ftrace_test_p6nop, 2b)
689 _ASM_EXTABLE(ftrace_test_nop5, 3b)
690 : "=r"(faulted) : "0" (faulted));
691
692 switch (faulted) {
693 case 0:
694 pr_info("converting mcount calls to 0f 1f 44 00 00\n");
695 memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
696 break;
697 case 1:
698 pr_info("converting mcount calls to 66 66 66 66 90\n");
699 memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
700 break;
701 case 2:
702 pr_info("converting mcount calls to jmp . + 5\n");
703 memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
704 break;
705 }
706
707}
708#endif
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index fa044e1e30a2..679b6450382b 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1953,6 +1953,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
1953 size_t size, 1953 size_t size,
1954 int dir) 1954 int dir)
1955{ 1955{
1956 dma_addr_t flush_addr;
1956 dma_addr_t i, start; 1957 dma_addr_t i, start;
1957 unsigned int pages; 1958 unsigned int pages;
1958 1959
@@ -1960,6 +1961,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
1960 (dma_addr + size > dma_dom->aperture_size)) 1961 (dma_addr + size > dma_dom->aperture_size))
1961 return; 1962 return;
1962 1963
1964 flush_addr = dma_addr;
1963 pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); 1965 pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
1964 dma_addr &= PAGE_MASK; 1966 dma_addr &= PAGE_MASK;
1965 start = dma_addr; 1967 start = dma_addr;
@@ -1974,7 +1976,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
1974 dma_ops_free_addresses(dma_dom, dma_addr, pages); 1976 dma_ops_free_addresses(dma_dom, dma_addr, pages);
1975 1977
1976 if (amd_iommu_unmap_flush || dma_dom->need_flush) { 1978 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
1977 iommu_flush_pages(&dma_dom->domain, dma_addr, size); 1979 iommu_flush_pages(&dma_dom->domain, flush_addr, size);
1978 dma_dom->need_flush = false; 1980 dma_dom->need_flush = false;
1979 } 1981 }
1980} 1982}
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 3cc63e2b8dd4..5a170cbbbed8 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -632,6 +632,13 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
632 iommu->last_device = calc_devid(MMIO_GET_BUS(range), 632 iommu->last_device = calc_devid(MMIO_GET_BUS(range),
633 MMIO_GET_LD(range)); 633 MMIO_GET_LD(range));
634 iommu->evt_msi_num = MMIO_MSI_NUM(misc); 634 iommu->evt_msi_num = MMIO_MSI_NUM(misc);
635
636 if (is_rd890_iommu(iommu->dev)) {
637 pci_read_config_dword(iommu->dev, 0xf0, &iommu->cache_cfg[0]);
638 pci_read_config_dword(iommu->dev, 0xf4, &iommu->cache_cfg[1]);
639 pci_read_config_dword(iommu->dev, 0xf8, &iommu->cache_cfg[2]);
640 pci_read_config_dword(iommu->dev, 0xfc, &iommu->cache_cfg[3]);
641 }
635} 642}
636 643
637/* 644/*
@@ -649,29 +656,9 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
649 struct ivhd_entry *e; 656 struct ivhd_entry *e;
650 657
651 /* 658 /*
652 * First set the recommended feature enable bits from ACPI 659 * First save the recommended feature enable bits from ACPI
653 * into the IOMMU control registers
654 */ 660 */
655 h->flags & IVHD_FLAG_HT_TUN_EN_MASK ? 661 iommu->acpi_flags = h->flags;
656 iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
657 iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
658
659 h->flags & IVHD_FLAG_PASSPW_EN_MASK ?
660 iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
661 iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
662
663 h->flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
664 iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
665 iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
666
667 h->flags & IVHD_FLAG_ISOC_EN_MASK ?
668 iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
669 iommu_feature_disable(iommu, CONTROL_ISOC_EN);
670
671 /*
672 * make IOMMU memory accesses cache coherent
673 */
674 iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
675 662
676 /* 663 /*
677 * Done. Now parse the device entries 664 * Done. Now parse the device entries
@@ -1116,6 +1103,40 @@ static void init_device_table(void)
1116 } 1103 }
1117} 1104}
1118 1105
1106static void iommu_init_flags(struct amd_iommu *iommu)
1107{
1108 iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
1109 iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
1110 iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
1111
1112 iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
1113 iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
1114 iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
1115
1116 iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
1117 iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
1118 iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
1119
1120 iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
1121 iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
1122 iommu_feature_disable(iommu, CONTROL_ISOC_EN);
1123
1124 /*
1125 * make IOMMU memory accesses cache coherent
1126 */
1127 iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
1128}
1129
1130static void iommu_apply_quirks(struct amd_iommu *iommu)
1131{
1132 if (is_rd890_iommu(iommu->dev)) {
1133 pci_write_config_dword(iommu->dev, 0xf0, iommu->cache_cfg[0]);
1134 pci_write_config_dword(iommu->dev, 0xf4, iommu->cache_cfg[1]);
1135 pci_write_config_dword(iommu->dev, 0xf8, iommu->cache_cfg[2]);
1136 pci_write_config_dword(iommu->dev, 0xfc, iommu->cache_cfg[3]);
1137 }
1138}
1139
1119/* 1140/*
1120 * This function finally enables all IOMMUs found in the system after 1141 * This function finally enables all IOMMUs found in the system after
1121 * they have been initialized 1142 * they have been initialized
@@ -1126,6 +1147,8 @@ static void enable_iommus(void)
1126 1147
1127 for_each_iommu(iommu) { 1148 for_each_iommu(iommu) {
1128 iommu_disable(iommu); 1149 iommu_disable(iommu);
1150 iommu_apply_quirks(iommu);
1151 iommu_init_flags(iommu);
1129 iommu_set_device_table(iommu); 1152 iommu_set_device_table(iommu);
1130 iommu_enable_command_buffer(iommu); 1153 iommu_enable_command_buffer(iommu);
1131 iommu_enable_event_buffer(iommu); 1154 iommu_enable_event_buffer(iommu);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index f1efebaf5510..5c5b8f3dddb5 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -306,14 +306,19 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc,
306 306
307 old_cfg = old_desc->chip_data; 307 old_cfg = old_desc->chip_data;
308 308
309 memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); 309 cfg->vector = old_cfg->vector;
310 cfg->move_in_progress = old_cfg->move_in_progress;
311 cpumask_copy(cfg->domain, old_cfg->domain);
312 cpumask_copy(cfg->old_domain, old_cfg->old_domain);
310 313
311 init_copy_irq_2_pin(old_cfg, cfg, node); 314 init_copy_irq_2_pin(old_cfg, cfg, node);
312} 315}
313 316
314static void free_irq_cfg(struct irq_cfg *old_cfg) 317static void free_irq_cfg(struct irq_cfg *cfg)
315{ 318{
316 kfree(old_cfg); 319 free_cpumask_var(cfg->domain);
320 free_cpumask_var(cfg->old_domain);
321 kfree(cfg);
317} 322}
318 323
319void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) 324void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 490dac63c2d2..f2f9ac7da25c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -545,7 +545,7 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
545 } 545 }
546} 546}
547 547
548static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) 548void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
549{ 549{
550 u32 tfms, xlvl; 550 u32 tfms, xlvl;
551 u32 ebx; 551 u32 ebx;
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 3624e8a0f71b..f668bb1f7d43 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -33,5 +33,6 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
33 *const __x86_cpu_dev_end[]; 33 *const __x86_cpu_dev_end[];
34 34
35extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); 35extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
36extern void get_cpu_cap(struct cpuinfo_x86 *c);
36 37
37#endif 38#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index 994230d4dc4e..4f6f679f2799 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -368,16 +368,22 @@ static int __init pcc_cpufreq_do_osc(acpi_handle *handle)
368 return -ENODEV; 368 return -ENODEV;
369 369
370 out_obj = output.pointer; 370 out_obj = output.pointer;
371 if (out_obj->type != ACPI_TYPE_BUFFER) 371 if (out_obj->type != ACPI_TYPE_BUFFER) {
372 return -ENODEV; 372 ret = -ENODEV;
373 goto out_free;
374 }
373 375
374 errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); 376 errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0);
375 if (errors) 377 if (errors) {
376 return -ENODEV; 378 ret = -ENODEV;
379 goto out_free;
380 }
377 381
378 supported = *((u32 *)(out_obj->buffer.pointer + 4)); 382 supported = *((u32 *)(out_obj->buffer.pointer + 4));
379 if (!(supported & 0x1)) 383 if (!(supported & 0x1)) {
380 return -ENODEV; 384 ret = -ENODEV;
385 goto out_free;
386 }
381 387
382out_free: 388out_free:
383 kfree(output.pointer); 389 kfree(output.pointer);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 85f69cdeae10..b4389441efbb 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -39,6 +39,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
39 misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; 39 misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID;
40 wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); 40 wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
41 c->cpuid_level = cpuid_eax(0); 41 c->cpuid_level = cpuid_eax(0);
42 get_cpu_cap(c);
42 } 43 }
43 } 44 }
44 45
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 03a5b0385ad6..e2513f26ba8b 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -531,7 +531,7 @@ static int x86_pmu_hw_config(struct perf_event *event)
531/* 531/*
532 * Setup the hardware configuration for a given attr_type 532 * Setup the hardware configuration for a given attr_type
533 */ 533 */
534static int __hw_perf_event_init(struct perf_event *event) 534static int __x86_pmu_event_init(struct perf_event *event)
535{ 535{
536 int err; 536 int err;
537 537
@@ -584,7 +584,7 @@ static void x86_pmu_disable_all(void)
584 } 584 }
585} 585}
586 586
587void hw_perf_disable(void) 587static void x86_pmu_disable(struct pmu *pmu)
588{ 588{
589 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 589 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
590 590
@@ -619,7 +619,7 @@ static void x86_pmu_enable_all(int added)
619 } 619 }
620} 620}
621 621
622static const struct pmu pmu; 622static struct pmu pmu;
623 623
624static inline int is_x86_event(struct perf_event *event) 624static inline int is_x86_event(struct perf_event *event)
625{ 625{
@@ -801,10 +801,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
801 hwc->last_tag == cpuc->tags[i]; 801 hwc->last_tag == cpuc->tags[i];
802} 802}
803 803
804static int x86_pmu_start(struct perf_event *event); 804static void x86_pmu_start(struct perf_event *event, int flags);
805static void x86_pmu_stop(struct perf_event *event); 805static void x86_pmu_stop(struct perf_event *event, int flags);
806 806
807void hw_perf_enable(void) 807static void x86_pmu_enable(struct pmu *pmu)
808{ 808{
809 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 809 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
810 struct perf_event *event; 810 struct perf_event *event;
@@ -840,7 +840,14 @@ void hw_perf_enable(void)
840 match_prev_assignment(hwc, cpuc, i)) 840 match_prev_assignment(hwc, cpuc, i))
841 continue; 841 continue;
842 842
843 x86_pmu_stop(event); 843 /*
844 * Ensure we don't accidentally enable a stopped
845 * counter simply because we rescheduled.
846 */
847 if (hwc->state & PERF_HES_STOPPED)
848 hwc->state |= PERF_HES_ARCH;
849
850 x86_pmu_stop(event, PERF_EF_UPDATE);
844 } 851 }
845 852
846 for (i = 0; i < cpuc->n_events; i++) { 853 for (i = 0; i < cpuc->n_events; i++) {
@@ -852,7 +859,10 @@ void hw_perf_enable(void)
852 else if (i < n_running) 859 else if (i < n_running)
853 continue; 860 continue;
854 861
855 x86_pmu_start(event); 862 if (hwc->state & PERF_HES_ARCH)
863 continue;
864
865 x86_pmu_start(event, PERF_EF_RELOAD);
856 } 866 }
857 cpuc->n_added = 0; 867 cpuc->n_added = 0;
858 perf_events_lapic_init(); 868 perf_events_lapic_init();
@@ -953,15 +963,12 @@ static void x86_pmu_enable_event(struct perf_event *event)
953} 963}
954 964
955/* 965/*
956 * activate a single event 966 * Add a single event to the PMU.
957 * 967 *
958 * The event is added to the group of enabled events 968 * The event is added to the group of enabled events
959 * but only if it can be scehduled with existing events. 969 * but only if it can be scehduled with existing events.
960 *
961 * Called with PMU disabled. If successful and return value 1,
962 * then guaranteed to call perf_enable() and hw_perf_enable()
963 */ 970 */
964static int x86_pmu_enable(struct perf_event *event) 971static int x86_pmu_add(struct perf_event *event, int flags)
965{ 972{
966 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 973 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
967 struct hw_perf_event *hwc; 974 struct hw_perf_event *hwc;
@@ -970,58 +977,67 @@ static int x86_pmu_enable(struct perf_event *event)
970 977
971 hwc = &event->hw; 978 hwc = &event->hw;
972 979
980 perf_pmu_disable(event->pmu);
973 n0 = cpuc->n_events; 981 n0 = cpuc->n_events;
974 n = collect_events(cpuc, event, false); 982 ret = n = collect_events(cpuc, event, false);
975 if (n < 0) 983 if (ret < 0)
976 return n; 984 goto out;
985
986 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
987 if (!(flags & PERF_EF_START))
988 hwc->state |= PERF_HES_ARCH;
977 989
978 /* 990 /*
979 * If group events scheduling transaction was started, 991 * If group events scheduling transaction was started,
980 * skip the schedulability test here, it will be peformed 992 * skip the schedulability test here, it will be peformed
981 * at commit time(->commit_txn) as a whole 993 * at commit time (->commit_txn) as a whole
982 */ 994 */
983 if (cpuc->group_flag & PERF_EVENT_TXN) 995 if (cpuc->group_flag & PERF_EVENT_TXN)
984 goto out; 996 goto done_collect;
985 997
986 ret = x86_pmu.schedule_events(cpuc, n, assign); 998 ret = x86_pmu.schedule_events(cpuc, n, assign);
987 if (ret) 999 if (ret)
988 return ret; 1000 goto out;
989 /* 1001 /*
990 * copy new assignment, now we know it is possible 1002 * copy new assignment, now we know it is possible
991 * will be used by hw_perf_enable() 1003 * will be used by hw_perf_enable()
992 */ 1004 */
993 memcpy(cpuc->assign, assign, n*sizeof(int)); 1005 memcpy(cpuc->assign, assign, n*sizeof(int));
994 1006
995out: 1007done_collect:
996 cpuc->n_events = n; 1008 cpuc->n_events = n;
997 cpuc->n_added += n - n0; 1009 cpuc->n_added += n - n0;
998 cpuc->n_txn += n - n0; 1010 cpuc->n_txn += n - n0;
999 1011
1000 return 0; 1012 ret = 0;
1013out:
1014 perf_pmu_enable(event->pmu);
1015 return ret;
1001} 1016}
1002 1017
1003static int x86_pmu_start(struct perf_event *event) 1018static void x86_pmu_start(struct perf_event *event, int flags)
1004{ 1019{
1005 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1020 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1006 int idx = event->hw.idx; 1021 int idx = event->hw.idx;
1007 1022
1008 if (idx == -1) 1023 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1009 return -EAGAIN; 1024 return;
1025
1026 if (WARN_ON_ONCE(idx == -1))
1027 return;
1028
1029 if (flags & PERF_EF_RELOAD) {
1030 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
1031 x86_perf_event_set_period(event);
1032 }
1033
1034 event->hw.state = 0;
1010 1035
1011 x86_perf_event_set_period(event);
1012 cpuc->events[idx] = event; 1036 cpuc->events[idx] = event;
1013 __set_bit(idx, cpuc->active_mask); 1037 __set_bit(idx, cpuc->active_mask);
1014 __set_bit(idx, cpuc->running); 1038 __set_bit(idx, cpuc->running);
1015 x86_pmu.enable(event); 1039 x86_pmu.enable(event);
1016 perf_event_update_userpage(event); 1040 perf_event_update_userpage(event);
1017
1018 return 0;
1019}
1020
1021static void x86_pmu_unthrottle(struct perf_event *event)
1022{
1023 int ret = x86_pmu_start(event);
1024 WARN_ON_ONCE(ret);
1025} 1041}
1026 1042
1027void perf_event_print_debug(void) 1043void perf_event_print_debug(void)
@@ -1078,27 +1094,29 @@ void perf_event_print_debug(void)
1078 local_irq_restore(flags); 1094 local_irq_restore(flags);
1079} 1095}
1080 1096
1081static void x86_pmu_stop(struct perf_event *event) 1097static void x86_pmu_stop(struct perf_event *event, int flags)
1082{ 1098{
1083 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1099 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1084 struct hw_perf_event *hwc = &event->hw; 1100 struct hw_perf_event *hwc = &event->hw;
1085 int idx = hwc->idx;
1086 1101
1087 if (!__test_and_clear_bit(idx, cpuc->active_mask)) 1102 if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
1088 return; 1103 x86_pmu.disable(event);
1089 1104 cpuc->events[hwc->idx] = NULL;
1090 x86_pmu.disable(event); 1105 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
1091 1106 hwc->state |= PERF_HES_STOPPED;
1092 /* 1107 }
1093 * Drain the remaining delta count out of a event
1094 * that we are disabling:
1095 */
1096 x86_perf_event_update(event);
1097 1108
1098 cpuc->events[idx] = NULL; 1109 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
1110 /*
1111 * Drain the remaining delta count out of a event
1112 * that we are disabling:
1113 */
1114 x86_perf_event_update(event);
1115 hwc->state |= PERF_HES_UPTODATE;
1116 }
1099} 1117}
1100 1118
1101static void x86_pmu_disable(struct perf_event *event) 1119static void x86_pmu_del(struct perf_event *event, int flags)
1102{ 1120{
1103 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1121 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1104 int i; 1122 int i;
@@ -1111,7 +1129,7 @@ static void x86_pmu_disable(struct perf_event *event)
1111 if (cpuc->group_flag & PERF_EVENT_TXN) 1129 if (cpuc->group_flag & PERF_EVENT_TXN)
1112 return; 1130 return;
1113 1131
1114 x86_pmu_stop(event); 1132 x86_pmu_stop(event, PERF_EF_UPDATE);
1115 1133
1116 for (i = 0; i < cpuc->n_events; i++) { 1134 for (i = 0; i < cpuc->n_events; i++) {
1117 if (event == cpuc->event_list[i]) { 1135 if (event == cpuc->event_list[i]) {
@@ -1134,7 +1152,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1134 struct perf_sample_data data; 1152 struct perf_sample_data data;
1135 struct cpu_hw_events *cpuc; 1153 struct cpu_hw_events *cpuc;
1136 struct perf_event *event; 1154 struct perf_event *event;
1137 struct hw_perf_event *hwc;
1138 int idx, handled = 0; 1155 int idx, handled = 0;
1139 u64 val; 1156 u64 val;
1140 1157
@@ -1155,7 +1172,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1155 } 1172 }
1156 1173
1157 event = cpuc->events[idx]; 1174 event = cpuc->events[idx];
1158 hwc = &event->hw;
1159 1175
1160 val = x86_perf_event_update(event); 1176 val = x86_perf_event_update(event);
1161 if (val & (1ULL << (x86_pmu.cntval_bits - 1))) 1177 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
@@ -1171,7 +1187,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1171 continue; 1187 continue;
1172 1188
1173 if (perf_event_overflow(event, 1, &data, regs)) 1189 if (perf_event_overflow(event, 1, &data, regs))
1174 x86_pmu_stop(event); 1190 x86_pmu_stop(event, 0);
1175 } 1191 }
1176 1192
1177 if (handled) 1193 if (handled)
@@ -1388,7 +1404,6 @@ void __init init_hw_perf_events(void)
1388 x86_pmu.num_counters = X86_PMC_MAX_GENERIC; 1404 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1389 } 1405 }
1390 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; 1406 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1391 perf_max_events = x86_pmu.num_counters;
1392 1407
1393 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { 1408 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1394 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", 1409 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
@@ -1424,6 +1439,7 @@ void __init init_hw_perf_events(void)
1424 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); 1439 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1425 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); 1440 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1426 1441
1442 perf_pmu_register(&pmu);
1427 perf_cpu_notifier(x86_pmu_notifier); 1443 perf_cpu_notifier(x86_pmu_notifier);
1428} 1444}
1429 1445
@@ -1437,10 +1453,11 @@ static inline void x86_pmu_read(struct perf_event *event)
1437 * Set the flag to make pmu::enable() not perform the 1453 * Set the flag to make pmu::enable() not perform the
1438 * schedulability test, it will be performed at commit time 1454 * schedulability test, it will be performed at commit time
1439 */ 1455 */
1440static void x86_pmu_start_txn(const struct pmu *pmu) 1456static void x86_pmu_start_txn(struct pmu *pmu)
1441{ 1457{
1442 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1458 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1443 1459
1460 perf_pmu_disable(pmu);
1444 cpuc->group_flag |= PERF_EVENT_TXN; 1461 cpuc->group_flag |= PERF_EVENT_TXN;
1445 cpuc->n_txn = 0; 1462 cpuc->n_txn = 0;
1446} 1463}
@@ -1450,7 +1467,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
1450 * Clear the flag and pmu::enable() will perform the 1467 * Clear the flag and pmu::enable() will perform the
1451 * schedulability test. 1468 * schedulability test.
1452 */ 1469 */
1453static void x86_pmu_cancel_txn(const struct pmu *pmu) 1470static void x86_pmu_cancel_txn(struct pmu *pmu)
1454{ 1471{
1455 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1472 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1456 1473
@@ -1460,6 +1477,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1460 */ 1477 */
1461 cpuc->n_added -= cpuc->n_txn; 1478 cpuc->n_added -= cpuc->n_txn;
1462 cpuc->n_events -= cpuc->n_txn; 1479 cpuc->n_events -= cpuc->n_txn;
1480 perf_pmu_enable(pmu);
1463} 1481}
1464 1482
1465/* 1483/*
@@ -1467,7 +1485,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1467 * Perform the group schedulability test as a whole 1485 * Perform the group schedulability test as a whole
1468 * Return 0 if success 1486 * Return 0 if success
1469 */ 1487 */
1470static int x86_pmu_commit_txn(const struct pmu *pmu) 1488static int x86_pmu_commit_txn(struct pmu *pmu)
1471{ 1489{
1472 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1490 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1473 int assign[X86_PMC_IDX_MAX]; 1491 int assign[X86_PMC_IDX_MAX];
@@ -1489,22 +1507,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
1489 memcpy(cpuc->assign, assign, n*sizeof(int)); 1507 memcpy(cpuc->assign, assign, n*sizeof(int));
1490 1508
1491 cpuc->group_flag &= ~PERF_EVENT_TXN; 1509 cpuc->group_flag &= ~PERF_EVENT_TXN;
1492 1510 perf_pmu_enable(pmu);
1493 return 0; 1511 return 0;
1494} 1512}
1495 1513
1496static const struct pmu pmu = {
1497 .enable = x86_pmu_enable,
1498 .disable = x86_pmu_disable,
1499 .start = x86_pmu_start,
1500 .stop = x86_pmu_stop,
1501 .read = x86_pmu_read,
1502 .unthrottle = x86_pmu_unthrottle,
1503 .start_txn = x86_pmu_start_txn,
1504 .cancel_txn = x86_pmu_cancel_txn,
1505 .commit_txn = x86_pmu_commit_txn,
1506};
1507
1508/* 1514/*
1509 * validate that we can schedule this event 1515 * validate that we can schedule this event
1510 */ 1516 */
@@ -1579,12 +1585,22 @@ out:
1579 return ret; 1585 return ret;
1580} 1586}
1581 1587
1582const struct pmu *hw_perf_event_init(struct perf_event *event) 1588int x86_pmu_event_init(struct perf_event *event)
1583{ 1589{
1584 const struct pmu *tmp; 1590 struct pmu *tmp;
1585 int err; 1591 int err;
1586 1592
1587 err = __hw_perf_event_init(event); 1593 switch (event->attr.type) {
1594 case PERF_TYPE_RAW:
1595 case PERF_TYPE_HARDWARE:
1596 case PERF_TYPE_HW_CACHE:
1597 break;
1598
1599 default:
1600 return -ENOENT;
1601 }
1602
1603 err = __x86_pmu_event_init(event);
1588 if (!err) { 1604 if (!err) {
1589 /* 1605 /*
1590 * we temporarily connect event to its pmu 1606 * we temporarily connect event to its pmu
@@ -1604,26 +1620,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1604 if (err) { 1620 if (err) {
1605 if (event->destroy) 1621 if (event->destroy)
1606 event->destroy(event); 1622 event->destroy(event);
1607 return ERR_PTR(err);
1608 } 1623 }
1609 1624
1610 return &pmu; 1625 return err;
1611} 1626}
1612 1627
1613/* 1628static struct pmu pmu = {
1614 * callchain support 1629 .pmu_enable = x86_pmu_enable,
1615 */ 1630 .pmu_disable = x86_pmu_disable,
1616 1631
1617static inline 1632 .event_init = x86_pmu_event_init,
1618void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1619{
1620 if (entry->nr < PERF_MAX_STACK_DEPTH)
1621 entry->ip[entry->nr++] = ip;
1622}
1623 1633
1624static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); 1634 .add = x86_pmu_add,
1625static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); 1635 .del = x86_pmu_del,
1636 .start = x86_pmu_start,
1637 .stop = x86_pmu_stop,
1638 .read = x86_pmu_read,
1626 1639
1640 .start_txn = x86_pmu_start_txn,
1641 .cancel_txn = x86_pmu_cancel_txn,
1642 .commit_txn = x86_pmu_commit_txn,
1643};
1644
1645/*
1646 * callchain support
1647 */
1627 1648
1628static void 1649static void
1629backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) 1650backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
@@ -1645,7 +1666,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
1645{ 1666{
1646 struct perf_callchain_entry *entry = data; 1667 struct perf_callchain_entry *entry = data;
1647 1668
1648 callchain_store(entry, addr); 1669 perf_callchain_store(entry, addr);
1649} 1670}
1650 1671
1651static const struct stacktrace_ops backtrace_ops = { 1672static const struct stacktrace_ops backtrace_ops = {
@@ -1656,11 +1677,15 @@ static const struct stacktrace_ops backtrace_ops = {
1656 .walk_stack = print_context_stack_bp, 1677 .walk_stack = print_context_stack_bp,
1657}; 1678};
1658 1679
1659static void 1680void
1660perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1681perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1661{ 1682{
1662 callchain_store(entry, PERF_CONTEXT_KERNEL); 1683 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1663 callchain_store(entry, regs->ip); 1684 /* TODO: We don't support guest os callchain now */
1685 return;
1686 }
1687
1688 perf_callchain_store(entry, regs->ip);
1664 1689
1665 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1690 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
1666} 1691}
@@ -1689,7 +1714,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1689 if (fp < compat_ptr(regs->sp)) 1714 if (fp < compat_ptr(regs->sp))
1690 break; 1715 break;
1691 1716
1692 callchain_store(entry, frame.return_address); 1717 perf_callchain_store(entry, frame.return_address);
1693 fp = compat_ptr(frame.next_frame); 1718 fp = compat_ptr(frame.next_frame);
1694 } 1719 }
1695 return 1; 1720 return 1;
@@ -1702,19 +1727,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1702} 1727}
1703#endif 1728#endif
1704 1729
1705static void 1730void
1706perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) 1731perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1707{ 1732{
1708 struct stack_frame frame; 1733 struct stack_frame frame;
1709 const void __user *fp; 1734 const void __user *fp;
1710 1735
1711 if (!user_mode(regs)) 1736 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1712 regs = task_pt_regs(current); 1737 /* TODO: We don't support guest os callchain now */
1738 return;
1739 }
1713 1740
1714 fp = (void __user *)regs->bp; 1741 fp = (void __user *)regs->bp;
1715 1742
1716 callchain_store(entry, PERF_CONTEXT_USER); 1743 perf_callchain_store(entry, regs->ip);
1717 callchain_store(entry, regs->ip);
1718 1744
1719 if (perf_callchain_user32(regs, entry)) 1745 if (perf_callchain_user32(regs, entry))
1720 return; 1746 return;
@@ -1731,52 +1757,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1731 if ((unsigned long)fp < regs->sp) 1757 if ((unsigned long)fp < regs->sp)
1732 break; 1758 break;
1733 1759
1734 callchain_store(entry, frame.return_address); 1760 perf_callchain_store(entry, frame.return_address);
1735 fp = frame.next_frame; 1761 fp = frame.next_frame;
1736 } 1762 }
1737} 1763}
1738 1764
1739static void
1740perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1741{
1742 int is_user;
1743
1744 if (!regs)
1745 return;
1746
1747 is_user = user_mode(regs);
1748
1749 if (is_user && current->state != TASK_RUNNING)
1750 return;
1751
1752 if (!is_user)
1753 perf_callchain_kernel(regs, entry);
1754
1755 if (current->mm)
1756 perf_callchain_user(regs, entry);
1757}
1758
1759struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1760{
1761 struct perf_callchain_entry *entry;
1762
1763 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1764 /* TODO: We don't support guest os callchain now */
1765 return NULL;
1766 }
1767
1768 if (in_nmi())
1769 entry = &__get_cpu_var(pmc_nmi_entry);
1770 else
1771 entry = &__get_cpu_var(pmc_irq_entry);
1772
1773 entry->nr = 0;
1774
1775 perf_do_callchain(regs, entry);
1776
1777 return entry;
1778}
1779
1780unsigned long perf_instruction_pointer(struct pt_regs *regs) 1765unsigned long perf_instruction_pointer(struct pt_regs *regs)
1781{ 1766{
1782 unsigned long ip; 1767 unsigned long ip;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index ee05c90012d2..c8f5c088cad1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
713 struct cpu_hw_events *cpuc; 713 struct cpu_hw_events *cpuc;
714 int bit, loops; 714 int bit, loops;
715 u64 status; 715 u64 status;
716 int handled = 0; 716 int handled;
717 717
718 perf_sample_data_init(&data, 0); 718 perf_sample_data_init(&data, 0);
719 719
720 cpuc = &__get_cpu_var(cpu_hw_events); 720 cpuc = &__get_cpu_var(cpu_hw_events);
721 721
722 intel_pmu_disable_all(); 722 intel_pmu_disable_all();
723 intel_pmu_drain_bts_buffer(); 723 handled = intel_pmu_drain_bts_buffer();
724 status = intel_pmu_get_status(); 724 status = intel_pmu_get_status();
725 if (!status) { 725 if (!status) {
726 intel_pmu_enable_all(0); 726 intel_pmu_enable_all(0);
727 return 0; 727 return handled;
728 } 728 }
729 729
730 loops = 0; 730 loops = 0;
@@ -763,7 +763,7 @@ again:
763 data.period = event->hw.last_period; 763 data.period = event->hw.last_period;
764 764
765 if (perf_event_overflow(event, 1, &data, regs)) 765 if (perf_event_overflow(event, 1, &data, regs))
766 x86_pmu_stop(event); 766 x86_pmu_stop(event, 0);
767 } 767 }
768 768
769 /* 769 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 18018d1311cd..4977f9c400e5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void)
214 update_debugctlmsr(debugctlmsr); 214 update_debugctlmsr(debugctlmsr);
215} 215}
216 216
217static void intel_pmu_drain_bts_buffer(void) 217static int intel_pmu_drain_bts_buffer(void)
218{ 218{
219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
220 struct debug_store *ds = cpuc->ds; 220 struct debug_store *ds = cpuc->ds;
@@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void)
231 struct pt_regs regs; 231 struct pt_regs regs;
232 232
233 if (!event) 233 if (!event)
234 return; 234 return 0;
235 235
236 if (!ds) 236 if (!ds)
237 return; 237 return 0;
238 238
239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
240 top = (struct bts_record *)(unsigned long)ds->bts_index; 240 top = (struct bts_record *)(unsigned long)ds->bts_index;
241 241
242 if (top <= at) 242 if (top <= at)
243 return; 243 return 0;
244 244
245 ds->bts_index = ds->bts_buffer_base; 245 ds->bts_index = ds->bts_buffer_base;
246 246
@@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void)
256 perf_prepare_sample(&header, &data, event, &regs); 256 perf_prepare_sample(&header, &data, event, &regs);
257 257
258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) 258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
259 return; 259 return 1;
260 260
261 for (; at < top; at++) { 261 for (; at < top; at++) {
262 data.ip = at->from; 262 data.ip = at->from;
@@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void)
270 /* There's new data available. */ 270 /* There's new data available. */
271 event->hw.interrupts++; 271 event->hw.interrupts++;
272 event->pending_kill = POLL_IN; 272 event->pending_kill = POLL_IN;
273 return 1;
273} 274}
274 275
275/* 276/*
@@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
491 regs.flags &= ~PERF_EFLAGS_EXACT; 492 regs.flags &= ~PERF_EFLAGS_EXACT;
492 493
493 if (perf_event_overflow(event, 1, &data, &regs)) 494 if (perf_event_overflow(event, 1, &data, &regs))
494 x86_pmu_stop(event); 495 x86_pmu_stop(event, 0);
495} 496}
496 497
497static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) 498static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index b560db3305be..81400b93e694 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -18,6 +18,8 @@
18struct p4_event_bind { 18struct p4_event_bind {
19 unsigned int opcode; /* Event code and ESCR selector */ 19 unsigned int opcode; /* Event code and ESCR selector */
20 unsigned int escr_msr[2]; /* ESCR MSR for this event */ 20 unsigned int escr_msr[2]; /* ESCR MSR for this event */
21 unsigned int escr_emask; /* valid ESCR EventMask bits */
22 unsigned int shared; /* event is shared across threads */
21 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ 23 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
22}; 24};
23 25
@@ -66,231 +68,435 @@ static struct p4_event_bind p4_event_bind_map[] = {
66 [P4_EVENT_TC_DELIVER_MODE] = { 68 [P4_EVENT_TC_DELIVER_MODE] = {
67 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), 69 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
68 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, 70 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
71 .escr_emask =
72 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) |
73 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) |
74 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) |
75 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) |
76 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) |
77 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) |
78 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
79 .shared = 1,
69 .cntr = { {4, 5, -1}, {6, 7, -1} }, 80 .cntr = { {4, 5, -1}, {6, 7, -1} },
70 }, 81 },
71 [P4_EVENT_BPU_FETCH_REQUEST] = { 82 [P4_EVENT_BPU_FETCH_REQUEST] = {
72 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), 83 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
73 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, 84 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
85 .escr_emask =
86 P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
74 .cntr = { {0, -1, -1}, {2, -1, -1} }, 87 .cntr = { {0, -1, -1}, {2, -1, -1} },
75 }, 88 },
76 [P4_EVENT_ITLB_REFERENCE] = { 89 [P4_EVENT_ITLB_REFERENCE] = {
77 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), 90 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
78 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, 91 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
92 .escr_emask =
93 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) |
94 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) |
95 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
79 .cntr = { {0, -1, -1}, {2, -1, -1} }, 96 .cntr = { {0, -1, -1}, {2, -1, -1} },
80 }, 97 },
81 [P4_EVENT_MEMORY_CANCEL] = { 98 [P4_EVENT_MEMORY_CANCEL] = {
82 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), 99 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
83 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, 100 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
101 .escr_emask =
102 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) |
103 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
84 .cntr = { {8, 9, -1}, {10, 11, -1} }, 104 .cntr = { {8, 9, -1}, {10, 11, -1} },
85 }, 105 },
86 [P4_EVENT_MEMORY_COMPLETE] = { 106 [P4_EVENT_MEMORY_COMPLETE] = {
87 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), 107 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
88 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, 108 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
109 .escr_emask =
110 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) |
111 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
89 .cntr = { {8, 9, -1}, {10, 11, -1} }, 112 .cntr = { {8, 9, -1}, {10, 11, -1} },
90 }, 113 },
91 [P4_EVENT_LOAD_PORT_REPLAY] = { 114 [P4_EVENT_LOAD_PORT_REPLAY] = {
92 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), 115 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
93 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, 116 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
117 .escr_emask =
118 P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
94 .cntr = { {8, 9, -1}, {10, 11, -1} }, 119 .cntr = { {8, 9, -1}, {10, 11, -1} },
95 }, 120 },
96 [P4_EVENT_STORE_PORT_REPLAY] = { 121 [P4_EVENT_STORE_PORT_REPLAY] = {
97 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), 122 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
98 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, 123 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
124 .escr_emask =
125 P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
99 .cntr = { {8, 9, -1}, {10, 11, -1} }, 126 .cntr = { {8, 9, -1}, {10, 11, -1} },
100 }, 127 },
101 [P4_EVENT_MOB_LOAD_REPLAY] = { 128 [P4_EVENT_MOB_LOAD_REPLAY] = {
102 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), 129 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
103 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, 130 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
131 .escr_emask =
132 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) |
133 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) |
134 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) |
135 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
104 .cntr = { {0, -1, -1}, {2, -1, -1} }, 136 .cntr = { {0, -1, -1}, {2, -1, -1} },
105 }, 137 },
106 [P4_EVENT_PAGE_WALK_TYPE] = { 138 [P4_EVENT_PAGE_WALK_TYPE] = {
107 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), 139 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
108 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, 140 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
141 .escr_emask =
142 P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) |
143 P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
144 .shared = 1,
109 .cntr = { {0, -1, -1}, {2, -1, -1} }, 145 .cntr = { {0, -1, -1}, {2, -1, -1} },
110 }, 146 },
111 [P4_EVENT_BSQ_CACHE_REFERENCE] = { 147 [P4_EVENT_BSQ_CACHE_REFERENCE] = {
112 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), 148 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
113 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, 149 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
150 .escr_emask =
151 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
152 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
153 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
154 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
155 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
156 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) |
157 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
158 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
159 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
114 .cntr = { {0, -1, -1}, {2, -1, -1} }, 160 .cntr = { {0, -1, -1}, {2, -1, -1} },
115 }, 161 },
116 [P4_EVENT_IOQ_ALLOCATION] = { 162 [P4_EVENT_IOQ_ALLOCATION] = {
117 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), 163 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
118 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 164 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
165 .escr_emask =
166 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) |
167 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) |
168 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) |
169 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) |
170 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) |
171 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) |
172 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) |
173 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) |
174 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) |
175 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) |
176 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
119 .cntr = { {0, -1, -1}, {2, -1, -1} }, 177 .cntr = { {0, -1, -1}, {2, -1, -1} },
120 }, 178 },
121 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ 179 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
122 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), 180 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
123 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, 181 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
182 .escr_emask =
183 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) |
184 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) |
185 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) |
186 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) |
187 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) |
188 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) |
189 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) |
190 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) |
191 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) |
192 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) |
193 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
124 .cntr = { {2, -1, -1}, {3, -1, -1} }, 194 .cntr = { {2, -1, -1}, {3, -1, -1} },
125 }, 195 },
126 [P4_EVENT_FSB_DATA_ACTIVITY] = { 196 [P4_EVENT_FSB_DATA_ACTIVITY] = {
127 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), 197 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
128 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 198 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
199 .escr_emask =
200 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
201 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) |
202 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) |
203 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) |
204 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) |
205 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
206 .shared = 1,
129 .cntr = { {0, -1, -1}, {2, -1, -1} }, 207 .cntr = { {0, -1, -1}, {2, -1, -1} },
130 }, 208 },
131 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ 209 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
132 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), 210 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
133 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, 211 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
212 .escr_emask =
213 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) |
214 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) |
215 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) |
216 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) |
217 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) |
218 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) |
219 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) |
220 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) |
221 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) |
222 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) |
223 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) |
224 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) |
225 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
134 .cntr = { {0, -1, -1}, {1, -1, -1} }, 226 .cntr = { {0, -1, -1}, {1, -1, -1} },
135 }, 227 },
136 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ 228 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
137 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), 229 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
138 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, 230 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
231 .escr_emask =
232 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) |
233 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) |
234 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) |
235 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) |
236 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) |
237 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) |
238 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) |
239 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) |
240 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) |
241 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) |
242 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) |
243 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) |
244 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
139 .cntr = { {2, -1, -1}, {3, -1, -1} }, 245 .cntr = { {2, -1, -1}, {3, -1, -1} },
140 }, 246 },
141 [P4_EVENT_SSE_INPUT_ASSIST] = { 247 [P4_EVENT_SSE_INPUT_ASSIST] = {
142 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), 248 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
143 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 249 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
250 .escr_emask =
251 P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
252 .shared = 1,
144 .cntr = { {8, 9, -1}, {10, 11, -1} }, 253 .cntr = { {8, 9, -1}, {10, 11, -1} },
145 }, 254 },
146 [P4_EVENT_PACKED_SP_UOP] = { 255 [P4_EVENT_PACKED_SP_UOP] = {
147 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), 256 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
148 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 257 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
258 .escr_emask =
259 P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
260 .shared = 1,
149 .cntr = { {8, 9, -1}, {10, 11, -1} }, 261 .cntr = { {8, 9, -1}, {10, 11, -1} },
150 }, 262 },
151 [P4_EVENT_PACKED_DP_UOP] = { 263 [P4_EVENT_PACKED_DP_UOP] = {
152 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), 264 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
153 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 265 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
266 .escr_emask =
267 P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
268 .shared = 1,
154 .cntr = { {8, 9, -1}, {10, 11, -1} }, 269 .cntr = { {8, 9, -1}, {10, 11, -1} },
155 }, 270 },
156 [P4_EVENT_SCALAR_SP_UOP] = { 271 [P4_EVENT_SCALAR_SP_UOP] = {
157 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), 272 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
158 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 273 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
274 .escr_emask =
275 P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
276 .shared = 1,
159 .cntr = { {8, 9, -1}, {10, 11, -1} }, 277 .cntr = { {8, 9, -1}, {10, 11, -1} },
160 }, 278 },
161 [P4_EVENT_SCALAR_DP_UOP] = { 279 [P4_EVENT_SCALAR_DP_UOP] = {
162 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), 280 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
163 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 281 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
282 .escr_emask =
283 P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
284 .shared = 1,
164 .cntr = { {8, 9, -1}, {10, 11, -1} }, 285 .cntr = { {8, 9, -1}, {10, 11, -1} },
165 }, 286 },
166 [P4_EVENT_64BIT_MMX_UOP] = { 287 [P4_EVENT_64BIT_MMX_UOP] = {
167 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), 288 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
168 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 289 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
290 .escr_emask =
291 P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
292 .shared = 1,
169 .cntr = { {8, 9, -1}, {10, 11, -1} }, 293 .cntr = { {8, 9, -1}, {10, 11, -1} },
170 }, 294 },
171 [P4_EVENT_128BIT_MMX_UOP] = { 295 [P4_EVENT_128BIT_MMX_UOP] = {
172 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), 296 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
173 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 297 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
298 .escr_emask =
299 P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
300 .shared = 1,
174 .cntr = { {8, 9, -1}, {10, 11, -1} }, 301 .cntr = { {8, 9, -1}, {10, 11, -1} },
175 }, 302 },
176 [P4_EVENT_X87_FP_UOP] = { 303 [P4_EVENT_X87_FP_UOP] = {
177 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), 304 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
178 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 305 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
306 .escr_emask =
307 P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
308 .shared = 1,
179 .cntr = { {8, 9, -1}, {10, 11, -1} }, 309 .cntr = { {8, 9, -1}, {10, 11, -1} },
180 }, 310 },
181 [P4_EVENT_TC_MISC] = { 311 [P4_EVENT_TC_MISC] = {
182 .opcode = P4_OPCODE(P4_EVENT_TC_MISC), 312 .opcode = P4_OPCODE(P4_EVENT_TC_MISC),
183 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, 313 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
314 .escr_emask =
315 P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
184 .cntr = { {4, 5, -1}, {6, 7, -1} }, 316 .cntr = { {4, 5, -1}, {6, 7, -1} },
185 }, 317 },
186 [P4_EVENT_GLOBAL_POWER_EVENTS] = { 318 [P4_EVENT_GLOBAL_POWER_EVENTS] = {
187 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), 319 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
188 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 320 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
321 .escr_emask =
322 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
189 .cntr = { {0, -1, -1}, {2, -1, -1} }, 323 .cntr = { {0, -1, -1}, {2, -1, -1} },
190 }, 324 },
191 [P4_EVENT_TC_MS_XFER] = { 325 [P4_EVENT_TC_MS_XFER] = {
192 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), 326 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
193 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, 327 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
328 .escr_emask =
329 P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
194 .cntr = { {4, 5, -1}, {6, 7, -1} }, 330 .cntr = { {4, 5, -1}, {6, 7, -1} },
195 }, 331 },
196 [P4_EVENT_UOP_QUEUE_WRITES] = { 332 [P4_EVENT_UOP_QUEUE_WRITES] = {
197 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), 333 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
198 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, 334 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
335 .escr_emask =
336 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) |
337 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) |
338 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
199 .cntr = { {4, 5, -1}, {6, 7, -1} }, 339 .cntr = { {4, 5, -1}, {6, 7, -1} },
200 }, 340 },
201 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { 341 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
202 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), 342 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
203 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, 343 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
344 .escr_emask =
345 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) |
346 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) |
347 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) |
348 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
204 .cntr = { {4, 5, -1}, {6, 7, -1} }, 349 .cntr = { {4, 5, -1}, {6, 7, -1} },
205 }, 350 },
206 [P4_EVENT_RETIRED_BRANCH_TYPE] = { 351 [P4_EVENT_RETIRED_BRANCH_TYPE] = {
207 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), 352 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
208 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, 353 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
354 .escr_emask =
355 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
356 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
357 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
358 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
209 .cntr = { {4, 5, -1}, {6, 7, -1} }, 359 .cntr = { {4, 5, -1}, {6, 7, -1} },
210 }, 360 },
211 [P4_EVENT_RESOURCE_STALL] = { 361 [P4_EVENT_RESOURCE_STALL] = {
212 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), 362 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
213 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, 363 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
364 .escr_emask =
365 P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
214 .cntr = { {12, 13, 16}, {14, 15, 17} }, 366 .cntr = { {12, 13, 16}, {14, 15, 17} },
215 }, 367 },
216 [P4_EVENT_WC_BUFFER] = { 368 [P4_EVENT_WC_BUFFER] = {
217 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), 369 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
218 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, 370 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
371 .escr_emask =
372 P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) |
373 P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
374 .shared = 1,
219 .cntr = { {8, 9, -1}, {10, 11, -1} }, 375 .cntr = { {8, 9, -1}, {10, 11, -1} },
220 }, 376 },
221 [P4_EVENT_B2B_CYCLES] = { 377 [P4_EVENT_B2B_CYCLES] = {
222 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), 378 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
223 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 379 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
380 .escr_emask = 0,
224 .cntr = { {0, -1, -1}, {2, -1, -1} }, 381 .cntr = { {0, -1, -1}, {2, -1, -1} },
225 }, 382 },
226 [P4_EVENT_BNR] = { 383 [P4_EVENT_BNR] = {
227 .opcode = P4_OPCODE(P4_EVENT_BNR), 384 .opcode = P4_OPCODE(P4_EVENT_BNR),
228 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 385 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
386 .escr_emask = 0,
229 .cntr = { {0, -1, -1}, {2, -1, -1} }, 387 .cntr = { {0, -1, -1}, {2, -1, -1} },
230 }, 388 },
231 [P4_EVENT_SNOOP] = { 389 [P4_EVENT_SNOOP] = {
232 .opcode = P4_OPCODE(P4_EVENT_SNOOP), 390 .opcode = P4_OPCODE(P4_EVENT_SNOOP),
233 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 391 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
392 .escr_emask = 0,
234 .cntr = { {0, -1, -1}, {2, -1, -1} }, 393 .cntr = { {0, -1, -1}, {2, -1, -1} },
235 }, 394 },
236 [P4_EVENT_RESPONSE] = { 395 [P4_EVENT_RESPONSE] = {
237 .opcode = P4_OPCODE(P4_EVENT_RESPONSE), 396 .opcode = P4_OPCODE(P4_EVENT_RESPONSE),
238 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 397 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
398 .escr_emask = 0,
239 .cntr = { {0, -1, -1}, {2, -1, -1} }, 399 .cntr = { {0, -1, -1}, {2, -1, -1} },
240 }, 400 },
241 [P4_EVENT_FRONT_END_EVENT] = { 401 [P4_EVENT_FRONT_END_EVENT] = {
242 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), 402 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
243 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 403 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
404 .escr_emask =
405 P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) |
406 P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
244 .cntr = { {12, 13, 16}, {14, 15, 17} }, 407 .cntr = { {12, 13, 16}, {14, 15, 17} },
245 }, 408 },
246 [P4_EVENT_EXECUTION_EVENT] = { 409 [P4_EVENT_EXECUTION_EVENT] = {
247 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), 410 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
248 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 411 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
412 .escr_emask =
413 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) |
414 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) |
415 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) |
416 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) |
417 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
418 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
419 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
420 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
249 .cntr = { {12, 13, 16}, {14, 15, 17} }, 421 .cntr = { {12, 13, 16}, {14, 15, 17} },
250 }, 422 },
251 [P4_EVENT_REPLAY_EVENT] = { 423 [P4_EVENT_REPLAY_EVENT] = {
252 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), 424 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
253 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 425 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
426 .escr_emask =
427 P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) |
428 P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
254 .cntr = { {12, 13, 16}, {14, 15, 17} }, 429 .cntr = { {12, 13, 16}, {14, 15, 17} },
255 }, 430 },
256 [P4_EVENT_INSTR_RETIRED] = { 431 [P4_EVENT_INSTR_RETIRED] = {
257 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), 432 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
258 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 433 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
434 .escr_emask =
435 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
436 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) |
437 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) |
438 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
259 .cntr = { {12, 13, 16}, {14, 15, 17} }, 439 .cntr = { {12, 13, 16}, {14, 15, 17} },
260 }, 440 },
261 [P4_EVENT_UOPS_RETIRED] = { 441 [P4_EVENT_UOPS_RETIRED] = {
262 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), 442 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
263 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 443 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
444 .escr_emask =
445 P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) |
446 P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
264 .cntr = { {12, 13, 16}, {14, 15, 17} }, 447 .cntr = { {12, 13, 16}, {14, 15, 17} },
265 }, 448 },
266 [P4_EVENT_UOP_TYPE] = { 449 [P4_EVENT_UOP_TYPE] = {
267 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), 450 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
268 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, 451 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
452 .escr_emask =
453 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) |
454 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
269 .cntr = { {12, 13, 16}, {14, 15, 17} }, 455 .cntr = { {12, 13, 16}, {14, 15, 17} },
270 }, 456 },
271 [P4_EVENT_BRANCH_RETIRED] = { 457 [P4_EVENT_BRANCH_RETIRED] = {
272 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), 458 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
273 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 459 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
460 .escr_emask =
461 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) |
462 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) |
463 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) |
464 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
274 .cntr = { {12, 13, 16}, {14, 15, 17} }, 465 .cntr = { {12, 13, 16}, {14, 15, 17} },
275 }, 466 },
276 [P4_EVENT_MISPRED_BRANCH_RETIRED] = { 467 [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
277 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), 468 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
278 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 469 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
470 .escr_emask =
471 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
279 .cntr = { {12, 13, 16}, {14, 15, 17} }, 472 .cntr = { {12, 13, 16}, {14, 15, 17} },
280 }, 473 },
281 [P4_EVENT_X87_ASSIST] = { 474 [P4_EVENT_X87_ASSIST] = {
282 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), 475 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
283 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 476 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
477 .escr_emask =
478 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) |
479 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) |
480 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) |
481 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) |
482 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
284 .cntr = { {12, 13, 16}, {14, 15, 17} }, 483 .cntr = { {12, 13, 16}, {14, 15, 17} },
285 }, 484 },
286 [P4_EVENT_MACHINE_CLEAR] = { 485 [P4_EVENT_MACHINE_CLEAR] = {
287 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), 486 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
288 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 487 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
488 .escr_emask =
489 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) |
490 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) |
491 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
289 .cntr = { {12, 13, 16}, {14, 15, 17} }, 492 .cntr = { {12, 13, 16}, {14, 15, 17} },
290 }, 493 },
291 [P4_EVENT_INSTR_COMPLETED] = { 494 [P4_EVENT_INSTR_COMPLETED] = {
292 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), 495 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
293 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 496 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
497 .escr_emask =
498 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) |
499 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
294 .cntr = { {12, 13, 16}, {14, 15, 17} }, 500 .cntr = { {12, 13, 16}, {14, 15, 17} },
295 }, 501 },
296}; 502};
@@ -428,29 +634,73 @@ static u64 p4_pmu_event_map(int hw_event)
428 return config; 634 return config;
429} 635}
430 636
637/* check cpu model specifics */
638static bool p4_event_match_cpu_model(unsigned int event_idx)
639{
640 /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
641 if (event_idx == P4_EVENT_INSTR_COMPLETED) {
642 if (boot_cpu_data.x86_model != 3 &&
643 boot_cpu_data.x86_model != 4 &&
644 boot_cpu_data.x86_model != 6)
645 return false;
646 }
647
648 /*
649 * For info
650 * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
651 */
652
653 return true;
654}
655
431static int p4_validate_raw_event(struct perf_event *event) 656static int p4_validate_raw_event(struct perf_event *event)
432{ 657{
433 unsigned int v; 658 unsigned int v, emask;
434 659
435 /* user data may have out-of-bound event index */ 660 /* User data may have out-of-bound event index */
436 v = p4_config_unpack_event(event->attr.config); 661 v = p4_config_unpack_event(event->attr.config);
437 if (v >= ARRAY_SIZE(p4_event_bind_map)) { 662 if (v >= ARRAY_SIZE(p4_event_bind_map))
438 pr_warning("P4 PMU: Unknown event code: %d\n", v); 663 return -EINVAL;
664
665 /* It may be unsupported: */
666 if (!p4_event_match_cpu_model(v))
439 return -EINVAL; 667 return -EINVAL;
668
669 /*
670 * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
671 * in Architectural Performance Monitoring, it means not
672 * on _which_ logical cpu to count but rather _when_, ie it
673 * depends on logical cpu state -- count event if one cpu active,
674 * none, both or any, so we just allow user to pass any value
675 * desired.
676 *
677 * In turn we always set Tx_OS/Tx_USR bits bound to logical
678 * cpu without their propagation to another cpu
679 */
680
681 /*
682 * if an event is shared accross the logical threads
683 * the user needs special permissions to be able to use it
684 */
685 if (p4_event_bind_map[v].shared) {
686 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
687 return -EACCES;
440 } 688 }
441 689
690 /* ESCR EventMask bits may be invalid */
691 emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
692 if (emask & ~p4_event_bind_map[v].escr_emask)
693 return -EINVAL;
694
442 /* 695 /*
443 * it may have some screwed PEBS bits 696 * it may have some invalid PEBS bits
444 */ 697 */
445 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { 698 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
446 pr_warning("P4 PMU: PEBS are not supported yet\n");
447 return -EINVAL; 699 return -EINVAL;
448 } 700
449 v = p4_config_unpack_metric(event->attr.config); 701 v = p4_config_unpack_metric(event->attr.config);
450 if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { 702 if (v >= ARRAY_SIZE(p4_pebs_bind_map))
451 pr_warning("P4 PMU: Unknown metric code: %d\n", v);
452 return -EINVAL; 703 return -EINVAL;
453 }
454 704
455 return 0; 705 return 0;
456} 706}
@@ -478,27 +728,21 @@ static int p4_hw_config(struct perf_event *event)
478 728
479 if (event->attr.type == PERF_TYPE_RAW) { 729 if (event->attr.type == PERF_TYPE_RAW) {
480 730
731 /*
732 * Clear bits we reserve to be managed by kernel itself
733 * and never allowed from a user space
734 */
735 event->attr.config &= P4_CONFIG_MASK;
736
481 rc = p4_validate_raw_event(event); 737 rc = p4_validate_raw_event(event);
482 if (rc) 738 if (rc)
483 goto out; 739 goto out;
484 740
485 /* 741 /*
486 * We don't control raw events so it's up to the caller
487 * to pass sane values (and we don't count the thread number
488 * on HT machine but allow HT-compatible specifics to be
489 * passed on)
490 *
491 * Note that for RAW events we allow user to use P4_CCCR_RESERVED 742 * Note that for RAW events we allow user to use P4_CCCR_RESERVED
492 * bits since we keep additional info here (for cache events and etc) 743 * bits since we keep additional info here (for cache events and etc)
493 *
494 * XXX: HT wide things should check perf_paranoid_cpu() &&
495 * CAP_SYS_ADMIN
496 */ 744 */
497 event->hw.config |= event->attr.config & 745 event->hw.config |= event->attr.config;
498 (p4_config_pack_escr(P4_ESCR_MASK_HT) |
499 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
500
501 event->hw.config &= ~P4_CCCR_FORCE_OVF;
502 } 746 }
503 747
504 rc = x86_setup_perfctr(event); 748 rc = x86_setup_perfctr(event);
@@ -660,8 +904,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
660 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 904 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
661 int overflow; 905 int overflow;
662 906
663 if (!test_bit(idx, cpuc->active_mask)) 907 if (!test_bit(idx, cpuc->active_mask)) {
908 /* catch in-flight IRQs */
909 if (__test_and_clear_bit(idx, cpuc->running))
910 handled++;
664 continue; 911 continue;
912 }
665 913
666 event = cpuc->events[idx]; 914 event = cpuc->events[idx];
667 hwc = &event->hw; 915 hwc = &event->hw;
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 34b4dad6f0b8..d49079515122 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -31,6 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
31 const struct cpuid_bit *cb; 31 const struct cpuid_bit *cb;
32 32
33 static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { 33 static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
34 { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 },
34 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, 35 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 },
35 { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, 36 { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 },
36 { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, 37 { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 },
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index cd37469b54ee..3afb33f14d2d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
257 return mod_code_status; 257 return mod_code_status;
258} 258}
259 259
260
261
262
263static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
264
265static unsigned char *ftrace_nop_replace(void) 260static unsigned char *ftrace_nop_replace(void)
266{ 261{
267 return ftrace_nop; 262 return ideal_nop5;
268} 263}
269 264
270static int 265static int
@@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
338 333
339int __init ftrace_dyn_arch_init(void *data) 334int __init ftrace_dyn_arch_init(void *data)
340{ 335{
341 extern const unsigned char ftrace_test_p6nop[];
342 extern const unsigned char ftrace_test_nop5[];
343 extern const unsigned char ftrace_test_jmp[];
344 int faulted = 0;
345
346 /*
347 * There is no good nop for all x86 archs.
348 * We will default to using the P6_NOP5, but first we
349 * will test to make sure that the nop will actually
350 * work on this CPU. If it faults, we will then
351 * go to a lesser efficient 5 byte nop. If that fails
352 * we then just use a jmp as our nop. This isn't the most
353 * efficient nop, but we can not use a multi part nop
354 * since we would then risk being preempted in the middle
355 * of that nop, and if we enabled tracing then, it might
356 * cause a system crash.
357 *
358 * TODO: check the cpuid to determine the best nop.
359 */
360 asm volatile (
361 "ftrace_test_jmp:"
362 "jmp ftrace_test_p6nop\n"
363 "nop\n"
364 "nop\n"
365 "nop\n" /* 2 byte jmp + 3 bytes */
366 "ftrace_test_p6nop:"
367 P6_NOP5
368 "jmp 1f\n"
369 "ftrace_test_nop5:"
370 ".byte 0x66,0x66,0x66,0x66,0x90\n"
371 "1:"
372 ".section .fixup, \"ax\"\n"
373 "2: movl $1, %0\n"
374 " jmp ftrace_test_nop5\n"
375 "3: movl $2, %0\n"
376 " jmp 1b\n"
377 ".previous\n"
378 _ASM_EXTABLE(ftrace_test_p6nop, 2b)
379 _ASM_EXTABLE(ftrace_test_nop5, 3b)
380 : "=r"(faulted) : "0" (faulted));
381
382 switch (faulted) {
383 case 0:
384 pr_info("converting mcount calls to 0f 1f 44 00 00\n");
385 memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
386 break;
387 case 1:
388 pr_info("converting mcount calls to 66 66 66 66 90\n");
389 memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
390 break;
391 case 2:
392 pr_info("converting mcount calls to jmp . + 5\n");
393 memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
394 break;
395 }
396
397 /* The return code is retured via data */ 336 /* The return code is retured via data */
398 *(unsigned long *)data = 0; 337 *(unsigned long *)data = 0;
399 338
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 410fdb3f1939..7494999141b3 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -506,7 +506,7 @@ static int hpet_assign_irq(struct hpet_dev *dev)
506{ 506{
507 unsigned int irq; 507 unsigned int irq;
508 508
509 irq = create_irq(); 509 irq = create_irq_nr(0, -1);
510 if (!irq) 510 if (!irq)
511 return -EINVAL; 511 return -EINVAL;
512 512
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
new file mode 100644
index 000000000000..961b6b30ba90
--- /dev/null
+++ b/arch/x86/kernel/jump_label.c
@@ -0,0 +1,50 @@
1/*
2 * jump label x86 support
3 *
4 * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
5 *
6 */
7#include <linux/jump_label.h>
8#include <linux/memory.h>
9#include <linux/uaccess.h>
10#include <linux/module.h>
11#include <linux/list.h>
12#include <linux/jhash.h>
13#include <linux/cpu.h>
14#include <asm/kprobes.h>
15#include <asm/alternative.h>
16
17#ifdef HAVE_JUMP_LABEL
18
19union jump_code_union {
20 char code[JUMP_LABEL_NOP_SIZE];
21 struct {
22 char jump;
23 int offset;
24 } __attribute__((packed));
25};
26
27void arch_jump_label_transform(struct jump_entry *entry,
28 enum jump_label_type type)
29{
30 union jump_code_union code;
31
32 if (type == JUMP_LABEL_ENABLE) {
33 code.jump = 0xe9;
34 code.offset = entry->target -
35 (entry->code + JUMP_LABEL_NOP_SIZE);
36 } else
37 memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE);
38 get_online_cpus();
39 mutex_lock(&text_mutex);
40 text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
41 mutex_unlock(&text_mutex);
42 put_online_cpus();
43}
44
45void arch_jump_label_text_poke_early(jump_label_t addr)
46{
47 text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE);
48}
49
50#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 770ebfb349e9..1cbd54c0df99 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -230,9 +230,6 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
230 return 0; 230 return 0;
231} 231}
232 232
233/* Dummy buffers for kallsyms_lookup */
234static char __dummy_buf[KSYM_NAME_LEN];
235
236/* Check if paddr is at an instruction boundary */ 233/* Check if paddr is at an instruction boundary */
237static int __kprobes can_probe(unsigned long paddr) 234static int __kprobes can_probe(unsigned long paddr)
238{ 235{
@@ -241,7 +238,7 @@ static int __kprobes can_probe(unsigned long paddr)
241 struct insn insn; 238 struct insn insn;
242 kprobe_opcode_t buf[MAX_INSN_SIZE]; 239 kprobe_opcode_t buf[MAX_INSN_SIZE];
243 240
244 if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) 241 if (!kallsyms_lookup_size_offset(paddr, NULL, &offset))
245 return 0; 242 return 0;
246 243
247 /* Decode instructions */ 244 /* Decode instructions */
@@ -1129,7 +1126,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
1129 *(unsigned long *)addr = val; 1126 *(unsigned long *)addr = val;
1130} 1127}
1131 1128
1132void __kprobes kprobes_optinsn_template_holder(void) 1129static void __used __kprobes kprobes_optinsn_template_holder(void)
1133{ 1130{
1134 asm volatile ( 1131 asm volatile (
1135 ".global optprobe_template_entry\n" 1132 ".global optprobe_template_entry\n"
@@ -1221,7 +1218,8 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
1221 } 1218 }
1222 /* Check whether the address range is reserved */ 1219 /* Check whether the address range is reserved */
1223 if (ftrace_text_reserved(src, src + len - 1) || 1220 if (ftrace_text_reserved(src, src + len - 1) ||
1224 alternatives_text_reserved(src, src + len - 1)) 1221 alternatives_text_reserved(src, src + len - 1) ||
1222 jump_label_text_reserved(src, src + len - 1))
1225 return -EBUSY; 1223 return -EBUSY;
1226 1224
1227 return len; 1225 return len;
@@ -1269,11 +1267,9 @@ static int __kprobes can_optimize(unsigned long paddr)
1269 unsigned long addr, size = 0, offset = 0; 1267 unsigned long addr, size = 0, offset = 0;
1270 struct insn insn; 1268 struct insn insn;
1271 kprobe_opcode_t buf[MAX_INSN_SIZE]; 1269 kprobe_opcode_t buf[MAX_INSN_SIZE];
1272 /* Dummy buffers for lookup_symbol_attrs */
1273 static char __dummy_buf[KSYM_NAME_LEN];
1274 1270
1275 /* Lookup symbol including addr */ 1271 /* Lookup symbol including addr */
1276 if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf)) 1272 if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
1277 return 0; 1273 return 0;
1278 1274
1279 /* Check there is enough space for a relative jump. */ 1275 /* Check there is enough space for a relative jump. */
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index e0bc186d7501..8f2956091735 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -239,11 +239,13 @@ int module_finalize(const Elf_Ehdr *hdr,
239 apply_paravirt(pseg, pseg + para->sh_size); 239 apply_paravirt(pseg, pseg + para->sh_size);
240 } 240 }
241 241
242 return module_bug_finalize(hdr, sechdrs, me); 242 /* make jump label nops */
243 jump_label_apply_nops(me);
244
245 return 0;
243} 246}
244 247
245void module_arch_cleanup(struct module *mod) 248void module_arch_cleanup(struct module *mod)
246{ 249{
247 alternatives_smp_module_del(mod); 250 alternatives_smp_module_del(mod);
248 module_bug_cleanup(mod);
249} 251}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c3a4fbb2b996..00e167870f71 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -112,6 +112,7 @@
112#include <asm/numa_64.h> 112#include <asm/numa_64.h>
113#endif 113#endif
114#include <asm/mce.h> 114#include <asm/mce.h>
115#include <asm/alternative.h>
115 116
116/* 117/*
117 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. 118 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -726,6 +727,7 @@ void __init setup_arch(char **cmdline_p)
726{ 727{
727 int acpi = 0; 728 int acpi = 0;
728 int k8 = 0; 729 int k8 = 0;
730 unsigned long flags;
729 731
730#ifdef CONFIG_X86_32 732#ifdef CONFIG_X86_32
731 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 733 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
@@ -1071,6 +1073,10 @@ void __init setup_arch(char **cmdline_p)
1071 x86_init.oem.banner(); 1073 x86_init.oem.banner();
1072 1074
1073 mcheck_init(); 1075 mcheck_init();
1076
1077 local_irq_save(flags);
1078 arch_init_ideal_nop5();
1079 local_irq_restore(flags);
1074} 1080}
1075 1081
1076#ifdef CONFIG_X86_32 1082#ifdef CONFIG_X86_32
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 4c4508e8a204..a24c6cfdccc4 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -251,6 +251,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
251 if (!(address >= VMALLOC_START && address < VMALLOC_END)) 251 if (!(address >= VMALLOC_START && address < VMALLOC_END))
252 return -1; 252 return -1;
253 253
254 WARN_ON_ONCE(in_nmi());
255
254 /* 256 /*
255 * Synchronize this task's top level page-table 257 * Synchronize this task's top level page-table
256 * with the 'reference' page table. 258 * with the 'reference' page table.
@@ -369,6 +371,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
369 if (!(address >= VMALLOC_START && address < VMALLOC_END)) 371 if (!(address >= VMALLOC_START && address < VMALLOC_END))
370 return -1; 372 return -1;
371 373
374 WARN_ON_ONCE(in_nmi());
375
372 /* 376 /*
373 * Copy kernel mappings over when needed. This can also 377 * Copy kernel mappings over when needed. This can also
374 * happen within a race in page table update. In the later 378 * happen within a race in page table update. In the later
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index b3b531a4f8e5..d87dd6d042d6 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -631,6 +631,8 @@ bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
631 if (!pte) 631 if (!pte)
632 return false; 632 return false;
633 633
634 WARN_ON_ONCE(in_nmi());
635
634 if (error_code & 2) 636 if (error_code & 2)
635 kmemcheck_access(regs, address, KMEMCHECK_WRITE); 637 kmemcheck_access(regs, address, KMEMCHECK_WRITE);
636 else 638 else
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 1a5353a753fc..b2bb5aa3b054 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -489,8 +489,9 @@ static void xen_hvm_setup_cpu_clockevents(void)
489__init void xen_hvm_init_time_ops(void) 489__init void xen_hvm_init_time_ops(void)
490{ 490{
491 /* vector callback is needed otherwise we cannot receive interrupts 491 /* vector callback is needed otherwise we cannot receive interrupts
492 * on cpu > 0 */ 492 * on cpu > 0 and at this point we don't know how many cpus are
493 if (!xen_have_vector_callback && num_present_cpus() > 1) 493 * available */
494 if (!xen_have_vector_callback)
494 return; 495 return;
495 if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { 496 if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
496 printk(KERN_INFO "Xen doesn't support pvclock on HVM," 497 printk(KERN_INFO "Xen doesn't support pvclock on HVM,"