diff options
36 files changed, 2988 insertions, 424 deletions
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 2f9115c0ae62..61c291cddf18 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt | |||
@@ -165,8 +165,8 @@ the user entry_handler invocation is also skipped. | |||
165 | 165 | ||
166 | 1.4 How Does Jump Optimization Work? | 166 | 1.4 How Does Jump Optimization Work? |
167 | 167 | ||
168 | If you configured your kernel with CONFIG_OPTPROBES=y (currently | 168 | If your kernel is built with CONFIG_OPTPROBES=y (currently this flag |
169 | this option is supported on x86/x86-64, non-preemptive kernel) and | 169 | is automatically set 'y' on x86/x86-64, non-preemptive kernel) and |
170 | the "debug.kprobes_optimization" kernel parameter is set to 1 (see | 170 | the "debug.kprobes_optimization" kernel parameter is set to 1 (see |
171 | sysctl(8)), Kprobes tries to reduce probe-hit overhead by using a jump | 171 | sysctl(8)), Kprobes tries to reduce probe-hit overhead by using a jump |
172 | instruction instead of a breakpoint instruction at each probepoint. | 172 | instruction instead of a breakpoint instruction at each probepoint. |
@@ -271,8 +271,6 @@ tweak the kernel's execution path, you need to suppress optimization, | |||
271 | using one of the following techniques: | 271 | using one of the following techniques: |
272 | - Specify an empty function for the kprobe's post_handler or break_handler. | 272 | - Specify an empty function for the kprobe's post_handler or break_handler. |
273 | or | 273 | or |
274 | - Config CONFIG_OPTPROBES=n. | ||
275 | or | ||
276 | - Execute 'sysctl -w debug.kprobes_optimization=n' | 274 | - Execute 'sysctl -w debug.kprobes_optimization=n' |
277 | 275 | ||
278 | 2. Architectures Supported | 276 | 2. Architectures Supported |
@@ -307,10 +305,6 @@ it useful to "Compile the kernel with debug info" (CONFIG_DEBUG_INFO), | |||
307 | so you can use "objdump -d -l vmlinux" to see the source-to-object | 305 | so you can use "objdump -d -l vmlinux" to see the source-to-object |
308 | code mapping. | 306 | code mapping. |
309 | 307 | ||
310 | If you want to reduce probing overhead, set "Kprobes jump optimization | ||
311 | support" (CONFIG_OPTPROBES) to "y". You can find this option under the | ||
312 | "Kprobes" line. | ||
313 | |||
314 | 4. API Reference | 308 | 4. API Reference |
315 | 309 | ||
316 | The Kprobes API includes a "register" function and an "unregister" | 310 | The Kprobes API includes a "register" function and an "unregister" |
diff --git a/arch/Kconfig b/arch/Kconfig index e5eb1337a537..f06010fb4838 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -42,15 +42,10 @@ config KPROBES | |||
42 | If in doubt, say "N". | 42 | If in doubt, say "N". |
43 | 43 | ||
44 | config OPTPROBES | 44 | config OPTPROBES |
45 | bool "Kprobes jump optimization support (EXPERIMENTAL)" | 45 | def_bool y |
46 | default y | 46 | depends on KPROBES && HAVE_OPTPROBES |
47 | depends on KPROBES | ||
48 | depends on !PREEMPT | 47 | depends on !PREEMPT |
49 | depends on HAVE_OPTPROBES | ||
50 | select KALLSYMS_ALL | 48 | select KALLSYMS_ALL |
51 | help | ||
52 | This option will allow kprobes to optimize breakpoint to | ||
53 | a jump for reducing its overhead. | ||
54 | 49 | ||
55 | config HAVE_EFFICIENT_UNALIGNED_ACCESS | 50 | config HAVE_EFFICIENT_UNALIGNED_ACCESS |
56 | bool | 51 | bool |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e98440371525..e1240f652a9b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -58,6 +58,9 @@ config X86 | |||
58 | select HAVE_ARCH_KMEMCHECK | 58 | select HAVE_ARCH_KMEMCHECK |
59 | select HAVE_USER_RETURN_NOTIFIER | 59 | select HAVE_USER_RETURN_NOTIFIER |
60 | 60 | ||
61 | config INSTRUCTION_DECODER | ||
62 | def_bool (KPROBES || PERF_EVENTS) | ||
63 | |||
61 | config OUTPUT_FORMAT | 64 | config OUTPUT_FORMAT |
62 | string | 65 | string |
63 | default "elf32-i386" if X86_32 | 66 | default "elf32-i386" if X86_32 |
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 96c2e0ad04ca..88c765e16410 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h | |||
@@ -68,6 +68,8 @@ struct insn { | |||
68 | const insn_byte_t *next_byte; | 68 | const insn_byte_t *next_byte; |
69 | }; | 69 | }; |
70 | 70 | ||
71 | #define MAX_INSN_SIZE 16 | ||
72 | |||
71 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) | 73 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) |
72 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) | 74 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) |
73 | #define X86_MODRM_RM(modrm) ((modrm) & 0x07) | 75 | #define X86_MODRM_RM(modrm) ((modrm) & 0x07) |
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 4ffa345a8ccb..547882539157 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/types.h> | 24 | #include <linux/types.h> |
25 | #include <linux/ptrace.h> | 25 | #include <linux/ptrace.h> |
26 | #include <linux/percpu.h> | 26 | #include <linux/percpu.h> |
27 | #include <asm/insn.h> | ||
27 | 28 | ||
28 | #define __ARCH_WANT_KPROBES_INSN_SLOT | 29 | #define __ARCH_WANT_KPROBES_INSN_SLOT |
29 | 30 | ||
@@ -36,7 +37,6 @@ typedef u8 kprobe_opcode_t; | |||
36 | #define RELATIVEJUMP_SIZE 5 | 37 | #define RELATIVEJUMP_SIZE 5 |
37 | #define RELATIVECALL_OPCODE 0xe8 | 38 | #define RELATIVECALL_OPCODE 0xe8 |
38 | #define RELATIVE_ADDR_SIZE 4 | 39 | #define RELATIVE_ADDR_SIZE 4 |
39 | #define MAX_INSN_SIZE 16 | ||
40 | #define MAX_STACK_SIZE 64 | 40 | #define MAX_STACK_SIZE 64 |
41 | #define MIN_STACK_SIZE(ADDR) \ | 41 | #define MIN_STACK_SIZE(ADDR) \ |
42 | (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ | 42 | (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index db6109a885a7..124dddd598f3 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -5,7 +5,7 @@ | |||
5 | * Performance event hw details: | 5 | * Performance event hw details: |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #define X86_PMC_MAX_GENERIC 8 | 8 | #define X86_PMC_MAX_GENERIC 32 |
9 | #define X86_PMC_MAX_FIXED 3 | 9 | #define X86_PMC_MAX_FIXED 3 |
10 | 10 | ||
11 | #define X86_PMC_IDX_GENERIC 0 | 11 | #define X86_PMC_IDX_GENERIC 0 |
@@ -136,6 +136,25 @@ extern void perf_events_lapic_init(void); | |||
136 | 136 | ||
137 | #define PERF_EVENT_INDEX_OFFSET 0 | 137 | #define PERF_EVENT_INDEX_OFFSET 0 |
138 | 138 | ||
139 | /* | ||
140 | * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. | ||
141 | * This flag is otherwise unused and ABI specified to be 0, so nobody should | ||
142 | * care what we do with it. | ||
143 | */ | ||
144 | #define PERF_EFLAGS_EXACT (1UL << 3) | ||
145 | |||
146 | #define perf_misc_flags(regs) \ | ||
147 | ({ int misc = 0; \ | ||
148 | if (user_mode(regs)) \ | ||
149 | misc |= PERF_RECORD_MISC_USER; \ | ||
150 | else \ | ||
151 | misc |= PERF_RECORD_MISC_KERNEL; \ | ||
152 | if (regs->flags & PERF_EFLAGS_EXACT) \ | ||
153 | misc |= PERF_RECORD_MISC_EXACT; \ | ||
154 | misc; }) | ||
155 | |||
156 | #define perf_instruction_pointer(regs) ((regs)->ip) | ||
157 | |||
139 | #else | 158 | #else |
140 | static inline void init_hw_perf_events(void) { } | 159 | static inline void init_hw_perf_events(void) { } |
141 | static inline void perf_events_lapic_init(void) { } | 160 | static inline void perf_events_lapic_init(void) { } |
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h new file mode 100644 index 000000000000..b842b3238e46 --- /dev/null +++ b/arch/x86/include/asm/perf_event_p4.h | |||
@@ -0,0 +1,708 @@ | |||
1 | /* | ||
2 | * Netburst Perfomance Events (P4, old Xeon) | ||
3 | */ | ||
4 | |||
5 | #ifndef PERF_EVENT_P4_H | ||
6 | #define PERF_EVENT_P4_H | ||
7 | |||
8 | #include <linux/cpu.h> | ||
9 | #include <linux/bitops.h> | ||
10 | |||
11 | /* | ||
12 | * NetBurst has perfomance MSRs shared between | ||
13 | * threads if HT is turned on, ie for both logical | ||
14 | * processors (mem: in turn in Atom with HT support | ||
15 | * perf-MSRs are not shared and every thread has its | ||
16 | * own perf-MSRs set) | ||
17 | */ | ||
18 | #define ARCH_P4_TOTAL_ESCR (46) | ||
19 | #define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ | ||
20 | #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) | ||
21 | #define ARCH_P4_MAX_CCCR (18) | ||
22 | #define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2) | ||
23 | |||
24 | #define P4_EVNTSEL_EVENT_MASK 0x7e000000U | ||
25 | #define P4_EVNTSEL_EVENT_SHIFT 25 | ||
26 | #define P4_EVNTSEL_EVENTMASK_MASK 0x01fffe00U | ||
27 | #define P4_EVNTSEL_EVENTMASK_SHIFT 9 | ||
28 | #define P4_EVNTSEL_TAG_MASK 0x000001e0U | ||
29 | #define P4_EVNTSEL_TAG_SHIFT 5 | ||
30 | #define P4_EVNTSEL_TAG_ENABLE 0x00000010U | ||
31 | #define P4_EVNTSEL_T0_OS 0x00000008U | ||
32 | #define P4_EVNTSEL_T0_USR 0x00000004U | ||
33 | #define P4_EVNTSEL_T1_OS 0x00000002U | ||
34 | #define P4_EVNTSEL_T1_USR 0x00000001U | ||
35 | |||
36 | /* Non HT mask */ | ||
37 | #define P4_EVNTSEL_MASK \ | ||
38 | (P4_EVNTSEL_EVENT_MASK | \ | ||
39 | P4_EVNTSEL_EVENTMASK_MASK | \ | ||
40 | P4_EVNTSEL_TAG_MASK | \ | ||
41 | P4_EVNTSEL_TAG_ENABLE | \ | ||
42 | P4_EVNTSEL_T0_OS | \ | ||
43 | P4_EVNTSEL_T0_USR) | ||
44 | |||
45 | /* HT mask */ | ||
46 | #define P4_EVNTSEL_MASK_HT \ | ||
47 | (P4_EVNTSEL_MASK | \ | ||
48 | P4_EVNTSEL_T1_OS | \ | ||
49 | P4_EVNTSEL_T1_USR) | ||
50 | |||
51 | #define P4_CCCR_OVF 0x80000000U | ||
52 | #define P4_CCCR_CASCADE 0x40000000U | ||
53 | #define P4_CCCR_OVF_PMI_T0 0x04000000U | ||
54 | #define P4_CCCR_OVF_PMI_T1 0x08000000U | ||
55 | #define P4_CCCR_FORCE_OVF 0x02000000U | ||
56 | #define P4_CCCR_EDGE 0x01000000U | ||
57 | #define P4_CCCR_THRESHOLD_MASK 0x00f00000U | ||
58 | #define P4_CCCR_THRESHOLD_SHIFT 20 | ||
59 | #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) | ||
60 | #define P4_CCCR_COMPLEMENT 0x00080000U | ||
61 | #define P4_CCCR_COMPARE 0x00040000U | ||
62 | #define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U | ||
63 | #define P4_CCCR_ESCR_SELECT_SHIFT 13 | ||
64 | #define P4_CCCR_ENABLE 0x00001000U | ||
65 | #define P4_CCCR_THREAD_SINGLE 0x00010000U | ||
66 | #define P4_CCCR_THREAD_BOTH 0x00020000U | ||
67 | #define P4_CCCR_THREAD_ANY 0x00030000U | ||
68 | |||
69 | /* Non HT mask */ | ||
70 | #define P4_CCCR_MASK \ | ||
71 | (P4_CCCR_OVF | \ | ||
72 | P4_CCCR_CASCADE | \ | ||
73 | P4_CCCR_OVF_PMI_T0 | \ | ||
74 | P4_CCCR_FORCE_OVF | \ | ||
75 | P4_CCCR_EDGE | \ | ||
76 | P4_CCCR_THRESHOLD_MASK | \ | ||
77 | P4_CCCR_COMPLEMENT | \ | ||
78 | P4_CCCR_COMPARE | \ | ||
79 | P4_CCCR_ESCR_SELECT_MASK | \ | ||
80 | P4_CCCR_ENABLE) | ||
81 | |||
82 | /* HT mask */ | ||
83 | #define P4_CCCR_MASK_HT \ | ||
84 | (P4_CCCR_MASK | \ | ||
85 | P4_CCCR_THREAD_ANY) | ||
86 | |||
87 | /* | ||
88 | * format is 32 bit: ee ss aa aa | ||
89 | * where | ||
90 | * ee - 8 bit event | ||
91 | * ss - 8 bit selector | ||
92 | * aa aa - 16 bits reserved for tags/attributes | ||
93 | */ | ||
94 | #define P4_EVENT_PACK(event, selector) (((event) << 24) | ((selector) << 16)) | ||
95 | #define P4_EVENT_UNPACK_EVENT(packed) (((packed) >> 24) & 0xff) | ||
96 | #define P4_EVENT_UNPACK_SELECTOR(packed) (((packed) >> 16) & 0xff) | ||
97 | #define P4_EVENT_PACK_ATTR(attr) ((attr)) | ||
98 | #define P4_EVENT_UNPACK_ATTR(packed) ((packed) & 0xffff) | ||
99 | #define P4_MAKE_EVENT_ATTR(class, name, bit) class##_##name = (1 << bit) | ||
100 | #define P4_EVENT_ATTR(class, name) class##_##name | ||
101 | #define P4_EVENT_ATTR_STR(class, name) __stringify(class##_##name) | ||
102 | |||
103 | /* | ||
104 | * config field is 64bit width and consists of | ||
105 | * HT << 63 | ESCR << 32 | CCCR | ||
106 | * where HT is HyperThreading bit (since ESCR | ||
107 | * has it reserved we may use it for own purpose) | ||
108 | * | ||
109 | * note that this is NOT the addresses of respective | ||
110 | * ESCR and CCCR but rather an only packed value should | ||
111 | * be unpacked and written to a proper addresses | ||
112 | * | ||
113 | * the base idea is to pack as much info as | ||
114 | * possible | ||
115 | */ | ||
116 | #define p4_config_pack_escr(v) (((u64)(v)) << 32) | ||
117 | #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) | ||
118 | #define p4_config_unpack_escr(v) (((u64)(v)) >> 32) | ||
119 | #define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL) | ||
120 | |||
121 | #define p4_config_unpack_emask(v) \ | ||
122 | ({ \ | ||
123 | u32 t = p4_config_unpack_escr((v)); \ | ||
124 | t &= P4_EVNTSEL_EVENTMASK_MASK; \ | ||
125 | t >>= P4_EVNTSEL_EVENTMASK_SHIFT; \ | ||
126 | t; \ | ||
127 | }) | ||
128 | |||
129 | #define P4_CONFIG_HT_SHIFT 63 | ||
130 | #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) | ||
131 | |||
132 | static inline u32 p4_config_unpack_opcode(u64 config) | ||
133 | { | ||
134 | u32 e, s; | ||
135 | |||
136 | /* | ||
137 | * we don't care about HT presence here since | ||
138 | * event opcode doesn't depend on it | ||
139 | */ | ||
140 | e = (p4_config_unpack_escr(config) & P4_EVNTSEL_EVENT_MASK) >> P4_EVNTSEL_EVENT_SHIFT; | ||
141 | s = (p4_config_unpack_cccr(config) & P4_CCCR_ESCR_SELECT_MASK) >> P4_CCCR_ESCR_SELECT_SHIFT; | ||
142 | |||
143 | return P4_EVENT_PACK(e, s); | ||
144 | } | ||
145 | |||
146 | static inline bool p4_is_event_cascaded(u64 config) | ||
147 | { | ||
148 | u32 cccr = p4_config_unpack_cccr(config); | ||
149 | return !!(cccr & P4_CCCR_CASCADE); | ||
150 | } | ||
151 | |||
152 | static inline int p4_ht_config_thread(u64 config) | ||
153 | { | ||
154 | return !!(config & P4_CONFIG_HT); | ||
155 | } | ||
156 | |||
157 | static inline u64 p4_set_ht_bit(u64 config) | ||
158 | { | ||
159 | return config | P4_CONFIG_HT; | ||
160 | } | ||
161 | |||
162 | static inline u64 p4_clear_ht_bit(u64 config) | ||
163 | { | ||
164 | return config & ~P4_CONFIG_HT; | ||
165 | } | ||
166 | |||
167 | static inline int p4_ht_active(void) | ||
168 | { | ||
169 | #ifdef CONFIG_SMP | ||
170 | return smp_num_siblings > 1; | ||
171 | #endif | ||
172 | return 0; | ||
173 | } | ||
174 | |||
175 | static inline int p4_ht_thread(int cpu) | ||
176 | { | ||
177 | #ifdef CONFIG_SMP | ||
178 | if (smp_num_siblings == 2) | ||
179 | return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); | ||
180 | #endif | ||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | static inline int p4_should_swap_ts(u64 config, int cpu) | ||
185 | { | ||
186 | return p4_ht_config_thread(config) ^ p4_ht_thread(cpu); | ||
187 | } | ||
188 | |||
189 | static inline u32 p4_default_cccr_conf(int cpu) | ||
190 | { | ||
191 | /* | ||
192 | * Note that P4_CCCR_THREAD_ANY is "required" on | ||
193 | * non-HT machines (on HT machines we count TS events | ||
194 | * regardless the state of second logical processor | ||
195 | */ | ||
196 | u32 cccr = P4_CCCR_THREAD_ANY; | ||
197 | |||
198 | if (!p4_ht_thread(cpu)) | ||
199 | cccr |= P4_CCCR_OVF_PMI_T0; | ||
200 | else | ||
201 | cccr |= P4_CCCR_OVF_PMI_T1; | ||
202 | |||
203 | return cccr; | ||
204 | } | ||
205 | |||
206 | static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) | ||
207 | { | ||
208 | u32 escr = 0; | ||
209 | |||
210 | if (!p4_ht_thread(cpu)) { | ||
211 | if (!exclude_os) | ||
212 | escr |= P4_EVNTSEL_T0_OS; | ||
213 | if (!exclude_usr) | ||
214 | escr |= P4_EVNTSEL_T0_USR; | ||
215 | } else { | ||
216 | if (!exclude_os) | ||
217 | escr |= P4_EVNTSEL_T1_OS; | ||
218 | if (!exclude_usr) | ||
219 | escr |= P4_EVNTSEL_T1_USR; | ||
220 | } | ||
221 | |||
222 | return escr; | ||
223 | } | ||
224 | |||
225 | /* | ||
226 | * Comments below the event represent ESCR restriction | ||
227 | * for this event and counter index per ESCR | ||
228 | * | ||
229 | * MSR_P4_IQ_ESCR0 and MSR_P4_IQ_ESCR1 are available only on early | ||
230 | * processor builds (family 0FH, models 01H-02H). These MSRs | ||
231 | * are not available on later versions, so that we don't use | ||
232 | * them completely | ||
233 | * | ||
234 | * Also note that CCCR1 do not have P4_CCCR_ENABLE bit properly | ||
235 | * working so that we should not use this CCCR and respective | ||
236 | * counter as result | ||
237 | */ | ||
238 | #define P4_TC_DELIVER_MODE P4_EVENT_PACK(0x01, 0x01) | ||
239 | /* | ||
240 | * MSR_P4_TC_ESCR0: 4, 5 | ||
241 | * MSR_P4_TC_ESCR1: 6, 7 | ||
242 | */ | ||
243 | |||
244 | #define P4_BPU_FETCH_REQUEST P4_EVENT_PACK(0x03, 0x00) | ||
245 | /* | ||
246 | * MSR_P4_BPU_ESCR0: 0, 1 | ||
247 | * MSR_P4_BPU_ESCR1: 2, 3 | ||
248 | */ | ||
249 | |||
250 | #define P4_ITLB_REFERENCE P4_EVENT_PACK(0x18, 0x03) | ||
251 | /* | ||
252 | * MSR_P4_ITLB_ESCR0: 0, 1 | ||
253 | * MSR_P4_ITLB_ESCR1: 2, 3 | ||
254 | */ | ||
255 | |||
256 | #define P4_MEMORY_CANCEL P4_EVENT_PACK(0x02, 0x05) | ||
257 | /* | ||
258 | * MSR_P4_DAC_ESCR0: 8, 9 | ||
259 | * MSR_P4_DAC_ESCR1: 10, 11 | ||
260 | */ | ||
261 | |||
262 | #define P4_MEMORY_COMPLETE P4_EVENT_PACK(0x08, 0x02) | ||
263 | /* | ||
264 | * MSR_P4_SAAT_ESCR0: 8, 9 | ||
265 | * MSR_P4_SAAT_ESCR1: 10, 11 | ||
266 | */ | ||
267 | |||
268 | #define P4_LOAD_PORT_REPLAY P4_EVENT_PACK(0x04, 0x02) | ||
269 | /* | ||
270 | * MSR_P4_SAAT_ESCR0: 8, 9 | ||
271 | * MSR_P4_SAAT_ESCR1: 10, 11 | ||
272 | */ | ||
273 | |||
274 | #define P4_STORE_PORT_REPLAY P4_EVENT_PACK(0x05, 0x02) | ||
275 | /* | ||
276 | * MSR_P4_SAAT_ESCR0: 8, 9 | ||
277 | * MSR_P4_SAAT_ESCR1: 10, 11 | ||
278 | */ | ||
279 | |||
280 | #define P4_MOB_LOAD_REPLAY P4_EVENT_PACK(0x03, 0x02) | ||
281 | /* | ||
282 | * MSR_P4_MOB_ESCR0: 0, 1 | ||
283 | * MSR_P4_MOB_ESCR1: 2, 3 | ||
284 | */ | ||
285 | |||
286 | #define P4_PAGE_WALK_TYPE P4_EVENT_PACK(0x01, 0x04) | ||
287 | /* | ||
288 | * MSR_P4_PMH_ESCR0: 0, 1 | ||
289 | * MSR_P4_PMH_ESCR1: 2, 3 | ||
290 | */ | ||
291 | |||
292 | #define P4_BSQ_CACHE_REFERENCE P4_EVENT_PACK(0x0c, 0x07) | ||
293 | /* | ||
294 | * MSR_P4_BSU_ESCR0: 0, 1 | ||
295 | * MSR_P4_BSU_ESCR1: 2, 3 | ||
296 | */ | ||
297 | |||
298 | #define P4_IOQ_ALLOCATION P4_EVENT_PACK(0x03, 0x06) | ||
299 | /* | ||
300 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
301 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
302 | */ | ||
303 | |||
304 | #define P4_IOQ_ACTIVE_ENTRIES P4_EVENT_PACK(0x1a, 0x06) | ||
305 | /* | ||
306 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
307 | */ | ||
308 | |||
309 | #define P4_FSB_DATA_ACTIVITY P4_EVENT_PACK(0x17, 0x06) | ||
310 | /* | ||
311 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
312 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
313 | */ | ||
314 | |||
315 | #define P4_BSQ_ALLOCATION P4_EVENT_PACK(0x05, 0x07) | ||
316 | /* | ||
317 | * MSR_P4_BSU_ESCR0: 0, 1 | ||
318 | */ | ||
319 | |||
320 | #define P4_BSQ_ACTIVE_ENTRIES P4_EVENT_PACK(0x06, 0x07) | ||
321 | /* | ||
322 | * NOTE: no ESCR name in docs, it's guessed | ||
323 | * MSR_P4_BSU_ESCR1: 2, 3 | ||
324 | */ | ||
325 | |||
326 | #define P4_SSE_INPUT_ASSIST P4_EVENT_PACK(0x34, 0x01) | ||
327 | /* | ||
328 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
329 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
330 | */ | ||
331 | |||
332 | #define P4_PACKED_SP_UOP P4_EVENT_PACK(0x08, 0x01) | ||
333 | /* | ||
334 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
335 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
336 | */ | ||
337 | |||
338 | #define P4_PACKED_DP_UOP P4_EVENT_PACK(0x0c, 0x01) | ||
339 | /* | ||
340 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
341 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
342 | */ | ||
343 | |||
344 | #define P4_SCALAR_SP_UOP P4_EVENT_PACK(0x0a, 0x01) | ||
345 | /* | ||
346 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
347 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
348 | */ | ||
349 | |||
350 | #define P4_SCALAR_DP_UOP P4_EVENT_PACK(0x0e, 0x01) | ||
351 | /* | ||
352 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
353 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
354 | */ | ||
355 | |||
356 | #define P4_64BIT_MMX_UOP P4_EVENT_PACK(0x02, 0x01) | ||
357 | /* | ||
358 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
359 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
360 | */ | ||
361 | |||
362 | #define P4_128BIT_MMX_UOP P4_EVENT_PACK(0x1a, 0x01) | ||
363 | /* | ||
364 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
365 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
366 | */ | ||
367 | |||
368 | #define P4_X87_FP_UOP P4_EVENT_PACK(0x04, 0x01) | ||
369 | /* | ||
370 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
371 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
372 | */ | ||
373 | |||
374 | #define P4_TC_MISC P4_EVENT_PACK(0x06, 0x01) | ||
375 | /* | ||
376 | * MSR_P4_TC_ESCR0: 4, 5 | ||
377 | * MSR_P4_TC_ESCR1: 6, 7 | ||
378 | */ | ||
379 | |||
380 | #define P4_GLOBAL_POWER_EVENTS P4_EVENT_PACK(0x13, 0x06) | ||
381 | /* | ||
382 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
383 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
384 | */ | ||
385 | |||
386 | #define P4_TC_MS_XFER P4_EVENT_PACK(0x05, 0x00) | ||
387 | /* | ||
388 | * MSR_P4_MS_ESCR0: 4, 5 | ||
389 | * MSR_P4_MS_ESCR1: 6, 7 | ||
390 | */ | ||
391 | |||
392 | #define P4_UOP_QUEUE_WRITES P4_EVENT_PACK(0x09, 0x00) | ||
393 | /* | ||
394 | * MSR_P4_MS_ESCR0: 4, 5 | ||
395 | * MSR_P4_MS_ESCR1: 6, 7 | ||
396 | */ | ||
397 | |||
398 | #define P4_RETIRED_MISPRED_BRANCH_TYPE P4_EVENT_PACK(0x05, 0x02) | ||
399 | /* | ||
400 | * MSR_P4_TBPU_ESCR0: 4, 5 | ||
401 | * MSR_P4_TBPU_ESCR0: 6, 7 | ||
402 | */ | ||
403 | |||
404 | #define P4_RETIRED_BRANCH_TYPE P4_EVENT_PACK(0x04, 0x02) | ||
405 | /* | ||
406 | * MSR_P4_TBPU_ESCR0: 4, 5 | ||
407 | * MSR_P4_TBPU_ESCR0: 6, 7 | ||
408 | */ | ||
409 | |||
410 | #define P4_RESOURCE_STALL P4_EVENT_PACK(0x01, 0x01) | ||
411 | /* | ||
412 | * MSR_P4_ALF_ESCR0: 12, 13, 16 | ||
413 | * MSR_P4_ALF_ESCR1: 14, 15, 17 | ||
414 | */ | ||
415 | |||
416 | #define P4_WC_BUFFER P4_EVENT_PACK(0x05, 0x05) | ||
417 | /* | ||
418 | * MSR_P4_DAC_ESCR0: 8, 9 | ||
419 | * MSR_P4_DAC_ESCR1: 10, 11 | ||
420 | */ | ||
421 | |||
422 | #define P4_B2B_CYCLES P4_EVENT_PACK(0x16, 0x03) | ||
423 | /* | ||
424 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
425 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
426 | */ | ||
427 | |||
428 | #define P4_BNR P4_EVENT_PACK(0x08, 0x03) | ||
429 | /* | ||
430 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
431 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
432 | */ | ||
433 | |||
434 | #define P4_SNOOP P4_EVENT_PACK(0x06, 0x03) | ||
435 | /* | ||
436 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
437 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
438 | */ | ||
439 | |||
440 | #define P4_RESPONSE P4_EVENT_PACK(0x04, 0x03) | ||
441 | /* | ||
442 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
443 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
444 | */ | ||
445 | |||
446 | #define P4_FRONT_END_EVENT P4_EVENT_PACK(0x08, 0x05) | ||
447 | /* | ||
448 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
449 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
450 | */ | ||
451 | |||
452 | #define P4_EXECUTION_EVENT P4_EVENT_PACK(0x0c, 0x05) | ||
453 | /* | ||
454 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
455 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
456 | */ | ||
457 | |||
458 | #define P4_REPLAY_EVENT P4_EVENT_PACK(0x09, 0x05) | ||
459 | /* | ||
460 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
461 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
462 | */ | ||
463 | |||
464 | #define P4_INSTR_RETIRED P4_EVENT_PACK(0x02, 0x04) | ||
465 | /* | ||
466 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
467 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
468 | */ | ||
469 | |||
470 | #define P4_UOPS_RETIRED P4_EVENT_PACK(0x01, 0x04) | ||
471 | /* | ||
472 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
473 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
474 | */ | ||
475 | |||
476 | #define P4_UOP_TYPE P4_EVENT_PACK(0x02, 0x02) | ||
477 | /* | ||
478 | * MSR_P4_RAT_ESCR0: 12, 13, 16 | ||
479 | * MSR_P4_RAT_ESCR1: 14, 15, 17 | ||
480 | */ | ||
481 | |||
482 | #define P4_BRANCH_RETIRED P4_EVENT_PACK(0x06, 0x05) | ||
483 | /* | ||
484 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
485 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
486 | */ | ||
487 | |||
488 | #define P4_MISPRED_BRANCH_RETIRED P4_EVENT_PACK(0x03, 0x04) | ||
489 | /* | ||
490 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
491 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
492 | */ | ||
493 | |||
494 | #define P4_X87_ASSIST P4_EVENT_PACK(0x03, 0x05) | ||
495 | /* | ||
496 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
497 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
498 | */ | ||
499 | |||
500 | #define P4_MACHINE_CLEAR P4_EVENT_PACK(0x02, 0x05) | ||
501 | /* | ||
502 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
503 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
504 | */ | ||
505 | |||
506 | #define P4_INSTR_COMPLETED P4_EVENT_PACK(0x07, 0x04) | ||
507 | /* | ||
508 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
509 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
510 | */ | ||
511 | |||
512 | /* | ||
513 | * a caller should use P4_EVENT_ATTR helper to | ||
514 | * pick the attribute needed, for example | ||
515 | * | ||
516 | * P4_EVENT_ATTR(P4_TC_DELIVER_MODE, DD) | ||
517 | */ | ||
518 | enum P4_EVENTS_ATTR { | ||
519 | P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DD, 0), | ||
520 | P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DB, 1), | ||
521 | P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DI, 2), | ||
522 | P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BD, 3), | ||
523 | P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BB, 4), | ||
524 | P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BI, 5), | ||
525 | P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, ID, 6), | ||
526 | |||
527 | P4_MAKE_EVENT_ATTR(P4_BPU_FETCH_REQUEST, TCMISS, 0), | ||
528 | |||
529 | P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, HIT, 0), | ||
530 | P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, MISS, 1), | ||
531 | P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, HIT_UK, 2), | ||
532 | |||
533 | P4_MAKE_EVENT_ATTR(P4_MEMORY_CANCEL, ST_RB_FULL, 2), | ||
534 | P4_MAKE_EVENT_ATTR(P4_MEMORY_CANCEL, 64K_CONF, 3), | ||
535 | |||
536 | P4_MAKE_EVENT_ATTR(P4_MEMORY_COMPLETE, LSC, 0), | ||
537 | P4_MAKE_EVENT_ATTR(P4_MEMORY_COMPLETE, SSC, 1), | ||
538 | |||
539 | P4_MAKE_EVENT_ATTR(P4_LOAD_PORT_REPLAY, SPLIT_LD, 1), | ||
540 | |||
541 | P4_MAKE_EVENT_ATTR(P4_STORE_PORT_REPLAY, SPLIT_ST, 1), | ||
542 | |||
543 | P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, NO_STA, 1), | ||
544 | P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, NO_STD, 3), | ||
545 | P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, PARTIAL_DATA, 4), | ||
546 | P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, UNALGN_ADDR, 5), | ||
547 | |||
548 | P4_MAKE_EVENT_ATTR(P4_PAGE_WALK_TYPE, DTMISS, 0), | ||
549 | P4_MAKE_EVENT_ATTR(P4_PAGE_WALK_TYPE, ITMISS, 1), | ||
550 | |||
551 | P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS, 0), | ||
552 | P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE, 1), | ||
553 | P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITM, 2), | ||
554 | P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITS, 3), | ||
555 | P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITE, 4), | ||
556 | P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITM, 5), | ||
557 | P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS, 8), | ||
558 | P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS, 9), | ||
559 | P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, WR_2ndL_MISS, 10), | ||
560 | |||
561 | P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, DEFAULT, 0), | ||
562 | P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, ALL_READ, 5), | ||
563 | P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, ALL_WRITE, 6), | ||
564 | P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_UC, 7), | ||
565 | P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WC, 8), | ||
566 | P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WT, 9), | ||
567 | P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WP, 10), | ||
568 | P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WB, 11), | ||
569 | P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, OWN, 13), | ||
570 | P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, OTHER, 14), | ||
571 | P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, PREFETCH, 15), | ||
572 | |||
573 | P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, DEFAULT, 0), | ||
574 | P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, ALL_READ, 5), | ||
575 | P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, ALL_WRITE, 6), | ||
576 | P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_UC, 7), | ||
577 | P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WC, 8), | ||
578 | P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WT, 9), | ||
579 | P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WP, 10), | ||
580 | P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WB, 11), | ||
581 | P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, OWN, 13), | ||
582 | P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, OTHER, 14), | ||
583 | P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, PREFETCH, 15), | ||
584 | |||
585 | P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV, 0), | ||
586 | P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN, 1), | ||
587 | P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OTHER, 2), | ||
588 | P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_DRV, 3), | ||
589 | P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_OWN, 4), | ||
590 | P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_OTHER, 5), | ||
591 | |||
592 | P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_TYPE0, 0), | ||
593 | P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_TYPE1, 1), | ||
594 | P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LEN0, 2), | ||
595 | P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LEN1, 3), | ||
596 | P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_IO_TYPE, 5), | ||
597 | P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LOCK_TYPE, 6), | ||
598 | P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_CACHE_TYPE, 7), | ||
599 | P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_SPLIT_TYPE, 8), | ||
600 | P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_DEM_TYPE, 9), | ||
601 | P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_ORD_TYPE, 10), | ||
602 | P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE0, 11), | ||
603 | P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE1, 12), | ||
604 | P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE2, 13), | ||
605 | |||
606 | P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_TYPE0, 0), | ||
607 | P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_TYPE1, 1), | ||
608 | P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LEN0, 2), | ||
609 | P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LEN1, 3), | ||
610 | P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE, 5), | ||
611 | P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE, 6), | ||
612 | P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE, 7), | ||
613 | P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE, 8), | ||
614 | P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE, 9), | ||
615 | P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE, 10), | ||
616 | P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE0, 11), | ||
617 | P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE1, 12), | ||
618 | P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE2, 13), | ||
619 | |||
620 | P4_MAKE_EVENT_ATTR(P4_SSE_INPUT_ASSIST, ALL, 15), | ||
621 | |||
622 | P4_MAKE_EVENT_ATTR(P4_PACKED_SP_UOP, ALL, 15), | ||
623 | |||
624 | P4_MAKE_EVENT_ATTR(P4_PACKED_DP_UOP, ALL, 15), | ||
625 | |||
626 | P4_MAKE_EVENT_ATTR(P4_SCALAR_SP_UOP, ALL, 15), | ||
627 | |||
628 | P4_MAKE_EVENT_ATTR(P4_SCALAR_DP_UOP, ALL, 15), | ||
629 | |||
630 | P4_MAKE_EVENT_ATTR(P4_64BIT_MMX_UOP, ALL, 15), | ||
631 | |||
632 | P4_MAKE_EVENT_ATTR(P4_128BIT_MMX_UOP, ALL, 15), | ||
633 | |||
634 | P4_MAKE_EVENT_ATTR(P4_X87_FP_UOP, ALL, 15), | ||
635 | |||
636 | P4_MAKE_EVENT_ATTR(P4_TC_MISC, FLUSH, 4), | ||
637 | |||
638 | P4_MAKE_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING, 0), | ||
639 | |||
640 | P4_MAKE_EVENT_ATTR(P4_TC_MS_XFER, CISC, 0), | ||
641 | |||
642 | P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_TC_BUILD, 0), | ||
643 | P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_TC_DELIVER, 1), | ||
644 | P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_ROM, 2), | ||
645 | |||
646 | P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL, 1), | ||
647 | P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, CALL, 2), | ||
648 | P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, RETURN, 3), | ||
649 | P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT, 4), | ||
650 | |||
651 | P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL, 1), | ||
652 | P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL, 2), | ||
653 | P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, RETURN, 3), | ||
654 | P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, INDIRECT, 4), | ||
655 | |||
656 | P4_MAKE_EVENT_ATTR(P4_RESOURCE_STALL, SBFULL, 5), | ||
657 | |||
658 | P4_MAKE_EVENT_ATTR(P4_WC_BUFFER, WCB_EVICTS, 0), | ||
659 | P4_MAKE_EVENT_ATTR(P4_WC_BUFFER, WCB_FULL_EVICTS, 1), | ||
660 | |||
661 | P4_MAKE_EVENT_ATTR(P4_FRONT_END_EVENT, NBOGUS, 0), | ||
662 | P4_MAKE_EVENT_ATTR(P4_FRONT_END_EVENT, BOGUS, 1), | ||
663 | |||
664 | P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS0, 0), | ||
665 | P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS1, 1), | ||
666 | P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS2, 2), | ||
667 | P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS3, 3), | ||
668 | P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS0, 4), | ||
669 | P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS1, 5), | ||
670 | P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS2, 6), | ||
671 | P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS3, 7), | ||
672 | |||
673 | P4_MAKE_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS, 0), | ||
674 | P4_MAKE_EVENT_ATTR(P4_REPLAY_EVENT, BOGUS, 1), | ||
675 | |||
676 | P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG, 0), | ||
677 | P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSTAG, 1), | ||
678 | P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG, 2), | ||
679 | P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSTAG, 3), | ||
680 | |||
681 | P4_MAKE_EVENT_ATTR(P4_UOPS_RETIRED, NBOGUS, 0), | ||
682 | P4_MAKE_EVENT_ATTR(P4_UOPS_RETIRED, BOGUS, 1), | ||
683 | |||
684 | P4_MAKE_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS, 1), | ||
685 | P4_MAKE_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES, 2), | ||
686 | |||
687 | P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMNP, 0), | ||
688 | P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMNM, 1), | ||
689 | P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMTP, 2), | ||
690 | P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMTM, 3), | ||
691 | |||
692 | P4_MAKE_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS, 0), | ||
693 | |||
694 | P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, FPSU, 0), | ||
695 | P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, FPSO, 1), | ||
696 | P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, POAO, 2), | ||
697 | P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, POAU, 3), | ||
698 | P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, PREA, 4), | ||
699 | |||
700 | P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, CLEAR, 0), | ||
701 | P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, MOCLEAR, 1), | ||
702 | P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, SMCLEAR, 2), | ||
703 | |||
704 | P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, NBOGUS, 0), | ||
705 | P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, BOGUS, 1), | ||
706 | }; | ||
707 | |||
708 | #endif /* PERF_EVENT_P4_H */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 60398a0d947c..5dacf63f913e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -29,46 +29,53 @@ | |||
29 | #include <asm/stacktrace.h> | 29 | #include <asm/stacktrace.h> |
30 | #include <asm/nmi.h> | 30 | #include <asm/nmi.h> |
31 | 31 | ||
32 | static u64 perf_event_mask __read_mostly; | 32 | #if 0 |
33 | #undef wrmsrl | ||
34 | #define wrmsrl(msr, val) \ | ||
35 | do { \ | ||
36 | trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\ | ||
37 | (unsigned long)(val)); \ | ||
38 | native_write_msr((msr), (u32)((u64)(val)), \ | ||
39 | (u32)((u64)(val) >> 32)); \ | ||
40 | } while (0) | ||
41 | #endif | ||
33 | 42 | ||
34 | /* The maximal number of PEBS events: */ | 43 | /* |
35 | #define MAX_PEBS_EVENTS 4 | 44 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context |
45 | */ | ||
46 | static unsigned long | ||
47 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
48 | { | ||
49 | unsigned long offset, addr = (unsigned long)from; | ||
50 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
51 | unsigned long size, len = 0; | ||
52 | struct page *page; | ||
53 | void *map; | ||
54 | int ret; | ||
36 | 55 | ||
37 | /* The size of a BTS record in bytes: */ | 56 | do { |
38 | #define BTS_RECORD_SIZE 24 | 57 | ret = __get_user_pages_fast(addr, 1, 0, &page); |
58 | if (!ret) | ||
59 | break; | ||
39 | 60 | ||
40 | /* The size of a per-cpu BTS buffer in bytes: */ | 61 | offset = addr & (PAGE_SIZE - 1); |
41 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) | 62 | size = min(PAGE_SIZE - offset, n - len); |
42 | 63 | ||
43 | /* The BTS overflow threshold in bytes from the end of the buffer: */ | 64 | map = kmap_atomic(page, type); |
44 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) | 65 | memcpy(to, map+offset, size); |
66 | kunmap_atomic(map, type); | ||
67 | put_page(page); | ||
45 | 68 | ||
69 | len += size; | ||
70 | to += size; | ||
71 | addr += size; | ||
46 | 72 | ||
47 | /* | 73 | } while (len < n); |
48 | * Bits in the debugctlmsr controlling branch tracing. | ||
49 | */ | ||
50 | #define X86_DEBUGCTL_TR (1 << 6) | ||
51 | #define X86_DEBUGCTL_BTS (1 << 7) | ||
52 | #define X86_DEBUGCTL_BTINT (1 << 8) | ||
53 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | ||
54 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | ||
55 | 74 | ||
56 | /* | 75 | return len; |
57 | * A debug store configuration. | 76 | } |
58 | * | 77 | |
59 | * We only support architectures that use 64bit fields. | 78 | static u64 perf_event_mask __read_mostly; |
60 | */ | ||
61 | struct debug_store { | ||
62 | u64 bts_buffer_base; | ||
63 | u64 bts_index; | ||
64 | u64 bts_absolute_maximum; | ||
65 | u64 bts_interrupt_threshold; | ||
66 | u64 pebs_buffer_base; | ||
67 | u64 pebs_index; | ||
68 | u64 pebs_absolute_maximum; | ||
69 | u64 pebs_interrupt_threshold; | ||
70 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
71 | }; | ||
72 | 79 | ||
73 | struct event_constraint { | 80 | struct event_constraint { |
74 | union { | 81 | union { |
@@ -87,18 +94,40 @@ struct amd_nb { | |||
87 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | 94 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; |
88 | }; | 95 | }; |
89 | 96 | ||
97 | #define MAX_LBR_ENTRIES 16 | ||
98 | |||
90 | struct cpu_hw_events { | 99 | struct cpu_hw_events { |
100 | /* | ||
101 | * Generic x86 PMC bits | ||
102 | */ | ||
91 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ | 103 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ |
92 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 104 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
93 | unsigned long interrupts; | 105 | unsigned long interrupts; |
94 | int enabled; | 106 | int enabled; |
95 | struct debug_store *ds; | ||
96 | 107 | ||
97 | int n_events; | 108 | int n_events; |
98 | int n_added; | 109 | int n_added; |
99 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | 110 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ |
100 | u64 tags[X86_PMC_IDX_MAX]; | 111 | u64 tags[X86_PMC_IDX_MAX]; |
101 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | 112 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
113 | |||
114 | /* | ||
115 | * Intel DebugStore bits | ||
116 | */ | ||
117 | struct debug_store *ds; | ||
118 | u64 pebs_enabled; | ||
119 | |||
120 | /* | ||
121 | * Intel LBR bits | ||
122 | */ | ||
123 | int lbr_users; | ||
124 | void *lbr_context; | ||
125 | struct perf_branch_stack lbr_stack; | ||
126 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | ||
127 | |||
128 | /* | ||
129 | * AMD specific bits | ||
130 | */ | ||
102 | struct amd_nb *amd_nb; | 131 | struct amd_nb *amd_nb; |
103 | }; | 132 | }; |
104 | 133 | ||
@@ -112,22 +141,48 @@ struct cpu_hw_events { | |||
112 | #define EVENT_CONSTRAINT(c, n, m) \ | 141 | #define EVENT_CONSTRAINT(c, n, m) \ |
113 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | 142 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) |
114 | 143 | ||
144 | /* | ||
145 | * Constraint on the Event code. | ||
146 | */ | ||
115 | #define INTEL_EVENT_CONSTRAINT(c, n) \ | 147 | #define INTEL_EVENT_CONSTRAINT(c, n) \ |
116 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) | 148 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) |
117 | 149 | ||
150 | /* | ||
151 | * Constraint on the Event code + UMask + fixed-mask | ||
152 | */ | ||
118 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | 153 | #define FIXED_EVENT_CONSTRAINT(c, n) \ |
119 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) | 154 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) |
120 | 155 | ||
156 | /* | ||
157 | * Constraint on the Event code + UMask | ||
158 | */ | ||
159 | #define PEBS_EVENT_CONSTRAINT(c, n) \ | ||
160 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | ||
161 | |||
121 | #define EVENT_CONSTRAINT_END \ | 162 | #define EVENT_CONSTRAINT_END \ |
122 | EVENT_CONSTRAINT(0, 0, 0) | 163 | EVENT_CONSTRAINT(0, 0, 0) |
123 | 164 | ||
124 | #define for_each_event_constraint(e, c) \ | 165 | #define for_each_event_constraint(e, c) \ |
125 | for ((e) = (c); (e)->cmask; (e)++) | 166 | for ((e) = (c); (e)->cmask; (e)++) |
126 | 167 | ||
168 | union perf_capabilities { | ||
169 | struct { | ||
170 | u64 lbr_format : 6; | ||
171 | u64 pebs_trap : 1; | ||
172 | u64 pebs_arch_reg : 1; | ||
173 | u64 pebs_format : 4; | ||
174 | u64 smm_freeze : 1; | ||
175 | }; | ||
176 | u64 capabilities; | ||
177 | }; | ||
178 | |||
127 | /* | 179 | /* |
128 | * struct x86_pmu - generic x86 pmu | 180 | * struct x86_pmu - generic x86 pmu |
129 | */ | 181 | */ |
130 | struct x86_pmu { | 182 | struct x86_pmu { |
183 | /* | ||
184 | * Generic x86 PMC bits | ||
185 | */ | ||
131 | const char *name; | 186 | const char *name; |
132 | int version; | 187 | int version; |
133 | int (*handle_irq)(struct pt_regs *); | 188 | int (*handle_irq)(struct pt_regs *); |
@@ -135,6 +190,8 @@ struct x86_pmu { | |||
135 | void (*enable_all)(void); | 190 | void (*enable_all)(void); |
136 | void (*enable)(struct perf_event *); | 191 | void (*enable)(struct perf_event *); |
137 | void (*disable)(struct perf_event *); | 192 | void (*disable)(struct perf_event *); |
193 | int (*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc); | ||
194 | int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); | ||
138 | unsigned eventsel; | 195 | unsigned eventsel; |
139 | unsigned perfctr; | 196 | unsigned perfctr; |
140 | u64 (*event_map)(int); | 197 | u64 (*event_map)(int); |
@@ -146,10 +203,6 @@ struct x86_pmu { | |||
146 | u64 event_mask; | 203 | u64 event_mask; |
147 | int apic; | 204 | int apic; |
148 | u64 max_period; | 205 | u64 max_period; |
149 | u64 intel_ctrl; | ||
150 | void (*enable_bts)(u64 config); | ||
151 | void (*disable_bts)(void); | ||
152 | |||
153 | struct event_constraint * | 206 | struct event_constraint * |
154 | (*get_event_constraints)(struct cpu_hw_events *cpuc, | 207 | (*get_event_constraints)(struct cpu_hw_events *cpuc, |
155 | struct perf_event *event); | 208 | struct perf_event *event); |
@@ -157,11 +210,32 @@ struct x86_pmu { | |||
157 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 210 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
158 | struct perf_event *event); | 211 | struct perf_event *event); |
159 | struct event_constraint *event_constraints; | 212 | struct event_constraint *event_constraints; |
213 | void (*quirks)(void); | ||
160 | 214 | ||
161 | void (*cpu_prepare)(int cpu); | 215 | void (*cpu_prepare)(int cpu); |
162 | void (*cpu_starting)(int cpu); | 216 | void (*cpu_starting)(int cpu); |
163 | void (*cpu_dying)(int cpu); | 217 | void (*cpu_dying)(int cpu); |
164 | void (*cpu_dead)(int cpu); | 218 | void (*cpu_dead)(int cpu); |
219 | |||
220 | /* | ||
221 | * Intel Arch Perfmon v2+ | ||
222 | */ | ||
223 | u64 intel_ctrl; | ||
224 | union perf_capabilities intel_cap; | ||
225 | |||
226 | /* | ||
227 | * Intel DebugStore bits | ||
228 | */ | ||
229 | int bts, pebs; | ||
230 | int pebs_record_size; | ||
231 | void (*drain_pebs)(struct pt_regs *regs); | ||
232 | struct event_constraint *pebs_constraints; | ||
233 | |||
234 | /* | ||
235 | * Intel LBR | ||
236 | */ | ||
237 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | ||
238 | int lbr_nr; /* hardware stack size */ | ||
165 | }; | 239 | }; |
166 | 240 | ||
167 | static struct x86_pmu x86_pmu __read_mostly; | 241 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -293,110 +367,14 @@ static void release_pmc_hardware(void) | |||
293 | #endif | 367 | #endif |
294 | } | 368 | } |
295 | 369 | ||
296 | static inline bool bts_available(void) | 370 | static int reserve_ds_buffers(void); |
297 | { | 371 | static void release_ds_buffers(void); |
298 | return x86_pmu.enable_bts != NULL; | ||
299 | } | ||
300 | |||
301 | static void init_debug_store_on_cpu(int cpu) | ||
302 | { | ||
303 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
304 | |||
305 | if (!ds) | ||
306 | return; | ||
307 | |||
308 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
309 | (u32)((u64)(unsigned long)ds), | ||
310 | (u32)((u64)(unsigned long)ds >> 32)); | ||
311 | } | ||
312 | |||
313 | static void fini_debug_store_on_cpu(int cpu) | ||
314 | { | ||
315 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
316 | return; | ||
317 | |||
318 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
319 | } | ||
320 | |||
321 | static void release_bts_hardware(void) | ||
322 | { | ||
323 | int cpu; | ||
324 | |||
325 | if (!bts_available()) | ||
326 | return; | ||
327 | |||
328 | get_online_cpus(); | ||
329 | |||
330 | for_each_online_cpu(cpu) | ||
331 | fini_debug_store_on_cpu(cpu); | ||
332 | |||
333 | for_each_possible_cpu(cpu) { | ||
334 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
335 | |||
336 | if (!ds) | ||
337 | continue; | ||
338 | |||
339 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
340 | |||
341 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
342 | kfree(ds); | ||
343 | } | ||
344 | |||
345 | put_online_cpus(); | ||
346 | } | ||
347 | |||
348 | static int reserve_bts_hardware(void) | ||
349 | { | ||
350 | int cpu, err = 0; | ||
351 | |||
352 | if (!bts_available()) | ||
353 | return 0; | ||
354 | |||
355 | get_online_cpus(); | ||
356 | |||
357 | for_each_possible_cpu(cpu) { | ||
358 | struct debug_store *ds; | ||
359 | void *buffer; | ||
360 | |||
361 | err = -ENOMEM; | ||
362 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
363 | if (unlikely(!buffer)) | ||
364 | break; | ||
365 | |||
366 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
367 | if (unlikely(!ds)) { | ||
368 | kfree(buffer); | ||
369 | break; | ||
370 | } | ||
371 | |||
372 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
373 | ds->bts_index = ds->bts_buffer_base; | ||
374 | ds->bts_absolute_maximum = | ||
375 | ds->bts_buffer_base + BTS_BUFFER_SIZE; | ||
376 | ds->bts_interrupt_threshold = | ||
377 | ds->bts_absolute_maximum - BTS_OVFL_TH; | ||
378 | |||
379 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
380 | err = 0; | ||
381 | } | ||
382 | |||
383 | if (err) | ||
384 | release_bts_hardware(); | ||
385 | else { | ||
386 | for_each_online_cpu(cpu) | ||
387 | init_debug_store_on_cpu(cpu); | ||
388 | } | ||
389 | |||
390 | put_online_cpus(); | ||
391 | |||
392 | return err; | ||
393 | } | ||
394 | 372 | ||
395 | static void hw_perf_event_destroy(struct perf_event *event) | 373 | static void hw_perf_event_destroy(struct perf_event *event) |
396 | { | 374 | { |
397 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { | 375 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { |
398 | release_pmc_hardware(); | 376 | release_pmc_hardware(); |
399 | release_bts_hardware(); | 377 | release_ds_buffers(); |
400 | mutex_unlock(&pmc_reserve_mutex); | 378 | mutex_unlock(&pmc_reserve_mutex); |
401 | } | 379 | } |
402 | } | 380 | } |
@@ -439,6 +417,25 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | |||
439 | return 0; | 417 | return 0; |
440 | } | 418 | } |
441 | 419 | ||
420 | static int x86_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) | ||
421 | { | ||
422 | /* | ||
423 | * Generate PMC IRQs: | ||
424 | * (keep 'enabled' bit clear for now) | ||
425 | */ | ||
426 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | ||
427 | |||
428 | /* | ||
429 | * Count user and OS events unless requested not to | ||
430 | */ | ||
431 | if (!attr->exclude_user) | ||
432 | hwc->config |= ARCH_PERFMON_EVENTSEL_USR; | ||
433 | if (!attr->exclude_kernel) | ||
434 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | ||
435 | |||
436 | return 0; | ||
437 | } | ||
438 | |||
442 | /* | 439 | /* |
443 | * Setup the hardware configuration for a given attr_type | 440 | * Setup the hardware configuration for a given attr_type |
444 | */ | 441 | */ |
@@ -459,7 +456,7 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
459 | if (!reserve_pmc_hardware()) | 456 | if (!reserve_pmc_hardware()) |
460 | err = -EBUSY; | 457 | err = -EBUSY; |
461 | else | 458 | else |
462 | err = reserve_bts_hardware(); | 459 | err = reserve_ds_buffers(); |
463 | } | 460 | } |
464 | if (!err) | 461 | if (!err) |
465 | atomic_inc(&active_events); | 462 | atomic_inc(&active_events); |
@@ -470,23 +467,14 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
470 | 467 | ||
471 | event->destroy = hw_perf_event_destroy; | 468 | event->destroy = hw_perf_event_destroy; |
472 | 469 | ||
473 | /* | ||
474 | * Generate PMC IRQs: | ||
475 | * (keep 'enabled' bit clear for now) | ||
476 | */ | ||
477 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | ||
478 | |||
479 | hwc->idx = -1; | 470 | hwc->idx = -1; |
480 | hwc->last_cpu = -1; | 471 | hwc->last_cpu = -1; |
481 | hwc->last_tag = ~0ULL; | 472 | hwc->last_tag = ~0ULL; |
482 | 473 | ||
483 | /* | 474 | /* Processor specifics */ |
484 | * Count user and OS events unless requested not to. | 475 | err = x86_pmu.hw_config(attr, hwc); |
485 | */ | 476 | if (err) |
486 | if (!attr->exclude_user) | 477 | return err; |
487 | hwc->config |= ARCH_PERFMON_EVENTSEL_USR; | ||
488 | if (!attr->exclude_kernel) | ||
489 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | ||
490 | 478 | ||
491 | if (!hwc->sample_period) { | 479 | if (!hwc->sample_period) { |
492 | hwc->sample_period = x86_pmu.max_period; | 480 | hwc->sample_period = x86_pmu.max_period; |
@@ -537,11 +525,11 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
537 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | 525 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && |
538 | (hwc->sample_period == 1)) { | 526 | (hwc->sample_period == 1)) { |
539 | /* BTS is not supported by this architecture. */ | 527 | /* BTS is not supported by this architecture. */ |
540 | if (!bts_available()) | 528 | if (!x86_pmu.bts) |
541 | return -EOPNOTSUPP; | 529 | return -EOPNOTSUPP; |
542 | 530 | ||
543 | /* BTS is currently only allowed for user-mode. */ | 531 | /* BTS is currently only allowed for user-mode. */ |
544 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | 532 | if (!attr->exclude_kernel) |
545 | return -EOPNOTSUPP; | 533 | return -EOPNOTSUPP; |
546 | } | 534 | } |
547 | 535 | ||
@@ -850,14 +838,15 @@ void hw_perf_enable(void) | |||
850 | 838 | ||
851 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) | 839 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) |
852 | { | 840 | { |
853 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 841 | wrmsrl(hwc->config_base + hwc->idx, |
854 | hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); | 842 | hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); |
855 | } | 843 | } |
856 | 844 | ||
857 | static inline void x86_pmu_disable_event(struct perf_event *event) | 845 | static inline void x86_pmu_disable_event(struct perf_event *event) |
858 | { | 846 | { |
859 | struct hw_perf_event *hwc = &event->hw; | 847 | struct hw_perf_event *hwc = &event->hw; |
860 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); | 848 | |
849 | wrmsrl(hwc->config_base + hwc->idx, hwc->config); | ||
861 | } | 850 | } |
862 | 851 | ||
863 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | 852 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
@@ -872,7 +861,7 @@ x86_perf_event_set_period(struct perf_event *event) | |||
872 | struct hw_perf_event *hwc = &event->hw; | 861 | struct hw_perf_event *hwc = &event->hw; |
873 | s64 left = atomic64_read(&hwc->period_left); | 862 | s64 left = atomic64_read(&hwc->period_left); |
874 | s64 period = hwc->sample_period; | 863 | s64 period = hwc->sample_period; |
875 | int err, ret = 0, idx = hwc->idx; | 864 | int ret = 0, idx = hwc->idx; |
876 | 865 | ||
877 | if (idx == X86_PMC_IDX_FIXED_BTS) | 866 | if (idx == X86_PMC_IDX_FIXED_BTS) |
878 | return 0; | 867 | return 0; |
@@ -910,8 +899,8 @@ x86_perf_event_set_period(struct perf_event *event) | |||
910 | */ | 899 | */ |
911 | atomic64_set(&hwc->prev_count, (u64)-left); | 900 | atomic64_set(&hwc->prev_count, (u64)-left); |
912 | 901 | ||
913 | err = checking_wrmsrl(hwc->event_base + idx, | 902 | wrmsrl(hwc->event_base + idx, |
914 | (u64)(-left) & x86_pmu.event_mask); | 903 | (u64)(-left) & x86_pmu.event_mask); |
915 | 904 | ||
916 | perf_event_update_userpage(event); | 905 | perf_event_update_userpage(event); |
917 | 906 | ||
@@ -948,7 +937,7 @@ static int x86_pmu_enable(struct perf_event *event) | |||
948 | if (n < 0) | 937 | if (n < 0) |
949 | return n; | 938 | return n; |
950 | 939 | ||
951 | ret = x86_schedule_events(cpuc, n, assign); | 940 | ret = x86_pmu.schedule_events(cpuc, n, assign); |
952 | if (ret) | 941 | if (ret) |
953 | return ret; | 942 | return ret; |
954 | /* | 943 | /* |
@@ -989,6 +978,7 @@ static void x86_pmu_unthrottle(struct perf_event *event) | |||
989 | void perf_event_print_debug(void) | 978 | void perf_event_print_debug(void) |
990 | { | 979 | { |
991 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | 980 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; |
981 | u64 pebs; | ||
992 | struct cpu_hw_events *cpuc; | 982 | struct cpu_hw_events *cpuc; |
993 | unsigned long flags; | 983 | unsigned long flags; |
994 | int cpu, idx; | 984 | int cpu, idx; |
@@ -1006,14 +996,16 @@ void perf_event_print_debug(void) | |||
1006 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | 996 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); |
1007 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); | 997 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); |
1008 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); | 998 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); |
999 | rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); | ||
1009 | 1000 | ||
1010 | pr_info("\n"); | 1001 | pr_info("\n"); |
1011 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); | 1002 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); |
1012 | pr_info("CPU#%d: status: %016llx\n", cpu, status); | 1003 | pr_info("CPU#%d: status: %016llx\n", cpu, status); |
1013 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | 1004 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); |
1014 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | 1005 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); |
1006 | pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); | ||
1015 | } | 1007 | } |
1016 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); | 1008 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); |
1017 | 1009 | ||
1018 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1010 | for (idx = 0; idx < x86_pmu.num_events; idx++) { |
1019 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | 1011 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); |
@@ -1272,12 +1264,15 @@ int hw_perf_group_sched_in(struct perf_event *leader, | |||
1272 | int assign[X86_PMC_IDX_MAX]; | 1264 | int assign[X86_PMC_IDX_MAX]; |
1273 | int n0, n1, ret; | 1265 | int n0, n1, ret; |
1274 | 1266 | ||
1267 | if (!x86_pmu_initialized()) | ||
1268 | return 0; | ||
1269 | |||
1275 | /* n0 = total number of events */ | 1270 | /* n0 = total number of events */ |
1276 | n0 = collect_events(cpuc, leader, true); | 1271 | n0 = collect_events(cpuc, leader, true); |
1277 | if (n0 < 0) | 1272 | if (n0 < 0) |
1278 | return n0; | 1273 | return n0; |
1279 | 1274 | ||
1280 | ret = x86_schedule_events(cpuc, n0, assign); | 1275 | ret = x86_pmu.schedule_events(cpuc, n0, assign); |
1281 | if (ret) | 1276 | if (ret) |
1282 | return ret; | 1277 | return ret; |
1283 | 1278 | ||
@@ -1327,6 +1322,9 @@ undo: | |||
1327 | 1322 | ||
1328 | #include "perf_event_amd.c" | 1323 | #include "perf_event_amd.c" |
1329 | #include "perf_event_p6.c" | 1324 | #include "perf_event_p6.c" |
1325 | #include "perf_event_p4.c" | ||
1326 | #include "perf_event_intel_lbr.c" | ||
1327 | #include "perf_event_intel_ds.c" | ||
1330 | #include "perf_event_intel.c" | 1328 | #include "perf_event_intel.c" |
1331 | 1329 | ||
1332 | static int __cpuinit | 1330 | static int __cpuinit |
@@ -1398,6 +1396,9 @@ void __init init_hw_perf_events(void) | |||
1398 | 1396 | ||
1399 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1397 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
1400 | 1398 | ||
1399 | if (x86_pmu.quirks) | ||
1400 | x86_pmu.quirks(); | ||
1401 | |||
1401 | if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { | 1402 | if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { |
1402 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | 1403 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", |
1403 | x86_pmu.num_events, X86_PMC_MAX_GENERIC); | 1404 | x86_pmu.num_events, X86_PMC_MAX_GENERIC); |
@@ -1459,6 +1460,32 @@ static const struct pmu pmu = { | |||
1459 | }; | 1460 | }; |
1460 | 1461 | ||
1461 | /* | 1462 | /* |
1463 | * validate that we can schedule this event | ||
1464 | */ | ||
1465 | static int validate_event(struct perf_event *event) | ||
1466 | { | ||
1467 | struct cpu_hw_events *fake_cpuc; | ||
1468 | struct event_constraint *c; | ||
1469 | int ret = 0; | ||
1470 | |||
1471 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | ||
1472 | if (!fake_cpuc) | ||
1473 | return -ENOMEM; | ||
1474 | |||
1475 | c = x86_pmu.get_event_constraints(fake_cpuc, event); | ||
1476 | |||
1477 | if (!c || !c->weight) | ||
1478 | ret = -ENOSPC; | ||
1479 | |||
1480 | if (x86_pmu.put_event_constraints) | ||
1481 | x86_pmu.put_event_constraints(fake_cpuc, event); | ||
1482 | |||
1483 | kfree(fake_cpuc); | ||
1484 | |||
1485 | return ret; | ||
1486 | } | ||
1487 | |||
1488 | /* | ||
1462 | * validate a single event group | 1489 | * validate a single event group |
1463 | * | 1490 | * |
1464 | * validation include: | 1491 | * validation include: |
@@ -1498,7 +1525,7 @@ static int validate_group(struct perf_event *event) | |||
1498 | 1525 | ||
1499 | fake_cpuc->n_events = n; | 1526 | fake_cpuc->n_events = n; |
1500 | 1527 | ||
1501 | ret = x86_schedule_events(fake_cpuc, n, NULL); | 1528 | ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); |
1502 | 1529 | ||
1503 | out_free: | 1530 | out_free: |
1504 | kfree(fake_cpuc); | 1531 | kfree(fake_cpuc); |
@@ -1523,6 +1550,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
1523 | 1550 | ||
1524 | if (event->group_leader != event) | 1551 | if (event->group_leader != event) |
1525 | err = validate_group(event); | 1552 | err = validate_group(event); |
1553 | else | ||
1554 | err = validate_event(event); | ||
1526 | 1555 | ||
1527 | event->pmu = tmp; | 1556 | event->pmu = tmp; |
1528 | } | 1557 | } |
@@ -1593,41 +1622,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1593 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); | 1622 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); |
1594 | } | 1623 | } |
1595 | 1624 | ||
1596 | /* | ||
1597 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | ||
1598 | */ | ||
1599 | static unsigned long | ||
1600 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
1601 | { | ||
1602 | unsigned long offset, addr = (unsigned long)from; | ||
1603 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
1604 | unsigned long size, len = 0; | ||
1605 | struct page *page; | ||
1606 | void *map; | ||
1607 | int ret; | ||
1608 | |||
1609 | do { | ||
1610 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
1611 | if (!ret) | ||
1612 | break; | ||
1613 | |||
1614 | offset = addr & (PAGE_SIZE - 1); | ||
1615 | size = min(PAGE_SIZE - offset, n - len); | ||
1616 | |||
1617 | map = kmap_atomic(page, type); | ||
1618 | memcpy(to, map+offset, size); | ||
1619 | kunmap_atomic(map, type); | ||
1620 | put_page(page); | ||
1621 | |||
1622 | len += size; | ||
1623 | to += size; | ||
1624 | addr += size; | ||
1625 | |||
1626 | } while (len < n); | ||
1627 | |||
1628 | return len; | ||
1629 | } | ||
1630 | |||
1631 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | 1625 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) |
1632 | { | 1626 | { |
1633 | unsigned long bytes; | 1627 | unsigned long bytes; |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 573458f1caf2..358a8e3d05f8 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -363,6 +363,8 @@ static __initconst struct x86_pmu amd_pmu = { | |||
363 | .enable_all = x86_pmu_enable_all, | 363 | .enable_all = x86_pmu_enable_all, |
364 | .enable = x86_pmu_enable_event, | 364 | .enable = x86_pmu_enable_event, |
365 | .disable = x86_pmu_disable_event, | 365 | .disable = x86_pmu_disable_event, |
366 | .hw_config = x86_hw_config, | ||
367 | .schedule_events = x86_schedule_events, | ||
366 | .eventsel = MSR_K7_EVNTSEL0, | 368 | .eventsel = MSR_K7_EVNTSEL0, |
367 | .perfctr = MSR_K7_PERFCTR0, | 369 | .perfctr = MSR_K7_PERFCTR0, |
368 | .event_map = amd_pmu_event_map, | 370 | .event_map = amd_pmu_event_map, |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 84bfde64a337..044b8436b19d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -470,42 +470,6 @@ static u64 intel_pmu_raw_event(u64 hw_event) | |||
470 | return hw_event & CORE_EVNTSEL_MASK; | 470 | return hw_event & CORE_EVNTSEL_MASK; |
471 | } | 471 | } |
472 | 472 | ||
473 | static void intel_pmu_enable_bts(u64 config) | ||
474 | { | ||
475 | unsigned long debugctlmsr; | ||
476 | |||
477 | debugctlmsr = get_debugctlmsr(); | ||
478 | |||
479 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
480 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
481 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
482 | |||
483 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
484 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
485 | |||
486 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
487 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
488 | |||
489 | update_debugctlmsr(debugctlmsr); | ||
490 | } | ||
491 | |||
492 | static void intel_pmu_disable_bts(void) | ||
493 | { | ||
494 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
495 | unsigned long debugctlmsr; | ||
496 | |||
497 | if (!cpuc->ds) | ||
498 | return; | ||
499 | |||
500 | debugctlmsr = get_debugctlmsr(); | ||
501 | |||
502 | debugctlmsr &= | ||
503 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
504 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
505 | |||
506 | update_debugctlmsr(debugctlmsr); | ||
507 | } | ||
508 | |||
509 | static void intel_pmu_disable_all(void) | 473 | static void intel_pmu_disable_all(void) |
510 | { | 474 | { |
511 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 475 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -514,12 +478,17 @@ static void intel_pmu_disable_all(void) | |||
514 | 478 | ||
515 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | 479 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) |
516 | intel_pmu_disable_bts(); | 480 | intel_pmu_disable_bts(); |
481 | |||
482 | intel_pmu_pebs_disable_all(); | ||
483 | intel_pmu_lbr_disable_all(); | ||
517 | } | 484 | } |
518 | 485 | ||
519 | static void intel_pmu_enable_all(void) | 486 | static void intel_pmu_enable_all(void) |
520 | { | 487 | { |
521 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 488 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
522 | 489 | ||
490 | intel_pmu_pebs_enable_all(); | ||
491 | intel_pmu_lbr_enable_all(); | ||
523 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 492 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); |
524 | 493 | ||
525 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | 494 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { |
@@ -547,8 +516,7 @@ static inline void intel_pmu_ack_status(u64 ack) | |||
547 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | 516 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); |
548 | } | 517 | } |
549 | 518 | ||
550 | static inline void | 519 | static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) |
551 | intel_pmu_disable_fixed(struct hw_perf_event *hwc) | ||
552 | { | 520 | { |
553 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 521 | int idx = hwc->idx - X86_PMC_IDX_FIXED; |
554 | u64 ctrl_val, mask; | 522 | u64 ctrl_val, mask; |
@@ -557,71 +525,10 @@ intel_pmu_disable_fixed(struct hw_perf_event *hwc) | |||
557 | 525 | ||
558 | rdmsrl(hwc->config_base, ctrl_val); | 526 | rdmsrl(hwc->config_base, ctrl_val); |
559 | ctrl_val &= ~mask; | 527 | ctrl_val &= ~mask; |
560 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | 528 | wrmsrl(hwc->config_base, ctrl_val); |
561 | } | 529 | } |
562 | 530 | ||
563 | static void intel_pmu_drain_bts_buffer(void) | 531 | static void intel_pmu_disable_event(struct perf_event *event) |
564 | { | ||
565 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
566 | struct debug_store *ds = cpuc->ds; | ||
567 | struct bts_record { | ||
568 | u64 from; | ||
569 | u64 to; | ||
570 | u64 flags; | ||
571 | }; | ||
572 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
573 | struct bts_record *at, *top; | ||
574 | struct perf_output_handle handle; | ||
575 | struct perf_event_header header; | ||
576 | struct perf_sample_data data; | ||
577 | struct pt_regs regs; | ||
578 | |||
579 | if (!event) | ||
580 | return; | ||
581 | |||
582 | if (!ds) | ||
583 | return; | ||
584 | |||
585 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
586 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
587 | |||
588 | if (top <= at) | ||
589 | return; | ||
590 | |||
591 | ds->bts_index = ds->bts_buffer_base; | ||
592 | |||
593 | perf_sample_data_init(&data, 0); | ||
594 | |||
595 | data.period = event->hw.last_period; | ||
596 | regs.ip = 0; | ||
597 | |||
598 | /* | ||
599 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
600 | * We will overwrite the from and to address before we output | ||
601 | * the sample. | ||
602 | */ | ||
603 | perf_prepare_sample(&header, &data, event, ®s); | ||
604 | |||
605 | if (perf_output_begin(&handle, event, | ||
606 | header.size * (top - at), 1, 1)) | ||
607 | return; | ||
608 | |||
609 | for (; at < top; at++) { | ||
610 | data.ip = at->from; | ||
611 | data.addr = at->to; | ||
612 | |||
613 | perf_output_sample(&handle, &header, &data, event); | ||
614 | } | ||
615 | |||
616 | perf_output_end(&handle); | ||
617 | |||
618 | /* There's new data available. */ | ||
619 | event->hw.interrupts++; | ||
620 | event->pending_kill = POLL_IN; | ||
621 | } | ||
622 | |||
623 | static inline void | ||
624 | intel_pmu_disable_event(struct perf_event *event) | ||
625 | { | 532 | { |
626 | struct hw_perf_event *hwc = &event->hw; | 533 | struct hw_perf_event *hwc = &event->hw; |
627 | 534 | ||
@@ -637,14 +544,15 @@ intel_pmu_disable_event(struct perf_event *event) | |||
637 | } | 544 | } |
638 | 545 | ||
639 | x86_pmu_disable_event(event); | 546 | x86_pmu_disable_event(event); |
547 | |||
548 | if (unlikely(event->attr.precise)) | ||
549 | intel_pmu_pebs_disable(event); | ||
640 | } | 550 | } |
641 | 551 | ||
642 | static inline void | 552 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) |
643 | intel_pmu_enable_fixed(struct hw_perf_event *hwc) | ||
644 | { | 553 | { |
645 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 554 | int idx = hwc->idx - X86_PMC_IDX_FIXED; |
646 | u64 ctrl_val, bits, mask; | 555 | u64 ctrl_val, bits, mask; |
647 | int err; | ||
648 | 556 | ||
649 | /* | 557 | /* |
650 | * Enable IRQ generation (0x8), | 558 | * Enable IRQ generation (0x8), |
@@ -669,7 +577,7 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc) | |||
669 | rdmsrl(hwc->config_base, ctrl_val); | 577 | rdmsrl(hwc->config_base, ctrl_val); |
670 | ctrl_val &= ~mask; | 578 | ctrl_val &= ~mask; |
671 | ctrl_val |= bits; | 579 | ctrl_val |= bits; |
672 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | 580 | wrmsrl(hwc->config_base, ctrl_val); |
673 | } | 581 | } |
674 | 582 | ||
675 | static void intel_pmu_enable_event(struct perf_event *event) | 583 | static void intel_pmu_enable_event(struct perf_event *event) |
@@ -689,6 +597,9 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
689 | return; | 597 | return; |
690 | } | 598 | } |
691 | 599 | ||
600 | if (unlikely(event->attr.precise)) | ||
601 | intel_pmu_pebs_enable(event); | ||
602 | |||
692 | __x86_pmu_enable_event(hwc); | 603 | __x86_pmu_enable_event(hwc); |
693 | } | 604 | } |
694 | 605 | ||
@@ -762,6 +673,15 @@ again: | |||
762 | 673 | ||
763 | inc_irq_stat(apic_perf_irqs); | 674 | inc_irq_stat(apic_perf_irqs); |
764 | ack = status; | 675 | ack = status; |
676 | |||
677 | intel_pmu_lbr_read(); | ||
678 | |||
679 | /* | ||
680 | * PEBS overflow sets bit 62 in the global status register | ||
681 | */ | ||
682 | if (__test_and_clear_bit(62, (unsigned long *)&status)) | ||
683 | x86_pmu.drain_pebs(regs); | ||
684 | |||
765 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | 685 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { |
766 | struct perf_event *event = cpuc->events[bit]; | 686 | struct perf_event *event = cpuc->events[bit]; |
767 | 687 | ||
@@ -791,22 +711,18 @@ done: | |||
791 | return 1; | 711 | return 1; |
792 | } | 712 | } |
793 | 713 | ||
794 | static struct event_constraint bts_constraint = | ||
795 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
796 | |||
797 | static struct event_constraint * | 714 | static struct event_constraint * |
798 | intel_special_constraints(struct perf_event *event) | 715 | intel_bts_constraints(struct perf_event *event) |
799 | { | 716 | { |
800 | unsigned int hw_event; | 717 | struct hw_perf_event *hwc = &event->hw; |
801 | 718 | unsigned int hw_event, bts_event; | |
802 | hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; | ||
803 | 719 | ||
804 | if (unlikely((hw_event == | 720 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; |
805 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | 721 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); |
806 | (event->hw.sample_period == 1))) { | ||
807 | 722 | ||
723 | if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) | ||
808 | return &bts_constraint; | 724 | return &bts_constraint; |
809 | } | 725 | |
810 | return NULL; | 726 | return NULL; |
811 | } | 727 | } |
812 | 728 | ||
@@ -815,7 +731,11 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
815 | { | 731 | { |
816 | struct event_constraint *c; | 732 | struct event_constraint *c; |
817 | 733 | ||
818 | c = intel_special_constraints(event); | 734 | c = intel_bts_constraints(event); |
735 | if (c) | ||
736 | return c; | ||
737 | |||
738 | c = intel_pebs_constraints(event); | ||
819 | if (c) | 739 | if (c) |
820 | return c; | 740 | return c; |
821 | 741 | ||
@@ -829,6 +749,8 @@ static __initconst struct x86_pmu core_pmu = { | |||
829 | .enable_all = x86_pmu_enable_all, | 749 | .enable_all = x86_pmu_enable_all, |
830 | .enable = x86_pmu_enable_event, | 750 | .enable = x86_pmu_enable_event, |
831 | .disable = x86_pmu_disable_event, | 751 | .disable = x86_pmu_disable_event, |
752 | .hw_config = x86_hw_config, | ||
753 | .schedule_events = x86_schedule_events, | ||
832 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 754 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
833 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 755 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
834 | .event_map = intel_pmu_event_map, | 756 | .event_map = intel_pmu_event_map, |
@@ -845,6 +767,20 @@ static __initconst struct x86_pmu core_pmu = { | |||
845 | .event_constraints = intel_core_event_constraints, | 767 | .event_constraints = intel_core_event_constraints, |
846 | }; | 768 | }; |
847 | 769 | ||
770 | static void intel_pmu_cpu_starting(int cpu) | ||
771 | { | ||
772 | init_debug_store_on_cpu(cpu); | ||
773 | /* | ||
774 | * Deal with CPUs that don't clear their LBRs on power-up. | ||
775 | */ | ||
776 | intel_pmu_lbr_reset(); | ||
777 | } | ||
778 | |||
779 | static void intel_pmu_cpu_dying(int cpu) | ||
780 | { | ||
781 | fini_debug_store_on_cpu(cpu); | ||
782 | } | ||
783 | |||
848 | static __initconst struct x86_pmu intel_pmu = { | 784 | static __initconst struct x86_pmu intel_pmu = { |
849 | .name = "Intel", | 785 | .name = "Intel", |
850 | .handle_irq = intel_pmu_handle_irq, | 786 | .handle_irq = intel_pmu_handle_irq, |
@@ -852,6 +788,8 @@ static __initconst struct x86_pmu intel_pmu = { | |||
852 | .enable_all = intel_pmu_enable_all, | 788 | .enable_all = intel_pmu_enable_all, |
853 | .enable = intel_pmu_enable_event, | 789 | .enable = intel_pmu_enable_event, |
854 | .disable = intel_pmu_disable_event, | 790 | .disable = intel_pmu_disable_event, |
791 | .hw_config = x86_hw_config, | ||
792 | .schedule_events = x86_schedule_events, | ||
855 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 793 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
856 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 794 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
857 | .event_map = intel_pmu_event_map, | 795 | .event_map = intel_pmu_event_map, |
@@ -864,14 +802,38 @@ static __initconst struct x86_pmu intel_pmu = { | |||
864 | * the generic event period: | 802 | * the generic event period: |
865 | */ | 803 | */ |
866 | .max_period = (1ULL << 31) - 1, | 804 | .max_period = (1ULL << 31) - 1, |
867 | .enable_bts = intel_pmu_enable_bts, | ||
868 | .disable_bts = intel_pmu_disable_bts, | ||
869 | .get_event_constraints = intel_get_event_constraints, | 805 | .get_event_constraints = intel_get_event_constraints, |
870 | 806 | ||
871 | .cpu_starting = init_debug_store_on_cpu, | 807 | .cpu_starting = intel_pmu_cpu_starting, |
872 | .cpu_dying = fini_debug_store_on_cpu, | 808 | .cpu_dying = intel_pmu_cpu_dying, |
873 | }; | 809 | }; |
874 | 810 | ||
811 | static void intel_clovertown_quirks(void) | ||
812 | { | ||
813 | /* | ||
814 | * PEBS is unreliable due to: | ||
815 | * | ||
816 | * AJ67 - PEBS may experience CPL leaks | ||
817 | * AJ68 - PEBS PMI may be delayed by one event | ||
818 | * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] | ||
819 | * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS | ||
820 | * | ||
821 | * AJ67 could be worked around by restricting the OS/USR flags. | ||
822 | * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. | ||
823 | * | ||
824 | * AJ106 could possibly be worked around by not allowing LBR | ||
825 | * usage from PEBS, including the fixup. | ||
826 | * AJ68 could possibly be worked around by always programming | ||
827 | * a pebs_event_reset[0] value and coping with the lost events. | ||
828 | * | ||
829 | * But taken together it might just make sense to not enable PEBS on | ||
830 | * these chips. | ||
831 | */ | ||
832 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); | ||
833 | x86_pmu.pebs = 0; | ||
834 | x86_pmu.pebs_constraints = NULL; | ||
835 | } | ||
836 | |||
875 | static __init int intel_pmu_init(void) | 837 | static __init int intel_pmu_init(void) |
876 | { | 838 | { |
877 | union cpuid10_edx edx; | 839 | union cpuid10_edx edx; |
@@ -881,12 +843,13 @@ static __init int intel_pmu_init(void) | |||
881 | int version; | 843 | int version; |
882 | 844 | ||
883 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 845 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
884 | /* check for P6 processor family */ | 846 | switch (boot_cpu_data.x86) { |
885 | if (boot_cpu_data.x86 == 6) { | 847 | case 0x6: |
886 | return p6_pmu_init(); | 848 | return p6_pmu_init(); |
887 | } else { | 849 | case 0xf: |
850 | return p4_pmu_init(); | ||
851 | } | ||
888 | return -ENODEV; | 852 | return -ENODEV; |
889 | } | ||
890 | } | 853 | } |
891 | 854 | ||
892 | /* | 855 | /* |
@@ -916,6 +879,18 @@ static __init int intel_pmu_init(void) | |||
916 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | 879 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); |
917 | 880 | ||
918 | /* | 881 | /* |
882 | * v2 and above have a perf capabilities MSR | ||
883 | */ | ||
884 | if (version > 1) { | ||
885 | u64 capabilities; | ||
886 | |||
887 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); | ||
888 | x86_pmu.intel_cap.capabilities = capabilities; | ||
889 | } | ||
890 | |||
891 | intel_ds_init(); | ||
892 | |||
893 | /* | ||
919 | * Install the hw-cache-events table: | 894 | * Install the hw-cache-events table: |
920 | */ | 895 | */ |
921 | switch (boot_cpu_data.x86_model) { | 896 | switch (boot_cpu_data.x86_model) { |
@@ -924,12 +899,15 @@ static __init int intel_pmu_init(void) | |||
924 | break; | 899 | break; |
925 | 900 | ||
926 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | 901 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ |
902 | x86_pmu.quirks = intel_clovertown_quirks; | ||
927 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | 903 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ |
928 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | 904 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ |
929 | case 29: /* six-core 45 nm xeon "Dunnington" */ | 905 | case 29: /* six-core 45 nm xeon "Dunnington" */ |
930 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | 906 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, |
931 | sizeof(hw_cache_event_ids)); | 907 | sizeof(hw_cache_event_ids)); |
932 | 908 | ||
909 | intel_pmu_lbr_init_core(); | ||
910 | |||
933 | x86_pmu.event_constraints = intel_core2_event_constraints; | 911 | x86_pmu.event_constraints = intel_core2_event_constraints; |
934 | pr_cont("Core2 events, "); | 912 | pr_cont("Core2 events, "); |
935 | break; | 913 | break; |
@@ -939,13 +917,18 @@ static __init int intel_pmu_init(void) | |||
939 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 917 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
940 | sizeof(hw_cache_event_ids)); | 918 | sizeof(hw_cache_event_ids)); |
941 | 919 | ||
920 | intel_pmu_lbr_init_nhm(); | ||
921 | |||
942 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 922 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
943 | pr_cont("Nehalem/Corei7 events, "); | 923 | pr_cont("Nehalem/Corei7 events, "); |
944 | break; | 924 | break; |
925 | |||
945 | case 28: /* Atom */ | 926 | case 28: /* Atom */ |
946 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | 927 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, |
947 | sizeof(hw_cache_event_ids)); | 928 | sizeof(hw_cache_event_ids)); |
948 | 929 | ||
930 | intel_pmu_lbr_init_atom(); | ||
931 | |||
949 | x86_pmu.event_constraints = intel_gen_event_constraints; | 932 | x86_pmu.event_constraints = intel_gen_event_constraints; |
950 | pr_cont("Atom events, "); | 933 | pr_cont("Atom events, "); |
951 | break; | 934 | break; |
@@ -955,6 +938,8 @@ static __init int intel_pmu_init(void) | |||
955 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | 938 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
956 | sizeof(hw_cache_event_ids)); | 939 | sizeof(hw_cache_event_ids)); |
957 | 940 | ||
941 | intel_pmu_lbr_init_nhm(); | ||
942 | |||
958 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 943 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
959 | pr_cont("Westmere events, "); | 944 | pr_cont("Westmere events, "); |
960 | break; | 945 | break; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c new file mode 100644 index 000000000000..c59678a14a2e --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -0,0 +1,673 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | /* The maximal number of PEBS events: */ | ||
4 | #define MAX_PEBS_EVENTS 4 | ||
5 | |||
6 | /* The size of a BTS record in bytes: */ | ||
7 | #define BTS_RECORD_SIZE 24 | ||
8 | |||
9 | #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) | ||
10 | #define PEBS_BUFFER_SIZE PAGE_SIZE | ||
11 | |||
12 | /* | ||
13 | * pebs_record_32 for p4 and core not supported | ||
14 | |||
15 | struct pebs_record_32 { | ||
16 | u32 flags, ip; | ||
17 | u32 ax, bc, cx, dx; | ||
18 | u32 si, di, bp, sp; | ||
19 | }; | ||
20 | |||
21 | */ | ||
22 | |||
23 | struct pebs_record_core { | ||
24 | u64 flags, ip; | ||
25 | u64 ax, bx, cx, dx; | ||
26 | u64 si, di, bp, sp; | ||
27 | u64 r8, r9, r10, r11; | ||
28 | u64 r12, r13, r14, r15; | ||
29 | }; | ||
30 | |||
31 | struct pebs_record_nhm { | ||
32 | u64 flags, ip; | ||
33 | u64 ax, bx, cx, dx; | ||
34 | u64 si, di, bp, sp; | ||
35 | u64 r8, r9, r10, r11; | ||
36 | u64 r12, r13, r14, r15; | ||
37 | u64 status, dla, dse, lat; | ||
38 | }; | ||
39 | |||
40 | /* | ||
41 | * Bits in the debugctlmsr controlling branch tracing. | ||
42 | */ | ||
43 | #define X86_DEBUGCTL_TR (1 << 6) | ||
44 | #define X86_DEBUGCTL_BTS (1 << 7) | ||
45 | #define X86_DEBUGCTL_BTINT (1 << 8) | ||
46 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | ||
47 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | ||
48 | |||
49 | /* | ||
50 | * A debug store configuration. | ||
51 | * | ||
52 | * We only support architectures that use 64bit fields. | ||
53 | */ | ||
54 | struct debug_store { | ||
55 | u64 bts_buffer_base; | ||
56 | u64 bts_index; | ||
57 | u64 bts_absolute_maximum; | ||
58 | u64 bts_interrupt_threshold; | ||
59 | u64 pebs_buffer_base; | ||
60 | u64 pebs_index; | ||
61 | u64 pebs_absolute_maximum; | ||
62 | u64 pebs_interrupt_threshold; | ||
63 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
64 | }; | ||
65 | |||
66 | static void init_debug_store_on_cpu(int cpu) | ||
67 | { | ||
68 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
69 | |||
70 | if (!ds) | ||
71 | return; | ||
72 | |||
73 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
74 | (u32)((u64)(unsigned long)ds), | ||
75 | (u32)((u64)(unsigned long)ds >> 32)); | ||
76 | } | ||
77 | |||
78 | static void fini_debug_store_on_cpu(int cpu) | ||
79 | { | ||
80 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
81 | return; | ||
82 | |||
83 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
84 | } | ||
85 | |||
86 | static void release_ds_buffers(void) | ||
87 | { | ||
88 | int cpu; | ||
89 | |||
90 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
91 | return; | ||
92 | |||
93 | get_online_cpus(); | ||
94 | |||
95 | for_each_online_cpu(cpu) | ||
96 | fini_debug_store_on_cpu(cpu); | ||
97 | |||
98 | for_each_possible_cpu(cpu) { | ||
99 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
100 | |||
101 | if (!ds) | ||
102 | continue; | ||
103 | |||
104 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
105 | |||
106 | kfree((void *)(unsigned long)ds->pebs_buffer_base); | ||
107 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
108 | kfree(ds); | ||
109 | } | ||
110 | |||
111 | put_online_cpus(); | ||
112 | } | ||
113 | |||
114 | static int reserve_ds_buffers(void) | ||
115 | { | ||
116 | int cpu, err = 0; | ||
117 | |||
118 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
119 | return 0; | ||
120 | |||
121 | get_online_cpus(); | ||
122 | |||
123 | for_each_possible_cpu(cpu) { | ||
124 | struct debug_store *ds; | ||
125 | void *buffer; | ||
126 | int max, thresh; | ||
127 | |||
128 | err = -ENOMEM; | ||
129 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
130 | if (unlikely(!ds)) | ||
131 | break; | ||
132 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
133 | |||
134 | if (x86_pmu.bts) { | ||
135 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
136 | if (unlikely(!buffer)) | ||
137 | break; | ||
138 | |||
139 | max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; | ||
140 | thresh = max / 16; | ||
141 | |||
142 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
143 | ds->bts_index = ds->bts_buffer_base; | ||
144 | ds->bts_absolute_maximum = ds->bts_buffer_base + | ||
145 | max * BTS_RECORD_SIZE; | ||
146 | ds->bts_interrupt_threshold = ds->bts_absolute_maximum - | ||
147 | thresh * BTS_RECORD_SIZE; | ||
148 | } | ||
149 | |||
150 | if (x86_pmu.pebs) { | ||
151 | buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); | ||
152 | if (unlikely(!buffer)) | ||
153 | break; | ||
154 | |||
155 | max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; | ||
156 | |||
157 | ds->pebs_buffer_base = (u64)(unsigned long)buffer; | ||
158 | ds->pebs_index = ds->pebs_buffer_base; | ||
159 | ds->pebs_absolute_maximum = ds->pebs_buffer_base + | ||
160 | max * x86_pmu.pebs_record_size; | ||
161 | /* | ||
162 | * Always use single record PEBS | ||
163 | */ | ||
164 | ds->pebs_interrupt_threshold = ds->pebs_buffer_base + | ||
165 | x86_pmu.pebs_record_size; | ||
166 | } | ||
167 | |||
168 | err = 0; | ||
169 | } | ||
170 | |||
171 | if (err) | ||
172 | release_ds_buffers(); | ||
173 | else { | ||
174 | for_each_online_cpu(cpu) | ||
175 | init_debug_store_on_cpu(cpu); | ||
176 | } | ||
177 | |||
178 | put_online_cpus(); | ||
179 | |||
180 | return err; | ||
181 | } | ||
182 | |||
183 | /* | ||
184 | * BTS | ||
185 | */ | ||
186 | |||
187 | static struct event_constraint bts_constraint = | ||
188 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
189 | |||
190 | static void intel_pmu_enable_bts(u64 config) | ||
191 | { | ||
192 | unsigned long debugctlmsr; | ||
193 | |||
194 | debugctlmsr = get_debugctlmsr(); | ||
195 | |||
196 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
197 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
198 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
199 | |||
200 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
201 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
202 | |||
203 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
204 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
205 | |||
206 | update_debugctlmsr(debugctlmsr); | ||
207 | } | ||
208 | |||
209 | static void intel_pmu_disable_bts(void) | ||
210 | { | ||
211 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
212 | unsigned long debugctlmsr; | ||
213 | |||
214 | if (!cpuc->ds) | ||
215 | return; | ||
216 | |||
217 | debugctlmsr = get_debugctlmsr(); | ||
218 | |||
219 | debugctlmsr &= | ||
220 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
221 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
222 | |||
223 | update_debugctlmsr(debugctlmsr); | ||
224 | } | ||
225 | |||
226 | static void intel_pmu_drain_bts_buffer(void) | ||
227 | { | ||
228 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
229 | struct debug_store *ds = cpuc->ds; | ||
230 | struct bts_record { | ||
231 | u64 from; | ||
232 | u64 to; | ||
233 | u64 flags; | ||
234 | }; | ||
235 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
236 | struct bts_record *at, *top; | ||
237 | struct perf_output_handle handle; | ||
238 | struct perf_event_header header; | ||
239 | struct perf_sample_data data; | ||
240 | struct pt_regs regs; | ||
241 | |||
242 | if (!event) | ||
243 | return; | ||
244 | |||
245 | if (!ds) | ||
246 | return; | ||
247 | |||
248 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
249 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
250 | |||
251 | if (top <= at) | ||
252 | return; | ||
253 | |||
254 | ds->bts_index = ds->bts_buffer_base; | ||
255 | |||
256 | perf_sample_data_init(&data, 0); | ||
257 | data.period = event->hw.last_period; | ||
258 | regs.ip = 0; | ||
259 | |||
260 | /* | ||
261 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
262 | * We will overwrite the from and to address before we output | ||
263 | * the sample. | ||
264 | */ | ||
265 | perf_prepare_sample(&header, &data, event, ®s); | ||
266 | |||
267 | if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) | ||
268 | return; | ||
269 | |||
270 | for (; at < top; at++) { | ||
271 | data.ip = at->from; | ||
272 | data.addr = at->to; | ||
273 | |||
274 | perf_output_sample(&handle, &header, &data, event); | ||
275 | } | ||
276 | |||
277 | perf_output_end(&handle); | ||
278 | |||
279 | /* There's new data available. */ | ||
280 | event->hw.interrupts++; | ||
281 | event->pending_kill = POLL_IN; | ||
282 | } | ||
283 | |||
284 | /* | ||
285 | * PEBS | ||
286 | */ | ||
287 | |||
288 | static struct event_constraint intel_core_pebs_events[] = { | ||
289 | PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ | ||
290 | PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ | ||
291 | PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ | ||
292 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ | ||
293 | PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
294 | PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
295 | PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
296 | PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
297 | PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
298 | EVENT_CONSTRAINT_END | ||
299 | }; | ||
300 | |||
301 | static struct event_constraint intel_nehalem_pebs_events[] = { | ||
302 | PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ | ||
303 | PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ | ||
304 | PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ | ||
305 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ | ||
306 | PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
307 | PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
308 | PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
309 | PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
310 | PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
311 | EVENT_CONSTRAINT_END | ||
312 | }; | ||
313 | |||
314 | static struct event_constraint * | ||
315 | intel_pebs_constraints(struct perf_event *event) | ||
316 | { | ||
317 | struct event_constraint *c; | ||
318 | |||
319 | if (!event->attr.precise) | ||
320 | return NULL; | ||
321 | |||
322 | if (x86_pmu.pebs_constraints) { | ||
323 | for_each_event_constraint(c, x86_pmu.pebs_constraints) { | ||
324 | if ((event->hw.config & c->cmask) == c->code) | ||
325 | return c; | ||
326 | } | ||
327 | } | ||
328 | |||
329 | return &emptyconstraint; | ||
330 | } | ||
331 | |||
332 | static void intel_pmu_pebs_enable(struct perf_event *event) | ||
333 | { | ||
334 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
335 | struct hw_perf_event *hwc = &event->hw; | ||
336 | |||
337 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | ||
338 | |||
339 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | ||
340 | WARN_ON_ONCE(cpuc->enabled); | ||
341 | |||
342 | if (x86_pmu.intel_cap.pebs_trap) | ||
343 | intel_pmu_lbr_enable(event); | ||
344 | } | ||
345 | |||
346 | static void intel_pmu_pebs_disable(struct perf_event *event) | ||
347 | { | ||
348 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
349 | struct hw_perf_event *hwc = &event->hw; | ||
350 | |||
351 | cpuc->pebs_enabled &= ~(1ULL << hwc->idx); | ||
352 | if (cpuc->enabled) | ||
353 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | ||
354 | |||
355 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | ||
356 | |||
357 | if (x86_pmu.intel_cap.pebs_trap) | ||
358 | intel_pmu_lbr_disable(event); | ||
359 | } | ||
360 | |||
361 | static void intel_pmu_pebs_enable_all(void) | ||
362 | { | ||
363 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
364 | |||
365 | if (cpuc->pebs_enabled) | ||
366 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | ||
367 | } | ||
368 | |||
369 | static void intel_pmu_pebs_disable_all(void) | ||
370 | { | ||
371 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
372 | |||
373 | if (cpuc->pebs_enabled) | ||
374 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | ||
375 | } | ||
376 | |||
377 | #include <asm/insn.h> | ||
378 | |||
379 | static inline bool kernel_ip(unsigned long ip) | ||
380 | { | ||
381 | #ifdef CONFIG_X86_32 | ||
382 | return ip > PAGE_OFFSET; | ||
383 | #else | ||
384 | return (long)ip < 0; | ||
385 | #endif | ||
386 | } | ||
387 | |||
388 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | ||
389 | { | ||
390 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
391 | unsigned long from = cpuc->lbr_entries[0].from; | ||
392 | unsigned long old_to, to = cpuc->lbr_entries[0].to; | ||
393 | unsigned long ip = regs->ip; | ||
394 | |||
395 | /* | ||
396 | * We don't need to fixup if the PEBS assist is fault like | ||
397 | */ | ||
398 | if (!x86_pmu.intel_cap.pebs_trap) | ||
399 | return 1; | ||
400 | |||
401 | /* | ||
402 | * No LBR entry, no basic block, no rewinding | ||
403 | */ | ||
404 | if (!cpuc->lbr_stack.nr || !from || !to) | ||
405 | return 0; | ||
406 | |||
407 | /* | ||
408 | * Basic blocks should never cross user/kernel boundaries | ||
409 | */ | ||
410 | if (kernel_ip(ip) != kernel_ip(to)) | ||
411 | return 0; | ||
412 | |||
413 | /* | ||
414 | * unsigned math, either ip is before the start (impossible) or | ||
415 | * the basic block is larger than 1 page (sanity) | ||
416 | */ | ||
417 | if ((ip - to) > PAGE_SIZE) | ||
418 | return 0; | ||
419 | |||
420 | /* | ||
421 | * We sampled a branch insn, rewind using the LBR stack | ||
422 | */ | ||
423 | if (ip == to) { | ||
424 | regs->ip = from; | ||
425 | return 1; | ||
426 | } | ||
427 | |||
428 | do { | ||
429 | struct insn insn; | ||
430 | u8 buf[MAX_INSN_SIZE]; | ||
431 | void *kaddr; | ||
432 | |||
433 | old_to = to; | ||
434 | if (!kernel_ip(ip)) { | ||
435 | int bytes, size = MAX_INSN_SIZE; | ||
436 | |||
437 | bytes = copy_from_user_nmi(buf, (void __user *)to, size); | ||
438 | if (bytes != size) | ||
439 | return 0; | ||
440 | |||
441 | kaddr = buf; | ||
442 | } else | ||
443 | kaddr = (void *)to; | ||
444 | |||
445 | kernel_insn_init(&insn, kaddr); | ||
446 | insn_get_length(&insn); | ||
447 | to += insn.length; | ||
448 | } while (to < ip); | ||
449 | |||
450 | if (to == ip) { | ||
451 | regs->ip = old_to; | ||
452 | return 1; | ||
453 | } | ||
454 | |||
455 | /* | ||
456 | * Even though we decoded the basic block, the instruction stream | ||
457 | * never matched the given IP, either the TO or the IP got corrupted. | ||
458 | */ | ||
459 | return 0; | ||
460 | } | ||
461 | |||
462 | static int intel_pmu_save_and_restart(struct perf_event *event); | ||
463 | |||
464 | static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | ||
465 | { | ||
466 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
467 | struct debug_store *ds = cpuc->ds; | ||
468 | struct perf_event *event = cpuc->events[0]; /* PMC0 only */ | ||
469 | struct pebs_record_core *at, *top; | ||
470 | struct perf_sample_data data; | ||
471 | struct perf_raw_record raw; | ||
472 | struct pt_regs regs; | ||
473 | int n; | ||
474 | |||
475 | if (!ds || !x86_pmu.pebs) | ||
476 | return; | ||
477 | |||
478 | at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; | ||
479 | top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; | ||
480 | |||
481 | /* | ||
482 | * Whatever else happens, drain the thing | ||
483 | */ | ||
484 | ds->pebs_index = ds->pebs_buffer_base; | ||
485 | |||
486 | if (!test_bit(0, cpuc->active_mask)) | ||
487 | return; | ||
488 | |||
489 | WARN_ON_ONCE(!event); | ||
490 | |||
491 | if (!event->attr.precise) | ||
492 | return; | ||
493 | |||
494 | n = top - at; | ||
495 | if (n <= 0) | ||
496 | return; | ||
497 | |||
498 | if (!intel_pmu_save_and_restart(event)) | ||
499 | return; | ||
500 | |||
501 | /* | ||
502 | * Should not happen, we program the threshold at 1 and do not | ||
503 | * set a reset value. | ||
504 | */ | ||
505 | WARN_ON_ONCE(n > 1); | ||
506 | at += n - 1; | ||
507 | |||
508 | perf_sample_data_init(&data, 0); | ||
509 | data.period = event->hw.last_period; | ||
510 | |||
511 | if (event->attr.sample_type & PERF_SAMPLE_RAW) { | ||
512 | raw.size = x86_pmu.pebs_record_size; | ||
513 | raw.data = at; | ||
514 | data.raw = &raw; | ||
515 | } | ||
516 | |||
517 | /* | ||
518 | * We use the interrupt regs as a base because the PEBS record | ||
519 | * does not contain a full regs set, specifically it seems to | ||
520 | * lack segment descriptors, which get used by things like | ||
521 | * user_mode(). | ||
522 | * | ||
523 | * In the simple case fix up only the IP and BP,SP regs, for | ||
524 | * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. | ||
525 | * A possible PERF_SAMPLE_REGS will have to transfer all regs. | ||
526 | */ | ||
527 | regs = *iregs; | ||
528 | regs.ip = at->ip; | ||
529 | regs.bp = at->bp; | ||
530 | regs.sp = at->sp; | ||
531 | |||
532 | if (intel_pmu_pebs_fixup_ip(®s)) | ||
533 | regs.flags |= PERF_EFLAGS_EXACT; | ||
534 | else | ||
535 | regs.flags &= ~PERF_EFLAGS_EXACT; | ||
536 | |||
537 | if (perf_event_overflow(event, 1, &data, ®s)) | ||
538 | x86_pmu_stop(event); | ||
539 | } | ||
540 | |||
541 | static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | ||
542 | { | ||
543 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
544 | struct debug_store *ds = cpuc->ds; | ||
545 | struct pebs_record_nhm *at, *top; | ||
546 | struct perf_sample_data data; | ||
547 | struct perf_event *event = NULL; | ||
548 | struct perf_raw_record raw; | ||
549 | struct pt_regs regs; | ||
550 | u64 status = 0; | ||
551 | int bit, n; | ||
552 | |||
553 | if (!ds || !x86_pmu.pebs) | ||
554 | return; | ||
555 | |||
556 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | ||
557 | top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; | ||
558 | |||
559 | ds->pebs_index = ds->pebs_buffer_base; | ||
560 | |||
561 | n = top - at; | ||
562 | if (n <= 0) | ||
563 | return; | ||
564 | |||
565 | /* | ||
566 | * Should not happen, we program the threshold at 1 and do not | ||
567 | * set a reset value. | ||
568 | */ | ||
569 | WARN_ON_ONCE(n > MAX_PEBS_EVENTS); | ||
570 | |||
571 | for ( ; at < top; at++) { | ||
572 | for_each_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { | ||
573 | event = cpuc->events[bit]; | ||
574 | if (!test_bit(bit, cpuc->active_mask)) | ||
575 | continue; | ||
576 | |||
577 | WARN_ON_ONCE(!event); | ||
578 | |||
579 | if (!event->attr.precise) | ||
580 | continue; | ||
581 | |||
582 | if (__test_and_set_bit(bit, (unsigned long *)&status)) | ||
583 | continue; | ||
584 | |||
585 | break; | ||
586 | } | ||
587 | |||
588 | if (!event || bit >= MAX_PEBS_EVENTS) | ||
589 | continue; | ||
590 | |||
591 | if (!intel_pmu_save_and_restart(event)) | ||
592 | continue; | ||
593 | |||
594 | perf_sample_data_init(&data, 0); | ||
595 | data.period = event->hw.last_period; | ||
596 | |||
597 | if (event->attr.sample_type & PERF_SAMPLE_RAW) { | ||
598 | raw.size = x86_pmu.pebs_record_size; | ||
599 | raw.data = at; | ||
600 | data.raw = &raw; | ||
601 | } | ||
602 | |||
603 | /* | ||
604 | * See the comment in intel_pmu_drain_pebs_core() | ||
605 | */ | ||
606 | regs = *iregs; | ||
607 | regs.ip = at->ip; | ||
608 | regs.bp = at->bp; | ||
609 | regs.sp = at->sp; | ||
610 | |||
611 | if (intel_pmu_pebs_fixup_ip(®s)) | ||
612 | regs.flags |= PERF_EFLAGS_EXACT; | ||
613 | else | ||
614 | regs.flags &= ~PERF_EFLAGS_EXACT; | ||
615 | |||
616 | if (perf_event_overflow(event, 1, &data, ®s)) | ||
617 | x86_pmu_stop(event); | ||
618 | } | ||
619 | } | ||
620 | |||
621 | /* | ||
622 | * BTS, PEBS probe and setup | ||
623 | */ | ||
624 | |||
625 | static void intel_ds_init(void) | ||
626 | { | ||
627 | /* | ||
628 | * No support for 32bit formats | ||
629 | */ | ||
630 | if (!boot_cpu_has(X86_FEATURE_DTES64)) | ||
631 | return; | ||
632 | |||
633 | x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); | ||
634 | x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); | ||
635 | if (x86_pmu.pebs) { | ||
636 | char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; | ||
637 | int format = x86_pmu.intel_cap.pebs_format; | ||
638 | |||
639 | switch (format) { | ||
640 | case 0: | ||
641 | printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); | ||
642 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); | ||
643 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; | ||
644 | x86_pmu.pebs_constraints = intel_core_pebs_events; | ||
645 | break; | ||
646 | |||
647 | case 1: | ||
648 | printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); | ||
649 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); | ||
650 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; | ||
651 | x86_pmu.pebs_constraints = intel_nehalem_pebs_events; | ||
652 | break; | ||
653 | |||
654 | default: | ||
655 | printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); | ||
656 | x86_pmu.pebs = 0; | ||
657 | break; | ||
658 | } | ||
659 | } | ||
660 | } | ||
661 | |||
662 | #else /* CONFIG_CPU_SUP_INTEL */ | ||
663 | |||
664 | static int reserve_ds_buffers(void) | ||
665 | { | ||
666 | return 0; | ||
667 | } | ||
668 | |||
669 | static void release_ds_buffers(void) | ||
670 | { | ||
671 | } | ||
672 | |||
673 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c new file mode 100644 index 000000000000..df4c98e26c5b --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -0,0 +1,221 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | enum { | ||
4 | LBR_FORMAT_32 = 0x00, | ||
5 | LBR_FORMAT_LIP = 0x01, | ||
6 | LBR_FORMAT_EIP = 0x02, | ||
7 | LBR_FORMAT_EIP_FLAGS = 0x03, | ||
8 | }; | ||
9 | |||
10 | /* | ||
11 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI | ||
12 | * otherwise it becomes near impossible to get a reliable stack. | ||
13 | */ | ||
14 | |||
15 | #define X86_DEBUGCTL_LBR (1 << 0) | ||
16 | #define X86_DEBUGCTL_FREEZE_LBRS_ON_PMI (1 << 11) | ||
17 | |||
18 | static void __intel_pmu_lbr_enable(void) | ||
19 | { | ||
20 | u64 debugctl; | ||
21 | |||
22 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
23 | debugctl |= (X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI); | ||
24 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
25 | } | ||
26 | |||
27 | static void __intel_pmu_lbr_disable(void) | ||
28 | { | ||
29 | u64 debugctl; | ||
30 | |||
31 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
32 | debugctl &= ~(X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI); | ||
33 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
34 | } | ||
35 | |||
36 | static void intel_pmu_lbr_reset_32(void) | ||
37 | { | ||
38 | int i; | ||
39 | |||
40 | for (i = 0; i < x86_pmu.lbr_nr; i++) | ||
41 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
42 | } | ||
43 | |||
44 | static void intel_pmu_lbr_reset_64(void) | ||
45 | { | ||
46 | int i; | ||
47 | |||
48 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
49 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
50 | wrmsrl(x86_pmu.lbr_to + i, 0); | ||
51 | } | ||
52 | } | ||
53 | |||
54 | static void intel_pmu_lbr_reset(void) | ||
55 | { | ||
56 | if (!x86_pmu.lbr_nr) | ||
57 | return; | ||
58 | |||
59 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) | ||
60 | intel_pmu_lbr_reset_32(); | ||
61 | else | ||
62 | intel_pmu_lbr_reset_64(); | ||
63 | } | ||
64 | |||
65 | static void intel_pmu_lbr_enable(struct perf_event *event) | ||
66 | { | ||
67 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
68 | |||
69 | if (!x86_pmu.lbr_nr) | ||
70 | return; | ||
71 | |||
72 | WARN_ON_ONCE(cpuc->enabled); | ||
73 | |||
74 | /* | ||
75 | * Reset the LBR stack if we changed task context to | ||
76 | * avoid data leaks. | ||
77 | */ | ||
78 | |||
79 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { | ||
80 | intel_pmu_lbr_reset(); | ||
81 | cpuc->lbr_context = event->ctx; | ||
82 | } | ||
83 | |||
84 | cpuc->lbr_users++; | ||
85 | } | ||
86 | |||
87 | static void intel_pmu_lbr_disable(struct perf_event *event) | ||
88 | { | ||
89 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
90 | |||
91 | if (!x86_pmu.lbr_nr) | ||
92 | return; | ||
93 | |||
94 | cpuc->lbr_users--; | ||
95 | WARN_ON_ONCE(cpuc->lbr_users < 0); | ||
96 | |||
97 | if (cpuc->enabled && !cpuc->lbr_users) | ||
98 | __intel_pmu_lbr_disable(); | ||
99 | } | ||
100 | |||
101 | static void intel_pmu_lbr_enable_all(void) | ||
102 | { | ||
103 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
104 | |||
105 | if (cpuc->lbr_users) | ||
106 | __intel_pmu_lbr_enable(); | ||
107 | } | ||
108 | |||
109 | static void intel_pmu_lbr_disable_all(void) | ||
110 | { | ||
111 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
112 | |||
113 | if (cpuc->lbr_users) | ||
114 | __intel_pmu_lbr_disable(); | ||
115 | } | ||
116 | |||
117 | static inline u64 intel_pmu_lbr_tos(void) | ||
118 | { | ||
119 | u64 tos; | ||
120 | |||
121 | rdmsrl(x86_pmu.lbr_tos, tos); | ||
122 | |||
123 | return tos; | ||
124 | } | ||
125 | |||
126 | static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | ||
127 | { | ||
128 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
129 | u64 tos = intel_pmu_lbr_tos(); | ||
130 | int i; | ||
131 | |||
132 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
133 | unsigned long lbr_idx = (tos - i) & mask; | ||
134 | union { | ||
135 | struct { | ||
136 | u32 from; | ||
137 | u32 to; | ||
138 | }; | ||
139 | u64 lbr; | ||
140 | } msr_lastbranch; | ||
141 | |||
142 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); | ||
143 | |||
144 | cpuc->lbr_entries[i].from = msr_lastbranch.from; | ||
145 | cpuc->lbr_entries[i].to = msr_lastbranch.to; | ||
146 | cpuc->lbr_entries[i].flags = 0; | ||
147 | } | ||
148 | cpuc->lbr_stack.nr = i; | ||
149 | } | ||
150 | |||
151 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
152 | |||
153 | /* | ||
154 | * Due to lack of segmentation in Linux the effective address (offset) | ||
155 | * is the same as the linear address, allowing us to merge the LIP and EIP | ||
156 | * LBR formats. | ||
157 | */ | ||
158 | static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | ||
159 | { | ||
160 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
161 | int lbr_format = x86_pmu.intel_cap.lbr_format; | ||
162 | u64 tos = intel_pmu_lbr_tos(); | ||
163 | int i; | ||
164 | |||
165 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
166 | unsigned long lbr_idx = (tos - i) & mask; | ||
167 | u64 from, to, flags = 0; | ||
168 | |||
169 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | ||
170 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | ||
171 | |||
172 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { | ||
173 | flags = !!(from & LBR_FROM_FLAG_MISPRED); | ||
174 | from = (u64)((((s64)from) << 1) >> 1); | ||
175 | } | ||
176 | |||
177 | cpuc->lbr_entries[i].from = from; | ||
178 | cpuc->lbr_entries[i].to = to; | ||
179 | cpuc->lbr_entries[i].flags = flags; | ||
180 | } | ||
181 | cpuc->lbr_stack.nr = i; | ||
182 | } | ||
183 | |||
184 | static void intel_pmu_lbr_read(void) | ||
185 | { | ||
186 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
187 | |||
188 | if (!cpuc->lbr_users) | ||
189 | return; | ||
190 | |||
191 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) | ||
192 | intel_pmu_lbr_read_32(cpuc); | ||
193 | else | ||
194 | intel_pmu_lbr_read_64(cpuc); | ||
195 | } | ||
196 | |||
197 | static void intel_pmu_lbr_init_core(void) | ||
198 | { | ||
199 | x86_pmu.lbr_nr = 4; | ||
200 | x86_pmu.lbr_tos = 0x01c9; | ||
201 | x86_pmu.lbr_from = 0x40; | ||
202 | x86_pmu.lbr_to = 0x60; | ||
203 | } | ||
204 | |||
205 | static void intel_pmu_lbr_init_nhm(void) | ||
206 | { | ||
207 | x86_pmu.lbr_nr = 16; | ||
208 | x86_pmu.lbr_tos = 0x01c9; | ||
209 | x86_pmu.lbr_from = 0x680; | ||
210 | x86_pmu.lbr_to = 0x6c0; | ||
211 | } | ||
212 | |||
213 | static void intel_pmu_lbr_init_atom(void) | ||
214 | { | ||
215 | x86_pmu.lbr_nr = 8; | ||
216 | x86_pmu.lbr_tos = 0x01c9; | ||
217 | x86_pmu.lbr_from = 0x40; | ||
218 | x86_pmu.lbr_to = 0x60; | ||
219 | } | ||
220 | |||
221 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c new file mode 100644 index 000000000000..a11ce73a93c9 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -0,0 +1,607 @@ | |||
1 | /* | ||
2 | * Netburst Perfomance Events (P4, old Xeon) | ||
3 | * | ||
4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> | ||
5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> | ||
6 | * | ||
7 | * For licencing details see kernel-base/COPYING | ||
8 | */ | ||
9 | |||
10 | #ifdef CONFIG_CPU_SUP_INTEL | ||
11 | |||
12 | #include <asm/perf_event_p4.h> | ||
13 | |||
14 | /* | ||
15 | * array indices: 0,1 - HT threads, used with HT enabled cpu | ||
16 | */ | ||
17 | struct p4_event_template { | ||
18 | u32 opcode; /* ESCR event + CCCR selector */ | ||
19 | u64 config; /* packed predefined bits */ | ||
20 | int dep; /* upstream dependency event index */ | ||
21 | unsigned int emask; /* ESCR EventMask */ | ||
22 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ | ||
23 | unsigned int cntr[2]; /* counter index (offset) */ | ||
24 | }; | ||
25 | |||
26 | struct p4_pmu_res { | ||
27 | /* maps hw_conf::idx into template for ESCR sake */ | ||
28 | struct p4_event_template *tpl[ARCH_P4_MAX_CCCR]; | ||
29 | }; | ||
30 | |||
31 | static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config); | ||
32 | |||
33 | /* | ||
34 | * WARN: CCCR1 doesn't have a working enable bit so try to not | ||
35 | * use it if possible | ||
36 | * | ||
37 | * Also as only we start to support raw events we will need to | ||
38 | * append _all_ P4_EVENT_PACK'ed events here | ||
39 | */ | ||
40 | struct p4_event_template p4_templates[] = { | ||
41 | [0] = { | ||
42 | .opcode = P4_UOP_TYPE, | ||
43 | .config = 0, | ||
44 | .dep = -1, | ||
45 | .emask = | ||
46 | P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) | | ||
47 | P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES), | ||
48 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, | ||
49 | .cntr = { 16, 17 }, | ||
50 | }, | ||
51 | [1] = { | ||
52 | .opcode = P4_GLOBAL_POWER_EVENTS, | ||
53 | .config = 0, | ||
54 | .dep = -1, | ||
55 | .emask = | ||
56 | P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING), | ||
57 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
58 | .cntr = { 0, 2 }, | ||
59 | }, | ||
60 | [2] = { | ||
61 | .opcode = P4_INSTR_RETIRED, | ||
62 | .config = 0, | ||
63 | .dep = -1, /* needs front-end tagging */ | ||
64 | .emask = | ||
65 | P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG) | | ||
66 | P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG), | ||
67 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
68 | .cntr = { 12, 14 }, | ||
69 | }, | ||
70 | [3] = { | ||
71 | .opcode = P4_BSQ_CACHE_REFERENCE, | ||
72 | .config = 0, | ||
73 | .dep = -1, | ||
74 | .emask = | ||
75 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | | ||
76 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | | ||
77 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | | ||
78 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | | ||
79 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | | ||
80 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITM), | ||
81 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | ||
82 | .cntr = { 0, 2 }, | ||
83 | }, | ||
84 | [4] = { | ||
85 | .opcode = P4_BSQ_CACHE_REFERENCE, | ||
86 | .config = 0, | ||
87 | .dep = -1, | ||
88 | .emask = | ||
89 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | | ||
90 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | | ||
91 | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), | ||
92 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | ||
93 | .cntr = { 0, 3 }, | ||
94 | }, | ||
95 | [5] = { | ||
96 | .opcode = P4_RETIRED_BRANCH_TYPE, | ||
97 | .config = 0, | ||
98 | .dep = -1, | ||
99 | .emask = | ||
100 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL) | | ||
101 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL) | | ||
102 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, RETURN) | | ||
103 | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, INDIRECT), | ||
104 | .escr_msr = { MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1 }, | ||
105 | .cntr = { 4, 6 }, | ||
106 | }, | ||
107 | [6] = { | ||
108 | .opcode = P4_MISPRED_BRANCH_RETIRED, | ||
109 | .config = 0, | ||
110 | .dep = -1, | ||
111 | .emask = | ||
112 | P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS), | ||
113 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
114 | .cntr = { 12, 14 }, | ||
115 | }, | ||
116 | [7] = { | ||
117 | .opcode = P4_FSB_DATA_ACTIVITY, | ||
118 | .config = p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), | ||
119 | .dep = -1, | ||
120 | .emask = | ||
121 | P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV) | | ||
122 | P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN), | ||
123 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
124 | .cntr = { 0, 2 }, | ||
125 | }, | ||
126 | }; | ||
127 | |||
128 | static struct p4_event_template *p4_event_map[PERF_COUNT_HW_MAX] = { | ||
129 | /* non-halted CPU clocks */ | ||
130 | [PERF_COUNT_HW_CPU_CYCLES] = &p4_templates[1], | ||
131 | |||
132 | /* retired instructions: dep on tagging the FSB */ | ||
133 | [PERF_COUNT_HW_INSTRUCTIONS] = &p4_templates[2], | ||
134 | |||
135 | /* cache hits */ | ||
136 | [PERF_COUNT_HW_CACHE_REFERENCES] = &p4_templates[3], | ||
137 | |||
138 | /* cache misses */ | ||
139 | [PERF_COUNT_HW_CACHE_MISSES] = &p4_templates[4], | ||
140 | |||
141 | /* branch instructions retired */ | ||
142 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = &p4_templates[5], | ||
143 | |||
144 | /* mispredicted branches retired */ | ||
145 | [PERF_COUNT_HW_BRANCH_MISSES] = &p4_templates[6], | ||
146 | |||
147 | /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ | ||
148 | [PERF_COUNT_HW_BUS_CYCLES] = &p4_templates[7], | ||
149 | }; | ||
150 | |||
151 | static u64 p4_pmu_event_map(int hw_event) | ||
152 | { | ||
153 | struct p4_event_template *tpl; | ||
154 | u64 config; | ||
155 | |||
156 | if (hw_event > ARRAY_SIZE(p4_event_map)) { | ||
157 | printk_once(KERN_ERR "PMU: Incorrect event index\n"); | ||
158 | return 0; | ||
159 | } | ||
160 | tpl = p4_event_map[hw_event]; | ||
161 | |||
162 | /* | ||
163 | * fill config up according to | ||
164 | * a predefined event template | ||
165 | */ | ||
166 | config = tpl->config; | ||
167 | config |= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl->opcode) << P4_EVNTSEL_EVENT_SHIFT); | ||
168 | config |= p4_config_pack_escr(tpl->emask << P4_EVNTSEL_EVENTMASK_SHIFT); | ||
169 | config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT); | ||
170 | |||
171 | /* on HT machine we need a special bit */ | ||
172 | if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id())) | ||
173 | config = p4_set_ht_bit(config); | ||
174 | |||
175 | return config; | ||
176 | } | ||
177 | |||
178 | /* | ||
179 | * Note that we still have 5 events (from global events SDM list) | ||
180 | * intersected in opcode+emask bits so we will need another | ||
181 | * scheme there do distinguish templates. | ||
182 | */ | ||
183 | static inline int p4_pmu_emask_match(unsigned int dst, unsigned int src) | ||
184 | { | ||
185 | return dst & src; | ||
186 | } | ||
187 | |||
188 | static struct p4_event_template *p4_pmu_template_lookup(u64 config) | ||
189 | { | ||
190 | u32 opcode = p4_config_unpack_opcode(config); | ||
191 | unsigned int emask = p4_config_unpack_emask(config); | ||
192 | unsigned int i; | ||
193 | |||
194 | for (i = 0; i < ARRAY_SIZE(p4_templates); i++) { | ||
195 | if (opcode == p4_templates[i].opcode && | ||
196 | p4_pmu_emask_match(emask, p4_templates[i].emask)) | ||
197 | return &p4_templates[i]; | ||
198 | } | ||
199 | |||
200 | return NULL; | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * We don't control raw events so it's up to the caller | ||
205 | * to pass sane values (and we don't count the thread number | ||
206 | * on HT machine but allow HT-compatible specifics to be | ||
207 | * passed on) | ||
208 | */ | ||
209 | static u64 p4_pmu_raw_event(u64 hw_event) | ||
210 | { | ||
211 | return hw_event & | ||
212 | (p4_config_pack_escr(P4_EVNTSEL_MASK_HT) | | ||
213 | p4_config_pack_cccr(P4_CCCR_MASK_HT)); | ||
214 | } | ||
215 | |||
216 | static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) | ||
217 | { | ||
218 | int cpu = raw_smp_processor_id(); | ||
219 | |||
220 | /* | ||
221 | * the reason we use cpu that early is that: if we get scheduled | ||
222 | * first time on the same cpu -- we will not need swap thread | ||
223 | * specific flags in config (and will save some cpu cycles) | ||
224 | */ | ||
225 | |||
226 | /* CCCR by default */ | ||
227 | hwc->config = p4_config_pack_cccr(p4_default_cccr_conf(cpu)); | ||
228 | |||
229 | /* Count user and OS events unless not requested to */ | ||
230 | hwc->config |= p4_config_pack_escr(p4_default_escr_conf(cpu, attr->exclude_kernel, | ||
231 | attr->exclude_user)); | ||
232 | return 0; | ||
233 | } | ||
234 | |||
235 | static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | ||
236 | { | ||
237 | unsigned long dummy; | ||
238 | |||
239 | rdmsrl(hwc->config_base + hwc->idx, dummy); | ||
240 | if (dummy & P4_CCCR_OVF) { | ||
241 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
242 | ((u64)dummy) & ~P4_CCCR_OVF); | ||
243 | } | ||
244 | } | ||
245 | |||
246 | static inline void p4_pmu_disable_event(struct perf_event *event) | ||
247 | { | ||
248 | struct hw_perf_event *hwc = &event->hw; | ||
249 | |||
250 | /* | ||
251 | * If event gets disabled while counter is in overflowed | ||
252 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get | ||
253 | * asserted again and again | ||
254 | */ | ||
255 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
256 | (u64)(p4_config_unpack_cccr(hwc->config)) & | ||
257 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF); | ||
258 | } | ||
259 | |||
260 | static void p4_pmu_disable_all(void) | ||
261 | { | ||
262 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
263 | int idx; | ||
264 | |||
265 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
266 | struct perf_event *event = cpuc->events[idx]; | ||
267 | if (!test_bit(idx, cpuc->active_mask)) | ||
268 | continue; | ||
269 | p4_pmu_disable_event(event); | ||
270 | } | ||
271 | } | ||
272 | |||
273 | static void p4_pmu_enable_event(struct perf_event *event) | ||
274 | { | ||
275 | struct hw_perf_event *hwc = &event->hw; | ||
276 | int thread = p4_ht_config_thread(hwc->config); | ||
277 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); | ||
278 | u64 escr_base; | ||
279 | struct p4_event_template *tpl; | ||
280 | struct p4_pmu_res *c; | ||
281 | |||
282 | /* | ||
283 | * some preparation work from per-cpu private fields | ||
284 | * since we need to find out which ESCR to use | ||
285 | */ | ||
286 | c = &__get_cpu_var(p4_pmu_config); | ||
287 | tpl = c->tpl[hwc->idx]; | ||
288 | if (!tpl) { | ||
289 | pr_crit("%s: Wrong index: %d\n", __func__, hwc->idx); | ||
290 | return; | ||
291 | } | ||
292 | escr_base = (u64)tpl->escr_msr[thread]; | ||
293 | |||
294 | /* | ||
295 | * - we dont support cascaded counters yet | ||
296 | * - and counter 1 is broken (erratum) | ||
297 | */ | ||
298 | WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); | ||
299 | WARN_ON_ONCE(hwc->idx == 1); | ||
300 | |||
301 | (void)checking_wrmsrl(escr_base, escr_conf); | ||
302 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
303 | (u64)(p4_config_unpack_cccr(hwc->config)) | P4_CCCR_ENABLE); | ||
304 | } | ||
305 | |||
306 | static void p4_pmu_enable_all(void) | ||
307 | { | ||
308 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
309 | int idx; | ||
310 | |||
311 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
312 | struct perf_event *event = cpuc->events[idx]; | ||
313 | if (!test_bit(idx, cpuc->active_mask)) | ||
314 | continue; | ||
315 | p4_pmu_enable_event(event); | ||
316 | } | ||
317 | } | ||
318 | |||
319 | static int p4_pmu_handle_irq(struct pt_regs *regs) | ||
320 | { | ||
321 | struct perf_sample_data data; | ||
322 | struct cpu_hw_events *cpuc; | ||
323 | struct perf_event *event; | ||
324 | struct hw_perf_event *hwc; | ||
325 | int idx, handled = 0; | ||
326 | u64 val; | ||
327 | |||
328 | data.addr = 0; | ||
329 | data.raw = NULL; | ||
330 | |||
331 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
332 | |||
333 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
334 | |||
335 | if (!test_bit(idx, cpuc->active_mask)) | ||
336 | continue; | ||
337 | |||
338 | event = cpuc->events[idx]; | ||
339 | hwc = &event->hw; | ||
340 | |||
341 | WARN_ON_ONCE(hwc->idx != idx); | ||
342 | |||
343 | /* | ||
344 | * FIXME: Redundant call, actually not needed | ||
345 | * but just to check if we're screwed | ||
346 | */ | ||
347 | p4_pmu_clear_cccr_ovf(hwc); | ||
348 | |||
349 | val = x86_perf_event_update(event); | ||
350 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | ||
351 | continue; | ||
352 | |||
353 | /* | ||
354 | * event overflow | ||
355 | */ | ||
356 | handled = 1; | ||
357 | data.period = event->hw.last_period; | ||
358 | |||
359 | if (!x86_perf_event_set_period(event)) | ||
360 | continue; | ||
361 | if (perf_event_overflow(event, 1, &data, regs)) | ||
362 | p4_pmu_disable_event(event); | ||
363 | } | ||
364 | |||
365 | if (handled) { | ||
366 | #ifdef CONFIG_X86_LOCAL_APIC | ||
367 | /* p4 quirk: unmask it again */ | ||
368 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | ||
369 | #endif | ||
370 | inc_irq_stat(apic_perf_irqs); | ||
371 | } | ||
372 | |||
373 | return handled; | ||
374 | } | ||
375 | |||
376 | /* | ||
377 | * swap thread specific fields according to a thread | ||
378 | * we are going to run on | ||
379 | */ | ||
380 | static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) | ||
381 | { | ||
382 | u32 escr, cccr; | ||
383 | |||
384 | /* | ||
385 | * we either lucky and continue on same cpu or no HT support | ||
386 | */ | ||
387 | if (!p4_should_swap_ts(hwc->config, cpu)) | ||
388 | return; | ||
389 | |||
390 | /* | ||
391 | * the event is migrated from an another logical | ||
392 | * cpu, so we need to swap thread specific flags | ||
393 | */ | ||
394 | |||
395 | escr = p4_config_unpack_escr(hwc->config); | ||
396 | cccr = p4_config_unpack_cccr(hwc->config); | ||
397 | |||
398 | if (p4_ht_thread(cpu)) { | ||
399 | cccr &= ~P4_CCCR_OVF_PMI_T0; | ||
400 | cccr |= P4_CCCR_OVF_PMI_T1; | ||
401 | if (escr & P4_EVNTSEL_T0_OS) { | ||
402 | escr &= ~P4_EVNTSEL_T0_OS; | ||
403 | escr |= P4_EVNTSEL_T1_OS; | ||
404 | } | ||
405 | if (escr & P4_EVNTSEL_T0_USR) { | ||
406 | escr &= ~P4_EVNTSEL_T0_USR; | ||
407 | escr |= P4_EVNTSEL_T1_USR; | ||
408 | } | ||
409 | hwc->config = p4_config_pack_escr(escr); | ||
410 | hwc->config |= p4_config_pack_cccr(cccr); | ||
411 | hwc->config |= P4_CONFIG_HT; | ||
412 | } else { | ||
413 | cccr &= ~P4_CCCR_OVF_PMI_T1; | ||
414 | cccr |= P4_CCCR_OVF_PMI_T0; | ||
415 | if (escr & P4_EVNTSEL_T1_OS) { | ||
416 | escr &= ~P4_EVNTSEL_T1_OS; | ||
417 | escr |= P4_EVNTSEL_T0_OS; | ||
418 | } | ||
419 | if (escr & P4_EVNTSEL_T1_USR) { | ||
420 | escr &= ~P4_EVNTSEL_T1_USR; | ||
421 | escr |= P4_EVNTSEL_T0_USR; | ||
422 | } | ||
423 | hwc->config = p4_config_pack_escr(escr); | ||
424 | hwc->config |= p4_config_pack_cccr(cccr); | ||
425 | hwc->config &= ~P4_CONFIG_HT; | ||
426 | } | ||
427 | } | ||
428 | |||
429 | /* ESCRs are not sequential in memory so we need a map */ | ||
430 | static unsigned int p4_escr_map[ARCH_P4_TOTAL_ESCR] = { | ||
431 | MSR_P4_ALF_ESCR0, /* 0 */ | ||
432 | MSR_P4_ALF_ESCR1, /* 1 */ | ||
433 | MSR_P4_BPU_ESCR0, /* 2 */ | ||
434 | MSR_P4_BPU_ESCR1, /* 3 */ | ||
435 | MSR_P4_BSU_ESCR0, /* 4 */ | ||
436 | MSR_P4_BSU_ESCR1, /* 5 */ | ||
437 | MSR_P4_CRU_ESCR0, /* 6 */ | ||
438 | MSR_P4_CRU_ESCR1, /* 7 */ | ||
439 | MSR_P4_CRU_ESCR2, /* 8 */ | ||
440 | MSR_P4_CRU_ESCR3, /* 9 */ | ||
441 | MSR_P4_CRU_ESCR4, /* 10 */ | ||
442 | MSR_P4_CRU_ESCR5, /* 11 */ | ||
443 | MSR_P4_DAC_ESCR0, /* 12 */ | ||
444 | MSR_P4_DAC_ESCR1, /* 13 */ | ||
445 | MSR_P4_FIRM_ESCR0, /* 14 */ | ||
446 | MSR_P4_FIRM_ESCR1, /* 15 */ | ||
447 | MSR_P4_FLAME_ESCR0, /* 16 */ | ||
448 | MSR_P4_FLAME_ESCR1, /* 17 */ | ||
449 | MSR_P4_FSB_ESCR0, /* 18 */ | ||
450 | MSR_P4_FSB_ESCR1, /* 19 */ | ||
451 | MSR_P4_IQ_ESCR0, /* 20 */ | ||
452 | MSR_P4_IQ_ESCR1, /* 21 */ | ||
453 | MSR_P4_IS_ESCR0, /* 22 */ | ||
454 | MSR_P4_IS_ESCR1, /* 23 */ | ||
455 | MSR_P4_ITLB_ESCR0, /* 24 */ | ||
456 | MSR_P4_ITLB_ESCR1, /* 25 */ | ||
457 | MSR_P4_IX_ESCR0, /* 26 */ | ||
458 | MSR_P4_IX_ESCR1, /* 27 */ | ||
459 | MSR_P4_MOB_ESCR0, /* 28 */ | ||
460 | MSR_P4_MOB_ESCR1, /* 29 */ | ||
461 | MSR_P4_MS_ESCR0, /* 30 */ | ||
462 | MSR_P4_MS_ESCR1, /* 31 */ | ||
463 | MSR_P4_PMH_ESCR0, /* 32 */ | ||
464 | MSR_P4_PMH_ESCR1, /* 33 */ | ||
465 | MSR_P4_RAT_ESCR0, /* 34 */ | ||
466 | MSR_P4_RAT_ESCR1, /* 35 */ | ||
467 | MSR_P4_SAAT_ESCR0, /* 36 */ | ||
468 | MSR_P4_SAAT_ESCR1, /* 37 */ | ||
469 | MSR_P4_SSU_ESCR0, /* 38 */ | ||
470 | MSR_P4_SSU_ESCR1, /* 39 */ | ||
471 | MSR_P4_TBPU_ESCR0, /* 40 */ | ||
472 | MSR_P4_TBPU_ESCR1, /* 41 */ | ||
473 | MSR_P4_TC_ESCR0, /* 42 */ | ||
474 | MSR_P4_TC_ESCR1, /* 43 */ | ||
475 | MSR_P4_U2L_ESCR0, /* 44 */ | ||
476 | MSR_P4_U2L_ESCR1, /* 45 */ | ||
477 | }; | ||
478 | |||
479 | static int p4_get_escr_idx(unsigned int addr) | ||
480 | { | ||
481 | unsigned int i; | ||
482 | |||
483 | for (i = 0; i < ARRAY_SIZE(p4_escr_map); i++) { | ||
484 | if (addr == p4_escr_map[i]) | ||
485 | return i; | ||
486 | } | ||
487 | |||
488 | return -1; | ||
489 | } | ||
490 | |||
491 | static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | ||
492 | { | ||
493 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
494 | unsigned long escr_mask[BITS_TO_LONGS(ARCH_P4_TOTAL_ESCR)]; | ||
495 | |||
496 | struct hw_perf_event *hwc; | ||
497 | struct p4_event_template *tpl; | ||
498 | struct p4_pmu_res *c; | ||
499 | int cpu = raw_smp_processor_id(); | ||
500 | int escr_idx, thread, i, num; | ||
501 | |||
502 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
503 | bitmap_zero(escr_mask, ARCH_P4_TOTAL_ESCR); | ||
504 | |||
505 | c = &__get_cpu_var(p4_pmu_config); | ||
506 | /* | ||
507 | * Firstly find out which resource events are going | ||
508 | * to use, if ESCR+CCCR tuple is already borrowed | ||
509 | * then get out of here | ||
510 | */ | ||
511 | for (i = 0, num = n; i < n; i++, num--) { | ||
512 | hwc = &cpuc->event_list[i]->hw; | ||
513 | tpl = p4_pmu_template_lookup(hwc->config); | ||
514 | if (!tpl) | ||
515 | goto done; | ||
516 | thread = p4_ht_thread(cpu); | ||
517 | escr_idx = p4_get_escr_idx(tpl->escr_msr[thread]); | ||
518 | if (escr_idx == -1) | ||
519 | goto done; | ||
520 | |||
521 | /* already allocated and remains on the same cpu */ | ||
522 | if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { | ||
523 | if (assign) | ||
524 | assign[i] = hwc->idx; | ||
525 | /* upstream dependent event */ | ||
526 | if (unlikely(tpl->dep != -1)) | ||
527 | printk_once(KERN_WARNING "PMU: Dep events are " | ||
528 | "not implemented yet\n"); | ||
529 | goto reserve; | ||
530 | } | ||
531 | |||
532 | /* it may be already borrowed */ | ||
533 | if (test_bit(tpl->cntr[thread], used_mask) || | ||
534 | test_bit(escr_idx, escr_mask)) | ||
535 | goto done; | ||
536 | |||
537 | /* | ||
538 | * ESCR+CCCR+COUNTERs are available to use lets swap | ||
539 | * thread specific bits, push assigned bits | ||
540 | * back and save template into per-cpu | ||
541 | * area (which will allow us to find out the ESCR | ||
542 | * to be used at moment of "enable event via real MSR") | ||
543 | */ | ||
544 | p4_pmu_swap_config_ts(hwc, cpu); | ||
545 | if (assign) { | ||
546 | assign[i] = tpl->cntr[thread]; | ||
547 | c->tpl[assign[i]] = tpl; | ||
548 | } | ||
549 | reserve: | ||
550 | set_bit(tpl->cntr[thread], used_mask); | ||
551 | set_bit(escr_idx, escr_mask); | ||
552 | } | ||
553 | |||
554 | done: | ||
555 | return num ? -ENOSPC : 0; | ||
556 | } | ||
557 | |||
558 | static __initconst struct x86_pmu p4_pmu = { | ||
559 | .name = "Netburst P4/Xeon", | ||
560 | .handle_irq = p4_pmu_handle_irq, | ||
561 | .disable_all = p4_pmu_disable_all, | ||
562 | .enable_all = p4_pmu_enable_all, | ||
563 | .enable = p4_pmu_enable_event, | ||
564 | .disable = p4_pmu_disable_event, | ||
565 | .eventsel = MSR_P4_BPU_CCCR0, | ||
566 | .perfctr = MSR_P4_BPU_PERFCTR0, | ||
567 | .event_map = p4_pmu_event_map, | ||
568 | .raw_event = p4_pmu_raw_event, | ||
569 | .max_events = ARRAY_SIZE(p4_event_map), | ||
570 | .get_event_constraints = x86_get_event_constraints, | ||
571 | /* | ||
572 | * IF HT disabled we may need to use all | ||
573 | * ARCH_P4_MAX_CCCR counters simulaneously | ||
574 | * though leave it restricted at moment assuming | ||
575 | * HT is on | ||
576 | */ | ||
577 | .num_events = ARCH_P4_MAX_CCCR, | ||
578 | .apic = 1, | ||
579 | .event_bits = 40, | ||
580 | .event_mask = (1ULL << 40) - 1, | ||
581 | .max_period = (1ULL << 39) - 1, | ||
582 | .hw_config = p4_hw_config, | ||
583 | .schedule_events = p4_pmu_schedule_events, | ||
584 | }; | ||
585 | |||
586 | static __init int p4_pmu_init(void) | ||
587 | { | ||
588 | unsigned int low, high; | ||
589 | |||
590 | /* If we get stripped -- indexig fails */ | ||
591 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); | ||
592 | |||
593 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | ||
594 | if (!(low & (1 << 7))) { | ||
595 | pr_cont("unsupported Netburst CPU model %d ", | ||
596 | boot_cpu_data.x86_model); | ||
597 | return -ENODEV; | ||
598 | } | ||
599 | |||
600 | pr_cont("Netburst events, "); | ||
601 | |||
602 | x86_pmu = p4_pmu; | ||
603 | |||
604 | return 0; | ||
605 | } | ||
606 | |||
607 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index a330485d14da..6ff4d01d880f 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
@@ -109,6 +109,8 @@ static __initconst struct x86_pmu p6_pmu = { | |||
109 | .enable_all = p6_pmu_enable_all, | 109 | .enable_all = p6_pmu_enable_all, |
110 | .enable = p6_pmu_enable_event, | 110 | .enable = p6_pmu_enable_event, |
111 | .disable = p6_pmu_disable_event, | 111 | .disable = p6_pmu_disable_event, |
112 | .hw_config = x86_hw_config, | ||
113 | .schedule_events = x86_schedule_events, | ||
112 | .eventsel = MSR_P6_EVNTSEL0, | 114 | .eventsel = MSR_P6_EVNTSEL0, |
113 | .perfctr = MSR_P6_PERFCTR0, | 115 | .perfctr = MSR_P6_PERFCTR0, |
114 | .event_map = p6_pmu_event_map, | 116 | .event_map = p6_pmu_event_map, |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 419386c24b82..cbaf8f2b83df 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -20,7 +20,7 @@ lib-y := delay.o | |||
20 | lib-y += thunk_$(BITS).o | 20 | lib-y += thunk_$(BITS).o |
21 | lib-y += usercopy_$(BITS).o getuser.o putuser.o | 21 | lib-y += usercopy_$(BITS).o getuser.o putuser.o |
22 | lib-y += memcpy_$(BITS).o | 22 | lib-y += memcpy_$(BITS).o |
23 | lib-$(CONFIG_KPROBES) += insn.o inat.o | 23 | lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o |
24 | 24 | ||
25 | obj-y += msr.o msr-reg.o msr-reg-export.o | 25 | obj-y += msr.o msr-reg.o msr-reg-export.o |
26 | 26 | ||
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 95477038a72a..2bccb7b9da2d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -203,8 +203,9 @@ struct perf_event_attr { | |||
203 | enable_on_exec : 1, /* next exec enables */ | 203 | enable_on_exec : 1, /* next exec enables */ |
204 | task : 1, /* trace fork/exit */ | 204 | task : 1, /* trace fork/exit */ |
205 | watermark : 1, /* wakeup_watermark */ | 205 | watermark : 1, /* wakeup_watermark */ |
206 | precise : 1, /* OoO invariant counter */ | ||
206 | 207 | ||
207 | __reserved_1 : 49; | 208 | __reserved_1 : 48; |
208 | 209 | ||
209 | union { | 210 | union { |
210 | __u32 wakeup_events; /* wakeup every n events */ | 211 | __u32 wakeup_events; /* wakeup every n events */ |
@@ -293,6 +294,12 @@ struct perf_event_mmap_page { | |||
293 | #define PERF_RECORD_MISC_USER (2 << 0) | 294 | #define PERF_RECORD_MISC_USER (2 << 0) |
294 | #define PERF_RECORD_MISC_HYPERVISOR (3 << 0) | 295 | #define PERF_RECORD_MISC_HYPERVISOR (3 << 0) |
295 | 296 | ||
297 | #define PERF_RECORD_MISC_EXACT (1 << 14) | ||
298 | /* | ||
299 | * Reserve the last bit to indicate some extended misc field | ||
300 | */ | ||
301 | #define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) | ||
302 | |||
296 | struct perf_event_header { | 303 | struct perf_event_header { |
297 | __u32 type; | 304 | __u32 type; |
298 | __u16 misc; | 305 | __u16 misc; |
@@ -468,6 +475,17 @@ struct perf_raw_record { | |||
468 | void *data; | 475 | void *data; |
469 | }; | 476 | }; |
470 | 477 | ||
478 | struct perf_branch_entry { | ||
479 | __u64 from; | ||
480 | __u64 to; | ||
481 | __u64 flags; | ||
482 | }; | ||
483 | |||
484 | struct perf_branch_stack { | ||
485 | __u64 nr; | ||
486 | struct perf_branch_entry entries[0]; | ||
487 | }; | ||
488 | |||
471 | struct task_struct; | 489 | struct task_struct; |
472 | 490 | ||
473 | /** | 491 | /** |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 574ee58a3046..455393e71cab 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -1368,6 +1368,8 @@ void perf_event_task_sched_in(struct task_struct *task) | |||
1368 | if (cpuctx->task_ctx == ctx) | 1368 | if (cpuctx->task_ctx == ctx) |
1369 | return; | 1369 | return; |
1370 | 1370 | ||
1371 | perf_disable(); | ||
1372 | |||
1371 | /* | 1373 | /* |
1372 | * We want to keep the following priority order: | 1374 | * We want to keep the following priority order: |
1373 | * cpu pinned (that don't need to move), task pinned, | 1375 | * cpu pinned (that don't need to move), task pinned, |
@@ -1380,6 +1382,8 @@ void perf_event_task_sched_in(struct task_struct *task) | |||
1380 | ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); | 1382 | ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); |
1381 | 1383 | ||
1382 | cpuctx->task_ctx = ctx; | 1384 | cpuctx->task_ctx = ctx; |
1385 | |||
1386 | perf_enable(); | ||
1383 | } | 1387 | } |
1384 | 1388 | ||
1385 | #define MAX_INTERRUPTS (~0ULL) | 1389 | #define MAX_INTERRUPTS (~0ULL) |
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 8a8f52db7e38..0abd25ee595f 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
@@ -513,6 +513,14 @@ else | |||
513 | LIB_OBJS += util/probe-finder.o | 513 | LIB_OBJS += util/probe-finder.o |
514 | endif | 514 | endif |
515 | 515 | ||
516 | ifneq ($(shell sh -c "(echo '\#include <newt.h>'; echo 'int main(void) { newtInit(); newtCls(); return newtFinished(); }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -lnewt -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) | ||
517 | msg := $(warning newt not found, disables TUI support. Please install newt-devel or libnewt-dev); | ||
518 | BASIC_CFLAGS += -DNO_NEWT_SUPPORT | ||
519 | else | ||
520 | EXTLIBS += -lnewt | ||
521 | LIB_OBJS += util/newt.o | ||
522 | endif | ||
523 | |||
516 | ifndef NO_LIBPERL | 524 | ifndef NO_LIBPERL |
517 | PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null` | 525 | PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null` |
518 | PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` | 526 | PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` |
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 6ad7148451c5..45d14660d53d 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c | |||
@@ -452,6 +452,16 @@ static void annotate_sym(struct hist_entry *he) | |||
452 | if (!filename) | 452 | if (!filename) |
453 | return; | 453 | return; |
454 | 454 | ||
455 | if (dso->origin == DSO__ORIG_KERNEL) { | ||
456 | if (dso->annotate_warned) | ||
457 | return; | ||
458 | dso->annotate_warned = 1; | ||
459 | pr_err("Can't annotate %s: No vmlinux file was found in the " | ||
460 | "path:\n", sym->name); | ||
461 | vmlinux_path__fprintf(stderr); | ||
462 | return; | ||
463 | } | ||
464 | |||
455 | pr_debug("%s: filename=%s, sym=%s, start=%#Lx, end=%#Lx\n", __func__, | 465 | pr_debug("%s: filename=%s, sym=%s, start=%#Lx, end=%#Lx\n", __func__, |
456 | filename, sym->name, map->unmap_ip(map, sym->start), | 466 | filename, sym->name, map->unmap_ip(map, sym->start), |
457 | map->unmap_ip(map, sym->end)); | 467 | map->unmap_ip(map, sym->end)); |
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3b8b6387c47c..962cdbf44ae9 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -225,7 +225,7 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n | |||
225 | return h_attr; | 225 | return h_attr; |
226 | } | 226 | } |
227 | 227 | ||
228 | static void create_counter(int counter, int cpu, pid_t pid) | 228 | static void create_counter(int counter, int cpu, pid_t pid, bool forks) |
229 | { | 229 | { |
230 | char *filter = filters[counter]; | 230 | char *filter = filters[counter]; |
231 | struct perf_event_attr *attr = attrs + counter; | 231 | struct perf_event_attr *attr = attrs + counter; |
@@ -277,6 +277,9 @@ static void create_counter(int counter, int cpu, pid_t pid) | |||
277 | attr->inherit = inherit; | 277 | attr->inherit = inherit; |
278 | attr->disabled = 1; | 278 | attr->disabled = 1; |
279 | 279 | ||
280 | if (forks) | ||
281 | attr->enable_on_exec = 1; | ||
282 | |||
280 | try_again: | 283 | try_again: |
281 | fd[nr_cpu][counter] = sys_perf_event_open(attr, pid, cpu, group_fd, 0); | 284 | fd[nr_cpu][counter] = sys_perf_event_open(attr, pid, cpu, group_fd, 0); |
282 | 285 | ||
@@ -284,7 +287,8 @@ try_again: | |||
284 | int err = errno; | 287 | int err = errno; |
285 | 288 | ||
286 | if (err == EPERM || err == EACCES) | 289 | if (err == EPERM || err == EACCES) |
287 | die("Permission error - are you root?\n"); | 290 | die("Permission error - are you root?\n" |
291 | "\t Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n"); | ||
288 | else if (err == ENODEV && profile_cpu != -1) | 292 | else if (err == ENODEV && profile_cpu != -1) |
289 | die("No such device - did you specify an out-of-range profile CPU?\n"); | 293 | die("No such device - did you specify an out-of-range profile CPU?\n"); |
290 | 294 | ||
@@ -380,13 +384,13 @@ try_again: | |||
380 | ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_ENABLE); | 384 | ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_ENABLE); |
381 | } | 385 | } |
382 | 386 | ||
383 | static void open_counters(int cpu, pid_t pid) | 387 | static void open_counters(int cpu, pid_t pid, bool forks) |
384 | { | 388 | { |
385 | int counter; | 389 | int counter; |
386 | 390 | ||
387 | group_fd = -1; | 391 | group_fd = -1; |
388 | for (counter = 0; counter < nr_counters; counter++) | 392 | for (counter = 0; counter < nr_counters; counter++) |
389 | create_counter(counter, cpu, pid); | 393 | create_counter(counter, cpu, pid, forks); |
390 | 394 | ||
391 | nr_cpu++; | 395 | nr_cpu++; |
392 | } | 396 | } |
@@ -546,11 +550,11 @@ static int __cmd_record(int argc, const char **argv) | |||
546 | 550 | ||
547 | 551 | ||
548 | if ((!system_wide && !inherit) || profile_cpu != -1) { | 552 | if ((!system_wide && !inherit) || profile_cpu != -1) { |
549 | open_counters(profile_cpu, target_pid); | 553 | open_counters(profile_cpu, target_pid, forks); |
550 | } else { | 554 | } else { |
551 | nr_cpus = read_cpu_map(); | 555 | nr_cpus = read_cpu_map(); |
552 | for (i = 0; i < nr_cpus; i++) | 556 | for (i = 0; i < nr_cpus; i++) |
553 | open_counters(cpumap[i], target_pid); | 557 | open_counters(cpumap[i], target_pid, forks); |
554 | } | 558 | } |
555 | 559 | ||
556 | if (file_new) { | 560 | if (file_new) { |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f815de25d0fc..1f9f8695f055 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -267,6 +267,7 @@ static int __cmd_report(void) | |||
267 | int ret = -EINVAL; | 267 | int ret = -EINVAL; |
268 | struct perf_session *session; | 268 | struct perf_session *session; |
269 | struct rb_node *next; | 269 | struct rb_node *next; |
270 | const char *help = "For a higher level overview, try: perf report --sort comm,dso"; | ||
270 | 271 | ||
271 | session = perf_session__new(input_name, O_RDONLY, force); | 272 | session = perf_session__new(input_name, O_RDONLY, force); |
272 | if (session == NULL) | 273 | if (session == NULL) |
@@ -301,30 +302,38 @@ static int __cmd_report(void) | |||
301 | stats = rb_entry(next, struct event_stat_id, rb_node); | 302 | stats = rb_entry(next, struct event_stat_id, rb_node); |
302 | perf_session__collapse_resort(&stats->hists); | 303 | perf_session__collapse_resort(&stats->hists); |
303 | perf_session__output_resort(&stats->hists, stats->stats.total); | 304 | perf_session__output_resort(&stats->hists, stats->stats.total); |
304 | if (rb_first(&session->stats_by_id) == | ||
305 | rb_last(&session->stats_by_id)) | ||
306 | fprintf(stdout, "# Samples: %Ld\n#\n", | ||
307 | stats->stats.total); | ||
308 | else | ||
309 | fprintf(stdout, "# Samples: %Ld %s\n#\n", | ||
310 | stats->stats.total, | ||
311 | __event_name(stats->type, stats->config)); | ||
312 | 305 | ||
313 | perf_session__fprintf_hists(&stats->hists, NULL, false, stdout, | 306 | if (use_browser) |
307 | perf_session__browse_hists(&stats->hists, | ||
308 | stats->stats.total, help); | ||
309 | else { | ||
310 | if (rb_first(&session->stats_by_id) == | ||
311 | rb_last(&session->stats_by_id)) | ||
312 | fprintf(stdout, "# Samples: %Ld\n#\n", | ||
313 | stats->stats.total); | ||
314 | else | ||
315 | fprintf(stdout, "# Samples: %Ld %s\n#\n", | ||
316 | stats->stats.total, | ||
317 | __event_name(stats->type, stats->config)); | ||
318 | |||
319 | perf_session__fprintf_hists(&stats->hists, NULL, false, stdout, | ||
314 | stats->stats.total); | 320 | stats->stats.total); |
315 | fprintf(stdout, "\n\n"); | 321 | fprintf(stdout, "\n\n"); |
322 | } | ||
323 | |||
316 | next = rb_next(&stats->rb_node); | 324 | next = rb_next(&stats->rb_node); |
317 | } | 325 | } |
318 | 326 | ||
319 | if (sort_order == default_sort_order && | 327 | if (!use_browser && sort_order == default_sort_order && |
320 | parent_pattern == default_parent_pattern) | 328 | parent_pattern == default_parent_pattern) { |
321 | fprintf(stdout, "#\n# (For a higher level overview, try: perf report --sort comm,dso)\n#\n"); | 329 | fprintf(stdout, "#\n# (%s)\n#\n", help); |
322 | 330 | ||
323 | if (show_threads) { | 331 | if (show_threads) { |
324 | bool raw_printing_style = !strcmp(pretty_printing_style, "raw"); | 332 | bool style = !strcmp(pretty_printing_style, "raw"); |
325 | perf_read_values_display(stdout, &show_threads_values, | 333 | perf_read_values_display(stdout, &show_threads_values, |
326 | raw_printing_style); | 334 | style); |
327 | perf_read_values_destroy(&show_threads_values); | 335 | perf_read_values_destroy(&show_threads_values); |
336 | } | ||
328 | } | 337 | } |
329 | out_delete: | 338 | out_delete: |
330 | perf_session__delete(session); | 339 | perf_session__delete(session); |
@@ -447,7 +456,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
447 | { | 456 | { |
448 | argc = parse_options(argc, argv, options, report_usage, 0); | 457 | argc = parse_options(argc, argv, options, report_usage, 0); |
449 | 458 | ||
450 | setup_pager(); | 459 | setup_browser(); |
451 | 460 | ||
452 | if (symbol__init() < 0) | 461 | if (symbol__init() < 0) |
453 | return -1; | 462 | return -1; |
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 1f529321607e..887ebbf5d1ff 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
@@ -133,7 +133,7 @@ static inline struct symbol *sym_entry__symbol(struct sym_entry *self) | |||
133 | return ((void *)self) + symbol_conf.priv_size; | 133 | return ((void *)self) + symbol_conf.priv_size; |
134 | } | 134 | } |
135 | 135 | ||
136 | static void get_term_dimensions(struct winsize *ws) | 136 | void get_term_dimensions(struct winsize *ws) |
137 | { | 137 | { |
138 | char *s = getenv("LINES"); | 138 | char *s = getenv("LINES"); |
139 | 139 | ||
@@ -169,7 +169,7 @@ static void sig_winch_handler(int sig __used) | |||
169 | update_print_entries(&winsize); | 169 | update_print_entries(&winsize); |
170 | } | 170 | } |
171 | 171 | ||
172 | static void parse_source(struct sym_entry *syme) | 172 | static int parse_source(struct sym_entry *syme) |
173 | { | 173 | { |
174 | struct symbol *sym; | 174 | struct symbol *sym; |
175 | struct sym_entry_source *source; | 175 | struct sym_entry_source *source; |
@@ -180,12 +180,21 @@ static void parse_source(struct sym_entry *syme) | |||
180 | u64 len; | 180 | u64 len; |
181 | 181 | ||
182 | if (!syme) | 182 | if (!syme) |
183 | return; | 183 | return -1; |
184 | |||
185 | sym = sym_entry__symbol(syme); | ||
186 | map = syme->map; | ||
187 | |||
188 | /* | ||
189 | * We can't annotate with just /proc/kallsyms | ||
190 | */ | ||
191 | if (map->dso->origin == DSO__ORIG_KERNEL) | ||
192 | return -1; | ||
184 | 193 | ||
185 | if (syme->src == NULL) { | 194 | if (syme->src == NULL) { |
186 | syme->src = zalloc(sizeof(*source)); | 195 | syme->src = zalloc(sizeof(*source)); |
187 | if (syme->src == NULL) | 196 | if (syme->src == NULL) |
188 | return; | 197 | return -1; |
189 | pthread_mutex_init(&syme->src->lock, NULL); | 198 | pthread_mutex_init(&syme->src->lock, NULL); |
190 | } | 199 | } |
191 | 200 | ||
@@ -195,9 +204,6 @@ static void parse_source(struct sym_entry *syme) | |||
195 | pthread_mutex_lock(&source->lock); | 204 | pthread_mutex_lock(&source->lock); |
196 | goto out_assign; | 205 | goto out_assign; |
197 | } | 206 | } |
198 | |||
199 | sym = sym_entry__symbol(syme); | ||
200 | map = syme->map; | ||
201 | path = map->dso->long_name; | 207 | path = map->dso->long_name; |
202 | 208 | ||
203 | len = sym->end - sym->start; | 209 | len = sym->end - sym->start; |
@@ -209,7 +215,7 @@ static void parse_source(struct sym_entry *syme) | |||
209 | 215 | ||
210 | file = popen(command, "r"); | 216 | file = popen(command, "r"); |
211 | if (!file) | 217 | if (!file) |
212 | return; | 218 | return -1; |
213 | 219 | ||
214 | pthread_mutex_lock(&source->lock); | 220 | pthread_mutex_lock(&source->lock); |
215 | source->lines_tail = &source->lines; | 221 | source->lines_tail = &source->lines; |
@@ -245,6 +251,7 @@ static void parse_source(struct sym_entry *syme) | |||
245 | out_assign: | 251 | out_assign: |
246 | sym_filter_entry = syme; | 252 | sym_filter_entry = syme; |
247 | pthread_mutex_unlock(&source->lock); | 253 | pthread_mutex_unlock(&source->lock); |
254 | return 0; | ||
248 | } | 255 | } |
249 | 256 | ||
250 | static void __zero_source_counters(struct sym_entry *syme) | 257 | static void __zero_source_counters(struct sym_entry *syme) |
@@ -411,6 +418,7 @@ static double sym_weight(const struct sym_entry *sym) | |||
411 | 418 | ||
412 | static long samples; | 419 | static long samples; |
413 | static long userspace_samples; | 420 | static long userspace_samples; |
421 | static long exact_samples; | ||
414 | static const char CONSOLE_CLEAR[] = "[H[2J"; | 422 | static const char CONSOLE_CLEAR[] = "[H[2J"; |
415 | 423 | ||
416 | static void __list_insert_active_sym(struct sym_entry *syme) | 424 | static void __list_insert_active_sym(struct sym_entry *syme) |
@@ -451,6 +459,7 @@ static void print_sym_table(void) | |||
451 | int counter, snap = !display_weighted ? sym_counter : 0; | 459 | int counter, snap = !display_weighted ? sym_counter : 0; |
452 | float samples_per_sec = samples/delay_secs; | 460 | float samples_per_sec = samples/delay_secs; |
453 | float ksamples_per_sec = (samples-userspace_samples)/delay_secs; | 461 | float ksamples_per_sec = (samples-userspace_samples)/delay_secs; |
462 | float esamples_percent = (100.0*exact_samples)/samples; | ||
454 | float sum_ksamples = 0.0; | 463 | float sum_ksamples = 0.0; |
455 | struct sym_entry *syme, *n; | 464 | struct sym_entry *syme, *n; |
456 | struct rb_root tmp = RB_ROOT; | 465 | struct rb_root tmp = RB_ROOT; |
@@ -458,7 +467,7 @@ static void print_sym_table(void) | |||
458 | int sym_width = 0, dso_width = 0, dso_short_width = 0; | 467 | int sym_width = 0, dso_width = 0, dso_short_width = 0; |
459 | const int win_width = winsize.ws_col - 1; | 468 | const int win_width = winsize.ws_col - 1; |
460 | 469 | ||
461 | samples = userspace_samples = 0; | 470 | samples = userspace_samples = exact_samples = 0; |
462 | 471 | ||
463 | /* Sort the active symbols */ | 472 | /* Sort the active symbols */ |
464 | pthread_mutex_lock(&active_symbols_lock); | 473 | pthread_mutex_lock(&active_symbols_lock); |
@@ -489,9 +498,10 @@ static void print_sym_table(void) | |||
489 | puts(CONSOLE_CLEAR); | 498 | puts(CONSOLE_CLEAR); |
490 | 499 | ||
491 | printf("%-*.*s\n", win_width, win_width, graph_dotted_line); | 500 | printf("%-*.*s\n", win_width, win_width, graph_dotted_line); |
492 | printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [", | 501 | printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% exact: %4.1f%% [", |
493 | samples_per_sec, | 502 | samples_per_sec, |
494 | 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); | 503 | 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)), |
504 | esamples_percent); | ||
495 | 505 | ||
496 | if (nr_counters == 1 || !display_weighted) { | 506 | if (nr_counters == 1 || !display_weighted) { |
497 | printf("%Ld", (u64)attrs[0].sample_period); | 507 | printf("%Ld", (u64)attrs[0].sample_period); |
@@ -960,6 +970,9 @@ static void event__process_sample(const event_t *self, | |||
960 | return; | 970 | return; |
961 | } | 971 | } |
962 | 972 | ||
973 | if (self->header.misc & PERF_RECORD_MISC_EXACT) | ||
974 | exact_samples++; | ||
975 | |||
963 | if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 || | 976 | if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 || |
964 | al.filtered) | 977 | al.filtered) |
965 | return; | 978 | return; |
@@ -990,7 +1003,17 @@ static void event__process_sample(const event_t *self, | |||
990 | if (sym_filter_entry_sched) { | 1003 | if (sym_filter_entry_sched) { |
991 | sym_filter_entry = sym_filter_entry_sched; | 1004 | sym_filter_entry = sym_filter_entry_sched; |
992 | sym_filter_entry_sched = NULL; | 1005 | sym_filter_entry_sched = NULL; |
993 | parse_source(sym_filter_entry); | 1006 | if (parse_source(sym_filter_entry) < 0) { |
1007 | struct symbol *sym = sym_entry__symbol(sym_filter_entry); | ||
1008 | |||
1009 | pr_err("Can't annotate %s", sym->name); | ||
1010 | if (sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) { | ||
1011 | pr_err(": No vmlinux file was found in the path:\n"); | ||
1012 | vmlinux_path__fprintf(stderr); | ||
1013 | } else | ||
1014 | pr_err(".\n"); | ||
1015 | exit(1); | ||
1016 | } | ||
994 | } | 1017 | } |
995 | 1018 | ||
996 | syme = symbol__priv(al.sym); | 1019 | syme = symbol__priv(al.sym); |
diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 57cb107c1f13..0d4b9edfab12 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c | |||
@@ -16,6 +16,8 @@ | |||
16 | #include "util/string.h" | 16 | #include "util/string.h" |
17 | #include "util/debugfs.h" | 17 | #include "util/debugfs.h" |
18 | 18 | ||
19 | bool use_browser; | ||
20 | |||
19 | const char perf_usage_string[] = | 21 | const char perf_usage_string[] = |
20 | "perf [--version] [--help] COMMAND [ARGS]"; | 22 | "perf [--version] [--help] COMMAND [ARGS]"; |
21 | 23 | ||
@@ -265,6 +267,8 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) | |||
265 | if (status) | 267 | if (status) |
266 | return status & 0xff; | 268 | return status & 0xff; |
267 | 269 | ||
270 | exit_browser(); | ||
271 | |||
268 | /* Somebody closed stdout? */ | 272 | /* Somebody closed stdout? */ |
269 | if (fstat(fileno(stdout), &st)) | 273 | if (fstat(fileno(stdout), &st)) |
270 | return 0; | 274 | return 0; |
diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 6fb379bc1d1f..aa786158b668 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h | |||
@@ -1,6 +1,10 @@ | |||
1 | #ifndef _PERF_PERF_H | 1 | #ifndef _PERF_PERF_H |
2 | #define _PERF_PERF_H | 2 | #define _PERF_PERF_H |
3 | 3 | ||
4 | struct winsize; | ||
5 | |||
6 | void get_term_dimensions(struct winsize *ws); | ||
7 | |||
4 | #if defined(__i386__) | 8 | #if defined(__i386__) |
5 | #include "../../arch/x86/include/asm/unistd.h" | 9 | #include "../../arch/x86/include/asm/unistd.h" |
6 | #define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") | 10 | #define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") |
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 918eb376abe3..47b12a3d11bf 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h | |||
@@ -1,6 +1,7 @@ | |||
1 | #ifndef __PERF_CACHE_H | 1 | #ifndef __PERF_CACHE_H |
2 | #define __PERF_CACHE_H | 2 | #define __PERF_CACHE_H |
3 | 3 | ||
4 | #include <stdbool.h> | ||
4 | #include "util.h" | 5 | #include "util.h" |
5 | #include "strbuf.h" | 6 | #include "strbuf.h" |
6 | #include "../perf.h" | 7 | #include "../perf.h" |
@@ -69,6 +70,19 @@ extern const char *pager_program; | |||
69 | extern int pager_in_use(void); | 70 | extern int pager_in_use(void); |
70 | extern int pager_use_color; | 71 | extern int pager_use_color; |
71 | 72 | ||
73 | extern bool use_browser; | ||
74 | |||
75 | #ifdef NO_NEWT_SUPPORT | ||
76 | static inline void setup_browser(void) | ||
77 | { | ||
78 | setup_pager(); | ||
79 | } | ||
80 | static inline void exit_browser(void) {} | ||
81 | #else | ||
82 | void setup_browser(void); | ||
83 | void exit_browser(void); | ||
84 | #endif | ||
85 | |||
72 | extern const char *editor_program; | 86 | extern const char *editor_program; |
73 | extern const char *excludes_file; | 87 | extern const char *excludes_file; |
74 | 88 | ||
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index e88bca55a599..9da01914e0af 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c | |||
@@ -203,7 +203,10 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) | |||
203 | int r; | 203 | int r; |
204 | 204 | ||
205 | va_start(args, fmt); | 205 | va_start(args, fmt); |
206 | r = color_vfprintf(fp, color, fmt, args); | 206 | if (use_browser) |
207 | r = vfprintf(fp, fmt, args); | ||
208 | else | ||
209 | r = color_vfprintf(fp, color, fmt, args); | ||
207 | va_end(args); | 210 | va_end(args); |
208 | return r; | 211 | return r; |
209 | } | 212 | } |
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 0905600c3851..033d66db863a 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <stdarg.h> | 6 | #include <stdarg.h> |
7 | #include <stdio.h> | 7 | #include <stdio.h> |
8 | 8 | ||
9 | #include "cache.h" | ||
9 | #include "color.h" | 10 | #include "color.h" |
10 | #include "event.h" | 11 | #include "event.h" |
11 | #include "debug.h" | 12 | #include "debug.h" |
@@ -21,7 +22,10 @@ int eprintf(int level, const char *fmt, ...) | |||
21 | 22 | ||
22 | if (verbose >= level) { | 23 | if (verbose >= level) { |
23 | va_start(args, fmt); | 24 | va_start(args, fmt); |
24 | ret = vfprintf(stderr, fmt, args); | 25 | if (use_browser) |
26 | ret = browser__show_help(fmt, args); | ||
27 | else | ||
28 | ret = vfprintf(stderr, fmt, args); | ||
25 | va_end(args); | 29 | va_end(args); |
26 | } | 30 | } |
27 | 31 | ||
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index c6c24c522dea..0172edf3f153 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h | |||
@@ -7,9 +7,16 @@ | |||
7 | extern int verbose; | 7 | extern int verbose; |
8 | extern int dump_trace; | 8 | extern int dump_trace; |
9 | 9 | ||
10 | int eprintf(int level, | ||
11 | const char *fmt, ...) __attribute__((format(printf, 2, 3))); | ||
12 | int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); | 10 | int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); |
13 | void trace_event(event_t *event); | 11 | void trace_event(event_t *event); |
14 | 12 | ||
13 | #ifdef NO_NEWT_SUPPORT | ||
14 | static inline int browser__show_help(const char *format __used, va_list ap __used) | ||
15 | { | ||
16 | return 0; | ||
17 | } | ||
18 | #else | ||
19 | int browser__show_help(const char *format, va_list ap); | ||
20 | #endif | ||
21 | |||
15 | #endif /* __PERF_DEBUG_H */ | 22 | #endif /* __PERF_DEBUG_H */ |
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index bdcfd6190b21..1a4e8376d843 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c | |||
@@ -455,11 +455,11 @@ static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, | |||
455 | return ret; | 455 | return ret; |
456 | } | 456 | } |
457 | 457 | ||
458 | static size_t hist_entry__fprintf(struct hist_entry *self, | 458 | size_t hist_entry__fprintf(struct hist_entry *self, |
459 | struct perf_session *pair_session, | 459 | struct perf_session *pair_session, |
460 | bool show_displacement, | 460 | bool show_displacement, |
461 | long displacement, FILE *fp, | 461 | long displacement, FILE *fp, |
462 | u64 session_total) | 462 | u64 session_total) |
463 | { | 463 | { |
464 | struct sort_entry *se; | 464 | struct sort_entry *se; |
465 | u64 count, total; | 465 | u64 count, total; |
@@ -485,9 +485,9 @@ static size_t hist_entry__fprintf(struct hist_entry *self, | |||
485 | 485 | ||
486 | if (symbol_conf.show_nr_samples) { | 486 | if (symbol_conf.show_nr_samples) { |
487 | if (sep) | 487 | if (sep) |
488 | fprintf(fp, "%c%lld", *sep, count); | 488 | ret += fprintf(fp, "%c%lld", *sep, count); |
489 | else | 489 | else |
490 | fprintf(fp, "%11lld", count); | 490 | ret += fprintf(fp, "%11lld", count); |
491 | } | 491 | } |
492 | 492 | ||
493 | if (pair_session) { | 493 | if (pair_session) { |
@@ -518,9 +518,9 @@ static size_t hist_entry__fprintf(struct hist_entry *self, | |||
518 | snprintf(bf, sizeof(bf), " "); | 518 | snprintf(bf, sizeof(bf), " "); |
519 | 519 | ||
520 | if (sep) | 520 | if (sep) |
521 | fprintf(fp, "%c%s", *sep, bf); | 521 | ret += fprintf(fp, "%c%s", *sep, bf); |
522 | else | 522 | else |
523 | fprintf(fp, "%6.6s", bf); | 523 | ret += fprintf(fp, "%6.6s", bf); |
524 | } | 524 | } |
525 | } | 525 | } |
526 | 526 | ||
@@ -528,27 +528,27 @@ static size_t hist_entry__fprintf(struct hist_entry *self, | |||
528 | if (se->elide) | 528 | if (se->elide) |
529 | continue; | 529 | continue; |
530 | 530 | ||
531 | fprintf(fp, "%s", sep ?: " "); | 531 | ret += fprintf(fp, "%s", sep ?: " "); |
532 | ret += se->print(fp, self, se->width ? *se->width : 0); | 532 | ret += se->print(fp, self, se->width ? *se->width : 0); |
533 | } | 533 | } |
534 | 534 | ||
535 | ret += fprintf(fp, "\n"); | 535 | return ret + fprintf(fp, "\n"); |
536 | 536 | } | |
537 | if (symbol_conf.use_callchain) { | ||
538 | int left_margin = 0; | ||
539 | 537 | ||
540 | if (sort__first_dimension == SORT_COMM) { | 538 | static size_t hist_entry__fprintf_callchain(struct hist_entry *self, FILE *fp, |
541 | se = list_first_entry(&hist_entry__sort_list, typeof(*se), | 539 | u64 session_total) |
542 | list); | 540 | { |
543 | left_margin = se->width ? *se->width : 0; | 541 | int left_margin = 0; |
544 | left_margin -= thread__comm_len(self->thread); | ||
545 | } | ||
546 | 542 | ||
547 | hist_entry_callchain__fprintf(fp, self, session_total, | 543 | if (sort__first_dimension == SORT_COMM) { |
548 | left_margin); | 544 | struct sort_entry *se = list_first_entry(&hist_entry__sort_list, |
545 | typeof(*se), list); | ||
546 | left_margin = se->width ? *se->width : 0; | ||
547 | left_margin -= thread__comm_len(self->thread); | ||
549 | } | 548 | } |
550 | 549 | ||
551 | return ret; | 550 | return hist_entry_callchain__fprintf(fp, self, session_total, |
551 | left_margin); | ||
552 | } | 552 | } |
553 | 553 | ||
554 | size_t perf_session__fprintf_hists(struct rb_root *hists, | 554 | size_t perf_session__fprintf_hists(struct rb_root *hists, |
@@ -655,6 +655,10 @@ print_entries: | |||
655 | } | 655 | } |
656 | ret += hist_entry__fprintf(h, pair, show_displacement, | 656 | ret += hist_entry__fprintf(h, pair, show_displacement, |
657 | displacement, fp, session_total); | 657 | displacement, fp, session_total); |
658 | |||
659 | if (symbol_conf.use_callchain) | ||
660 | ret += hist_entry__fprintf_callchain(h, fp, session_total); | ||
661 | |||
658 | if (h->map == NULL && verbose > 1) { | 662 | if (h->map == NULL && verbose > 1) { |
659 | __map_groups__fprintf_maps(&h->thread->mg, | 663 | __map_groups__fprintf_maps(&h->thread->mg, |
660 | MAP__FUNCTION, fp); | 664 | MAP__FUNCTION, fp); |
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 16f360cce5bf..fe366ce5db45 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h | |||
@@ -18,6 +18,11 @@ struct hist_entry *__perf_session__add_hist_entry(struct rb_root *hists, | |||
18 | u64 count, bool *hit); | 18 | u64 count, bool *hit); |
19 | extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); | 19 | extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); |
20 | extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); | 20 | extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); |
21 | size_t hist_entry__fprintf(struct hist_entry *self, | ||
22 | struct perf_session *pair_session, | ||
23 | bool show_displacement, | ||
24 | long displacement, FILE *fp, | ||
25 | u64 session_total); | ||
21 | void hist_entry__free(struct hist_entry *); | 26 | void hist_entry__free(struct hist_entry *); |
22 | 27 | ||
23 | void perf_session__output_resort(struct rb_root *hists, u64 total_samples); | 28 | void perf_session__output_resort(struct rb_root *hists, u64 total_samples); |
diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h index f2611655ab51..388ab1bfd114 100644 --- a/tools/perf/util/include/linux/kernel.h +++ b/tools/perf/util/include/linux/kernel.h | |||
@@ -85,16 +85,19 @@ simple_strtoul(const char *nptr, char **endptr, int base) | |||
85 | return strtoul(nptr, endptr, base); | 85 | return strtoul(nptr, endptr, base); |
86 | } | 86 | } |
87 | 87 | ||
88 | int eprintf(int level, | ||
89 | const char *fmt, ...) __attribute__((format(printf, 2, 3))); | ||
90 | |||
88 | #ifndef pr_fmt | 91 | #ifndef pr_fmt |
89 | #define pr_fmt(fmt) fmt | 92 | #define pr_fmt(fmt) fmt |
90 | #endif | 93 | #endif |
91 | 94 | ||
92 | #define pr_err(fmt, ...) \ | 95 | #define pr_err(fmt, ...) \ |
93 | do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0) | 96 | eprintf(0, pr_fmt(fmt), ##__VA_ARGS__) |
94 | #define pr_warning(fmt, ...) \ | 97 | #define pr_warning(fmt, ...) \ |
95 | do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0) | 98 | eprintf(0, pr_fmt(fmt), ##__VA_ARGS__) |
96 | #define pr_info(fmt, ...) \ | 99 | #define pr_info(fmt, ...) \ |
97 | do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0) | 100 | eprintf(0, pr_fmt(fmt), ##__VA_ARGS__) |
98 | #define pr_debug(fmt, ...) \ | 101 | #define pr_debug(fmt, ...) \ |
99 | eprintf(1, pr_fmt(fmt), ##__VA_ARGS__) | 102 | eprintf(1, pr_fmt(fmt), ##__VA_ARGS__) |
100 | #define pr_debugN(n, fmt, ...) \ | 103 | #define pr_debugN(n, fmt, ...) \ |
diff --git a/tools/perf/util/newt.c b/tools/perf/util/newt.c new file mode 100644 index 000000000000..2d19e7a3e6e8 --- /dev/null +++ b/tools/perf/util/newt.c | |||
@@ -0,0 +1,207 @@ | |||
1 | #define _GNU_SOURCE | ||
2 | #include <stdio.h> | ||
3 | #undef _GNU_SOURCE | ||
4 | |||
5 | #include <stdlib.h> | ||
6 | #include <newt.h> | ||
7 | #include <sys/ttydefaults.h> | ||
8 | |||
9 | #include "cache.h" | ||
10 | #include "hist.h" | ||
11 | #include "session.h" | ||
12 | #include "sort.h" | ||
13 | #include "symbol.h" | ||
14 | |||
15 | static void newt_form__set_exit_keys(newtComponent self) | ||
16 | { | ||
17 | newtFormAddHotKey(self, NEWT_KEY_ESCAPE); | ||
18 | newtFormAddHotKey(self, 'Q'); | ||
19 | newtFormAddHotKey(self, 'q'); | ||
20 | newtFormAddHotKey(self, CTRL('c')); | ||
21 | } | ||
22 | |||
23 | static newtComponent newt_form__new(void) | ||
24 | { | ||
25 | newtComponent self = newtForm(NULL, NULL, 0); | ||
26 | if (self) | ||
27 | newt_form__set_exit_keys(self); | ||
28 | return self; | ||
29 | } | ||
30 | |||
31 | static size_t hist_entry__append_browser(struct hist_entry *self, | ||
32 | newtComponent listbox, u64 total) | ||
33 | { | ||
34 | char bf[1024]; | ||
35 | size_t len; | ||
36 | FILE *fp; | ||
37 | |||
38 | if (symbol_conf.exclude_other && !self->parent) | ||
39 | return 0; | ||
40 | |||
41 | fp = fmemopen(bf, sizeof(bf), "w"); | ||
42 | if (fp == NULL) | ||
43 | return 0; | ||
44 | |||
45 | len = hist_entry__fprintf(self, NULL, false, 0, fp, total); | ||
46 | |||
47 | fclose(fp); | ||
48 | newtListboxAppendEntry(listbox, bf, self); | ||
49 | return len; | ||
50 | } | ||
51 | |||
52 | static void hist_entry__annotate_browser(struct hist_entry *self) | ||
53 | { | ||
54 | FILE *fp; | ||
55 | int cols, rows; | ||
56 | newtComponent form, listbox; | ||
57 | struct newtExitStruct es; | ||
58 | char *str; | ||
59 | size_t line_len, max_line_len = 0; | ||
60 | size_t max_usable_width; | ||
61 | char *line = NULL; | ||
62 | |||
63 | if (self->sym == NULL) | ||
64 | return; | ||
65 | |||
66 | if (asprintf(&str, "perf annotate %s 2>&1 | expand", self->sym->name) < 0) | ||
67 | return; | ||
68 | |||
69 | fp = popen(str, "r"); | ||
70 | if (fp == NULL) | ||
71 | goto out_free_str; | ||
72 | |||
73 | newtPushHelpLine("Press ESC to exit"); | ||
74 | newtGetScreenSize(&cols, &rows); | ||
75 | listbox = newtListbox(0, 0, rows - 5, NEWT_FLAG_SCROLL); | ||
76 | |||
77 | while (!feof(fp)) { | ||
78 | if (getline(&line, &line_len, fp) < 0 || !line_len) | ||
79 | break; | ||
80 | while (line_len != 0 && isspace(line[line_len - 1])) | ||
81 | line[--line_len] = '\0'; | ||
82 | |||
83 | if (line_len > max_line_len) | ||
84 | max_line_len = line_len; | ||
85 | newtListboxAppendEntry(listbox, line, NULL); | ||
86 | } | ||
87 | fclose(fp); | ||
88 | free(line); | ||
89 | |||
90 | max_usable_width = cols - 22; | ||
91 | if (max_line_len > max_usable_width) | ||
92 | max_line_len = max_usable_width; | ||
93 | |||
94 | newtListboxSetWidth(listbox, max_line_len); | ||
95 | |||
96 | newtCenteredWindow(max_line_len + 2, rows - 5, self->sym->name); | ||
97 | form = newt_form__new(); | ||
98 | newtFormAddComponents(form, listbox, NULL); | ||
99 | |||
100 | newtFormRun(form, &es); | ||
101 | newtFormDestroy(form); | ||
102 | newtPopWindow(); | ||
103 | newtPopHelpLine(); | ||
104 | out_free_str: | ||
105 | free(str); | ||
106 | } | ||
107 | |||
108 | void perf_session__browse_hists(struct rb_root *hists, u64 session_total, | ||
109 | const char *helpline) | ||
110 | { | ||
111 | struct sort_entry *se; | ||
112 | struct rb_node *nd; | ||
113 | unsigned int width; | ||
114 | char *col_width = symbol_conf.col_width_list_str; | ||
115 | int rows; | ||
116 | size_t max_len = 0; | ||
117 | char str[1024]; | ||
118 | newtComponent form, listbox; | ||
119 | struct newtExitStruct es; | ||
120 | |||
121 | snprintf(str, sizeof(str), "Samples: %Ld", session_total); | ||
122 | newtDrawRootText(0, 0, str); | ||
123 | newtPushHelpLine(helpline); | ||
124 | |||
125 | newtGetScreenSize(NULL, &rows); | ||
126 | |||
127 | form = newt_form__new(); | ||
128 | |||
129 | listbox = newtListbox(1, 1, rows - 2, (NEWT_FLAG_SCROLL | | ||
130 | NEWT_FLAG_BORDER | | ||
131 | NEWT_FLAG_RETURNEXIT)); | ||
132 | |||
133 | list_for_each_entry(se, &hist_entry__sort_list, list) { | ||
134 | if (se->elide) | ||
135 | continue; | ||
136 | width = strlen(se->header); | ||
137 | if (se->width) { | ||
138 | if (symbol_conf.col_width_list_str) { | ||
139 | if (col_width) { | ||
140 | *se->width = atoi(col_width); | ||
141 | col_width = strchr(col_width, ','); | ||
142 | if (col_width) | ||
143 | ++col_width; | ||
144 | } | ||
145 | } | ||
146 | *se->width = max(*se->width, width); | ||
147 | } | ||
148 | } | ||
149 | |||
150 | for (nd = rb_first(hists); nd; nd = rb_next(nd)) { | ||
151 | struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); | ||
152 | size_t len = hist_entry__append_browser(h, listbox, session_total); | ||
153 | if (len > max_len) | ||
154 | max_len = len; | ||
155 | } | ||
156 | |||
157 | newtListboxSetWidth(listbox, max_len); | ||
158 | newtFormAddComponents(form, listbox, NULL); | ||
159 | |||
160 | while (1) { | ||
161 | struct hist_entry *selection; | ||
162 | |||
163 | newtFormRun(form, &es); | ||
164 | if (es.reason == NEWT_EXIT_HOTKEY) | ||
165 | break; | ||
166 | selection = newtListboxGetCurrent(listbox); | ||
167 | hist_entry__annotate_browser(selection); | ||
168 | } | ||
169 | |||
170 | newtFormDestroy(form); | ||
171 | } | ||
172 | |||
173 | int browser__show_help(const char *format, va_list ap) | ||
174 | { | ||
175 | int ret; | ||
176 | static int backlog; | ||
177 | static char msg[1024]; | ||
178 | |||
179 | ret = vsnprintf(msg + backlog, sizeof(msg) - backlog, format, ap); | ||
180 | backlog += ret; | ||
181 | |||
182 | if (msg[backlog - 1] == '\n') { | ||
183 | newtPopHelpLine(); | ||
184 | newtPushHelpLine(msg); | ||
185 | newtRefresh(); | ||
186 | backlog = 0; | ||
187 | } | ||
188 | |||
189 | return ret; | ||
190 | } | ||
191 | |||
192 | void setup_browser(void) | ||
193 | { | ||
194 | if (!isatty(1)) | ||
195 | return; | ||
196 | |||
197 | use_browser = true; | ||
198 | newtInit(); | ||
199 | newtCls(); | ||
200 | newtPushHelpLine(" "); | ||
201 | } | ||
202 | |||
203 | void exit_browser(void) | ||
204 | { | ||
205 | if (use_browser) | ||
206 | newtFinished(); | ||
207 | } | ||
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 05d0c5c2030c..a2014459125a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c | |||
@@ -656,6 +656,10 @@ parse_raw_event(const char **strp, struct perf_event_attr *attr) | |||
656 | return EVT_FAILED; | 656 | return EVT_FAILED; |
657 | n = hex2u64(str + 1, &config); | 657 | n = hex2u64(str + 1, &config); |
658 | if (n > 0) { | 658 | if (n > 0) { |
659 | if (str[n+1] == 'p') { | ||
660 | attr->precise = 1; | ||
661 | n++; | ||
662 | } | ||
659 | *strp = str + n + 1; | 663 | *strp = str + n + 1; |
660 | attr->type = PERF_TYPE_RAW; | 664 | attr->type = PERF_TYPE_RAW; |
661 | attr->config = config; | 665 | attr->config = config; |
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 5c33417eebb3..34d73395baac 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h | |||
@@ -86,4 +86,13 @@ static inline struct map * | |||
86 | { | 86 | { |
87 | return map_groups__new_module(&self->kmaps, start, filename); | 87 | return map_groups__new_module(&self->kmaps, start, filename); |
88 | } | 88 | } |
89 | |||
90 | #ifdef NO_NEWT_SUPPORT | ||
91 | static inline void perf_session__browse_hists(struct rb_root *hists __used, | ||
92 | u64 session_total __used, | ||
93 | const char *helpline __used) {} | ||
94 | #else | ||
95 | void perf_session__browse_hists(struct rb_root *hists, u64 session_total, | ||
96 | const char *helpline); | ||
97 | #endif | ||
89 | #endif /* __PERF_SESSION_H */ | 98 | #endif /* __PERF_SESSION_H */ |
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index c458c4a371d1..3eb9de4baef3 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c | |||
@@ -18,18 +18,6 @@ | |||
18 | #define NT_GNU_BUILD_ID 3 | 18 | #define NT_GNU_BUILD_ID 3 |
19 | #endif | 19 | #endif |
20 | 20 | ||
21 | enum dso_origin { | ||
22 | DSO__ORIG_KERNEL = 0, | ||
23 | DSO__ORIG_JAVA_JIT, | ||
24 | DSO__ORIG_BUILD_ID_CACHE, | ||
25 | DSO__ORIG_FEDORA, | ||
26 | DSO__ORIG_UBUNTU, | ||
27 | DSO__ORIG_BUILDID, | ||
28 | DSO__ORIG_DSO, | ||
29 | DSO__ORIG_KMODULE, | ||
30 | DSO__ORIG_NOT_FOUND, | ||
31 | }; | ||
32 | |||
33 | static void dsos__add(struct list_head *head, struct dso *dso); | 21 | static void dsos__add(struct list_head *head, struct dso *dso); |
34 | static struct map *map__new2(u64 start, struct dso *dso, enum map_type type); | 22 | static struct map *map__new2(u64 start, struct dso *dso, enum map_type type); |
35 | static int dso__load_kernel_sym(struct dso *self, struct map *map, | 23 | static int dso__load_kernel_sym(struct dso *self, struct map *map, |
@@ -870,8 +858,8 @@ out_close: | |||
870 | if (err == 0) | 858 | if (err == 0) |
871 | return nr; | 859 | return nr; |
872 | out: | 860 | out: |
873 | pr_warning("%s: problems reading %s PLT info.\n", | 861 | pr_debug("%s: problems reading %s PLT info.\n", |
874 | __func__, self->long_name); | 862 | __func__, self->long_name); |
875 | return 0; | 863 | return 0; |
876 | } | 864 | } |
877 | 865 | ||
@@ -1025,7 +1013,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, | |||
1025 | } | 1013 | } |
1026 | curr_map->map_ip = identity__map_ip; | 1014 | curr_map->map_ip = identity__map_ip; |
1027 | curr_map->unmap_ip = identity__map_ip; | 1015 | curr_map->unmap_ip = identity__map_ip; |
1028 | curr_dso->origin = DSO__ORIG_KERNEL; | 1016 | curr_dso->origin = self->origin; |
1029 | map_groups__insert(kmap->kmaps, curr_map); | 1017 | map_groups__insert(kmap->kmaps, curr_map); |
1030 | dsos__add(&dsos__kernel, curr_dso); | 1018 | dsos__add(&dsos__kernel, curr_dso); |
1031 | dso__set_loaded(curr_dso, map->type); | 1019 | dso__set_loaded(curr_dso, map->type); |
@@ -1895,6 +1883,17 @@ out_fail: | |||
1895 | return -1; | 1883 | return -1; |
1896 | } | 1884 | } |
1897 | 1885 | ||
1886 | size_t vmlinux_path__fprintf(FILE *fp) | ||
1887 | { | ||
1888 | int i; | ||
1889 | size_t printed = 0; | ||
1890 | |||
1891 | for (i = 0; i < vmlinux_path__nr_entries; ++i) | ||
1892 | printed += fprintf(fp, "[%d] %s\n", i, vmlinux_path[i]); | ||
1893 | |||
1894 | return printed; | ||
1895 | } | ||
1896 | |||
1898 | static int setup_list(struct strlist **list, const char *list_str, | 1897 | static int setup_list(struct strlist **list, const char *list_str, |
1899 | const char *list_name) | 1898 | const char *list_name) |
1900 | { | 1899 | { |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index f30a37428919..0da2455d5b90 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
@@ -106,6 +106,7 @@ struct dso { | |||
106 | u8 has_build_id:1; | 106 | u8 has_build_id:1; |
107 | u8 kernel:1; | 107 | u8 kernel:1; |
108 | u8 hit:1; | 108 | u8 hit:1; |
109 | u8 annotate_warned:1; | ||
109 | unsigned char origin; | 110 | unsigned char origin; |
110 | u8 sorted_by_name; | 111 | u8 sorted_by_name; |
111 | u8 loaded; | 112 | u8 loaded; |
@@ -150,6 +151,19 @@ size_t dsos__fprintf_buildid(FILE *fp, bool with_hits); | |||
150 | 151 | ||
151 | size_t dso__fprintf_buildid(struct dso *self, FILE *fp); | 152 | size_t dso__fprintf_buildid(struct dso *self, FILE *fp); |
152 | size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); | 153 | size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); |
154 | |||
155 | enum dso_origin { | ||
156 | DSO__ORIG_KERNEL = 0, | ||
157 | DSO__ORIG_JAVA_JIT, | ||
158 | DSO__ORIG_BUILD_ID_CACHE, | ||
159 | DSO__ORIG_FEDORA, | ||
160 | DSO__ORIG_UBUNTU, | ||
161 | DSO__ORIG_BUILDID, | ||
162 | DSO__ORIG_DSO, | ||
163 | DSO__ORIG_KMODULE, | ||
164 | DSO__ORIG_NOT_FOUND, | ||
165 | }; | ||
166 | |||
153 | char dso__symtab_origin(const struct dso *self); | 167 | char dso__symtab_origin(const struct dso *self); |
154 | void dso__set_long_name(struct dso *self, char *name); | 168 | void dso__set_long_name(struct dso *self, char *name); |
155 | void dso__set_build_id(struct dso *self, void *build_id); | 169 | void dso__set_build_id(struct dso *self, void *build_id); |
@@ -169,4 +183,6 @@ int kallsyms__parse(const char *filename, void *arg, | |||
169 | int symbol__init(void); | 183 | int symbol__init(void); |
170 | bool symbol_type__is_a(char symbol_type, enum map_type map_type); | 184 | bool symbol_type__is_a(char symbol_type, enum map_type map_type); |
171 | 185 | ||
186 | size_t vmlinux_path__fprintf(FILE *fp); | ||
187 | |||
172 | #endif /* __PERF_SYMBOL */ | 188 | #endif /* __PERF_SYMBOL */ |