aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2010-03-03 06:02:30 -0500
committerIngo Molnar <mingo@elte.hu>2010-03-10 07:23:32 -0500
commitcaff2befffe899e63df5cc760b7ed01cfd902685 (patch)
treefe07f997bd67d1e5ae3122db789d7e7361ddca28
parent69fef0d2e2c2c049ef4207a52e78b50d527bd85a (diff)
perf, x86: Implement simple LBR support
Implement simple suport Intel Last-Branch-Record, it supports all hardware that implements FREEZE_LBRS_ON_PMI, but does not (yet) implement the LBR config register. The Intel LBR is a FIFO of From,To addresses describing the last few branches the hardware took. This patch does not add perf interface to the LBR, but merely provides an interface for internal use. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@infradead.org> Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100304140100.544191154@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/cpu/perf_event.c18
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c228
-rw-r--r--include/linux/perf_event.h11
4 files changed, 270 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 0c03d5c1671f..1badff6b6b28 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -48,6 +48,8 @@ struct amd_nb {
48 struct event_constraint event_constraints[X86_PMC_IDX_MAX]; 48 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
49}; 49};
50 50
51#define MAX_LBR_ENTRIES 16
52
51struct cpu_hw_events { 53struct cpu_hw_events {
52 /* 54 /*
53 * Generic x86 PMC bits 55 * Generic x86 PMC bits
@@ -70,6 +72,14 @@ struct cpu_hw_events {
70 u64 pebs_enabled; 72 u64 pebs_enabled;
71 73
72 /* 74 /*
75 * Intel LBR bits
76 */
77 int lbr_users;
78 void *lbr_context;
79 struct perf_branch_stack lbr_stack;
80 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
81
82 /*
73 * AMD specific bits 83 * AMD specific bits
74 */ 84 */
75 struct amd_nb *amd_nb; 85 struct amd_nb *amd_nb;
@@ -159,6 +169,13 @@ struct x86_pmu {
159 int pebs_record_size; 169 int pebs_record_size;
160 void (*drain_pebs)(struct pt_regs *regs); 170 void (*drain_pebs)(struct pt_regs *regs);
161 struct event_constraint *pebs_constraints; 171 struct event_constraint *pebs_constraints;
172
173 /*
174 * Intel LBR
175 */
176 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
177 int lbr_nr; /* hardware stack size */
178 int lbr_format; /* hardware format */
162}; 179};
163 180
164static struct x86_pmu x86_pmu __read_mostly; 181static struct x86_pmu x86_pmu __read_mostly;
@@ -1237,6 +1254,7 @@ undo:
1237 1254
1238#include "perf_event_amd.c" 1255#include "perf_event_amd.c"
1239#include "perf_event_p6.c" 1256#include "perf_event_p6.c"
1257#include "perf_event_intel_lbr.c"
1240#include "perf_event_intel_ds.c" 1258#include "perf_event_intel_ds.c"
1241#include "perf_event_intel.c" 1259#include "perf_event_intel.c"
1242 1260
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 11446412e4c7..44f6ed42a934 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -480,6 +480,7 @@ static void intel_pmu_disable_all(void)
480 intel_pmu_disable_bts(); 480 intel_pmu_disable_bts();
481 481
482 intel_pmu_pebs_disable_all(); 482 intel_pmu_pebs_disable_all();
483 intel_pmu_lbr_disable_all();
483} 484}
484 485
485static void intel_pmu_enable_all(void) 486static void intel_pmu_enable_all(void)
@@ -499,6 +500,7 @@ static void intel_pmu_enable_all(void)
499 } 500 }
500 501
501 intel_pmu_pebs_enable_all(); 502 intel_pmu_pebs_enable_all();
503 intel_pmu_lbr_enable_all();
502} 504}
503 505
504static inline u64 intel_pmu_get_status(void) 506static inline u64 intel_pmu_get_status(void)
@@ -674,6 +676,8 @@ again:
674 inc_irq_stat(apic_perf_irqs); 676 inc_irq_stat(apic_perf_irqs);
675 ack = status; 677 ack = status;
676 678
679 intel_pmu_lbr_read();
680
677 /* 681 /*
678 * PEBS overflow sets bit 62 in the global status register 682 * PEBS overflow sets bit 62 in the global status register
679 */ 683 */
@@ -848,6 +852,8 @@ static __init int intel_pmu_init(void)
848 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, 852 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
849 sizeof(hw_cache_event_ids)); 853 sizeof(hw_cache_event_ids));
850 854
855 intel_pmu_lbr_init_core();
856
851 x86_pmu.event_constraints = intel_core2_event_constraints; 857 x86_pmu.event_constraints = intel_core2_event_constraints;
852 pr_cont("Core2 events, "); 858 pr_cont("Core2 events, ");
853 break; 859 break;
@@ -857,13 +863,18 @@ static __init int intel_pmu_init(void)
857 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, 863 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
858 sizeof(hw_cache_event_ids)); 864 sizeof(hw_cache_event_ids));
859 865
866 intel_pmu_lbr_init_nhm();
867
860 x86_pmu.event_constraints = intel_nehalem_event_constraints; 868 x86_pmu.event_constraints = intel_nehalem_event_constraints;
861 pr_cont("Nehalem/Corei7 events, "); 869 pr_cont("Nehalem/Corei7 events, ");
862 break; 870 break;
871
863 case 28: /* Atom */ 872 case 28: /* Atom */
864 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, 873 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
865 sizeof(hw_cache_event_ids)); 874 sizeof(hw_cache_event_ids));
866 875
876 intel_pmu_lbr_init_atom();
877
867 x86_pmu.event_constraints = intel_gen_event_constraints; 878 x86_pmu.event_constraints = intel_gen_event_constraints;
868 pr_cont("Atom events, "); 879 pr_cont("Atom events, ");
869 break; 880 break;
@@ -873,6 +884,8 @@ static __init int intel_pmu_init(void)
873 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, 884 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
874 sizeof(hw_cache_event_ids)); 885 sizeof(hw_cache_event_ids));
875 886
887 intel_pmu_lbr_init_nhm();
888
876 x86_pmu.event_constraints = intel_westmere_event_constraints; 889 x86_pmu.event_constraints = intel_westmere_event_constraints;
877 pr_cont("Westmere events, "); 890 pr_cont("Westmere events, ");
878 break; 891 break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
new file mode 100644
index 000000000000..ea3e99ed82ce
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -0,0 +1,228 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3enum {
4 LBR_FORMAT_32 = 0x00,
5 LBR_FORMAT_LIP = 0x01,
6 LBR_FORMAT_EIP = 0x02,
7 LBR_FORMAT_EIP_FLAGS = 0x03,
8};
9
10/*
11 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
12 * otherwise it becomes near impossible to get a reliable stack.
13 */
14
15#define X86_DEBUGCTL_LBR (1 << 0)
16#define X86_DEBUGCTL_FREEZE_LBRS_ON_PMI (1 << 11)
17
18static void __intel_pmu_lbr_enable(void)
19{
20 u64 debugctl;
21
22 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
23 debugctl |= (X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI);
24 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
25}
26
27static void __intel_pmu_lbr_disable(void)
28{
29 u64 debugctl;
30
31 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
32 debugctl &= ~(X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI);
33 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
34}
35
36static void intel_pmu_lbr_reset_32(void)
37{
38 int i;
39
40 for (i = 0; i < x86_pmu.lbr_nr; i++)
41 wrmsrl(x86_pmu.lbr_from + i, 0);
42}
43
44static void intel_pmu_lbr_reset_64(void)
45{
46 int i;
47
48 for (i = 0; i < x86_pmu.lbr_nr; i++) {
49 wrmsrl(x86_pmu.lbr_from + i, 0);
50 wrmsrl(x86_pmu.lbr_to + i, 0);
51 }
52}
53
54static void intel_pmu_lbr_reset(void)
55{
56 if (x86_pmu.lbr_format == LBR_FORMAT_32)
57 intel_pmu_lbr_reset_32();
58 else
59 intel_pmu_lbr_reset_64();
60}
61
62static void intel_pmu_lbr_enable(struct perf_event *event)
63{
64 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
65
66 if (!x86_pmu.lbr_nr)
67 return;
68
69 WARN_ON(cpuc->enabled);
70
71 /*
72 * Reset the LBR stack if this is the first LBR user or
73 * we changed task context so as to avoid data leaks.
74 */
75
76 if (!cpuc->lbr_users ||
77 (event->ctx->task && cpuc->lbr_context != event->ctx)) {
78 intel_pmu_lbr_reset();
79 cpuc->lbr_context = event->ctx;
80 }
81
82 cpuc->lbr_users++;
83}
84
85static void intel_pmu_lbr_disable(struct perf_event *event)
86{
87 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
88
89 if (!x86_pmu.lbr_nr)
90 return;
91
92 cpuc->lbr_users--;
93
94 BUG_ON(cpuc->lbr_users < 0);
95 WARN_ON(cpuc->enabled);
96}
97
98static void intel_pmu_lbr_enable_all(void)
99{
100 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
101
102 if (cpuc->lbr_users)
103 __intel_pmu_lbr_enable();
104}
105
106static void intel_pmu_lbr_disable_all(void)
107{
108 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
109
110 if (cpuc->lbr_users)
111 __intel_pmu_lbr_disable();
112}
113
114static inline u64 intel_pmu_lbr_tos(void)
115{
116 u64 tos;
117
118 rdmsrl(x86_pmu.lbr_tos, tos);
119
120 return tos;
121}
122
123static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
124{
125 unsigned long mask = x86_pmu.lbr_nr - 1;
126 u64 tos = intel_pmu_lbr_tos();
127 int i;
128
129 for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) {
130 unsigned long lbr_idx = (tos - i) & mask;
131 union {
132 struct {
133 u32 from;
134 u32 to;
135 };
136 u64 lbr;
137 } msr_lastbranch;
138
139 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
140
141 cpuc->lbr_entries[i].from = msr_lastbranch.from;
142 cpuc->lbr_entries[i].to = msr_lastbranch.to;
143 cpuc->lbr_entries[i].flags = 0;
144 }
145 cpuc->lbr_stack.nr = i;
146}
147
148#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
149
150/*
151 * Due to lack of segmentation in Linux the effective address (offset)
152 * is the same as the linear address, allowing us to merge the LIP and EIP
153 * LBR formats.
154 */
155static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
156{
157 unsigned long mask = x86_pmu.lbr_nr - 1;
158 u64 tos = intel_pmu_lbr_tos();
159 int i;
160
161 for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) {
162 unsigned long lbr_idx = (tos - i) & mask;
163 u64 from, to, flags = 0;
164
165 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
166 rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
167
168 if (x86_pmu.lbr_format == LBR_FORMAT_EIP_FLAGS) {
169 flags = !!(from & LBR_FROM_FLAG_MISPRED);
170 from = (u64)((((s64)from) << 1) >> 1);
171 }
172
173 cpuc->lbr_entries[i].from = from;
174 cpuc->lbr_entries[i].to = to;
175 cpuc->lbr_entries[i].flags = flags;
176 }
177 cpuc->lbr_stack.nr = i;
178}
179
180static void intel_pmu_lbr_read(void)
181{
182 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
183
184 if (!cpuc->lbr_users)
185 return;
186
187 if (x86_pmu.lbr_format == LBR_FORMAT_32)
188 intel_pmu_lbr_read_32(cpuc);
189 else
190 intel_pmu_lbr_read_64(cpuc);
191}
192
193static int intel_pmu_lbr_format(void)
194{
195 u64 capabilities;
196
197 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
198 return capabilities & 0x1f;
199}
200
201static void intel_pmu_lbr_init_core(void)
202{
203 x86_pmu.lbr_format = intel_pmu_lbr_format();
204 x86_pmu.lbr_nr = 4;
205 x86_pmu.lbr_tos = 0x01c9;
206 x86_pmu.lbr_from = 0x40;
207 x86_pmu.lbr_to = 0x60;
208}
209
210static void intel_pmu_lbr_init_nhm(void)
211{
212 x86_pmu.lbr_format = intel_pmu_lbr_format();
213 x86_pmu.lbr_nr = 16;
214 x86_pmu.lbr_tos = 0x01c9;
215 x86_pmu.lbr_from = 0x680;
216 x86_pmu.lbr_to = 0x6c0;
217}
218
219static void intel_pmu_lbr_init_atom(void)
220{
221 x86_pmu.lbr_format = intel_pmu_lbr_format();
222 x86_pmu.lbr_nr = 8;
223 x86_pmu.lbr_tos = 0x01c9;
224 x86_pmu.lbr_from = 0x40;
225 x86_pmu.lbr_to = 0x60;
226}
227
228#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 42307b50c787..ab4fd9ede264 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -467,6 +467,17 @@ struct perf_raw_record {
467 void *data; 467 void *data;
468}; 468};
469 469
470struct perf_branch_entry {
471 __u64 from;
472 __u64 to;
473 __u64 flags;
474};
475
476struct perf_branch_stack {
477 __u64 nr;
478 struct perf_branch_entry entries[0];
479};
480
470struct task_struct; 481struct task_struct;
471 482
472/** 483/**