aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/cpu/perf_event.c18
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c228
-rw-r--r--include/linux/perf_event.h11
4 files changed, 270 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 0c03d5c1671f..1badff6b6b28 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -48,6 +48,8 @@ struct amd_nb {
48 struct event_constraint event_constraints[X86_PMC_IDX_MAX]; 48 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
49}; 49};
50 50
51#define MAX_LBR_ENTRIES 16
52
51struct cpu_hw_events { 53struct cpu_hw_events {
52 /* 54 /*
53 * Generic x86 PMC bits 55 * Generic x86 PMC bits
@@ -70,6 +72,14 @@ struct cpu_hw_events {
70 u64 pebs_enabled; 72 u64 pebs_enabled;
71 73
72 /* 74 /*
75 * Intel LBR bits
76 */
77 int lbr_users;
78 void *lbr_context;
79 struct perf_branch_stack lbr_stack;
80 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
81
82 /*
73 * AMD specific bits 83 * AMD specific bits
74 */ 84 */
75 struct amd_nb *amd_nb; 85 struct amd_nb *amd_nb;
@@ -159,6 +169,13 @@ struct x86_pmu {
159 int pebs_record_size; 169 int pebs_record_size;
160 void (*drain_pebs)(struct pt_regs *regs); 170 void (*drain_pebs)(struct pt_regs *regs);
161 struct event_constraint *pebs_constraints; 171 struct event_constraint *pebs_constraints;
172
173 /*
174 * Intel LBR
175 */
176 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
177 int lbr_nr; /* hardware stack size */
178 int lbr_format; /* hardware format */
162}; 179};
163 180
164static struct x86_pmu x86_pmu __read_mostly; 181static struct x86_pmu x86_pmu __read_mostly;
@@ -1237,6 +1254,7 @@ undo:
1237 1254
1238#include "perf_event_amd.c" 1255#include "perf_event_amd.c"
1239#include "perf_event_p6.c" 1256#include "perf_event_p6.c"
1257#include "perf_event_intel_lbr.c"
1240#include "perf_event_intel_ds.c" 1258#include "perf_event_intel_ds.c"
1241#include "perf_event_intel.c" 1259#include "perf_event_intel.c"
1242 1260
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 11446412e4c7..44f6ed42a934 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -480,6 +480,7 @@ static void intel_pmu_disable_all(void)
480 intel_pmu_disable_bts(); 480 intel_pmu_disable_bts();
481 481
482 intel_pmu_pebs_disable_all(); 482 intel_pmu_pebs_disable_all();
483 intel_pmu_lbr_disable_all();
483} 484}
484 485
485static void intel_pmu_enable_all(void) 486static void intel_pmu_enable_all(void)
@@ -499,6 +500,7 @@ static void intel_pmu_enable_all(void)
499 } 500 }
500 501
501 intel_pmu_pebs_enable_all(); 502 intel_pmu_pebs_enable_all();
503 intel_pmu_lbr_enable_all();
502} 504}
503 505
504static inline u64 intel_pmu_get_status(void) 506static inline u64 intel_pmu_get_status(void)
@@ -674,6 +676,8 @@ again:
674 inc_irq_stat(apic_perf_irqs); 676 inc_irq_stat(apic_perf_irqs);
675 ack = status; 677 ack = status;
676 678
679 intel_pmu_lbr_read();
680
677 /* 681 /*
678 * PEBS overflow sets bit 62 in the global status register 682 * PEBS overflow sets bit 62 in the global status register
679 */ 683 */
@@ -848,6 +852,8 @@ static __init int intel_pmu_init(void)
848 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, 852 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
849 sizeof(hw_cache_event_ids)); 853 sizeof(hw_cache_event_ids));
850 854
855 intel_pmu_lbr_init_core();
856
851 x86_pmu.event_constraints = intel_core2_event_constraints; 857 x86_pmu.event_constraints = intel_core2_event_constraints;
852 pr_cont("Core2 events, "); 858 pr_cont("Core2 events, ");
853 break; 859 break;
@@ -857,13 +863,18 @@ static __init int intel_pmu_init(void)
857 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, 863 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
858 sizeof(hw_cache_event_ids)); 864 sizeof(hw_cache_event_ids));
859 865
866 intel_pmu_lbr_init_nhm();
867
860 x86_pmu.event_constraints = intel_nehalem_event_constraints; 868 x86_pmu.event_constraints = intel_nehalem_event_constraints;
861 pr_cont("Nehalem/Corei7 events, "); 869 pr_cont("Nehalem/Corei7 events, ");
862 break; 870 break;
871
863 case 28: /* Atom */ 872 case 28: /* Atom */
864 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, 873 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
865 sizeof(hw_cache_event_ids)); 874 sizeof(hw_cache_event_ids));
866 875
876 intel_pmu_lbr_init_atom();
877
867 x86_pmu.event_constraints = intel_gen_event_constraints; 878 x86_pmu.event_constraints = intel_gen_event_constraints;
868 pr_cont("Atom events, "); 879 pr_cont("Atom events, ");
869 break; 880 break;
@@ -873,6 +884,8 @@ static __init int intel_pmu_init(void)
873 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, 884 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
874 sizeof(hw_cache_event_ids)); 885 sizeof(hw_cache_event_ids));
875 886
887 intel_pmu_lbr_init_nhm();
888
876 x86_pmu.event_constraints = intel_westmere_event_constraints; 889 x86_pmu.event_constraints = intel_westmere_event_constraints;
877 pr_cont("Westmere events, "); 890 pr_cont("Westmere events, ");
878 break; 891 break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
new file mode 100644
index 000000000000..ea3e99ed82ce
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -0,0 +1,228 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3enum {
4 LBR_FORMAT_32 = 0x00,
5 LBR_FORMAT_LIP = 0x01,
6 LBR_FORMAT_EIP = 0x02,
7 LBR_FORMAT_EIP_FLAGS = 0x03,
8};
9
10/*
11 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
12 * otherwise it becomes near impossible to get a reliable stack.
13 */
14
15#define X86_DEBUGCTL_LBR (1 << 0)
16#define X86_DEBUGCTL_FREEZE_LBRS_ON_PMI (1 << 11)
17
18static void __intel_pmu_lbr_enable(void)
19{
20 u64 debugctl;
21
22 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
23 debugctl |= (X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI);
24 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
25}
26
27static void __intel_pmu_lbr_disable(void)
28{
29 u64 debugctl;
30
31 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
32 debugctl &= ~(X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI);
33 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
34}
35
36static void intel_pmu_lbr_reset_32(void)
37{
38 int i;
39
40 for (i = 0; i < x86_pmu.lbr_nr; i++)
41 wrmsrl(x86_pmu.lbr_from + i, 0);
42}
43
44static void intel_pmu_lbr_reset_64(void)
45{
46 int i;
47
48 for (i = 0; i < x86_pmu.lbr_nr; i++) {
49 wrmsrl(x86_pmu.lbr_from + i, 0);
50 wrmsrl(x86_pmu.lbr_to + i, 0);
51 }
52}
53
54static void intel_pmu_lbr_reset(void)
55{
56 if (x86_pmu.lbr_format == LBR_FORMAT_32)
57 intel_pmu_lbr_reset_32();
58 else
59 intel_pmu_lbr_reset_64();
60}
61
62static void intel_pmu_lbr_enable(struct perf_event *event)
63{
64 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
65
66 if (!x86_pmu.lbr_nr)
67 return;
68
69 WARN_ON(cpuc->enabled);
70
71 /*
72 * Reset the LBR stack if this is the first LBR user or
73 * we changed task context so as to avoid data leaks.
74 */
75
76 if (!cpuc->lbr_users ||
77 (event->ctx->task && cpuc->lbr_context != event->ctx)) {
78 intel_pmu_lbr_reset();
79 cpuc->lbr_context = event->ctx;
80 }
81
82 cpuc->lbr_users++;
83}
84
85static void intel_pmu_lbr_disable(struct perf_event *event)
86{
87 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
88
89 if (!x86_pmu.lbr_nr)
90 return;
91
92 cpuc->lbr_users--;
93
94 BUG_ON(cpuc->lbr_users < 0);
95 WARN_ON(cpuc->enabled);
96}
97
98static void intel_pmu_lbr_enable_all(void)
99{
100 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
101
102 if (cpuc->lbr_users)
103 __intel_pmu_lbr_enable();
104}
105
106static void intel_pmu_lbr_disable_all(void)
107{
108 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
109
110 if (cpuc->lbr_users)
111 __intel_pmu_lbr_disable();
112}
113
114static inline u64 intel_pmu_lbr_tos(void)
115{
116 u64 tos;
117
118 rdmsrl(x86_pmu.lbr_tos, tos);
119
120 return tos;
121}
122
123static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
124{
125 unsigned long mask = x86_pmu.lbr_nr - 1;
126 u64 tos = intel_pmu_lbr_tos();
127 int i;
128
129 for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) {
130 unsigned long lbr_idx = (tos - i) & mask;
131 union {
132 struct {
133 u32 from;
134 u32 to;
135 };
136 u64 lbr;
137 } msr_lastbranch;
138
139 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
140
141 cpuc->lbr_entries[i].from = msr_lastbranch.from;
142 cpuc->lbr_entries[i].to = msr_lastbranch.to;
143 cpuc->lbr_entries[i].flags = 0;
144 }
145 cpuc->lbr_stack.nr = i;
146}
147
148#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
149
150/*
151 * Due to lack of segmentation in Linux the effective address (offset)
152 * is the same as the linear address, allowing us to merge the LIP and EIP
153 * LBR formats.
154 */
155static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
156{
157 unsigned long mask = x86_pmu.lbr_nr - 1;
158 u64 tos = intel_pmu_lbr_tos();
159 int i;
160
161 for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) {
162 unsigned long lbr_idx = (tos - i) & mask;
163 u64 from, to, flags = 0;
164
165 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
166 rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
167
168 if (x86_pmu.lbr_format == LBR_FORMAT_EIP_FLAGS) {
169 flags = !!(from & LBR_FROM_FLAG_MISPRED);
170 from = (u64)((((s64)from) << 1) >> 1);
171 }
172
173 cpuc->lbr_entries[i].from = from;
174 cpuc->lbr_entries[i].to = to;
175 cpuc->lbr_entries[i].flags = flags;
176 }
177 cpuc->lbr_stack.nr = i;
178}
179
180static void intel_pmu_lbr_read(void)
181{
182 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
183
184 if (!cpuc->lbr_users)
185 return;
186
187 if (x86_pmu.lbr_format == LBR_FORMAT_32)
188 intel_pmu_lbr_read_32(cpuc);
189 else
190 intel_pmu_lbr_read_64(cpuc);
191}
192
193static int intel_pmu_lbr_format(void)
194{
195 u64 capabilities;
196
197 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
198 return capabilities & 0x1f;
199}
200
201static void intel_pmu_lbr_init_core(void)
202{
203 x86_pmu.lbr_format = intel_pmu_lbr_format();
204 x86_pmu.lbr_nr = 4;
205 x86_pmu.lbr_tos = 0x01c9;
206 x86_pmu.lbr_from = 0x40;
207 x86_pmu.lbr_to = 0x60;
208}
209
210static void intel_pmu_lbr_init_nhm(void)
211{
212 x86_pmu.lbr_format = intel_pmu_lbr_format();
213 x86_pmu.lbr_nr = 16;
214 x86_pmu.lbr_tos = 0x01c9;
215 x86_pmu.lbr_from = 0x680;
216 x86_pmu.lbr_to = 0x6c0;
217}
218
219static void intel_pmu_lbr_init_atom(void)
220{
221 x86_pmu.lbr_format = intel_pmu_lbr_format();
222 x86_pmu.lbr_nr = 8;
223 x86_pmu.lbr_tos = 0x01c9;
224 x86_pmu.lbr_from = 0x40;
225 x86_pmu.lbr_to = 0x60;
226}
227
228#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 42307b50c787..ab4fd9ede264 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -467,6 +467,17 @@ struct perf_raw_record {
467 void *data; 467 void *data;
468}; 468};
469 469
470struct perf_branch_entry {
471 __u64 from;
472 __u64 to;
473 __u64 flags;
474};
475
476struct perf_branch_stack {
477 __u64 nr;
478 struct perf_branch_entry entries[0];
479};
480
470struct task_struct; 481struct task_struct;
471 482
472/** 483/**