aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-27 21:48:00 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-27 21:48:00 -0400
commita042e26137d7674ac04b1cd2d5c06b9ebc1ee2d5 (patch)
treec1a7a8bda41b99caa4b4a0fe320fc73278879f7d /arch/x86/kernel
parentf66dd539feb849a3a00f7fac67c026e0935e373a (diff)
parente25804a0327dad954f7d43803178fdef2fd35b4e (diff)
Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (50 commits) perf python scripting: Add futex-contention script perf python scripting: Fixup cut'n'paste error in sctop script perf scripting: Shut up 'perf record' final status perf record: Remove newline character from perror() argument perf python scripting: Support fedora 11 (audit 1.7.17) perf python scripting: Improve the syscalls-by-pid script perf python scripting: print the syscall name on sctop perf python scripting: Improve the syscalls-counts script perf python scripting: Improve the failed-syscalls-by-pid script kprobes: Remove redundant text_mutex lock in optimize x86/oprofile: Fix uninitialized variable use in debug printk tracing: Fix 'faild' -> 'failed' typo perf probe: Fix format specified for Dwarf_Off parameter perf trace: Fix detection of script extension perf trace: Use $PERF_EXEC_PATH in canned report scripts perf tools: Document event modifiers perf tools: Remove direct slang.h include perf_events: Fix for transaction recovery in group_sched_in() perf_events: Revert: Fix transaction recovery in group_sched_in() perf, x86: Use NUMA aware allocations for PEBS/BTS/DS allocations ...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/perf_event.c21
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c216
2 files changed, 160 insertions, 77 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index c1e8c7a51164..ed6310183efb 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -237,6 +237,7 @@ struct x86_pmu {
237 * Intel DebugStore bits 237 * Intel DebugStore bits
238 */ 238 */
239 int bts, pebs; 239 int bts, pebs;
240 int bts_active, pebs_active;
240 int pebs_record_size; 241 int pebs_record_size;
241 void (*drain_pebs)(struct pt_regs *regs); 242 void (*drain_pebs)(struct pt_regs *regs);
242 struct event_constraint *pebs_constraints; 243 struct event_constraint *pebs_constraints;
@@ -380,7 +381,7 @@ static void release_pmc_hardware(void) {}
380 381
381#endif 382#endif
382 383
383static int reserve_ds_buffers(void); 384static void reserve_ds_buffers(void);
384static void release_ds_buffers(void); 385static void release_ds_buffers(void);
385 386
386static void hw_perf_event_destroy(struct perf_event *event) 387static void hw_perf_event_destroy(struct perf_event *event)
@@ -477,7 +478,7 @@ static int x86_setup_perfctr(struct perf_event *event)
477 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && 478 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
478 (hwc->sample_period == 1)) { 479 (hwc->sample_period == 1)) {
479 /* BTS is not supported by this architecture. */ 480 /* BTS is not supported by this architecture. */
480 if (!x86_pmu.bts) 481 if (!x86_pmu.bts_active)
481 return -EOPNOTSUPP; 482 return -EOPNOTSUPP;
482 483
483 /* BTS is currently only allowed for user-mode. */ 484 /* BTS is currently only allowed for user-mode. */
@@ -496,12 +497,13 @@ static int x86_pmu_hw_config(struct perf_event *event)
496 int precise = 0; 497 int precise = 0;
497 498
498 /* Support for constant skid */ 499 /* Support for constant skid */
499 if (x86_pmu.pebs) 500 if (x86_pmu.pebs_active) {
500 precise++; 501 precise++;
501 502
502 /* Support for IP fixup */ 503 /* Support for IP fixup */
503 if (x86_pmu.lbr_nr) 504 if (x86_pmu.lbr_nr)
504 precise++; 505 precise++;
506 }
505 507
506 if (event->attr.precise_ip > precise) 508 if (event->attr.precise_ip > precise)
507 return -EOPNOTSUPP; 509 return -EOPNOTSUPP;
@@ -543,11 +545,8 @@ static int __x86_pmu_event_init(struct perf_event *event)
543 if (atomic_read(&active_events) == 0) { 545 if (atomic_read(&active_events) == 0) {
544 if (!reserve_pmc_hardware()) 546 if (!reserve_pmc_hardware())
545 err = -EBUSY; 547 err = -EBUSY;
546 else { 548 else
547 err = reserve_ds_buffers(); 549 reserve_ds_buffers();
548 if (err)
549 release_pmc_hardware();
550 }
551 } 550 }
552 if (!err) 551 if (!err)
553 atomic_inc(&active_events); 552 atomic_inc(&active_events);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 4977f9c400e5..b7dcd9f2b8a0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -74,6 +74,107 @@ static void fini_debug_store_on_cpu(int cpu)
74 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); 74 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
75} 75}
76 76
77static int alloc_pebs_buffer(int cpu)
78{
79 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
80 int node = cpu_to_node(cpu);
81 int max, thresh = 1; /* always use a single PEBS record */
82 void *buffer;
83
84 if (!x86_pmu.pebs)
85 return 0;
86
87 buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
88 if (unlikely(!buffer))
89 return -ENOMEM;
90
91 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
92
93 ds->pebs_buffer_base = (u64)(unsigned long)buffer;
94 ds->pebs_index = ds->pebs_buffer_base;
95 ds->pebs_absolute_maximum = ds->pebs_buffer_base +
96 max * x86_pmu.pebs_record_size;
97
98 ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
99 thresh * x86_pmu.pebs_record_size;
100
101 return 0;
102}
103
104static void release_pebs_buffer(int cpu)
105{
106 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
107
108 if (!ds || !x86_pmu.pebs)
109 return;
110
111 kfree((void *)(unsigned long)ds->pebs_buffer_base);
112 ds->pebs_buffer_base = 0;
113}
114
115static int alloc_bts_buffer(int cpu)
116{
117 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
118 int node = cpu_to_node(cpu);
119 int max, thresh;
120 void *buffer;
121
122 if (!x86_pmu.bts)
123 return 0;
124
125 buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
126 if (unlikely(!buffer))
127 return -ENOMEM;
128
129 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
130 thresh = max / 16;
131
132 ds->bts_buffer_base = (u64)(unsigned long)buffer;
133 ds->bts_index = ds->bts_buffer_base;
134 ds->bts_absolute_maximum = ds->bts_buffer_base +
135 max * BTS_RECORD_SIZE;
136 ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
137 thresh * BTS_RECORD_SIZE;
138
139 return 0;
140}
141
142static void release_bts_buffer(int cpu)
143{
144 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
145
146 if (!ds || !x86_pmu.bts)
147 return;
148
149 kfree((void *)(unsigned long)ds->bts_buffer_base);
150 ds->bts_buffer_base = 0;
151}
152
153static int alloc_ds_buffer(int cpu)
154{
155 int node = cpu_to_node(cpu);
156 struct debug_store *ds;
157
158 ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
159 if (unlikely(!ds))
160 return -ENOMEM;
161
162 per_cpu(cpu_hw_events, cpu).ds = ds;
163
164 return 0;
165}
166
167static void release_ds_buffer(int cpu)
168{
169 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
170
171 if (!ds)
172 return;
173
174 per_cpu(cpu_hw_events, cpu).ds = NULL;
175 kfree(ds);
176}
177
77static void release_ds_buffers(void) 178static void release_ds_buffers(void)
78{ 179{
79 int cpu; 180 int cpu;
@@ -82,93 +183,77 @@ static void release_ds_buffers(void)
82 return; 183 return;
83 184
84 get_online_cpus(); 185 get_online_cpus();
85
86 for_each_online_cpu(cpu) 186 for_each_online_cpu(cpu)
87 fini_debug_store_on_cpu(cpu); 187 fini_debug_store_on_cpu(cpu);
88 188
89 for_each_possible_cpu(cpu) { 189 for_each_possible_cpu(cpu) {
90 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 190 release_pebs_buffer(cpu);
91 191 release_bts_buffer(cpu);
92 if (!ds) 192 release_ds_buffer(cpu);
93 continue;
94
95 per_cpu(cpu_hw_events, cpu).ds = NULL;
96
97 kfree((void *)(unsigned long)ds->pebs_buffer_base);
98 kfree((void *)(unsigned long)ds->bts_buffer_base);
99 kfree(ds);
100 } 193 }
101
102 put_online_cpus(); 194 put_online_cpus();
103} 195}
104 196
105static int reserve_ds_buffers(void) 197static void reserve_ds_buffers(void)
106{ 198{
107 int cpu, err = 0; 199 int bts_err = 0, pebs_err = 0;
200 int cpu;
201
202 x86_pmu.bts_active = 0;
203 x86_pmu.pebs_active = 0;
108 204
109 if (!x86_pmu.bts && !x86_pmu.pebs) 205 if (!x86_pmu.bts && !x86_pmu.pebs)
110 return 0; 206 return;
207
208 if (!x86_pmu.bts)
209 bts_err = 1;
210
211 if (!x86_pmu.pebs)
212 pebs_err = 1;
111 213
112 get_online_cpus(); 214 get_online_cpus();
113 215
114 for_each_possible_cpu(cpu) { 216 for_each_possible_cpu(cpu) {
115 struct debug_store *ds; 217 if (alloc_ds_buffer(cpu)) {
116 void *buffer; 218 bts_err = 1;
117 int max, thresh; 219 pebs_err = 1;
220 }
221
222 if (!bts_err && alloc_bts_buffer(cpu))
223 bts_err = 1;
118 224
119 err = -ENOMEM; 225 if (!pebs_err && alloc_pebs_buffer(cpu))
120 ds = kzalloc(sizeof(*ds), GFP_KERNEL); 226 pebs_err = 1;
121 if (unlikely(!ds)) 227
228 if (bts_err && pebs_err)
122 break; 229 break;
123 per_cpu(cpu_hw_events, cpu).ds = ds; 230 }
124
125 if (x86_pmu.bts) {
126 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
127 if (unlikely(!buffer))
128 break;
129
130 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
131 thresh = max / 16;
132
133 ds->bts_buffer_base = (u64)(unsigned long)buffer;
134 ds->bts_index = ds->bts_buffer_base;
135 ds->bts_absolute_maximum = ds->bts_buffer_base +
136 max * BTS_RECORD_SIZE;
137 ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
138 thresh * BTS_RECORD_SIZE;
139 }
140 231
141 if (x86_pmu.pebs) { 232 if (bts_err) {
142 buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); 233 for_each_possible_cpu(cpu)
143 if (unlikely(!buffer)) 234 release_bts_buffer(cpu);
144 break; 235 }
145
146 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
147
148 ds->pebs_buffer_base = (u64)(unsigned long)buffer;
149 ds->pebs_index = ds->pebs_buffer_base;
150 ds->pebs_absolute_maximum = ds->pebs_buffer_base +
151 max * x86_pmu.pebs_record_size;
152 /*
153 * Always use single record PEBS
154 */
155 ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
156 x86_pmu.pebs_record_size;
157 }
158 236
159 err = 0; 237 if (pebs_err) {
238 for_each_possible_cpu(cpu)
239 release_pebs_buffer(cpu);
160 } 240 }
161 241
162 if (err) 242 if (bts_err && pebs_err) {
163 release_ds_buffers(); 243 for_each_possible_cpu(cpu)
164 else { 244 release_ds_buffer(cpu);
245 } else {
246 if (x86_pmu.bts && !bts_err)
247 x86_pmu.bts_active = 1;
248
249 if (x86_pmu.pebs && !pebs_err)
250 x86_pmu.pebs_active = 1;
251
165 for_each_online_cpu(cpu) 252 for_each_online_cpu(cpu)
166 init_debug_store_on_cpu(cpu); 253 init_debug_store_on_cpu(cpu);
167 } 254 }
168 255
169 put_online_cpus(); 256 put_online_cpus();
170
171 return err;
172} 257}
173 258
174/* 259/*
@@ -233,7 +318,7 @@ static int intel_pmu_drain_bts_buffer(void)
233 if (!event) 318 if (!event)
234 return 0; 319 return 0;
235 320
236 if (!ds) 321 if (!x86_pmu.bts_active)
237 return 0; 322 return 0;
238 323
239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 324 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
@@ -503,7 +588,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
503 struct pebs_record_core *at, *top; 588 struct pebs_record_core *at, *top;
504 int n; 589 int n;
505 590
506 if (!ds || !x86_pmu.pebs) 591 if (!x86_pmu.pebs_active)
507 return; 592 return;
508 593
509 at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; 594 at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
@@ -545,7 +630,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
545 u64 status = 0; 630 u64 status = 0;
546 int bit, n; 631 int bit, n;
547 632
548 if (!ds || !x86_pmu.pebs) 633 if (!x86_pmu.pebs_active)
549 return; 634 return;
550 635
551 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; 636 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
@@ -630,9 +715,8 @@ static void intel_ds_init(void)
630 715
631#else /* CONFIG_CPU_SUP_INTEL */ 716#else /* CONFIG_CPU_SUP_INTEL */
632 717
633static int reserve_ds_buffers(void) 718static void reserve_ds_buffers(void)
634{ 719{
635 return 0;
636} 720}
637 721
638static void release_ds_buffers(void) 722static void release_ds_buffers(void)