aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/oprofile/nmi_int.c
diff options
context:
space:
mode:
authorJason Yeh <jason.yeh@amd.com>2009-07-08 07:49:38 -0400
committerRobert Richter <robert.richter@amd.com>2009-07-20 10:33:53 -0400
commit4d4036e0e7299c6cbb2d2421b4b30b7a409ce61a (patch)
treec9003cd927ed878412e89a59db0138b6b701b629 /arch/x86/oprofile/nmi_int.c
parent6e63ea4b0b14ff5fb8a3ca704fcda7d28b95f079 (diff)
oprofile: Implement performance counter multiplexing
The number of hardware counters is limited. The multiplexing feature enables OProfile to gather more events than counters are provided by the hardware. This is realized by switching between events at an user specified time interval. A new file (/dev/oprofile/time_slice) is added for the user to specify the timer interval in ms. If the number of events to profile is higher than the number of hardware counters available, the patch will schedule a work queue that switches the event counter and re-writes the different sets of values into it. The switching mechanism needs to be implemented for each architecture to support multiplexing. This patch only implements AMD CPU support, but multiplexing can be easily extended for other models and architectures. There are follow-on patches that rework parts of this patch. Signed-off-by: Jason Yeh <jason.yeh@amd.com> Signed-off-by: Robert Richter <robert.richter@amd.com>
Diffstat (limited to 'arch/x86/oprofile/nmi_int.c')
-rw-r--r--arch/x86/oprofile/nmi_int.c162
1 files changed, 157 insertions, 5 deletions
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index fca8dc94531e..e54f6a0b35ac 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -1,11 +1,14 @@
1/** 1/**
2 * @file nmi_int.c 2 * @file nmi_int.c
3 * 3 *
4 * @remark Copyright 2002-2008 OProfile authors 4 * @remark Copyright 2002-2009 OProfile authors
5 * @remark Read the file COPYING 5 * @remark Read the file COPYING
6 * 6 *
7 * @author John Levon <levon@movementarian.org> 7 * @author John Levon <levon@movementarian.org>
8 * @author Robert Richter <robert.richter@amd.com> 8 * @author Robert Richter <robert.richter@amd.com>
9 * @author Barry Kasindorf <barry.kasindorf@amd.com>
10 * @author Jason Yeh <jason.yeh@amd.com>
11 * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
9 */ 12 */
10 13
11#include <linux/init.h> 14#include <linux/init.h>
@@ -24,6 +27,12 @@
24#include "op_counter.h" 27#include "op_counter.h"
25#include "op_x86_model.h" 28#include "op_x86_model.h"
26 29
30
31#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
32DEFINE_PER_CPU(int, switch_index);
33#endif
34
35
27static struct op_x86_model_spec const *model; 36static struct op_x86_model_spec const *model;
28static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); 37static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
29static DEFINE_PER_CPU(unsigned long, saved_lvtpc); 38static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
@@ -31,6 +40,13 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
31/* 0 == registered but off, 1 == registered and on */ 40/* 0 == registered but off, 1 == registered and on */
32static int nmi_enabled = 0; 41static int nmi_enabled = 0;
33 42
43
44#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
45extern atomic_t multiplex_counter;
46#endif
47
48struct op_counter_config counter_config[OP_MAX_COUNTER];
49
34/* common functions */ 50/* common functions */
35 51
36u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, 52u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
@@ -95,6 +111,11 @@ static void free_msrs(void)
95 per_cpu(cpu_msrs, i).counters = NULL; 111 per_cpu(cpu_msrs, i).counters = NULL;
96 kfree(per_cpu(cpu_msrs, i).controls); 112 kfree(per_cpu(cpu_msrs, i).controls);
97 per_cpu(cpu_msrs, i).controls = NULL; 113 per_cpu(cpu_msrs, i).controls = NULL;
114
115#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
116 kfree(per_cpu(cpu_msrs, i).multiplex);
117 per_cpu(cpu_msrs, i).multiplex = NULL;
118#endif
98 } 119 }
99} 120}
100 121
@@ -103,6 +124,9 @@ static int allocate_msrs(void)
103 int success = 1; 124 int success = 1;
104 size_t controls_size = sizeof(struct op_msr) * model->num_controls; 125 size_t controls_size = sizeof(struct op_msr) * model->num_controls;
105 size_t counters_size = sizeof(struct op_msr) * model->num_counters; 126 size_t counters_size = sizeof(struct op_msr) * model->num_counters;
127#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
128 size_t multiplex_size = sizeof(struct op_msr) * model->num_virt_counters;
129#endif
106 130
107 int i; 131 int i;
108 for_each_possible_cpu(i) { 132 for_each_possible_cpu(i) {
@@ -118,6 +142,14 @@ static int allocate_msrs(void)
118 success = 0; 142 success = 0;
119 break; 143 break;
120 } 144 }
145#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
146 per_cpu(cpu_msrs, i).multiplex =
147 kmalloc(multiplex_size, GFP_KERNEL);
148 if (!per_cpu(cpu_msrs, i).multiplex) {
149 success = 0;
150 break;
151 }
152#endif
121 } 153 }
122 154
123 if (!success) 155 if (!success)
@@ -126,6 +158,25 @@ static int allocate_msrs(void)
126 return success; 158 return success;
127} 159}
128 160
161#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
162
163static void nmi_setup_cpu_mux(struct op_msrs const * const msrs)
164{
165 int i;
166 struct op_msr *multiplex = msrs->multiplex;
167
168 for (i = 0; i < model->num_virt_counters; ++i) {
169 if (counter_config[i].enabled) {
170 multiplex[i].saved = -(u64)counter_config[i].count;
171 } else {
172 multiplex[i].addr = 0;
173 multiplex[i].saved = 0;
174 }
175 }
176}
177
178#endif
179
129static void nmi_cpu_setup(void *dummy) 180static void nmi_cpu_setup(void *dummy)
130{ 181{
131 int cpu = smp_processor_id(); 182 int cpu = smp_processor_id();
@@ -133,6 +184,9 @@ static void nmi_cpu_setup(void *dummy)
133 nmi_cpu_save_registers(msrs); 184 nmi_cpu_save_registers(msrs);
134 spin_lock(&oprofilefs_lock); 185 spin_lock(&oprofilefs_lock);
135 model->setup_ctrs(model, msrs); 186 model->setup_ctrs(model, msrs);
187#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
188 nmi_setup_cpu_mux(msrs);
189#endif
136 spin_unlock(&oprofilefs_lock); 190 spin_unlock(&oprofilefs_lock);
137 per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); 191 per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
138 apic_write(APIC_LVTPC, APIC_DM_NMI); 192 apic_write(APIC_LVTPC, APIC_DM_NMI);
@@ -173,14 +227,52 @@ static int nmi_setup(void)
173 memcpy(per_cpu(cpu_msrs, cpu).controls, 227 memcpy(per_cpu(cpu_msrs, cpu).controls,
174 per_cpu(cpu_msrs, 0).controls, 228 per_cpu(cpu_msrs, 0).controls,
175 sizeof(struct op_msr) * model->num_controls); 229 sizeof(struct op_msr) * model->num_controls);
230#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
231 memcpy(per_cpu(cpu_msrs, cpu).multiplex,
232 per_cpu(cpu_msrs, 0).multiplex,
233 sizeof(struct op_msr) * model->num_virt_counters);
234#endif
176 } 235 }
177
178 } 236 }
179 on_each_cpu(nmi_cpu_setup, NULL, 1); 237 on_each_cpu(nmi_cpu_setup, NULL, 1);
180 nmi_enabled = 1; 238 nmi_enabled = 1;
181 return 0; 239 return 0;
182} 240}
183 241
242#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
243
244static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
245{
246 unsigned int si = __get_cpu_var(switch_index);
247 struct op_msr *multiplex = msrs->multiplex;
248 unsigned int i;
249
250 for (i = 0; i < model->num_counters; ++i) {
251 int offset = i + si;
252 if (multiplex[offset].addr) {
253 rdmsrl(multiplex[offset].addr,
254 multiplex[offset].saved);
255 }
256 }
257}
258
259static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
260{
261 unsigned int si = __get_cpu_var(switch_index);
262 struct op_msr *multiplex = msrs->multiplex;
263 unsigned int i;
264
265 for (i = 0; i < model->num_counters; ++i) {
266 int offset = i + si;
267 if (multiplex[offset].addr) {
268 wrmsrl(multiplex[offset].addr,
269 multiplex[offset].saved);
270 }
271 }
272}
273
274#endif
275
184static void nmi_cpu_restore_registers(struct op_msrs *msrs) 276static void nmi_cpu_restore_registers(struct op_msrs *msrs)
185{ 277{
186 struct op_msr *counters = msrs->counters; 278 struct op_msr *counters = msrs->counters;
@@ -214,6 +306,9 @@ static void nmi_cpu_shutdown(void *dummy)
214 apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); 306 apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
215 apic_write(APIC_LVTERR, v); 307 apic_write(APIC_LVTERR, v);
216 nmi_cpu_restore_registers(msrs); 308 nmi_cpu_restore_registers(msrs);
309#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
310 __get_cpu_var(switch_index) = 0;
311#endif
217} 312}
218 313
219static void nmi_shutdown(void) 314static void nmi_shutdown(void)
@@ -252,16 +347,15 @@ static void nmi_stop(void)
252 on_each_cpu(nmi_cpu_stop, NULL, 1); 347 on_each_cpu(nmi_cpu_stop, NULL, 1);
253} 348}
254 349
255struct op_counter_config counter_config[OP_MAX_COUNTER];
256
257static int nmi_create_files(struct super_block *sb, struct dentry *root) 350static int nmi_create_files(struct super_block *sb, struct dentry *root)
258{ 351{
259 unsigned int i; 352 unsigned int i;
260 353
261 for (i = 0; i < model->num_counters; ++i) { 354 for (i = 0; i < model->num_virt_counters; ++i) {
262 struct dentry *dir; 355 struct dentry *dir;
263 char buf[4]; 356 char buf[4];
264 357
358#ifndef CONFIG_OPROFILE_EVENT_MULTIPLEX
265 /* quick little hack to _not_ expose a counter if it is not 359 /* quick little hack to _not_ expose a counter if it is not
266 * available for use. This should protect userspace app. 360 * available for use. This should protect userspace app.
267 * NOTE: assumes 1:1 mapping here (that counters are organized 361 * NOTE: assumes 1:1 mapping here (that counters are organized
@@ -269,6 +363,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root)
269 */ 363 */
270 if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i))) 364 if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
271 continue; 365 continue;
366#endif /* CONFIG_OPROFILE_EVENT_MULTIPLEX */
272 367
273 snprintf(buf, sizeof(buf), "%d", i); 368 snprintf(buf, sizeof(buf), "%d", i);
274 dir = oprofilefs_mkdir(sb, root, buf); 369 dir = oprofilefs_mkdir(sb, root, buf);
@@ -283,6 +378,57 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root)
283 return 0; 378 return 0;
284} 379}
285 380
381#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
382
383static void nmi_cpu_switch(void *dummy)
384{
385 int cpu = smp_processor_id();
386 int si = per_cpu(switch_index, cpu);
387 struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
388
389 nmi_cpu_stop(NULL);
390 nmi_cpu_save_mpx_registers(msrs);
391
392 /* move to next set */
393 si += model->num_counters;
394 if ((si > model->num_virt_counters) || (counter_config[si].count == 0))
395 per_cpu(switch_index, cpu) = 0;
396 else
397 per_cpu(switch_index, cpu) = si;
398
399 model->switch_ctrl(model, msrs);
400 nmi_cpu_restore_mpx_registers(msrs);
401
402 nmi_cpu_start(NULL);
403}
404
405
406/*
407 * Quick check to see if multiplexing is necessary.
408 * The check should be sufficient since counters are used
409 * in ordre.
410 */
411static int nmi_multiplex_on(void)
412{
413 return counter_config[model->num_counters].count ? 0 : -EINVAL;
414}
415
416static int nmi_switch_event(void)
417{
418 if (!model->switch_ctrl)
419 return -ENOSYS; /* not implemented */
420 if (nmi_multiplex_on() < 0)
421 return -EINVAL; /* not necessary */
422
423 on_each_cpu(nmi_cpu_switch, NULL, 1);
424
425 atomic_inc(&multiplex_counter);
426
427 return 0;
428}
429
430#endif
431
286#ifdef CONFIG_SMP 432#ifdef CONFIG_SMP
287static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, 433static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
288 void *data) 434 void *data)
@@ -516,12 +662,18 @@ int __init op_nmi_init(struct oprofile_operations *ops)
516 register_cpu_notifier(&oprofile_cpu_nb); 662 register_cpu_notifier(&oprofile_cpu_nb);
517#endif 663#endif
518 /* default values, can be overwritten by model */ 664 /* default values, can be overwritten by model */
665#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
666 __raw_get_cpu_var(switch_index) = 0;
667#endif
519 ops->create_files = nmi_create_files; 668 ops->create_files = nmi_create_files;
520 ops->setup = nmi_setup; 669 ops->setup = nmi_setup;
521 ops->shutdown = nmi_shutdown; 670 ops->shutdown = nmi_shutdown;
522 ops->start = nmi_start; 671 ops->start = nmi_start;
523 ops->stop = nmi_stop; 672 ops->stop = nmi_stop;
524 ops->cpu_type = cpu_type; 673 ops->cpu_type = cpu_type;
674#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
675 ops->switch_events = nmi_switch_event;
676#endif
525 677
526 if (model->init) 678 if (model->init)
527 ret = model->init(ops); 679 ret = model->init(ops);