diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/oprofile/backtrace.c | 3 | ||||
-rw-r--r-- | arch/x86/oprofile/nmi_int.c | 173 | ||||
-rw-r--r-- | arch/x86/oprofile/op_counter.h | 18 | ||||
-rw-r--r-- | arch/x86/oprofile/op_model_amd.c | 59 | ||||
-rw-r--r-- | arch/x86/oprofile/op_model_p4.c | 32 | ||||
-rw-r--r-- | arch/x86/oprofile/op_model_ppro.c | 120 | ||||
-rw-r--r-- | arch/x86/oprofile/op_x86_model.h | 13 |
7 files changed, 244 insertions, 174 deletions
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index e2095cba409f..04df67f8a7ba 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c | |||
@@ -52,8 +52,7 @@ struct frame_head { | |||
52 | unsigned long ret; | 52 | unsigned long ret; |
53 | } __attribute__((packed)); | 53 | } __attribute__((packed)); |
54 | 54 | ||
55 | static struct frame_head * | 55 | static struct frame_head *dump_user_backtrace(struct frame_head *head) |
56 | dump_user_backtrace(struct frame_head * head) | ||
57 | { | 56 | { |
58 | struct frame_head bufhead[2]; | 57 | struct frame_head bufhead[2]; |
59 | 58 | ||
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 57f6c9088081..022cd41ea9b4 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -28,85 +28,9 @@ static struct op_x86_model_spec const *model; | |||
28 | static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); | 28 | static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); |
29 | static DEFINE_PER_CPU(unsigned long, saved_lvtpc); | 29 | static DEFINE_PER_CPU(unsigned long, saved_lvtpc); |
30 | 30 | ||
31 | static int nmi_start(void); | ||
32 | static void nmi_stop(void); | ||
33 | static void nmi_cpu_start(void *dummy); | ||
34 | static void nmi_cpu_stop(void *dummy); | ||
35 | |||
36 | /* 0 == registered but off, 1 == registered and on */ | 31 | /* 0 == registered but off, 1 == registered and on */ |
37 | static int nmi_enabled = 0; | 32 | static int nmi_enabled = 0; |
38 | 33 | ||
39 | #ifdef CONFIG_SMP | ||
40 | static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, | ||
41 | void *data) | ||
42 | { | ||
43 | int cpu = (unsigned long)data; | ||
44 | switch (action) { | ||
45 | case CPU_DOWN_FAILED: | ||
46 | case CPU_ONLINE: | ||
47 | smp_call_function_single(cpu, nmi_cpu_start, NULL, 0); | ||
48 | break; | ||
49 | case CPU_DOWN_PREPARE: | ||
50 | smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1); | ||
51 | break; | ||
52 | } | ||
53 | return NOTIFY_DONE; | ||
54 | } | ||
55 | |||
56 | static struct notifier_block oprofile_cpu_nb = { | ||
57 | .notifier_call = oprofile_cpu_notifier | ||
58 | }; | ||
59 | #endif | ||
60 | |||
61 | #ifdef CONFIG_PM | ||
62 | |||
63 | static int nmi_suspend(struct sys_device *dev, pm_message_t state) | ||
64 | { | ||
65 | /* Only one CPU left, just stop that one */ | ||
66 | if (nmi_enabled == 1) | ||
67 | nmi_cpu_stop(NULL); | ||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | static int nmi_resume(struct sys_device *dev) | ||
72 | { | ||
73 | if (nmi_enabled == 1) | ||
74 | nmi_cpu_start(NULL); | ||
75 | return 0; | ||
76 | } | ||
77 | |||
78 | static struct sysdev_class oprofile_sysclass = { | ||
79 | .name = "oprofile", | ||
80 | .resume = nmi_resume, | ||
81 | .suspend = nmi_suspend, | ||
82 | }; | ||
83 | |||
84 | static struct sys_device device_oprofile = { | ||
85 | .id = 0, | ||
86 | .cls = &oprofile_sysclass, | ||
87 | }; | ||
88 | |||
89 | static int __init init_sysfs(void) | ||
90 | { | ||
91 | int error; | ||
92 | |||
93 | error = sysdev_class_register(&oprofile_sysclass); | ||
94 | if (!error) | ||
95 | error = sysdev_register(&device_oprofile); | ||
96 | return error; | ||
97 | } | ||
98 | |||
99 | static void exit_sysfs(void) | ||
100 | { | ||
101 | sysdev_unregister(&device_oprofile); | ||
102 | sysdev_class_unregister(&oprofile_sysclass); | ||
103 | } | ||
104 | |||
105 | #else | ||
106 | #define init_sysfs() do { } while (0) | ||
107 | #define exit_sysfs() do { } while (0) | ||
108 | #endif /* CONFIG_PM */ | ||
109 | |||
110 | static int profile_exceptions_notify(struct notifier_block *self, | 34 | static int profile_exceptions_notify(struct notifier_block *self, |
111 | unsigned long val, void *data) | 35 | unsigned long val, void *data) |
112 | { | 36 | { |
@@ -361,6 +285,77 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) | |||
361 | return 0; | 285 | return 0; |
362 | } | 286 | } |
363 | 287 | ||
288 | #ifdef CONFIG_SMP | ||
289 | static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, | ||
290 | void *data) | ||
291 | { | ||
292 | int cpu = (unsigned long)data; | ||
293 | switch (action) { | ||
294 | case CPU_DOWN_FAILED: | ||
295 | case CPU_ONLINE: | ||
296 | smp_call_function_single(cpu, nmi_cpu_start, NULL, 0); | ||
297 | break; | ||
298 | case CPU_DOWN_PREPARE: | ||
299 | smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1); | ||
300 | break; | ||
301 | } | ||
302 | return NOTIFY_DONE; | ||
303 | } | ||
304 | |||
305 | static struct notifier_block oprofile_cpu_nb = { | ||
306 | .notifier_call = oprofile_cpu_notifier | ||
307 | }; | ||
308 | #endif | ||
309 | |||
310 | #ifdef CONFIG_PM | ||
311 | |||
312 | static int nmi_suspend(struct sys_device *dev, pm_message_t state) | ||
313 | { | ||
314 | /* Only one CPU left, just stop that one */ | ||
315 | if (nmi_enabled == 1) | ||
316 | nmi_cpu_stop(NULL); | ||
317 | return 0; | ||
318 | } | ||
319 | |||
320 | static int nmi_resume(struct sys_device *dev) | ||
321 | { | ||
322 | if (nmi_enabled == 1) | ||
323 | nmi_cpu_start(NULL); | ||
324 | return 0; | ||
325 | } | ||
326 | |||
327 | static struct sysdev_class oprofile_sysclass = { | ||
328 | .name = "oprofile", | ||
329 | .resume = nmi_resume, | ||
330 | .suspend = nmi_suspend, | ||
331 | }; | ||
332 | |||
333 | static struct sys_device device_oprofile = { | ||
334 | .id = 0, | ||
335 | .cls = &oprofile_sysclass, | ||
336 | }; | ||
337 | |||
338 | static int __init init_sysfs(void) | ||
339 | { | ||
340 | int error; | ||
341 | |||
342 | error = sysdev_class_register(&oprofile_sysclass); | ||
343 | if (!error) | ||
344 | error = sysdev_register(&device_oprofile); | ||
345 | return error; | ||
346 | } | ||
347 | |||
348 | static void exit_sysfs(void) | ||
349 | { | ||
350 | sysdev_unregister(&device_oprofile); | ||
351 | sysdev_class_unregister(&oprofile_sysclass); | ||
352 | } | ||
353 | |||
354 | #else | ||
355 | #define init_sysfs() do { } while (0) | ||
356 | #define exit_sysfs() do { } while (0) | ||
357 | #endif /* CONFIG_PM */ | ||
358 | |||
364 | static int p4force; | 359 | static int p4force; |
365 | module_param(p4force, int, 0); | 360 | module_param(p4force, int, 0); |
366 | 361 | ||
@@ -420,9 +415,6 @@ static int __init ppro_init(char **cpu_type) | |||
420 | case 15: case 23: | 415 | case 15: case 23: |
421 | *cpu_type = "i386/core_2"; | 416 | *cpu_type = "i386/core_2"; |
422 | break; | 417 | break; |
423 | case 26: | ||
424 | *cpu_type = "i386/core_2"; | ||
425 | break; | ||
426 | default: | 418 | default: |
427 | /* Unknown */ | 419 | /* Unknown */ |
428 | return 0; | 420 | return 0; |
@@ -432,6 +424,16 @@ static int __init ppro_init(char **cpu_type) | |||
432 | return 1; | 424 | return 1; |
433 | } | 425 | } |
434 | 426 | ||
427 | static int __init arch_perfmon_init(char **cpu_type) | ||
428 | { | ||
429 | if (!cpu_has_arch_perfmon) | ||
430 | return 0; | ||
431 | *cpu_type = "i386/arch_perfmon"; | ||
432 | model = &op_arch_perfmon_spec; | ||
433 | arch_perfmon_setup_counters(); | ||
434 | return 1; | ||
435 | } | ||
436 | |||
435 | /* in order to get sysfs right */ | 437 | /* in order to get sysfs right */ |
436 | static int using_nmi; | 438 | static int using_nmi; |
437 | 439 | ||
@@ -439,7 +441,7 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
439 | { | 441 | { |
440 | __u8 vendor = boot_cpu_data.x86_vendor; | 442 | __u8 vendor = boot_cpu_data.x86_vendor; |
441 | __u8 family = boot_cpu_data.x86; | 443 | __u8 family = boot_cpu_data.x86; |
442 | char *cpu_type; | 444 | char *cpu_type = NULL; |
443 | int ret = 0; | 445 | int ret = 0; |
444 | 446 | ||
445 | if (!cpu_has_apic) | 447 | if (!cpu_has_apic) |
@@ -477,19 +479,20 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
477 | switch (family) { | 479 | switch (family) { |
478 | /* Pentium IV */ | 480 | /* Pentium IV */ |
479 | case 0xf: | 481 | case 0xf: |
480 | if (!p4_init(&cpu_type)) | 482 | p4_init(&cpu_type); |
481 | return -ENODEV; | ||
482 | break; | 483 | break; |
483 | 484 | ||
484 | /* A P6-class processor */ | 485 | /* A P6-class processor */ |
485 | case 6: | 486 | case 6: |
486 | if (!ppro_init(&cpu_type)) | 487 | ppro_init(&cpu_type); |
487 | return -ENODEV; | ||
488 | break; | 488 | break; |
489 | 489 | ||
490 | default: | 490 | default: |
491 | return -ENODEV; | 491 | break; |
492 | } | 492 | } |
493 | |||
494 | if (!cpu_type && !arch_perfmon_init(&cpu_type)) | ||
495 | return -ENODEV; | ||
493 | break; | 496 | break; |
494 | 497 | ||
495 | default: | 498 | default: |
diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h index 2880b15c4675..91b6a116165e 100644 --- a/arch/x86/oprofile/op_counter.h +++ b/arch/x86/oprofile/op_counter.h | |||
@@ -6,22 +6,22 @@ | |||
6 | * | 6 | * |
7 | * @author John Levon | 7 | * @author John Levon |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #ifndef OP_COUNTER_H | 10 | #ifndef OP_COUNTER_H |
11 | #define OP_COUNTER_H | 11 | #define OP_COUNTER_H |
12 | 12 | ||
13 | #define OP_MAX_COUNTER 8 | 13 | #define OP_MAX_COUNTER 8 |
14 | 14 | ||
15 | /* Per-perfctr configuration as set via | 15 | /* Per-perfctr configuration as set via |
16 | * oprofilefs. | 16 | * oprofilefs. |
17 | */ | 17 | */ |
18 | struct op_counter_config { | 18 | struct op_counter_config { |
19 | unsigned long count; | 19 | unsigned long count; |
20 | unsigned long enabled; | 20 | unsigned long enabled; |
21 | unsigned long event; | 21 | unsigned long event; |
22 | unsigned long kernel; | 22 | unsigned long kernel; |
23 | unsigned long user; | 23 | unsigned long user; |
24 | unsigned long unit_mask; | 24 | unsigned long unit_mask; |
25 | }; | 25 | }; |
26 | 26 | ||
27 | extern struct op_counter_config counter_config[]; | 27 | extern struct op_counter_config counter_config[]; |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index d9faf607b3a6..509513760a6e 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -67,8 +67,9 @@ static unsigned long reset_value[NUM_COUNTERS]; | |||
67 | 67 | ||
68 | /* The function interface needs to be fixed, something like add | 68 | /* The function interface needs to be fixed, something like add |
69 | data. Should then be added to linux/oprofile.h. */ | 69 | data. Should then be added to linux/oprofile.h. */ |
70 | extern void oprofile_add_ibs_sample(struct pt_regs *const regs, | 70 | extern void |
71 | unsigned int * const ibs_sample, u8 code); | 71 | oprofile_add_ibs_sample(struct pt_regs *const regs, |
72 | unsigned int *const ibs_sample, int ibs_code); | ||
72 | 73 | ||
73 | struct ibs_fetch_sample { | 74 | struct ibs_fetch_sample { |
74 | /* MSRC001_1031 IBS Fetch Linear Address Register */ | 75 | /* MSRC001_1031 IBS Fetch Linear Address Register */ |
@@ -309,12 +310,15 @@ static void op_amd_start(struct op_msrs const * const msrs) | |||
309 | #ifdef CONFIG_OPROFILE_IBS | 310 | #ifdef CONFIG_OPROFILE_IBS |
310 | if (ibs_allowed && ibs_config.fetch_enabled) { | 311 | if (ibs_allowed && ibs_config.fetch_enabled) { |
311 | low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; | 312 | low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; |
312 | high = IBS_FETCH_HIGH_ENABLE; | 313 | high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */ |
314 | + IBS_FETCH_HIGH_ENABLE; | ||
313 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); | 315 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); |
314 | } | 316 | } |
315 | 317 | ||
316 | if (ibs_allowed && ibs_config.op_enabled) { | 318 | if (ibs_allowed && ibs_config.op_enabled) { |
317 | low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + IBS_OP_LOW_ENABLE; | 319 | low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) |
320 | + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */ | ||
321 | + IBS_OP_LOW_ENABLE; | ||
318 | high = 0; | 322 | high = 0; |
319 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); | 323 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); |
320 | } | 324 | } |
@@ -468,11 +472,10 @@ static void clear_ibs_nmi(void) | |||
468 | on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); | 472 | on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); |
469 | } | 473 | } |
470 | 474 | ||
471 | static int (*create_arch_files)(struct super_block * sb, struct dentry * root); | 475 | static int (*create_arch_files)(struct super_block *sb, struct dentry *root); |
472 | 476 | ||
473 | static int setup_ibs_files(struct super_block * sb, struct dentry * root) | 477 | static int setup_ibs_files(struct super_block *sb, struct dentry *root) |
474 | { | 478 | { |
475 | char buf[12]; | ||
476 | struct dentry *dir; | 479 | struct dentry *dir; |
477 | int ret = 0; | 480 | int ret = 0; |
478 | 481 | ||
@@ -494,22 +497,22 @@ static int setup_ibs_files(struct super_block * sb, struct dentry * root) | |||
494 | ibs_config.max_cnt_op = 250000; | 497 | ibs_config.max_cnt_op = 250000; |
495 | ibs_config.op_enabled = 0; | 498 | ibs_config.op_enabled = 0; |
496 | ibs_config.dispatched_ops = 1; | 499 | ibs_config.dispatched_ops = 1; |
497 | snprintf(buf, sizeof(buf), "ibs_fetch"); | 500 | |
498 | dir = oprofilefs_mkdir(sb, root, buf); | 501 | dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); |
499 | oprofilefs_create_ulong(sb, dir, "rand_enable", | ||
500 | &ibs_config.rand_en); | ||
501 | oprofilefs_create_ulong(sb, dir, "enable", | 502 | oprofilefs_create_ulong(sb, dir, "enable", |
502 | &ibs_config.fetch_enabled); | 503 | &ibs_config.fetch_enabled); |
503 | oprofilefs_create_ulong(sb, dir, "max_count", | 504 | oprofilefs_create_ulong(sb, dir, "max_count", |
504 | &ibs_config.max_cnt_fetch); | 505 | &ibs_config.max_cnt_fetch); |
505 | snprintf(buf, sizeof(buf), "ibs_uops"); | 506 | oprofilefs_create_ulong(sb, dir, "rand_enable", |
506 | dir = oprofilefs_mkdir(sb, root, buf); | 507 | &ibs_config.rand_en); |
508 | |||
509 | dir = oprofilefs_mkdir(sb, root, "ibs_op"); | ||
507 | oprofilefs_create_ulong(sb, dir, "enable", | 510 | oprofilefs_create_ulong(sb, dir, "enable", |
508 | &ibs_config.op_enabled); | 511 | &ibs_config.op_enabled); |
509 | oprofilefs_create_ulong(sb, dir, "max_count", | 512 | oprofilefs_create_ulong(sb, dir, "max_count", |
510 | &ibs_config.max_cnt_op); | 513 | &ibs_config.max_cnt_op); |
511 | oprofilefs_create_ulong(sb, dir, "dispatched_ops", | 514 | oprofilefs_create_ulong(sb, dir, "dispatched_ops", |
512 | &ibs_config.dispatched_ops); | 515 | &ibs_config.dispatched_ops); |
513 | 516 | ||
514 | return 0; | 517 | return 0; |
515 | } | 518 | } |
@@ -530,14 +533,14 @@ static void op_amd_exit(void) | |||
530 | #endif | 533 | #endif |
531 | 534 | ||
532 | struct op_x86_model_spec const op_amd_spec = { | 535 | struct op_x86_model_spec const op_amd_spec = { |
533 | .init = op_amd_init, | 536 | .init = op_amd_init, |
534 | .exit = op_amd_exit, | 537 | .exit = op_amd_exit, |
535 | .num_counters = NUM_COUNTERS, | 538 | .num_counters = NUM_COUNTERS, |
536 | .num_controls = NUM_CONTROLS, | 539 | .num_controls = NUM_CONTROLS, |
537 | .fill_in_addresses = &op_amd_fill_in_addresses, | 540 | .fill_in_addresses = &op_amd_fill_in_addresses, |
538 | .setup_ctrs = &op_amd_setup_ctrs, | 541 | .setup_ctrs = &op_amd_setup_ctrs, |
539 | .check_ctrs = &op_amd_check_ctrs, | 542 | .check_ctrs = &op_amd_check_ctrs, |
540 | .start = &op_amd_start, | 543 | .start = &op_amd_start, |
541 | .stop = &op_amd_stop, | 544 | .stop = &op_amd_stop, |
542 | .shutdown = &op_amd_shutdown | 545 | .shutdown = &op_amd_shutdown |
543 | }; | 546 | }; |
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 43ac5af338d8..4c4a51c90bc2 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c | |||
@@ -698,24 +698,24 @@ static void p4_shutdown(struct op_msrs const * const msrs) | |||
698 | 698 | ||
699 | #ifdef CONFIG_SMP | 699 | #ifdef CONFIG_SMP |
700 | struct op_x86_model_spec const op_p4_ht2_spec = { | 700 | struct op_x86_model_spec const op_p4_ht2_spec = { |
701 | .num_counters = NUM_COUNTERS_HT2, | 701 | .num_counters = NUM_COUNTERS_HT2, |
702 | .num_controls = NUM_CONTROLS_HT2, | 702 | .num_controls = NUM_CONTROLS_HT2, |
703 | .fill_in_addresses = &p4_fill_in_addresses, | 703 | .fill_in_addresses = &p4_fill_in_addresses, |
704 | .setup_ctrs = &p4_setup_ctrs, | 704 | .setup_ctrs = &p4_setup_ctrs, |
705 | .check_ctrs = &p4_check_ctrs, | 705 | .check_ctrs = &p4_check_ctrs, |
706 | .start = &p4_start, | 706 | .start = &p4_start, |
707 | .stop = &p4_stop, | 707 | .stop = &p4_stop, |
708 | .shutdown = &p4_shutdown | 708 | .shutdown = &p4_shutdown |
709 | }; | 709 | }; |
710 | #endif | 710 | #endif |
711 | 711 | ||
712 | struct op_x86_model_spec const op_p4_spec = { | 712 | struct op_x86_model_spec const op_p4_spec = { |
713 | .num_counters = NUM_COUNTERS_NON_HT, | 713 | .num_counters = NUM_COUNTERS_NON_HT, |
714 | .num_controls = NUM_CONTROLS_NON_HT, | 714 | .num_controls = NUM_CONTROLS_NON_HT, |
715 | .fill_in_addresses = &p4_fill_in_addresses, | 715 | .fill_in_addresses = &p4_fill_in_addresses, |
716 | .setup_ctrs = &p4_setup_ctrs, | 716 | .setup_ctrs = &p4_setup_ctrs, |
717 | .check_ctrs = &p4_check_ctrs, | 717 | .check_ctrs = &p4_check_ctrs, |
718 | .start = &p4_start, | 718 | .start = &p4_start, |
719 | .stop = &p4_stop, | 719 | .stop = &p4_stop, |
720 | .shutdown = &p4_shutdown | 720 | .shutdown = &p4_shutdown |
721 | }; | 721 | }; |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index eff431f6c57b..0620d6d45f7d 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
@@ -1,32 +1,34 @@ | |||
1 | /* | 1 | /* |
2 | * @file op_model_ppro.h | 2 | * @file op_model_ppro.h |
3 | * pentium pro / P6 model-specific MSR operations | 3 | * Family 6 perfmon and architectural perfmon MSR operations |
4 | * | 4 | * |
5 | * @remark Copyright 2002 OProfile authors | 5 | * @remark Copyright 2002 OProfile authors |
6 | * @remark Copyright 2008 Intel Corporation | ||
6 | * @remark Read the file COPYING | 7 | * @remark Read the file COPYING |
7 | * | 8 | * |
8 | * @author John Levon | 9 | * @author John Levon |
9 | * @author Philippe Elie | 10 | * @author Philippe Elie |
10 | * @author Graydon Hoare | 11 | * @author Graydon Hoare |
12 | * @author Andi Kleen | ||
11 | */ | 13 | */ |
12 | 14 | ||
13 | #include <linux/oprofile.h> | 15 | #include <linux/oprofile.h> |
16 | #include <linux/slab.h> | ||
14 | #include <asm/ptrace.h> | 17 | #include <asm/ptrace.h> |
15 | #include <asm/msr.h> | 18 | #include <asm/msr.h> |
16 | #include <asm/apic.h> | 19 | #include <asm/apic.h> |
17 | #include <asm/nmi.h> | 20 | #include <asm/nmi.h> |
21 | #include <asm/intel_arch_perfmon.h> | ||
18 | 22 | ||
19 | #include "op_x86_model.h" | 23 | #include "op_x86_model.h" |
20 | #include "op_counter.h" | 24 | #include "op_counter.h" |
21 | 25 | ||
22 | #define NUM_COUNTERS 2 | 26 | static int num_counters = 2; |
23 | #define NUM_CONTROLS 2 | 27 | static int counter_width = 32; |
24 | 28 | ||
25 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) | 29 | #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) |
26 | #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) | 30 | #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) |
27 | #define CTR_32BIT_WRITE(l, msrs, c) \ | 31 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1)))) |
28 | do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0); } while (0) | ||
29 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) | ||
30 | 32 | ||
31 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) | 33 | #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) |
32 | #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) | 34 | #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) |
@@ -40,20 +42,20 @@ | |||
40 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) | 42 | #define CTRL_SET_UM(val, m) (val |= (m << 8)) |
41 | #define CTRL_SET_EVENT(val, e) (val |= e) | 43 | #define CTRL_SET_EVENT(val, e) (val |= e) |
42 | 44 | ||
43 | static unsigned long reset_value[NUM_COUNTERS]; | 45 | static u64 *reset_value; |
44 | 46 | ||
45 | static void ppro_fill_in_addresses(struct op_msrs * const msrs) | 47 | static void ppro_fill_in_addresses(struct op_msrs * const msrs) |
46 | { | 48 | { |
47 | int i; | 49 | int i; |
48 | 50 | ||
49 | for (i = 0; i < NUM_COUNTERS; i++) { | 51 | for (i = 0; i < num_counters; i++) { |
50 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) | 52 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) |
51 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; | 53 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; |
52 | else | 54 | else |
53 | msrs->counters[i].addr = 0; | 55 | msrs->counters[i].addr = 0; |
54 | } | 56 | } |
55 | 57 | ||
56 | for (i = 0; i < NUM_CONTROLS; i++) { | 58 | for (i = 0; i < num_counters; i++) { |
57 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) | 59 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) |
58 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; | 60 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; |
59 | else | 61 | else |
@@ -67,8 +69,22 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) | |||
67 | unsigned int low, high; | 69 | unsigned int low, high; |
68 | int i; | 70 | int i; |
69 | 71 | ||
72 | if (!reset_value) { | ||
73 | reset_value = kmalloc(sizeof(unsigned) * num_counters, | ||
74 | GFP_ATOMIC); | ||
75 | if (!reset_value) | ||
76 | return; | ||
77 | } | ||
78 | |||
79 | if (cpu_has_arch_perfmon) { | ||
80 | union cpuid10_eax eax; | ||
81 | eax.full = cpuid_eax(0xa); | ||
82 | if (counter_width < eax.split.bit_width) | ||
83 | counter_width = eax.split.bit_width; | ||
84 | } | ||
85 | |||
70 | /* clear all counters */ | 86 | /* clear all counters */ |
71 | for (i = 0 ; i < NUM_CONTROLS; ++i) { | 87 | for (i = 0 ; i < num_counters; ++i) { |
72 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) | 88 | if (unlikely(!CTRL_IS_RESERVED(msrs, i))) |
73 | continue; | 89 | continue; |
74 | CTRL_READ(low, high, msrs, i); | 90 | CTRL_READ(low, high, msrs, i); |
@@ -77,18 +93,18 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) | |||
77 | } | 93 | } |
78 | 94 | ||
79 | /* avoid a false detection of ctr overflows in NMI handler */ | 95 | /* avoid a false detection of ctr overflows in NMI handler */ |
80 | for (i = 0; i < NUM_COUNTERS; ++i) { | 96 | for (i = 0; i < num_counters; ++i) { |
81 | if (unlikely(!CTR_IS_RESERVED(msrs, i))) | 97 | if (unlikely(!CTR_IS_RESERVED(msrs, i))) |
82 | continue; | 98 | continue; |
83 | CTR_32BIT_WRITE(1, msrs, i); | 99 | wrmsrl(msrs->counters[i].addr, -1LL); |
84 | } | 100 | } |
85 | 101 | ||
86 | /* enable active counters */ | 102 | /* enable active counters */ |
87 | for (i = 0; i < NUM_COUNTERS; ++i) { | 103 | for (i = 0; i < num_counters; ++i) { |
88 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { | 104 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { |
89 | reset_value[i] = counter_config[i].count; | 105 | reset_value[i] = counter_config[i].count; |
90 | 106 | ||
91 | CTR_32BIT_WRITE(counter_config[i].count, msrs, i); | 107 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); |
92 | 108 | ||
93 | CTRL_READ(low, high, msrs, i); | 109 | CTRL_READ(low, high, msrs, i); |
94 | CTRL_CLEAR(low); | 110 | CTRL_CLEAR(low); |
@@ -111,13 +127,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs, | |||
111 | unsigned int low, high; | 127 | unsigned int low, high; |
112 | int i; | 128 | int i; |
113 | 129 | ||
114 | for (i = 0 ; i < NUM_COUNTERS; ++i) { | 130 | for (i = 0 ; i < num_counters; ++i) { |
115 | if (!reset_value[i]) | 131 | if (!reset_value[i]) |
116 | continue; | 132 | continue; |
117 | CTR_READ(low, high, msrs, i); | 133 | CTR_READ(low, high, msrs, i); |
118 | if (CTR_OVERFLOWED(low)) { | 134 | if (CTR_OVERFLOWED(low)) { |
119 | oprofile_add_sample(regs, i); | 135 | oprofile_add_sample(regs, i); |
120 | CTR_32BIT_WRITE(reset_value[i], msrs, i); | 136 | wrmsrl(msrs->counters[i].addr, -reset_value[i]); |
121 | } | 137 | } |
122 | } | 138 | } |
123 | 139 | ||
@@ -141,7 +157,7 @@ static void ppro_start(struct op_msrs const * const msrs) | |||
141 | unsigned int low, high; | 157 | unsigned int low, high; |
142 | int i; | 158 | int i; |
143 | 159 | ||
144 | for (i = 0; i < NUM_COUNTERS; ++i) { | 160 | for (i = 0; i < num_counters; ++i) { |
145 | if (reset_value[i]) { | 161 | if (reset_value[i]) { |
146 | CTRL_READ(low, high, msrs, i); | 162 | CTRL_READ(low, high, msrs, i); |
147 | CTRL_SET_ACTIVE(low); | 163 | CTRL_SET_ACTIVE(low); |
@@ -156,7 +172,7 @@ static void ppro_stop(struct op_msrs const * const msrs) | |||
156 | unsigned int low, high; | 172 | unsigned int low, high; |
157 | int i; | 173 | int i; |
158 | 174 | ||
159 | for (i = 0; i < NUM_COUNTERS; ++i) { | 175 | for (i = 0; i < num_counters; ++i) { |
160 | if (!reset_value[i]) | 176 | if (!reset_value[i]) |
161 | continue; | 177 | continue; |
162 | CTRL_READ(low, high, msrs, i); | 178 | CTRL_READ(low, high, msrs, i); |
@@ -169,24 +185,70 @@ static void ppro_shutdown(struct op_msrs const * const msrs) | |||
169 | { | 185 | { |
170 | int i; | 186 | int i; |
171 | 187 | ||
172 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | 188 | for (i = 0 ; i < num_counters ; ++i) { |
173 | if (CTR_IS_RESERVED(msrs, i)) | 189 | if (CTR_IS_RESERVED(msrs, i)) |
174 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); | 190 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); |
175 | } | 191 | } |
176 | for (i = 0 ; i < NUM_CONTROLS ; ++i) { | 192 | for (i = 0 ; i < num_counters ; ++i) { |
177 | if (CTRL_IS_RESERVED(msrs, i)) | 193 | if (CTRL_IS_RESERVED(msrs, i)) |
178 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); | 194 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); |
179 | } | 195 | } |
196 | if (reset_value) { | ||
197 | kfree(reset_value); | ||
198 | reset_value = NULL; | ||
199 | } | ||
180 | } | 200 | } |
181 | 201 | ||
182 | 202 | ||
183 | struct op_x86_model_spec const op_ppro_spec = { | 203 | struct op_x86_model_spec op_ppro_spec = { |
184 | .num_counters = NUM_COUNTERS, | 204 | .num_counters = 2, /* can be overriden */ |
185 | .num_controls = NUM_CONTROLS, | 205 | .num_controls = 2, /* dito */ |
186 | .fill_in_addresses = &ppro_fill_in_addresses, | 206 | .fill_in_addresses = &ppro_fill_in_addresses, |
187 | .setup_ctrs = &ppro_setup_ctrs, | 207 | .setup_ctrs = &ppro_setup_ctrs, |
188 | .check_ctrs = &ppro_check_ctrs, | 208 | .check_ctrs = &ppro_check_ctrs, |
189 | .start = &ppro_start, | 209 | .start = &ppro_start, |
190 | .stop = &ppro_stop, | 210 | .stop = &ppro_stop, |
191 | .shutdown = &ppro_shutdown | 211 | .shutdown = &ppro_shutdown |
212 | }; | ||
213 | |||
214 | /* | ||
215 | * Architectural performance monitoring. | ||
216 | * | ||
217 | * Newer Intel CPUs (Core1+) have support for architectural | ||
218 | * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details. | ||
219 | * The advantage of this is that it can be done without knowing about | ||
220 | * the specific CPU. | ||
221 | */ | ||
222 | |||
223 | void arch_perfmon_setup_counters(void) | ||
224 | { | ||
225 | union cpuid10_eax eax; | ||
226 | |||
227 | eax.full = cpuid_eax(0xa); | ||
228 | |||
229 | /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ | ||
230 | if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && | ||
231 | current_cpu_data.x86_model == 15) { | ||
232 | eax.split.version_id = 2; | ||
233 | eax.split.num_counters = 2; | ||
234 | eax.split.bit_width = 40; | ||
235 | } | ||
236 | |||
237 | num_counters = eax.split.num_counters; | ||
238 | |||
239 | op_arch_perfmon_spec.num_counters = num_counters; | ||
240 | op_arch_perfmon_spec.num_controls = num_counters; | ||
241 | op_ppro_spec.num_counters = num_counters; | ||
242 | op_ppro_spec.num_controls = num_counters; | ||
243 | } | ||
244 | |||
245 | struct op_x86_model_spec op_arch_perfmon_spec = { | ||
246 | /* num_counters/num_controls filled in at runtime */ | ||
247 | .fill_in_addresses = &ppro_fill_in_addresses, | ||
248 | /* user space does the cpuid check for available events */ | ||
249 | .setup_ctrs = &ppro_setup_ctrs, | ||
250 | .check_ctrs = &ppro_check_ctrs, | ||
251 | .start = &ppro_start, | ||
252 | .stop = &ppro_stop, | ||
253 | .shutdown = &ppro_shutdown | ||
192 | }; | 254 | }; |
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 05a0261ba0c3..825e79064d64 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h | |||
@@ -22,8 +22,8 @@ struct op_msr { | |||
22 | }; | 22 | }; |
23 | 23 | ||
24 | struct op_msrs { | 24 | struct op_msrs { |
25 | struct op_msr * counters; | 25 | struct op_msr *counters; |
26 | struct op_msr * controls; | 26 | struct op_msr *controls; |
27 | }; | 27 | }; |
28 | 28 | ||
29 | struct pt_regs; | 29 | struct pt_regs; |
@@ -34,8 +34,8 @@ struct pt_regs; | |||
34 | struct op_x86_model_spec { | 34 | struct op_x86_model_spec { |
35 | int (*init)(struct oprofile_operations *ops); | 35 | int (*init)(struct oprofile_operations *ops); |
36 | void (*exit)(void); | 36 | void (*exit)(void); |
37 | unsigned int const num_counters; | 37 | unsigned int num_counters; |
38 | unsigned int const num_controls; | 38 | unsigned int num_controls; |
39 | void (*fill_in_addresses)(struct op_msrs * const msrs); | 39 | void (*fill_in_addresses)(struct op_msrs * const msrs); |
40 | void (*setup_ctrs)(struct op_msrs const * const msrs); | 40 | void (*setup_ctrs)(struct op_msrs const * const msrs); |
41 | int (*check_ctrs)(struct pt_regs * const regs, | 41 | int (*check_ctrs)(struct pt_regs * const regs, |
@@ -45,9 +45,12 @@ struct op_x86_model_spec { | |||
45 | void (*shutdown)(struct op_msrs const * const msrs); | 45 | void (*shutdown)(struct op_msrs const * const msrs); |
46 | }; | 46 | }; |
47 | 47 | ||
48 | extern struct op_x86_model_spec const op_ppro_spec; | 48 | extern struct op_x86_model_spec op_ppro_spec; |
49 | extern struct op_x86_model_spec const op_p4_spec; | 49 | extern struct op_x86_model_spec const op_p4_spec; |
50 | extern struct op_x86_model_spec const op_p4_ht2_spec; | 50 | extern struct op_x86_model_spec const op_p4_ht2_spec; |
51 | extern struct op_x86_model_spec const op_amd_spec; | 51 | extern struct op_x86_model_spec const op_amd_spec; |
52 | extern struct op_x86_model_spec op_arch_perfmon_spec; | ||
53 | |||
54 | extern void arch_perfmon_setup_counters(void); | ||
52 | 55 | ||
53 | #endif /* OP_X86_MODEL_H */ | 56 | #endif /* OP_X86_MODEL_H */ |