diff options
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 29 | ||||
-rw-r--r-- | include/linux/perf_counter.h | 98 | ||||
-rw-r--r-- | include/linux/syscalls.h | 12 | ||||
-rw-r--r-- | kernel/perf_counter.c | 38 |
4 files changed, 106 insertions, 71 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 30e7ebf78275..ef1936a871aa 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -58,8 +58,8 @@ const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); | |||
58 | */ | 58 | */ |
59 | int hw_perf_counter_init(struct perf_counter *counter) | 59 | int hw_perf_counter_init(struct perf_counter *counter) |
60 | { | 60 | { |
61 | struct perf_counter_hw_event *hw_event = &counter->hw_event; | ||
61 | struct hw_perf_counter *hwc = &counter->hw; | 62 | struct hw_perf_counter *hwc = &counter->hw; |
62 | u32 hw_event_type = counter->event.hw_event_type; | ||
63 | 63 | ||
64 | if (unlikely(!perf_counters_initialized)) | 64 | if (unlikely(!perf_counters_initialized)) |
65 | return -EINVAL; | 65 | return -EINVAL; |
@@ -77,14 +77,14 @@ int hw_perf_counter_init(struct perf_counter *counter) | |||
77 | hwc->nmi = 0; | 77 | hwc->nmi = 0; |
78 | if (capable(CAP_SYS_ADMIN)) { | 78 | if (capable(CAP_SYS_ADMIN)) { |
79 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | 79 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; |
80 | if (hw_event_type & PERF_COUNT_NMI) | 80 | if (hw_event->nmi) |
81 | hwc->nmi = 1; | 81 | hwc->nmi = 1; |
82 | } | 82 | } |
83 | 83 | ||
84 | hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0; | 84 | hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0; |
85 | hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0; | 85 | hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0; |
86 | 86 | ||
87 | hwc->irq_period = counter->event.hw_event_period; | 87 | hwc->irq_period = hw_event->irq_period; |
88 | /* | 88 | /* |
89 | * Intel PMCs cannot be accessed sanely above 32 bit width, | 89 | * Intel PMCs cannot be accessed sanely above 32 bit width, |
90 | * so we install an artificial 1<<31 period regardless of | 90 | * so we install an artificial 1<<31 period regardless of |
@@ -93,21 +93,20 @@ int hw_perf_counter_init(struct perf_counter *counter) | |||
93 | if (!hwc->irq_period) | 93 | if (!hwc->irq_period) |
94 | hwc->irq_period = 0x7FFFFFFF; | 94 | hwc->irq_period = 0x7FFFFFFF; |
95 | 95 | ||
96 | hwc->next_count = -((s32) hwc->irq_period); | 96 | hwc->next_count = -(s32)hwc->irq_period; |
97 | 97 | ||
98 | /* | 98 | /* |
99 | * Raw event type provide the config in the event structure | 99 | * Raw event type provide the config in the event structure |
100 | */ | 100 | */ |
101 | hw_event_type &= ~PERF_COUNT_NMI; | 101 | if (hw_event->raw) { |
102 | if (hw_event_type == PERF_COUNT_RAW) { | 102 | hwc->config |= hw_event->type; |
103 | hwc->config |= counter->event.hw_raw_ctrl; | ||
104 | } else { | 103 | } else { |
105 | if (hw_event_type >= max_intel_perfmon_events) | 104 | if (hw_event->type >= max_intel_perfmon_events) |
106 | return -EINVAL; | 105 | return -EINVAL; |
107 | /* | 106 | /* |
108 | * The generic map: | 107 | * The generic map: |
109 | */ | 108 | */ |
110 | hwc->config |= intel_perfmon_event_map[hw_event_type]; | 109 | hwc->config |= intel_perfmon_event_map[hw_event->type]; |
111 | } | 110 | } |
112 | counter->wakeup_pending = 0; | 111 | counter->wakeup_pending = 0; |
113 | 112 | ||
@@ -354,7 +353,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown) | |||
354 | int bit; | 353 | int bit; |
355 | 354 | ||
356 | list_for_each_entry(counter, &ctx->counters, list) { | 355 | list_for_each_entry(counter, &ctx->counters, list) { |
357 | if (counter->record_type != PERF_RECORD_SIMPLE || | 356 | if (counter->hw_event.record_type != PERF_RECORD_SIMPLE || |
358 | counter == leader) | 357 | counter == leader) |
359 | continue; | 358 | continue; |
360 | 359 | ||
@@ -372,7 +371,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown) | |||
372 | perf_save_and_restart(counter); | 371 | perf_save_and_restart(counter); |
373 | } | 372 | } |
374 | } | 373 | } |
375 | perf_store_irq_data(leader, counter->event.hw_event_type); | 374 | perf_store_irq_data(leader, counter->hw_event.type); |
376 | perf_store_irq_data(leader, atomic64_counter_read(counter)); | 375 | perf_store_irq_data(leader, atomic64_counter_read(counter)); |
377 | } | 376 | } |
378 | } | 377 | } |
@@ -410,7 +409,7 @@ again: | |||
410 | 409 | ||
411 | perf_save_and_restart(counter); | 410 | perf_save_and_restart(counter); |
412 | 411 | ||
413 | switch (counter->record_type) { | 412 | switch (counter->hw_event.record_type) { |
414 | case PERF_RECORD_SIMPLE: | 413 | case PERF_RECORD_SIMPLE: |
415 | continue; | 414 | continue; |
416 | case PERF_RECORD_IRQ: | 415 | case PERF_RECORD_IRQ: |
@@ -418,7 +417,7 @@ again: | |||
418 | break; | 417 | break; |
419 | case PERF_RECORD_GROUP: | 418 | case PERF_RECORD_GROUP: |
420 | perf_store_irq_data(counter, | 419 | perf_store_irq_data(counter, |
421 | counter->event.hw_event_type); | 420 | counter->hw_event.type); |
422 | perf_store_irq_data(counter, | 421 | perf_store_irq_data(counter, |
423 | atomic64_counter_read(counter)); | 422 | atomic64_counter_read(counter)); |
424 | perf_handle_group(counter, &status, &ack); | 423 | perf_handle_group(counter, &status, &ack); |
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 1f0017673e77..a2b4852e2d70 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h | |||
@@ -24,65 +24,93 @@ | |||
24 | struct task_struct; | 24 | struct task_struct; |
25 | 25 | ||
26 | /* | 26 | /* |
27 | * Generalized hardware event types, used by the hw_event_type parameter | 27 | * User-space ABI bits: |
28 | * of the sys_perf_counter_open() syscall: | 28 | */ |
29 | |||
30 | /* | ||
31 | * Generalized performance counter event types, used by the hw_event.type | ||
32 | * parameter of the sys_perf_counter_open() syscall: | ||
29 | */ | 33 | */ |
30 | enum hw_event_types { | 34 | enum hw_event_types { |
31 | PERF_COUNT_CYCLES, | ||
32 | PERF_COUNT_INSTRUCTIONS, | ||
33 | PERF_COUNT_CACHE_REFERENCES, | ||
34 | PERF_COUNT_CACHE_MISSES, | ||
35 | PERF_COUNT_BRANCH_INSTRUCTIONS, | ||
36 | PERF_COUNT_BRANCH_MISSES, | ||
37 | /* | 35 | /* |
38 | * If this bit is set in the type, then trigger NMI sampling: | 36 | * Common hardware events, generalized by the kernel: |
39 | */ | 37 | */ |
40 | PERF_COUNT_NMI = (1 << 30), | 38 | PERF_COUNT_CYCLES = 0, |
41 | PERF_COUNT_RAW = (1 << 31), | 39 | PERF_COUNT_INSTRUCTIONS = 1, |
40 | PERF_COUNT_CACHE_REFERENCES = 2, | ||
41 | PERF_COUNT_CACHE_MISSES = 3, | ||
42 | PERF_COUNT_BRANCH_INSTRUCTIONS = 4, | ||
43 | PERF_COUNT_BRANCH_MISSES = 5, | ||
44 | |||
45 | /* | ||
46 | * Special "software" counters provided by the kernel, even if | ||
47 | * the hardware does not support performance counters. These | ||
48 | * counters measure various physical and sw events of the | ||
49 | * kernel (and allow the profiling of them as well): | ||
50 | */ | ||
51 | PERF_COUNT_CPU_CLOCK = -1, | ||
52 | PERF_COUNT_TASK_CLOCK = -2, | ||
53 | PERF_COUNT_PAGE_FAULTS = -3, | ||
54 | PERF_COUNT_CONTEXT_SWITCHES = -4, | ||
42 | }; | 55 | }; |
43 | 56 | ||
44 | /* | 57 | /* |
45 | * IRQ-notification data record type: | 58 | * IRQ-notification data record type: |
46 | */ | 59 | */ |
47 | enum perf_record_type { | 60 | enum perf_counter_record_type { |
48 | PERF_RECORD_SIMPLE, | 61 | PERF_RECORD_SIMPLE = 0, |
49 | PERF_RECORD_IRQ, | 62 | PERF_RECORD_IRQ = 1, |
50 | PERF_RECORD_GROUP, | 63 | PERF_RECORD_GROUP = 2, |
51 | }; | 64 | }; |
52 | 65 | ||
53 | struct perf_counter_event { | 66 | /* |
54 | u32 hw_event_type; | 67 | * Hardware event to monitor via a performance monitoring counter: |
55 | u32 hw_event_period; | 68 | */ |
56 | u64 hw_raw_ctrl; | 69 | struct perf_counter_hw_event { |
70 | u64 type; | ||
71 | |||
72 | u64 irq_period; | ||
73 | u32 record_type; | ||
74 | |||
75 | u32 disabled : 1, /* off by default */ | ||
76 | nmi : 1, /* NMI sampling */ | ||
77 | raw : 1, /* raw event type */ | ||
78 | __reserved_1 : 29; | ||
79 | |||
80 | u64 __reserved_2; | ||
57 | }; | 81 | }; |
58 | 82 | ||
83 | /* | ||
84 | * Kernel-internal data types: | ||
85 | */ | ||
86 | |||
59 | /** | 87 | /** |
60 | * struct hw_perf_counter - performance counter hardware details | 88 | * struct hw_perf_counter - performance counter hardware details: |
61 | */ | 89 | */ |
62 | struct hw_perf_counter { | 90 | struct hw_perf_counter { |
63 | u64 config; | 91 | u64 config; |
64 | unsigned long config_base; | 92 | unsigned long config_base; |
65 | unsigned long counter_base; | 93 | unsigned long counter_base; |
66 | int nmi; | 94 | int nmi; |
67 | unsigned int idx; | 95 | unsigned int idx; |
68 | u64 prev_count; | 96 | u64 prev_count; |
69 | s32 next_count; | 97 | u64 irq_period; |
70 | u64 irq_period; | 98 | s32 next_count; |
71 | }; | 99 | }; |
72 | 100 | ||
73 | /* | 101 | /* |
74 | * Hardcoded buffer length limit for now, for IRQ-fed events: | 102 | * Hardcoded buffer length limit for now, for IRQ-fed events: |
75 | */ | 103 | */ |
76 | #define PERF_DATA_BUFLEN 2048 | 104 | #define PERF_DATA_BUFLEN 2048 |
77 | 105 | ||
78 | /** | 106 | /** |
79 | * struct perf_data - performance counter IRQ data sampling ... | 107 | * struct perf_data - performance counter IRQ data sampling ... |
80 | */ | 108 | */ |
81 | struct perf_data { | 109 | struct perf_data { |
82 | int len; | 110 | int len; |
83 | int rd_idx; | 111 | int rd_idx; |
84 | int overrun; | 112 | int overrun; |
85 | u8 data[PERF_DATA_BUFLEN]; | 113 | u8 data[PERF_DATA_BUFLEN]; |
86 | }; | 114 | }; |
87 | 115 | ||
88 | /** | 116 | /** |
@@ -96,7 +124,7 @@ struct perf_counter { | |||
96 | #else | 124 | #else |
97 | atomic_t count32[2]; | 125 | atomic_t count32[2]; |
98 | #endif | 126 | #endif |
99 | struct perf_counter_event event; | 127 | struct perf_counter_hw_event hw_event; |
100 | struct hw_perf_counter hw; | 128 | struct hw_perf_counter hw; |
101 | 129 | ||
102 | struct perf_counter_context *ctx; | 130 | struct perf_counter_context *ctx; |
@@ -110,8 +138,6 @@ struct perf_counter { | |||
110 | int oncpu; | 138 | int oncpu; |
111 | int cpu; | 139 | int cpu; |
112 | 140 | ||
113 | enum perf_record_type record_type; | ||
114 | |||
115 | /* read() / irq related data */ | 141 | /* read() / irq related data */ |
116 | wait_queue_head_t waitq; | 142 | wait_queue_head_t waitq; |
117 | /* optional: for NMIs */ | 143 | /* optional: for NMIs */ |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3ecd73d03daa..a549678b7c3c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
@@ -54,7 +54,7 @@ struct compat_stat; | |||
54 | struct compat_timeval; | 54 | struct compat_timeval; |
55 | struct robust_list_head; | 55 | struct robust_list_head; |
56 | struct getcpu_cache; | 56 | struct getcpu_cache; |
57 | struct perf_counter_event; | 57 | struct perf_counter_hw_event; |
58 | 58 | ||
59 | #include <linux/types.h> | 59 | #include <linux/types.h> |
60 | #include <linux/aio_abi.h> | 60 | #include <linux/aio_abi.h> |
@@ -625,7 +625,11 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); | |||
625 | 625 | ||
626 | int kernel_execve(const char *filename, char *const argv[], char *const envp[]); | 626 | int kernel_execve(const char *filename, char *const argv[], char *const envp[]); |
627 | 627 | ||
628 | asmlinkage int | 628 | |
629 | sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, | 629 | asmlinkage int sys_perf_counter_open( |
630 | pid_t pid, int cpu, int masterfd); | 630 | |
631 | struct perf_counter_hw_event *hw_event_uptr __user, | ||
632 | pid_t pid, | ||
633 | int cpu, | ||
634 | int group_fd); | ||
631 | #endif | 635 | #endif |
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 2557c670a3bb..0d323ceda3a4 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -669,7 +669,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) | |||
669 | { | 669 | { |
670 | struct perf_counter *counter = file->private_data; | 670 | struct perf_counter *counter = file->private_data; |
671 | 671 | ||
672 | switch (counter->record_type) { | 672 | switch (counter->hw_event.record_type) { |
673 | case PERF_RECORD_SIMPLE: | 673 | case PERF_RECORD_SIMPLE: |
674 | return perf_read_hw(counter, buf, count); | 674 | return perf_read_hw(counter, buf, count); |
675 | 675 | ||
@@ -707,7 +707,7 @@ static const struct file_operations perf_fops = { | |||
707 | * Allocate and initialize a counter structure | 707 | * Allocate and initialize a counter structure |
708 | */ | 708 | */ |
709 | static struct perf_counter * | 709 | static struct perf_counter * |
710 | perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type) | 710 | perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu) |
711 | { | 711 | { |
712 | struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL); | 712 | struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL); |
713 | 713 | ||
@@ -718,31 +718,37 @@ perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type) | |||
718 | INIT_LIST_HEAD(&counter->list); | 718 | INIT_LIST_HEAD(&counter->list); |
719 | init_waitqueue_head(&counter->waitq); | 719 | init_waitqueue_head(&counter->waitq); |
720 | 720 | ||
721 | counter->irqdata = &counter->data[0]; | 721 | counter->irqdata = &counter->data[0]; |
722 | counter->usrdata = &counter->data[1]; | 722 | counter->usrdata = &counter->data[1]; |
723 | counter->cpu = cpu; | 723 | counter->cpu = cpu; |
724 | counter->record_type = record_type; | 724 | counter->hw_event = *hw_event; |
725 | counter->event = *event; | 725 | counter->wakeup_pending = 0; |
726 | counter->wakeup_pending = 0; | ||
727 | 726 | ||
728 | return counter; | 727 | return counter; |
729 | } | 728 | } |
730 | 729 | ||
731 | /** | 730 | /** |
732 | * sys_perf_task_open - open a performance counter associate it to a task | 731 | * sys_perf_task_open - open a performance counter, associate it to a task/cpu |
733 | * @hw_event_type: event type for monitoring/sampling... | 732 | * |
733 | * @hw_event_uptr: event type attributes for monitoring/sampling | ||
734 | * @pid: target pid | 734 | * @pid: target pid |
735 | * @cpu: target cpu | ||
736 | * @group_fd: group leader counter fd | ||
735 | */ | 737 | */ |
736 | asmlinkage int | 738 | asmlinkage int sys_perf_counter_open( |
737 | sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, | 739 | |
738 | pid_t pid, int cpu, int masterfd) | 740 | struct perf_counter_hw_event *hw_event_uptr __user, |
741 | pid_t pid, | ||
742 | int cpu, | ||
743 | int group_fd) | ||
744 | |||
739 | { | 745 | { |
740 | struct perf_counter_context *ctx; | 746 | struct perf_counter_context *ctx; |
741 | struct perf_counter_event event; | 747 | struct perf_counter_hw_event hw_event; |
742 | struct perf_counter *counter; | 748 | struct perf_counter *counter; |
743 | int ret; | 749 | int ret; |
744 | 750 | ||
745 | if (copy_from_user(&event, uevent, sizeof(event)) != 0) | 751 | if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) |
746 | return -EFAULT; | 752 | return -EFAULT; |
747 | 753 | ||
748 | ctx = find_get_context(pid, cpu); | 754 | ctx = find_get_context(pid, cpu); |
@@ -750,7 +756,7 @@ sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, | |||
750 | return PTR_ERR(ctx); | 756 | return PTR_ERR(ctx); |
751 | 757 | ||
752 | ret = -ENOMEM; | 758 | ret = -ENOMEM; |
753 | counter = perf_counter_alloc(&event, cpu, record_type); | 759 | counter = perf_counter_alloc(&hw_event, cpu); |
754 | if (!counter) | 760 | if (!counter) |
755 | goto err_put_context; | 761 | goto err_put_context; |
756 | 762 | ||