aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-12-10 06:33:23 -0500
committerIngo Molnar <mingo@elte.hu>2008-12-11 09:45:48 -0500
commit9f66a3810fe0d4100972db84290f3ae4a4d77025 (patch)
tree2101d0d14aecf9d3e406544711e7336e3ea6b3af
parentdfa7c899b401d7dc5d85aca416aee64ac82812f2 (diff)
perf counters: restructure the API
Impact: clean up new API Thorough cleanup of the new perf counters API, we now get clean separation of the various concepts: - introduce perf_counter_hw_event to separate out the event source details - move special type flags into separate attributes: PERF_COUNT_NMI, PERF_COUNT_RAW - extend the type to u64 and reserve it fully to the architecture in the raw type case. And make use of all these changes in the core and x86 perfcounters code. Also change the syscall signature to: asmlinkage int sys_perf_counter_open( struct perf_counter_hw_event *hw_event_uptr __user, pid_t pid, int cpu, int group_fd); ( Note that group_fd is unused for now - it's reserved for the counter groups abstraction. ) Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c29
-rw-r--r--include/linux/perf_counter.h98
-rw-r--r--include/linux/syscalls.h12
-rw-r--r--kernel/perf_counter.c38
4 files changed, 106 insertions, 71 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 30e7ebf78275..ef1936a871aa 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -58,8 +58,8 @@ const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
58 */ 58 */
59int hw_perf_counter_init(struct perf_counter *counter) 59int hw_perf_counter_init(struct perf_counter *counter)
60{ 60{
61 struct perf_counter_hw_event *hw_event = &counter->hw_event;
61 struct hw_perf_counter *hwc = &counter->hw; 62 struct hw_perf_counter *hwc = &counter->hw;
62 u32 hw_event_type = counter->event.hw_event_type;
63 63
64 if (unlikely(!perf_counters_initialized)) 64 if (unlikely(!perf_counters_initialized))
65 return -EINVAL; 65 return -EINVAL;
@@ -77,14 +77,14 @@ int hw_perf_counter_init(struct perf_counter *counter)
77 hwc->nmi = 0; 77 hwc->nmi = 0;
78 if (capable(CAP_SYS_ADMIN)) { 78 if (capable(CAP_SYS_ADMIN)) {
79 hwc->config |= ARCH_PERFMON_EVENTSEL_OS; 79 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
80 if (hw_event_type & PERF_COUNT_NMI) 80 if (hw_event->nmi)
81 hwc->nmi = 1; 81 hwc->nmi = 1;
82 } 82 }
83 83
84 hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0; 84 hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0;
85 hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0; 85 hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0;
86 86
87 hwc->irq_period = counter->event.hw_event_period; 87 hwc->irq_period = hw_event->irq_period;
88 /* 88 /*
89 * Intel PMCs cannot be accessed sanely above 32 bit width, 89 * Intel PMCs cannot be accessed sanely above 32 bit width,
90 * so we install an artificial 1<<31 period regardless of 90 * so we install an artificial 1<<31 period regardless of
@@ -93,21 +93,20 @@ int hw_perf_counter_init(struct perf_counter *counter)
93 if (!hwc->irq_period) 93 if (!hwc->irq_period)
94 hwc->irq_period = 0x7FFFFFFF; 94 hwc->irq_period = 0x7FFFFFFF;
95 95
96 hwc->next_count = -((s32) hwc->irq_period); 96 hwc->next_count = -(s32)hwc->irq_period;
97 97
98 /* 98 /*
99 * Raw event type provide the config in the event structure 99 * Raw event type provide the config in the event structure
100 */ 100 */
101 hw_event_type &= ~PERF_COUNT_NMI; 101 if (hw_event->raw) {
102 if (hw_event_type == PERF_COUNT_RAW) { 102 hwc->config |= hw_event->type;
103 hwc->config |= counter->event.hw_raw_ctrl;
104 } else { 103 } else {
105 if (hw_event_type >= max_intel_perfmon_events) 104 if (hw_event->type >= max_intel_perfmon_events)
106 return -EINVAL; 105 return -EINVAL;
107 /* 106 /*
108 * The generic map: 107 * The generic map:
109 */ 108 */
110 hwc->config |= intel_perfmon_event_map[hw_event_type]; 109 hwc->config |= intel_perfmon_event_map[hw_event->type];
111 } 110 }
112 counter->wakeup_pending = 0; 111 counter->wakeup_pending = 0;
113 112
@@ -354,7 +353,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
354 int bit; 353 int bit;
355 354
356 list_for_each_entry(counter, &ctx->counters, list) { 355 list_for_each_entry(counter, &ctx->counters, list) {
357 if (counter->record_type != PERF_RECORD_SIMPLE || 356 if (counter->hw_event.record_type != PERF_RECORD_SIMPLE ||
358 counter == leader) 357 counter == leader)
359 continue; 358 continue;
360 359
@@ -372,7 +371,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
372 perf_save_and_restart(counter); 371 perf_save_and_restart(counter);
373 } 372 }
374 } 373 }
375 perf_store_irq_data(leader, counter->event.hw_event_type); 374 perf_store_irq_data(leader, counter->hw_event.type);
376 perf_store_irq_data(leader, atomic64_counter_read(counter)); 375 perf_store_irq_data(leader, atomic64_counter_read(counter));
377 } 376 }
378} 377}
@@ -410,7 +409,7 @@ again:
410 409
411 perf_save_and_restart(counter); 410 perf_save_and_restart(counter);
412 411
413 switch (counter->record_type) { 412 switch (counter->hw_event.record_type) {
414 case PERF_RECORD_SIMPLE: 413 case PERF_RECORD_SIMPLE:
415 continue; 414 continue;
416 case PERF_RECORD_IRQ: 415 case PERF_RECORD_IRQ:
@@ -418,7 +417,7 @@ again:
418 break; 417 break;
419 case PERF_RECORD_GROUP: 418 case PERF_RECORD_GROUP:
420 perf_store_irq_data(counter, 419 perf_store_irq_data(counter,
421 counter->event.hw_event_type); 420 counter->hw_event.type);
422 perf_store_irq_data(counter, 421 perf_store_irq_data(counter,
423 atomic64_counter_read(counter)); 422 atomic64_counter_read(counter));
424 perf_handle_group(counter, &status, &ack); 423 perf_handle_group(counter, &status, &ack);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 1f0017673e77..a2b4852e2d70 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -24,65 +24,93 @@
24struct task_struct; 24struct task_struct;
25 25
26/* 26/*
27 * Generalized hardware event types, used by the hw_event_type parameter 27 * User-space ABI bits:
28 * of the sys_perf_counter_open() syscall: 28 */
29
30/*
31 * Generalized performance counter event types, used by the hw_event.type
32 * parameter of the sys_perf_counter_open() syscall:
29 */ 33 */
30enum hw_event_types { 34enum hw_event_types {
31 PERF_COUNT_CYCLES,
32 PERF_COUNT_INSTRUCTIONS,
33 PERF_COUNT_CACHE_REFERENCES,
34 PERF_COUNT_CACHE_MISSES,
35 PERF_COUNT_BRANCH_INSTRUCTIONS,
36 PERF_COUNT_BRANCH_MISSES,
37 /* 35 /*
38 * If this bit is set in the type, then trigger NMI sampling: 36 * Common hardware events, generalized by the kernel:
39 */ 37 */
40 PERF_COUNT_NMI = (1 << 30), 38 PERF_COUNT_CYCLES = 0,
41 PERF_COUNT_RAW = (1 << 31), 39 PERF_COUNT_INSTRUCTIONS = 1,
40 PERF_COUNT_CACHE_REFERENCES = 2,
41 PERF_COUNT_CACHE_MISSES = 3,
42 PERF_COUNT_BRANCH_INSTRUCTIONS = 4,
43 PERF_COUNT_BRANCH_MISSES = 5,
44
45 /*
46 * Special "software" counters provided by the kernel, even if
47 * the hardware does not support performance counters. These
48 * counters measure various physical and sw events of the
49 * kernel (and allow the profiling of them as well):
50 */
51 PERF_COUNT_CPU_CLOCK = -1,
52 PERF_COUNT_TASK_CLOCK = -2,
53 PERF_COUNT_PAGE_FAULTS = -3,
54 PERF_COUNT_CONTEXT_SWITCHES = -4,
42}; 55};
43 56
44/* 57/*
45 * IRQ-notification data record type: 58 * IRQ-notification data record type:
46 */ 59 */
47enum perf_record_type { 60enum perf_counter_record_type {
48 PERF_RECORD_SIMPLE, 61 PERF_RECORD_SIMPLE = 0,
49 PERF_RECORD_IRQ, 62 PERF_RECORD_IRQ = 1,
50 PERF_RECORD_GROUP, 63 PERF_RECORD_GROUP = 2,
51}; 64};
52 65
53struct perf_counter_event { 66/*
54 u32 hw_event_type; 67 * Hardware event to monitor via a performance monitoring counter:
55 u32 hw_event_period; 68 */
56 u64 hw_raw_ctrl; 69struct perf_counter_hw_event {
70 u64 type;
71
72 u64 irq_period;
73 u32 record_type;
74
75 u32 disabled : 1, /* off by default */
76 nmi : 1, /* NMI sampling */
77 raw : 1, /* raw event type */
78 __reserved_1 : 29;
79
80 u64 __reserved_2;
57}; 81};
58 82
83/*
84 * Kernel-internal data types:
85 */
86
59/** 87/**
60 * struct hw_perf_counter - performance counter hardware details 88 * struct hw_perf_counter - performance counter hardware details:
61 */ 89 */
62struct hw_perf_counter { 90struct hw_perf_counter {
63 u64 config; 91 u64 config;
64 unsigned long config_base; 92 unsigned long config_base;
65 unsigned long counter_base; 93 unsigned long counter_base;
66 int nmi; 94 int nmi;
67 unsigned int idx; 95 unsigned int idx;
68 u64 prev_count; 96 u64 prev_count;
69 s32 next_count; 97 u64 irq_period;
70 u64 irq_period; 98 s32 next_count;
71}; 99};
72 100
73/* 101/*
74 * Hardcoded buffer length limit for now, for IRQ-fed events: 102 * Hardcoded buffer length limit for now, for IRQ-fed events:
75 */ 103 */
76#define PERF_DATA_BUFLEN 2048 104#define PERF_DATA_BUFLEN 2048
77 105
78/** 106/**
79 * struct perf_data - performance counter IRQ data sampling ... 107 * struct perf_data - performance counter IRQ data sampling ...
80 */ 108 */
81struct perf_data { 109struct perf_data {
82 int len; 110 int len;
83 int rd_idx; 111 int rd_idx;
84 int overrun; 112 int overrun;
85 u8 data[PERF_DATA_BUFLEN]; 113 u8 data[PERF_DATA_BUFLEN];
86}; 114};
87 115
88/** 116/**
@@ -96,7 +124,7 @@ struct perf_counter {
96#else 124#else
97 atomic_t count32[2]; 125 atomic_t count32[2];
98#endif 126#endif
99 struct perf_counter_event event; 127 struct perf_counter_hw_event hw_event;
100 struct hw_perf_counter hw; 128 struct hw_perf_counter hw;
101 129
102 struct perf_counter_context *ctx; 130 struct perf_counter_context *ctx;
@@ -110,8 +138,6 @@ struct perf_counter {
110 int oncpu; 138 int oncpu;
111 int cpu; 139 int cpu;
112 140
113 enum perf_record_type record_type;
114
115 /* read() / irq related data */ 141 /* read() / irq related data */
116 wait_queue_head_t waitq; 142 wait_queue_head_t waitq;
117 /* optional: for NMIs */ 143 /* optional: for NMIs */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 3ecd73d03daa..a549678b7c3c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -54,7 +54,7 @@ struct compat_stat;
54struct compat_timeval; 54struct compat_timeval;
55struct robust_list_head; 55struct robust_list_head;
56struct getcpu_cache; 56struct getcpu_cache;
57struct perf_counter_event; 57struct perf_counter_hw_event;
58 58
59#include <linux/types.h> 59#include <linux/types.h>
60#include <linux/aio_abi.h> 60#include <linux/aio_abi.h>
@@ -625,7 +625,11 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
625 625
626int kernel_execve(const char *filename, char *const argv[], char *const envp[]); 626int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
627 627
628asmlinkage int 628
629sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, 629asmlinkage int sys_perf_counter_open(
630 pid_t pid, int cpu, int masterfd); 630
631 struct perf_counter_hw_event *hw_event_uptr __user,
632 pid_t pid,
633 int cpu,
634 int group_fd);
631#endif 635#endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 2557c670a3bb..0d323ceda3a4 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -669,7 +669,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
669{ 669{
670 struct perf_counter *counter = file->private_data; 670 struct perf_counter *counter = file->private_data;
671 671
672 switch (counter->record_type) { 672 switch (counter->hw_event.record_type) {
673 case PERF_RECORD_SIMPLE: 673 case PERF_RECORD_SIMPLE:
674 return perf_read_hw(counter, buf, count); 674 return perf_read_hw(counter, buf, count);
675 675
@@ -707,7 +707,7 @@ static const struct file_operations perf_fops = {
707 * Allocate and initialize a counter structure 707 * Allocate and initialize a counter structure
708 */ 708 */
709static struct perf_counter * 709static struct perf_counter *
710perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type) 710perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu)
711{ 711{
712 struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL); 712 struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL);
713 713
@@ -718,31 +718,37 @@ perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type)
718 INIT_LIST_HEAD(&counter->list); 718 INIT_LIST_HEAD(&counter->list);
719 init_waitqueue_head(&counter->waitq); 719 init_waitqueue_head(&counter->waitq);
720 720
721 counter->irqdata = &counter->data[0]; 721 counter->irqdata = &counter->data[0];
722 counter->usrdata = &counter->data[1]; 722 counter->usrdata = &counter->data[1];
723 counter->cpu = cpu; 723 counter->cpu = cpu;
724 counter->record_type = record_type; 724 counter->hw_event = *hw_event;
725 counter->event = *event; 725 counter->wakeup_pending = 0;
726 counter->wakeup_pending = 0;
727 726
728 return counter; 727 return counter;
729} 728}
730 729
731/** 730/**
732 * sys_perf_task_open - open a performance counter associate it to a task 731 * sys_perf_task_open - open a performance counter, associate it to a task/cpu
733 * @hw_event_type: event type for monitoring/sampling... 732 *
733 * @hw_event_uptr: event type attributes for monitoring/sampling
734 * @pid: target pid 734 * @pid: target pid
735 * @cpu: target cpu
736 * @group_fd: group leader counter fd
735 */ 737 */
736asmlinkage int 738asmlinkage int sys_perf_counter_open(
737sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, 739
738 pid_t pid, int cpu, int masterfd) 740 struct perf_counter_hw_event *hw_event_uptr __user,
741 pid_t pid,
742 int cpu,
743 int group_fd)
744
739{ 745{
740 struct perf_counter_context *ctx; 746 struct perf_counter_context *ctx;
741 struct perf_counter_event event; 747 struct perf_counter_hw_event hw_event;
742 struct perf_counter *counter; 748 struct perf_counter *counter;
743 int ret; 749 int ret;
744 750
745 if (copy_from_user(&event, uevent, sizeof(event)) != 0) 751 if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
746 return -EFAULT; 752 return -EFAULT;
747 753
748 ctx = find_get_context(pid, cpu); 754 ctx = find_get_context(pid, cpu);
@@ -750,7 +756,7 @@ sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
750 return PTR_ERR(ctx); 756 return PTR_ERR(ctx);
751 757
752 ret = -ENOMEM; 758 ret = -ENOMEM;
753 counter = perf_counter_alloc(&event, cpu, record_type); 759 counter = perf_counter_alloc(&hw_event, cpu);
754 if (!counter) 760 if (!counter)
755 goto err_put_context; 761 goto err_put_context;
756 762