aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c29
-rw-r--r--include/linux/perf_counter.h98
-rw-r--r--include/linux/syscalls.h12
-rw-r--r--kernel/perf_counter.c38
4 files changed, 106 insertions, 71 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 30e7ebf78275..ef1936a871aa 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -58,8 +58,8 @@ const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
58 */ 58 */
59int hw_perf_counter_init(struct perf_counter *counter) 59int hw_perf_counter_init(struct perf_counter *counter)
60{ 60{
61 struct perf_counter_hw_event *hw_event = &counter->hw_event;
61 struct hw_perf_counter *hwc = &counter->hw; 62 struct hw_perf_counter *hwc = &counter->hw;
62 u32 hw_event_type = counter->event.hw_event_type;
63 63
64 if (unlikely(!perf_counters_initialized)) 64 if (unlikely(!perf_counters_initialized))
65 return -EINVAL; 65 return -EINVAL;
@@ -77,14 +77,14 @@ int hw_perf_counter_init(struct perf_counter *counter)
77 hwc->nmi = 0; 77 hwc->nmi = 0;
78 if (capable(CAP_SYS_ADMIN)) { 78 if (capable(CAP_SYS_ADMIN)) {
79 hwc->config |= ARCH_PERFMON_EVENTSEL_OS; 79 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
80 if (hw_event_type & PERF_COUNT_NMI) 80 if (hw_event->nmi)
81 hwc->nmi = 1; 81 hwc->nmi = 1;
82 } 82 }
83 83
84 hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0; 84 hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0;
85 hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0; 85 hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0;
86 86
87 hwc->irq_period = counter->event.hw_event_period; 87 hwc->irq_period = hw_event->irq_period;
88 /* 88 /*
89 * Intel PMCs cannot be accessed sanely above 32 bit width, 89 * Intel PMCs cannot be accessed sanely above 32 bit width,
90 * so we install an artificial 1<<31 period regardless of 90 * so we install an artificial 1<<31 period regardless of
@@ -93,21 +93,20 @@ int hw_perf_counter_init(struct perf_counter *counter)
93 if (!hwc->irq_period) 93 if (!hwc->irq_period)
94 hwc->irq_period = 0x7FFFFFFF; 94 hwc->irq_period = 0x7FFFFFFF;
95 95
96 hwc->next_count = -((s32) hwc->irq_period); 96 hwc->next_count = -(s32)hwc->irq_period;
97 97
98 /* 98 /*
99 * Raw event type provide the config in the event structure 99 * Raw event type provide the config in the event structure
100 */ 100 */
101 hw_event_type &= ~PERF_COUNT_NMI; 101 if (hw_event->raw) {
102 if (hw_event_type == PERF_COUNT_RAW) { 102 hwc->config |= hw_event->type;
103 hwc->config |= counter->event.hw_raw_ctrl;
104 } else { 103 } else {
105 if (hw_event_type >= max_intel_perfmon_events) 104 if (hw_event->type >= max_intel_perfmon_events)
106 return -EINVAL; 105 return -EINVAL;
107 /* 106 /*
108 * The generic map: 107 * The generic map:
109 */ 108 */
110 hwc->config |= intel_perfmon_event_map[hw_event_type]; 109 hwc->config |= intel_perfmon_event_map[hw_event->type];
111 } 110 }
112 counter->wakeup_pending = 0; 111 counter->wakeup_pending = 0;
113 112
@@ -354,7 +353,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
354 int bit; 353 int bit;
355 354
356 list_for_each_entry(counter, &ctx->counters, list) { 355 list_for_each_entry(counter, &ctx->counters, list) {
357 if (counter->record_type != PERF_RECORD_SIMPLE || 356 if (counter->hw_event.record_type != PERF_RECORD_SIMPLE ||
358 counter == leader) 357 counter == leader)
359 continue; 358 continue;
360 359
@@ -372,7 +371,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
372 perf_save_and_restart(counter); 371 perf_save_and_restart(counter);
373 } 372 }
374 } 373 }
375 perf_store_irq_data(leader, counter->event.hw_event_type); 374 perf_store_irq_data(leader, counter->hw_event.type);
376 perf_store_irq_data(leader, atomic64_counter_read(counter)); 375 perf_store_irq_data(leader, atomic64_counter_read(counter));
377 } 376 }
378} 377}
@@ -410,7 +409,7 @@ again:
410 409
411 perf_save_and_restart(counter); 410 perf_save_and_restart(counter);
412 411
413 switch (counter->record_type) { 412 switch (counter->hw_event.record_type) {
414 case PERF_RECORD_SIMPLE: 413 case PERF_RECORD_SIMPLE:
415 continue; 414 continue;
416 case PERF_RECORD_IRQ: 415 case PERF_RECORD_IRQ:
@@ -418,7 +417,7 @@ again:
418 break; 417 break;
419 case PERF_RECORD_GROUP: 418 case PERF_RECORD_GROUP:
420 perf_store_irq_data(counter, 419 perf_store_irq_data(counter,
421 counter->event.hw_event_type); 420 counter->hw_event.type);
422 perf_store_irq_data(counter, 421 perf_store_irq_data(counter,
423 atomic64_counter_read(counter)); 422 atomic64_counter_read(counter));
424 perf_handle_group(counter, &status, &ack); 423 perf_handle_group(counter, &status, &ack);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 1f0017673e77..a2b4852e2d70 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -24,65 +24,93 @@
24struct task_struct; 24struct task_struct;
25 25
26/* 26/*
27 * Generalized hardware event types, used by the hw_event_type parameter 27 * User-space ABI bits:
28 * of the sys_perf_counter_open() syscall: 28 */
29
30/*
31 * Generalized performance counter event types, used by the hw_event.type
32 * parameter of the sys_perf_counter_open() syscall:
29 */ 33 */
30enum hw_event_types { 34enum hw_event_types {
31 PERF_COUNT_CYCLES,
32 PERF_COUNT_INSTRUCTIONS,
33 PERF_COUNT_CACHE_REFERENCES,
34 PERF_COUNT_CACHE_MISSES,
35 PERF_COUNT_BRANCH_INSTRUCTIONS,
36 PERF_COUNT_BRANCH_MISSES,
37 /* 35 /*
38 * If this bit is set in the type, then trigger NMI sampling: 36 * Common hardware events, generalized by the kernel:
39 */ 37 */
40 PERF_COUNT_NMI = (1 << 30), 38 PERF_COUNT_CYCLES = 0,
41 PERF_COUNT_RAW = (1 << 31), 39 PERF_COUNT_INSTRUCTIONS = 1,
40 PERF_COUNT_CACHE_REFERENCES = 2,
41 PERF_COUNT_CACHE_MISSES = 3,
42 PERF_COUNT_BRANCH_INSTRUCTIONS = 4,
43 PERF_COUNT_BRANCH_MISSES = 5,
44
45 /*
46 * Special "software" counters provided by the kernel, even if
47 * the hardware does not support performance counters. These
48 * counters measure various physical and sw events of the
49 * kernel (and allow the profiling of them as well):
50 */
51 PERF_COUNT_CPU_CLOCK = -1,
52 PERF_COUNT_TASK_CLOCK = -2,
53 PERF_COUNT_PAGE_FAULTS = -3,
54 PERF_COUNT_CONTEXT_SWITCHES = -4,
42}; 55};
43 56
44/* 57/*
45 * IRQ-notification data record type: 58 * IRQ-notification data record type:
46 */ 59 */
47enum perf_record_type { 60enum perf_counter_record_type {
48 PERF_RECORD_SIMPLE, 61 PERF_RECORD_SIMPLE = 0,
49 PERF_RECORD_IRQ, 62 PERF_RECORD_IRQ = 1,
50 PERF_RECORD_GROUP, 63 PERF_RECORD_GROUP = 2,
51}; 64};
52 65
53struct perf_counter_event { 66/*
54 u32 hw_event_type; 67 * Hardware event to monitor via a performance monitoring counter:
55 u32 hw_event_period; 68 */
56 u64 hw_raw_ctrl; 69struct perf_counter_hw_event {
70 u64 type;
71
72 u64 irq_period;
73 u32 record_type;
74
75 u32 disabled : 1, /* off by default */
76 nmi : 1, /* NMI sampling */
77 raw : 1, /* raw event type */
78 __reserved_1 : 29;
79
80 u64 __reserved_2;
57}; 81};
58 82
83/*
84 * Kernel-internal data types:
85 */
86
59/** 87/**
60 * struct hw_perf_counter - performance counter hardware details 88 * struct hw_perf_counter - performance counter hardware details:
61 */ 89 */
62struct hw_perf_counter { 90struct hw_perf_counter {
63 u64 config; 91 u64 config;
64 unsigned long config_base; 92 unsigned long config_base;
65 unsigned long counter_base; 93 unsigned long counter_base;
66 int nmi; 94 int nmi;
67 unsigned int idx; 95 unsigned int idx;
68 u64 prev_count; 96 u64 prev_count;
69 s32 next_count; 97 u64 irq_period;
70 u64 irq_period; 98 s32 next_count;
71}; 99};
72 100
73/* 101/*
74 * Hardcoded buffer length limit for now, for IRQ-fed events: 102 * Hardcoded buffer length limit for now, for IRQ-fed events:
75 */ 103 */
76#define PERF_DATA_BUFLEN 2048 104#define PERF_DATA_BUFLEN 2048
77 105
78/** 106/**
79 * struct perf_data - performance counter IRQ data sampling ... 107 * struct perf_data - performance counter IRQ data sampling ...
80 */ 108 */
81struct perf_data { 109struct perf_data {
82 int len; 110 int len;
83 int rd_idx; 111 int rd_idx;
84 int overrun; 112 int overrun;
85 u8 data[PERF_DATA_BUFLEN]; 113 u8 data[PERF_DATA_BUFLEN];
86}; 114};
87 115
88/** 116/**
@@ -96,7 +124,7 @@ struct perf_counter {
96#else 124#else
97 atomic_t count32[2]; 125 atomic_t count32[2];
98#endif 126#endif
99 struct perf_counter_event event; 127 struct perf_counter_hw_event hw_event;
100 struct hw_perf_counter hw; 128 struct hw_perf_counter hw;
101 129
102 struct perf_counter_context *ctx; 130 struct perf_counter_context *ctx;
@@ -110,8 +138,6 @@ struct perf_counter {
110 int oncpu; 138 int oncpu;
111 int cpu; 139 int cpu;
112 140
113 enum perf_record_type record_type;
114
115 /* read() / irq related data */ 141 /* read() / irq related data */
116 wait_queue_head_t waitq; 142 wait_queue_head_t waitq;
117 /* optional: for NMIs */ 143 /* optional: for NMIs */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 3ecd73d03daa..a549678b7c3c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -54,7 +54,7 @@ struct compat_stat;
54struct compat_timeval; 54struct compat_timeval;
55struct robust_list_head; 55struct robust_list_head;
56struct getcpu_cache; 56struct getcpu_cache;
57struct perf_counter_event; 57struct perf_counter_hw_event;
58 58
59#include <linux/types.h> 59#include <linux/types.h>
60#include <linux/aio_abi.h> 60#include <linux/aio_abi.h>
@@ -625,7 +625,11 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
625 625
626int kernel_execve(const char *filename, char *const argv[], char *const envp[]); 626int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
627 627
628asmlinkage int 628
629sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, 629asmlinkage int sys_perf_counter_open(
630 pid_t pid, int cpu, int masterfd); 630
631 struct perf_counter_hw_event *hw_event_uptr __user,
632 pid_t pid,
633 int cpu,
634 int group_fd);
631#endif 635#endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 2557c670a3bb..0d323ceda3a4 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -669,7 +669,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
669{ 669{
670 struct perf_counter *counter = file->private_data; 670 struct perf_counter *counter = file->private_data;
671 671
672 switch (counter->record_type) { 672 switch (counter->hw_event.record_type) {
673 case PERF_RECORD_SIMPLE: 673 case PERF_RECORD_SIMPLE:
674 return perf_read_hw(counter, buf, count); 674 return perf_read_hw(counter, buf, count);
675 675
@@ -707,7 +707,7 @@ static const struct file_operations perf_fops = {
707 * Allocate and initialize a counter structure 707 * Allocate and initialize a counter structure
708 */ 708 */
709static struct perf_counter * 709static struct perf_counter *
710perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type) 710perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu)
711{ 711{
712 struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL); 712 struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL);
713 713
@@ -718,31 +718,37 @@ perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type)
718 INIT_LIST_HEAD(&counter->list); 718 INIT_LIST_HEAD(&counter->list);
719 init_waitqueue_head(&counter->waitq); 719 init_waitqueue_head(&counter->waitq);
720 720
721 counter->irqdata = &counter->data[0]; 721 counter->irqdata = &counter->data[0];
722 counter->usrdata = &counter->data[1]; 722 counter->usrdata = &counter->data[1];
723 counter->cpu = cpu; 723 counter->cpu = cpu;
724 counter->record_type = record_type; 724 counter->hw_event = *hw_event;
725 counter->event = *event; 725 counter->wakeup_pending = 0;
726 counter->wakeup_pending = 0;
727 726
728 return counter; 727 return counter;
729} 728}
730 729
731/** 730/**
732 * sys_perf_task_open - open a performance counter associate it to a task 731 * sys_perf_task_open - open a performance counter, associate it to a task/cpu
733 * @hw_event_type: event type for monitoring/sampling... 732 *
733 * @hw_event_uptr: event type attributes for monitoring/sampling
734 * @pid: target pid 734 * @pid: target pid
735 * @cpu: target cpu
736 * @group_fd: group leader counter fd
735 */ 737 */
736asmlinkage int 738asmlinkage int sys_perf_counter_open(
737sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, 739
738 pid_t pid, int cpu, int masterfd) 740 struct perf_counter_hw_event *hw_event_uptr __user,
741 pid_t pid,
742 int cpu,
743 int group_fd)
744
739{ 745{
740 struct perf_counter_context *ctx; 746 struct perf_counter_context *ctx;
741 struct perf_counter_event event; 747 struct perf_counter_hw_event hw_event;
742 struct perf_counter *counter; 748 struct perf_counter *counter;
743 int ret; 749 int ret;
744 750
745 if (copy_from_user(&event, uevent, sizeof(event)) != 0) 751 if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
746 return -EFAULT; 752 return -EFAULT;
747 753
748 ctx = find_get_context(pid, cpu); 754 ctx = find_get_context(pid, cpu);
@@ -750,7 +756,7 @@ sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
750 return PTR_ERR(ctx); 756 return PTR_ERR(ctx);
751 757
752 ret = -ENOMEM; 758 ret = -ENOMEM;
753 counter = perf_counter_alloc(&event, cpu, record_type); 759 counter = perf_counter_alloc(&hw_event, cpu);
754 if (!counter) 760 if (!counter)
755 goto err_put_context; 761 goto err_put_context;
756 762