aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-03-19 15:26:18 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-06 03:30:17 -0400
commitb8e83514b64577b48bfb794fe85fcde40a9343ca (patch)
tree3fd79b41c917f56fe5a5b1f779b892110e0ec909
parente077df4f439681e43f0db8255b2d215b342ebdc6 (diff)
perf_counter: revamp syscall input ABI
Impact: modify ABI The hardware/software classification in hw_event->type became a little strained due to the addition of tracepoint tracing. Instead split up the field and provide a type field to explicitly specify the counter type, while using the event_id field to specify which event to use. Raw counters still work as before, only the raw config now goes into raw_event. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> Cc: Steven Rostedt <rostedt@goodmis.org> Orig-LKML-Reference: <20090319194233.836807573@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/powerpc/kernel/perf_counter.c4
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c10
-rw-r--r--include/linux/perf_counter.h95
-rw-r--r--kernel/perf_counter.c83
4 files changed, 117 insertions, 75 deletions
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 5008762e8bf4..26f69dc7130e 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -602,7 +602,7 @@ hw_perf_counter_init(struct perf_counter *counter)
602 return NULL; 602 return NULL;
603 if ((s64)counter->hw_event.irq_period < 0) 603 if ((s64)counter->hw_event.irq_period < 0)
604 return NULL; 604 return NULL;
605 ev = counter->hw_event.type; 605 ev = counter->hw_event.event_id;
606 if (!counter->hw_event.raw) { 606 if (!counter->hw_event.raw) {
607 if (ev >= ppmu->n_generic || 607 if (ev >= ppmu->n_generic ||
608 ppmu->generic_events[ev] == 0) 608 ppmu->generic_events[ev] == 0)
@@ -692,7 +692,7 @@ static void perf_handle_group(struct perf_counter *counter)
692 list_for_each_entry(sub, &leader->sibling_list, list_entry) { 692 list_for_each_entry(sub, &leader->sibling_list, list_entry) {
693 if (sub != counter) 693 if (sub != counter)
694 sub->hw_ops->read(sub); 694 sub->hw_ops->read(sub);
695 perf_store_irq_data(counter, sub->hw_event.type); 695 perf_store_irq_data(counter, sub->hw_event.event_config);
696 perf_store_irq_data(counter, atomic64_read(&sub->count)); 696 perf_store_irq_data(counter, atomic64_read(&sub->count));
697 } 697 }
698} 698}
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 6cba9d47b711..d844ae41d5a3 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -217,15 +217,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
217 /* 217 /*
218 * Raw event type provide the config in the event structure 218 * Raw event type provide the config in the event structure
219 */ 219 */
220 if (hw_event->raw) { 220 if (hw_event->raw_type) {
221 hwc->config |= pmc_ops->raw_event(hw_event->type); 221 hwc->config |= pmc_ops->raw_event(hw_event->raw_event_id);
222 } else { 222 } else {
223 if (hw_event->type >= pmc_ops->max_events) 223 if (hw_event->event_id >= pmc_ops->max_events)
224 return -EINVAL; 224 return -EINVAL;
225 /* 225 /*
226 * The generic map: 226 * The generic map:
227 */ 227 */
228 hwc->config |= pmc_ops->event_map(hw_event->type); 228 hwc->config |= pmc_ops->event_map(hw_event->event_id);
229 } 229 }
230 counter->wakeup_pending = 0; 230 counter->wakeup_pending = 0;
231 231
@@ -715,7 +715,7 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
715 list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { 715 list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
716 716
717 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); 717 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
718 perf_store_irq_data(sibling, counter->hw_event.type); 718 perf_store_irq_data(sibling, counter->hw_event.event_config);
719 perf_store_irq_data(sibling, atomic64_read(&counter->count)); 719 perf_store_irq_data(sibling, atomic64_read(&counter->count));
720 } 720 }
721} 721}
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 065984c1ff57..8f9394905502 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -21,56 +21,81 @@
21 */ 21 */
22 22
23/* 23/*
24 * Generalized performance counter event types, used by the hw_event.type 24 * hw_event.type
25 * parameter of the sys_perf_counter_open() syscall:
26 */ 25 */
27enum hw_event_types { 26enum perf_event_types {
27 PERF_TYPE_HARDWARE = 0,
28 PERF_TYPE_SOFTWARE = 1,
29 PERF_TYPE_TRACEPOINT = 2,
30
28 /* 31 /*
29 * Common hardware events, generalized by the kernel: 32 * available TYPE space, raw is the max value.
30 */ 33 */
31 PERF_COUNT_CPU_CYCLES = 0,
32 PERF_COUNT_INSTRUCTIONS = 1,
33 PERF_COUNT_CACHE_REFERENCES = 2,
34 PERF_COUNT_CACHE_MISSES = 3,
35 PERF_COUNT_BRANCH_INSTRUCTIONS = 4,
36 PERF_COUNT_BRANCH_MISSES = 5,
37 PERF_COUNT_BUS_CYCLES = 6,
38 34
39 PERF_HW_EVENTS_MAX = 7, 35 PERF_TYPE_RAW = 128,
36};
40 37
38/*
39 * Generalized performance counter event types, used by the hw_event.event_id
40 * parameter of the sys_perf_counter_open() syscall:
41 */
42enum hw_event_ids {
41 /* 43 /*
42 * Special "software" counters provided by the kernel, even if 44 * Common hardware events, generalized by the kernel:
43 * the hardware does not support performance counters. These
44 * counters measure various physical and sw events of the
45 * kernel (and allow the profiling of them as well):
46 */ 45 */
47 PERF_COUNT_CPU_CLOCK = -1, 46 PERF_COUNT_CPU_CYCLES = 0,
48 PERF_COUNT_TASK_CLOCK = -2, 47 PERF_COUNT_INSTRUCTIONS = 1,
49 PERF_COUNT_PAGE_FAULTS = -3, 48 PERF_COUNT_CACHE_REFERENCES = 2,
50 PERF_COUNT_CONTEXT_SWITCHES = -4, 49 PERF_COUNT_CACHE_MISSES = 3,
51 PERF_COUNT_CPU_MIGRATIONS = -5, 50 PERF_COUNT_BRANCH_INSTRUCTIONS = 4,
52 PERF_COUNT_PAGE_FAULTS_MIN = -6, 51 PERF_COUNT_BRANCH_MISSES = 5,
53 PERF_COUNT_PAGE_FAULTS_MAJ = -7, 52 PERF_COUNT_BUS_CYCLES = 6,
54 53
55 PERF_SW_EVENTS_MIN = -8, 54 PERF_HW_EVENTS_MAX = 7,
55};
56 56
57 PERF_TP_EVENTS_MIN = -65536 57/*
58 * Special "software" counters provided by the kernel, even if the hardware
59 * does not support performance counters. These counters measure various
60 * physical and sw events of the kernel (and allow the profiling of them as
61 * well):
62 */
63enum sw_event_ids {
64 PERF_COUNT_CPU_CLOCK = 0,
65 PERF_COUNT_TASK_CLOCK = 1,
66 PERF_COUNT_PAGE_FAULTS = 2,
67 PERF_COUNT_CONTEXT_SWITCHES = 3,
68 PERF_COUNT_CPU_MIGRATIONS = 4,
69 PERF_COUNT_PAGE_FAULTS_MIN = 5,
70 PERF_COUNT_PAGE_FAULTS_MAJ = 6,
71
72 PERF_SW_EVENTS_MAX = 7,
58}; 73};
59 74
60/* 75/*
61 * IRQ-notification data record type: 76 * IRQ-notification data record type:
62 */ 77 */
63enum perf_counter_record_type { 78enum perf_counter_record_type {
64 PERF_RECORD_SIMPLE = 0, 79 PERF_RECORD_SIMPLE = 0,
65 PERF_RECORD_IRQ = 1, 80 PERF_RECORD_IRQ = 1,
66 PERF_RECORD_GROUP = 2, 81 PERF_RECORD_GROUP = 2,
67}; 82};
68 83
69/* 84/*
70 * Hardware event to monitor via a performance monitoring counter: 85 * Hardware event to monitor via a performance monitoring counter:
71 */ 86 */
72struct perf_counter_hw_event { 87struct perf_counter_hw_event {
73 __s64 type; 88 union {
89 struct {
90 __u64 event_id : 56,
91 type : 8;
92 };
93 struct {
94 __u64 raw_event_id : 63,
95 raw_type : 1;
96 };
97 __u64 event_config;
98 };
74 99
75 __u64 irq_period; 100 __u64 irq_period;
76 __u64 record_type; 101 __u64 record_type;
@@ -78,7 +103,6 @@ struct perf_counter_hw_event {
78 103
79 __u64 disabled : 1, /* off by default */ 104 __u64 disabled : 1, /* off by default */
80 nmi : 1, /* NMI sampling */ 105 nmi : 1, /* NMI sampling */
81 raw : 1, /* raw event type */
82 inherit : 1, /* children inherit it */ 106 inherit : 1, /* children inherit it */
83 pinned : 1, /* must always be on PMU */ 107 pinned : 1, /* must always be on PMU */
84 exclusive : 1, /* only group on PMU */ 108 exclusive : 1, /* only group on PMU */
@@ -87,7 +111,7 @@ struct perf_counter_hw_event {
87 exclude_hv : 1, /* ditto hypervisor */ 111 exclude_hv : 1, /* ditto hypervisor */
88 exclude_idle : 1, /* don't count when idle */ 112 exclude_idle : 1, /* don't count when idle */
89 113
90 __reserved_1 : 54; 114 __reserved_1 : 55;
91 115
92 __u32 extra_config_len; 116 __u32 extra_config_len;
93 __u32 __reserved_4; 117 __u32 __reserved_4;
@@ -298,10 +322,11 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
298 */ 322 */
299static inline int is_software_counter(struct perf_counter *counter) 323static inline int is_software_counter(struct perf_counter *counter)
300{ 324{
301 return !counter->hw_event.raw && counter->hw_event.type < 0; 325 return !counter->hw_event.raw_type &&
326 counter->hw_event.type != PERF_TYPE_HARDWARE;
302} 327}
303 328
304extern void perf_swcounter_event(enum hw_event_types, u64, int, struct pt_regs *); 329extern void perf_swcounter_event(u32, u64, int, struct pt_regs *);
305 330
306#else 331#else
307static inline void 332static inline void
@@ -320,7 +345,7 @@ static inline u64 hw_perf_save_disable(void) { return 0; }
320static inline int perf_counter_task_disable(void) { return -EINVAL; } 345static inline int perf_counter_task_disable(void) { return -EINVAL; }
321static inline int perf_counter_task_enable(void) { return -EINVAL; } 346static inline int perf_counter_task_enable(void) { return -EINVAL; }
322 347
323static inline void perf_swcounter_event(enum hw_event_types event, u64 nr, 348static inline void perf_swcounter_event(u32 event, u64 nr,
324 int nmi, struct pt_regs *regs) { } 349 int nmi, struct pt_regs *regs) { }
325#endif 350#endif
326 351
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0bbe3e45ba0d..68a56a68bc74 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1395,12 +1395,6 @@ static void perf_swcounter_set_period(struct perf_counter *counter)
1395 atomic64_set(&hwc->count, -left); 1395 atomic64_set(&hwc->count, -left);
1396} 1396}
1397 1397
1398static void perf_swcounter_save_and_restart(struct perf_counter *counter)
1399{
1400 perf_swcounter_update(counter);
1401 perf_swcounter_set_period(counter);
1402}
1403
1404static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data) 1398static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data)
1405{ 1399{
1406 struct perf_data *irqdata = counter->irqdata; 1400 struct perf_data *irqdata = counter->irqdata;
@@ -1421,7 +1415,7 @@ static void perf_swcounter_handle_group(struct perf_counter *sibling)
1421 1415
1422 list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { 1416 list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
1423 counter->hw_ops->read(counter); 1417 counter->hw_ops->read(counter);
1424 perf_swcounter_store_irq(sibling, counter->hw_event.type); 1418 perf_swcounter_store_irq(sibling, counter->hw_event.event_config);
1425 perf_swcounter_store_irq(sibling, atomic64_read(&counter->count)); 1419 perf_swcounter_store_irq(sibling, atomic64_read(&counter->count));
1426 } 1420 }
1427} 1421}
@@ -1477,21 +1471,25 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
1477static void perf_swcounter_overflow(struct perf_counter *counter, 1471static void perf_swcounter_overflow(struct perf_counter *counter,
1478 int nmi, struct pt_regs *regs) 1472 int nmi, struct pt_regs *regs)
1479{ 1473{
1480 perf_swcounter_save_and_restart(counter); 1474 perf_swcounter_update(counter);
1475 perf_swcounter_set_period(counter);
1481 perf_swcounter_interrupt(counter, nmi, regs); 1476 perf_swcounter_interrupt(counter, nmi, regs);
1482} 1477}
1483 1478
1484static int perf_swcounter_match(struct perf_counter *counter, 1479static int perf_swcounter_match(struct perf_counter *counter,
1485 enum hw_event_types event, 1480 enum perf_event_types type,
1486 struct pt_regs *regs) 1481 u32 event, struct pt_regs *regs)
1487{ 1482{
1488 if (counter->state != PERF_COUNTER_STATE_ACTIVE) 1483 if (counter->state != PERF_COUNTER_STATE_ACTIVE)
1489 return 0; 1484 return 0;
1490 1485
1491 if (counter->hw_event.raw) 1486 if (counter->hw_event.raw_type)
1487 return 0;
1488
1489 if (counter->hw_event.type != type)
1492 return 0; 1490 return 0;
1493 1491
1494 if (counter->hw_event.type != event) 1492 if (counter->hw_event.event_id != event)
1495 return 0; 1493 return 0;
1496 1494
1497 if (counter->hw_event.exclude_user && user_mode(regs)) 1495 if (counter->hw_event.exclude_user && user_mode(regs))
@@ -1512,8 +1510,8 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
1512} 1510}
1513 1511
1514static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, 1512static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
1515 enum hw_event_types event, u64 nr, 1513 enum perf_event_types type, u32 event,
1516 int nmi, struct pt_regs *regs) 1514 u64 nr, int nmi, struct pt_regs *regs)
1517{ 1515{
1518 struct perf_counter *counter; 1516 struct perf_counter *counter;
1519 1517
@@ -1522,24 +1520,31 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
1522 1520
1523 rcu_read_lock(); 1521 rcu_read_lock();
1524 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { 1522 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
1525 if (perf_swcounter_match(counter, event, regs)) 1523 if (perf_swcounter_match(counter, type, event, regs))
1526 perf_swcounter_add(counter, nr, nmi, regs); 1524 perf_swcounter_add(counter, nr, nmi, regs);
1527 } 1525 }
1528 rcu_read_unlock(); 1526 rcu_read_unlock();
1529} 1527}
1530 1528
1531void perf_swcounter_event(enum hw_event_types event, u64 nr, 1529static void __perf_swcounter_event(enum perf_event_types type, u32 event,
1532 int nmi, struct pt_regs *regs) 1530 u64 nr, int nmi, struct pt_regs *regs)
1533{ 1531{
1534 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); 1532 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
1535 1533
1536 perf_swcounter_ctx_event(&cpuctx->ctx, event, nr, nmi, regs); 1534 perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs);
1537 if (cpuctx->task_ctx) 1535 if (cpuctx->task_ctx) {
1538 perf_swcounter_ctx_event(cpuctx->task_ctx, event, nr, nmi, regs); 1536 perf_swcounter_ctx_event(cpuctx->task_ctx, type, event,
1537 nr, nmi, regs);
1538 }
1539 1539
1540 put_cpu_var(perf_cpu_context); 1540 put_cpu_var(perf_cpu_context);
1541} 1541}
1542 1542
1543void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs)
1544{
1545 __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs);
1546}
1547
1543static void perf_swcounter_read(struct perf_counter *counter) 1548static void perf_swcounter_read(struct perf_counter *counter)
1544{ 1549{
1545 perf_swcounter_update(counter); 1550 perf_swcounter_update(counter);
@@ -1733,8 +1738,12 @@ static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
1733#ifdef CONFIG_EVENT_PROFILE 1738#ifdef CONFIG_EVENT_PROFILE
1734void perf_tpcounter_event(int event_id) 1739void perf_tpcounter_event(int event_id)
1735{ 1740{
1736 perf_swcounter_event(PERF_TP_EVENTS_MIN + event_id, 1, 1, 1741 struct pt_regs *regs = get_irq_regs();
1737 task_pt_regs(current)); 1742
1743 if (!regs)
1744 regs = task_pt_regs(current);
1745
1746 __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs);
1738} 1747}
1739 1748
1740extern int ftrace_profile_enable(int); 1749extern int ftrace_profile_enable(int);
@@ -1742,15 +1751,13 @@ extern void ftrace_profile_disable(int);
1742 1751
1743static void tp_perf_counter_destroy(struct perf_counter *counter) 1752static void tp_perf_counter_destroy(struct perf_counter *counter)
1744{ 1753{
1745 int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN; 1754 ftrace_profile_disable(counter->hw_event.event_id);
1746
1747 ftrace_profile_disable(event_id);
1748} 1755}
1749 1756
1750static const struct hw_perf_counter_ops * 1757static const struct hw_perf_counter_ops *
1751tp_perf_counter_init(struct perf_counter *counter) 1758tp_perf_counter_init(struct perf_counter *counter)
1752{ 1759{
1753 int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN; 1760 int event_id = counter->hw_event.event_id;
1754 int ret; 1761 int ret;
1755 1762
1756 ret = ftrace_profile_enable(event_id); 1763 ret = ftrace_profile_enable(event_id);
@@ -1758,6 +1765,7 @@ tp_perf_counter_init(struct perf_counter *counter)
1758 return NULL; 1765 return NULL;
1759 1766
1760 counter->destroy = tp_perf_counter_destroy; 1767 counter->destroy = tp_perf_counter_destroy;
1768 counter->hw.irq_period = counter->hw_event.irq_period;
1761 1769
1762 return &perf_ops_generic; 1770 return &perf_ops_generic;
1763} 1771}
@@ -1783,7 +1791,7 @@ sw_perf_counter_init(struct perf_counter *counter)
1783 * to be kernel events, and page faults are never hypervisor 1791 * to be kernel events, and page faults are never hypervisor
1784 * events. 1792 * events.
1785 */ 1793 */
1786 switch (counter->hw_event.type) { 1794 switch (counter->hw_event.event_id) {
1787 case PERF_COUNT_CPU_CLOCK: 1795 case PERF_COUNT_CPU_CLOCK:
1788 hw_ops = &perf_ops_cpu_clock; 1796 hw_ops = &perf_ops_cpu_clock;
1789 1797
@@ -1813,9 +1821,6 @@ sw_perf_counter_init(struct perf_counter *counter)
1813 if (!counter->hw_event.exclude_kernel) 1821 if (!counter->hw_event.exclude_kernel)
1814 hw_ops = &perf_ops_cpu_migrations; 1822 hw_ops = &perf_ops_cpu_migrations;
1815 break; 1823 break;
1816 default:
1817 hw_ops = tp_perf_counter_init(counter);
1818 break;
1819 } 1824 }
1820 1825
1821 if (hw_ops) 1826 if (hw_ops)
@@ -1870,10 +1875,22 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
1870 counter->state = PERF_COUNTER_STATE_OFF; 1875 counter->state = PERF_COUNTER_STATE_OFF;
1871 1876
1872 hw_ops = NULL; 1877 hw_ops = NULL;
1873 if (!hw_event->raw && hw_event->type < 0) 1878
1874 hw_ops = sw_perf_counter_init(counter); 1879 if (hw_event->raw_type)
1875 else 1880 hw_ops = hw_perf_counter_init(counter);
1881 else switch (hw_event->type) {
1882 case PERF_TYPE_HARDWARE:
1876 hw_ops = hw_perf_counter_init(counter); 1883 hw_ops = hw_perf_counter_init(counter);
1884 break;
1885
1886 case PERF_TYPE_SOFTWARE:
1887 hw_ops = sw_perf_counter_init(counter);
1888 break;
1889
1890 case PERF_TYPE_TRACEPOINT:
1891 hw_ops = tp_perf_counter_init(counter);
1892 break;
1893 }
1877 1894
1878 if (!hw_ops) { 1895 if (!hw_ops) {
1879 kfree(counter); 1896 kfree(counter);