aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/kernel/perf_counter.c68
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c31
-rw-r--r--include/linux/perf_counter.h19
-rw-r--r--kernel/perf_counter.c26
4 files changed, 117 insertions, 27 deletions
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 5b0211348c73..bd6ba85beb54 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -16,6 +16,7 @@
16#include <asm/reg.h> 16#include <asm/reg.h>
17#include <asm/pmc.h> 17#include <asm/pmc.h>
18#include <asm/machdep.h> 18#include <asm/machdep.h>
19#include <asm/firmware.h>
19 20
20struct cpu_hw_counters { 21struct cpu_hw_counters {
21 int n_counters; 22 int n_counters;
@@ -214,6 +215,36 @@ static int power_check_constraints(unsigned int event[], int n_ev)
214 return 0; 215 return 0;
215} 216}
216 217
218/*
219 * Check if newly-added counters have consistent settings for
220 * exclude_{user,kernel,hv} with each other and any previously
221 * added counters.
222 */
223static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
224{
225 int eu, ek, eh;
226 int i, n;
227 struct perf_counter *counter;
228
229 n = n_prev + n_new;
230 if (n <= 1)
231 return 0;
232
233 eu = ctrs[0]->hw_event.exclude_user;
234 ek = ctrs[0]->hw_event.exclude_kernel;
235 eh = ctrs[0]->hw_event.exclude_hv;
236 if (n_prev == 0)
237 n_prev = 1;
238 for (i = n_prev; i < n; ++i) {
239 counter = ctrs[i];
240 if (counter->hw_event.exclude_user != eu ||
241 counter->hw_event.exclude_kernel != ek ||
242 counter->hw_event.exclude_hv != eh)
243 return -EAGAIN;
244 }
245 return 0;
246}
247
217static void power_perf_read(struct perf_counter *counter) 248static void power_perf_read(struct perf_counter *counter)
218{ 249{
219 long val, delta, prev; 250 long val, delta, prev;
@@ -324,6 +355,20 @@ void hw_perf_restore(u64 disable)
324 } 355 }
325 356
326 /* 357 /*
358 * Add in MMCR0 freeze bits corresponding to the
359 * hw_event.exclude_* bits for the first counter.
360 * We have already checked that all counters have the
361 * same values for these bits as the first counter.
362 */
363 counter = cpuhw->counter[0];
364 if (counter->hw_event.exclude_user)
365 cpuhw->mmcr[0] |= MMCR0_FCP;
366 if (counter->hw_event.exclude_kernel)
367 cpuhw->mmcr[0] |= MMCR0_FCS;
368 if (counter->hw_event.exclude_hv)
369 cpuhw->mmcr[0] |= MMCR0_FCHV;
370
371 /*
327 * Write the new configuration to MMCR* with the freeze 372 * Write the new configuration to MMCR* with the freeze
328 * bit set and set the hardware counters to their initial values. 373 * bit set and set the hardware counters to their initial values.
329 * Then unfreeze the counters. 374 * Then unfreeze the counters.
@@ -424,6 +469,8 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
424 &cpuhw->counter[n0], &cpuhw->events[n0]); 469 &cpuhw->counter[n0], &cpuhw->events[n0]);
425 if (n < 0) 470 if (n < 0)
426 return -EAGAIN; 471 return -EAGAIN;
472 if (check_excludes(cpuhw->counter, n0, n))
473 return -EAGAIN;
427 if (power_check_constraints(cpuhw->events, n + n0)) 474 if (power_check_constraints(cpuhw->events, n + n0))
428 return -EAGAIN; 475 return -EAGAIN;
429 cpuhw->n_counters = n0 + n; 476 cpuhw->n_counters = n0 + n;
@@ -476,6 +523,8 @@ static int power_perf_enable(struct perf_counter *counter)
476 goto out; 523 goto out;
477 cpuhw->counter[n0] = counter; 524 cpuhw->counter[n0] = counter;
478 cpuhw->events[n0] = counter->hw.config; 525 cpuhw->events[n0] = counter->hw.config;
526 if (check_excludes(cpuhw->counter, n0, 1))
527 goto out;
479 if (power_check_constraints(cpuhw->events, n0 + 1)) 528 if (power_check_constraints(cpuhw->events, n0 + 1))
480 goto out; 529 goto out;
481 530
@@ -555,6 +604,17 @@ hw_perf_counter_init(struct perf_counter *counter)
555 counter->hw.idx = 0; 604 counter->hw.idx = 0;
556 605
557 /* 606 /*
607 * If we are not running on a hypervisor, force the
608 * exclude_hv bit to 0 so that we don't care what
609 * the user set it to. This also means that we don't
610 * set the MMCR0_FCHV bit, which unconditionally freezes
611 * the counters on the PPC970 variants used in Apple G5
612 * machines (since MSR.HV is always 1 on those machines).
613 */
614 if (!firmware_has_feature(FW_FEATURE_LPAR))
615 counter->hw_event.exclude_hv = 0;
616
617 /*
558 * If this is in a group, check if it can go on with all the 618 * If this is in a group, check if it can go on with all the
559 * other hardware counters in the group. We assume the counter 619 * other hardware counters in the group. We assume the counter
560 * hasn't been linked into its leader's sibling list at this point. 620 * hasn't been linked into its leader's sibling list at this point.
@@ -566,11 +626,13 @@ hw_perf_counter_init(struct perf_counter *counter)
566 if (n < 0) 626 if (n < 0)
567 return NULL; 627 return NULL;
568 } 628 }
569 events[n++] = ev; 629 events[n] = ev;
570 if (power_check_constraints(events, n)) 630 if (check_excludes(ctrs, n, 1))
631 return NULL;
632 if (power_check_constraints(events, n + 1))
571 return NULL; 633 return NULL;
572 634
573 counter->hw.config = events[n - 1]; 635 counter->hw.config = events[n];
574 atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); 636 atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
575 return &power_perf_ops; 637 return &power_perf_ops;
576} 638}
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 9901e46998d1..383d4c6423a1 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -107,21 +107,25 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
107 return -EINVAL; 107 return -EINVAL;
108 108
109 /* 109 /*
110 * Count user events, and generate PMC IRQs: 110 * Generate PMC IRQs:
111 * (keep 'enabled' bit clear for now) 111 * (keep 'enabled' bit clear for now)
112 */ 112 */
113 hwc->config = ARCH_PERFMON_EVENTSEL_USR | ARCH_PERFMON_EVENTSEL_INT; 113 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
114 114
115 /* 115 /*
116 * If privileged enough, count OS events too, and allow 116 * Count user and OS events unless requested not to.
117 * NMI events as well:
118 */ 117 */
119 hwc->nmi = 0; 118 if (!hw_event->exclude_user)
120 if (capable(CAP_SYS_ADMIN)) { 119 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
120 if (!hw_event->exclude_kernel)
121 hwc->config |= ARCH_PERFMON_EVENTSEL_OS; 121 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
122 if (hw_event->nmi) 122
123 hwc->nmi = 1; 123 /*
124 } 124 * If privileged enough, allow NMI events:
125 */
126 hwc->nmi = 0;
127 if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
128 hwc->nmi = 1;
125 129
126 hwc->irq_period = hw_event->irq_period; 130 hwc->irq_period = hw_event->irq_period;
127 /* 131 /*
@@ -248,10 +252,13 @@ __pmc_fixed_enable(struct perf_counter *counter,
248 int err; 252 int err;
249 253
250 /* 254 /*
251 * Enable IRQ generation (0x8) and ring-3 counting (0x2), 255 * Enable IRQ generation (0x8),
252 * and enable ring-0 counting if allowed: 256 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
257 * if requested:
253 */ 258 */
254 bits = 0x8ULL | 0x2ULL; 259 bits = 0x8ULL;
260 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
261 bits |= 0x2;
255 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) 262 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
256 bits |= 0x1; 263 bits |= 0x1;
257 bits <<= (idx * 4); 264 bits <<= (idx * 4);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index f55381fbcac9..c83f51d6e359 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -83,14 +83,17 @@ struct perf_counter_hw_event {
83 u64 irq_period; 83 u64 irq_period;
84 u32 record_type; 84 u32 record_type;
85 85
86 u32 disabled : 1, /* off by default */ 86 u32 disabled : 1, /* off by default */
87 nmi : 1, /* NMI sampling */ 87 nmi : 1, /* NMI sampling */
88 raw : 1, /* raw event type */ 88 raw : 1, /* raw event type */
89 inherit : 1, /* children inherit it */ 89 inherit : 1, /* children inherit it */
90 pinned : 1, /* must always be on PMU */ 90 pinned : 1, /* must always be on PMU */
91 exclusive : 1, /* only counter on PMU */ 91 exclusive : 1, /* only group on PMU */
92 92 exclude_user : 1, /* don't count user */
93 __reserved_1 : 26; 93 exclude_kernel : 1, /* ditto kernel */
94 exclude_hv : 1, /* ditto hypervisor */
95
96 __reserved_1 : 23;
94 97
95 u64 __reserved_2; 98 u64 __reserved_2;
96}; 99};
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 544193cbc478..89d5e3fe9700 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1567,11 +1567,25 @@ sw_perf_counter_init(struct perf_counter *counter)
1567{ 1567{
1568 const struct hw_perf_counter_ops *hw_ops = NULL; 1568 const struct hw_perf_counter_ops *hw_ops = NULL;
1569 1569
1570 /*
1571 * Software counters (currently) can't in general distinguish
1572 * between user, kernel and hypervisor events.
1573 * However, context switches and cpu migrations are considered
1574 * to be kernel events, and page faults are never hypervisor
1575 * events.
1576 */
1570 switch (counter->hw_event.type) { 1577 switch (counter->hw_event.type) {
1571 case PERF_COUNT_CPU_CLOCK: 1578 case PERF_COUNT_CPU_CLOCK:
1572 hw_ops = &perf_ops_cpu_clock; 1579 if (!(counter->hw_event.exclude_user ||
1580 counter->hw_event.exclude_kernel ||
1581 counter->hw_event.exclude_hv))
1582 hw_ops = &perf_ops_cpu_clock;
1573 break; 1583 break;
1574 case PERF_COUNT_TASK_CLOCK: 1584 case PERF_COUNT_TASK_CLOCK:
1585 if (counter->hw_event.exclude_user ||
1586 counter->hw_event.exclude_kernel ||
1587 counter->hw_event.exclude_hv)
1588 break;
1575 /* 1589 /*
1576 * If the user instantiates this as a per-cpu counter, 1590 * If the user instantiates this as a per-cpu counter,
1577 * use the cpu_clock counter instead. 1591 * use the cpu_clock counter instead.
@@ -1582,13 +1596,17 @@ sw_perf_counter_init(struct perf_counter *counter)
1582 hw_ops = &perf_ops_cpu_clock; 1596 hw_ops = &perf_ops_cpu_clock;
1583 break; 1597 break;
1584 case PERF_COUNT_PAGE_FAULTS: 1598 case PERF_COUNT_PAGE_FAULTS:
1585 hw_ops = &perf_ops_page_faults; 1599 if (!(counter->hw_event.exclude_user ||
1600 counter->hw_event.exclude_kernel))
1601 hw_ops = &perf_ops_page_faults;
1586 break; 1602 break;
1587 case PERF_COUNT_CONTEXT_SWITCHES: 1603 case PERF_COUNT_CONTEXT_SWITCHES:
1588 hw_ops = &perf_ops_context_switches; 1604 if (!counter->hw_event.exclude_kernel)
1605 hw_ops = &perf_ops_context_switches;
1589 break; 1606 break;
1590 case PERF_COUNT_CPU_MIGRATIONS: 1607 case PERF_COUNT_CPU_MIGRATIONS:
1591 hw_ops = &perf_ops_cpu_migrations; 1608 if (!counter->hw_event.exclude_kernel)
1609 hw_ops = &perf_ops_cpu_migrations;
1592 break; 1610 break;
1593 default: 1611 default:
1594 break; 1612 break;