aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2009-02-10 22:35:35 -0500
committerPaul Mackerras <paulus@samba.org>2009-02-10 23:06:59 -0500
commit0475f9ea8e2cc030298908949e0d5da9f2fc2cfe (patch)
treeeb2585d92e00ae4c7fc7e0654ffacde7e8a57e1c /arch
parentd278c48435625cb6b7edcf6a547620768b175709 (diff)
perf_counters: allow users to count user, kernel and/or hypervisor events
Impact: new perf_counter feature This extends the perf_counter_hw_event struct with bits that specify that events in user, kernel and/or hypervisor mode should not be counted (i.e. should be excluded), and adds code to program the PMU mode selection bits accordingly on x86 and powerpc. For software counters, we don't currently have the infrastructure to distinguish which mode an event occurs in, so we currently fail the counter initialization if the setting of the hw_event.exclude_* bits would require us to distinguish. Context switches and CPU migrations are currently considered to occur in kernel mode. On x86, this changes the previous policy that only root can count kernel events. Now non-root users can count kernel events or exclude them. Non-root users still can't use NMI events, though. On x86 we don't appear to have any way to control whether hypervisor events are counted or not, so hw_event.exclude_hv is ignored. On powerpc, the selection of whether to count events in user, kernel and/or hypervisor mode is PMU-wide, not per-counter, so this adds a check that the hw_event.exclude_* settings are the same as other events on the PMU. Counters being added to a group have to have the same settings as the other hardware counters in the group. Counters and groups can only be enabled in hw_perf_group_sched_in or power_perf_enable if they have the same settings as any other counters already on the PMU. If we are not running on a hypervisor, the exclude_hv setting is ignored (by forcing it to 0) since we can't ever get any hypervisor events. Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/kernel/perf_counter.c68
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c31
2 files changed, 84 insertions, 15 deletions
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 5b0211348c73..bd6ba85beb54 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -16,6 +16,7 @@
16#include <asm/reg.h> 16#include <asm/reg.h>
17#include <asm/pmc.h> 17#include <asm/pmc.h>
18#include <asm/machdep.h> 18#include <asm/machdep.h>
19#include <asm/firmware.h>
19 20
20struct cpu_hw_counters { 21struct cpu_hw_counters {
21 int n_counters; 22 int n_counters;
@@ -214,6 +215,36 @@ static int power_check_constraints(unsigned int event[], int n_ev)
214 return 0; 215 return 0;
215} 216}
216 217
218/*
219 * Check if newly-added counters have consistent settings for
220 * exclude_{user,kernel,hv} with each other and any previously
221 * added counters.
222 */
223static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
224{
225 int eu, ek, eh;
226 int i, n;
227 struct perf_counter *counter;
228
229 n = n_prev + n_new;
230 if (n <= 1)
231 return 0;
232
233 eu = ctrs[0]->hw_event.exclude_user;
234 ek = ctrs[0]->hw_event.exclude_kernel;
235 eh = ctrs[0]->hw_event.exclude_hv;
236 if (n_prev == 0)
237 n_prev = 1;
238 for (i = n_prev; i < n; ++i) {
239 counter = ctrs[i];
240 if (counter->hw_event.exclude_user != eu ||
241 counter->hw_event.exclude_kernel != ek ||
242 counter->hw_event.exclude_hv != eh)
243 return -EAGAIN;
244 }
245 return 0;
246}
247
217static void power_perf_read(struct perf_counter *counter) 248static void power_perf_read(struct perf_counter *counter)
218{ 249{
219 long val, delta, prev; 250 long val, delta, prev;
@@ -324,6 +355,20 @@ void hw_perf_restore(u64 disable)
324 } 355 }
325 356
326 /* 357 /*
358 * Add in MMCR0 freeze bits corresponding to the
359 * hw_event.exclude_* bits for the first counter.
360 * We have already checked that all counters have the
361 * same values for these bits as the first counter.
362 */
363 counter = cpuhw->counter[0];
364 if (counter->hw_event.exclude_user)
365 cpuhw->mmcr[0] |= MMCR0_FCP;
366 if (counter->hw_event.exclude_kernel)
367 cpuhw->mmcr[0] |= MMCR0_FCS;
368 if (counter->hw_event.exclude_hv)
369 cpuhw->mmcr[0] |= MMCR0_FCHV;
370
371 /*
327 * Write the new configuration to MMCR* with the freeze 372 * Write the new configuration to MMCR* with the freeze
328 * bit set and set the hardware counters to their initial values. 373 * bit set and set the hardware counters to their initial values.
329 * Then unfreeze the counters. 374 * Then unfreeze the counters.
@@ -424,6 +469,8 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
424 &cpuhw->counter[n0], &cpuhw->events[n0]); 469 &cpuhw->counter[n0], &cpuhw->events[n0]);
425 if (n < 0) 470 if (n < 0)
426 return -EAGAIN; 471 return -EAGAIN;
472 if (check_excludes(cpuhw->counter, n0, n))
473 return -EAGAIN;
427 if (power_check_constraints(cpuhw->events, n + n0)) 474 if (power_check_constraints(cpuhw->events, n + n0))
428 return -EAGAIN; 475 return -EAGAIN;
429 cpuhw->n_counters = n0 + n; 476 cpuhw->n_counters = n0 + n;
@@ -476,6 +523,8 @@ static int power_perf_enable(struct perf_counter *counter)
476 goto out; 523 goto out;
477 cpuhw->counter[n0] = counter; 524 cpuhw->counter[n0] = counter;
478 cpuhw->events[n0] = counter->hw.config; 525 cpuhw->events[n0] = counter->hw.config;
526 if (check_excludes(cpuhw->counter, n0, 1))
527 goto out;
479 if (power_check_constraints(cpuhw->events, n0 + 1)) 528 if (power_check_constraints(cpuhw->events, n0 + 1))
480 goto out; 529 goto out;
481 530
@@ -555,6 +604,17 @@ hw_perf_counter_init(struct perf_counter *counter)
555 counter->hw.idx = 0; 604 counter->hw.idx = 0;
556 605
557 /* 606 /*
607 * If we are not running on a hypervisor, force the
608 * exclude_hv bit to 0 so that we don't care what
609 * the user set it to. This also means that we don't
610 * set the MMCR0_FCHV bit, which unconditionally freezes
611 * the counters on the PPC970 variants used in Apple G5
612 * machines (since MSR.HV is always 1 on those machines).
613 */
614 if (!firmware_has_feature(FW_FEATURE_LPAR))
615 counter->hw_event.exclude_hv = 0;
616
617 /*
558 * If this is in a group, check if it can go on with all the 618 * If this is in a group, check if it can go on with all the
559 * other hardware counters in the group. We assume the counter 619 * other hardware counters in the group. We assume the counter
560 * hasn't been linked into its leader's sibling list at this point. 620 * hasn't been linked into its leader's sibling list at this point.
@@ -566,11 +626,13 @@ hw_perf_counter_init(struct perf_counter *counter)
566 if (n < 0) 626 if (n < 0)
567 return NULL; 627 return NULL;
568 } 628 }
569 events[n++] = ev; 629 events[n] = ev;
570 if (power_check_constraints(events, n)) 630 if (check_excludes(ctrs, n, 1))
631 return NULL;
632 if (power_check_constraints(events, n + 1))
571 return NULL; 633 return NULL;
572 634
573 counter->hw.config = events[n - 1]; 635 counter->hw.config = events[n];
574 atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); 636 atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
575 return &power_perf_ops; 637 return &power_perf_ops;
576} 638}
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 9901e46998d1..383d4c6423a1 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -107,21 +107,25 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
107 return -EINVAL; 107 return -EINVAL;
108 108
109 /* 109 /*
110 * Count user events, and generate PMC IRQs: 110 * Generate PMC IRQs:
111 * (keep 'enabled' bit clear for now) 111 * (keep 'enabled' bit clear for now)
112 */ 112 */
113 hwc->config = ARCH_PERFMON_EVENTSEL_USR | ARCH_PERFMON_EVENTSEL_INT; 113 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
114 114
115 /* 115 /*
116 * If privileged enough, count OS events too, and allow 116 * Count user and OS events unless requested not to.
117 * NMI events as well:
118 */ 117 */
119 hwc->nmi = 0; 118 if (!hw_event->exclude_user)
120 if (capable(CAP_SYS_ADMIN)) { 119 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
120 if (!hw_event->exclude_kernel)
121 hwc->config |= ARCH_PERFMON_EVENTSEL_OS; 121 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
122 if (hw_event->nmi) 122
123 hwc->nmi = 1; 123 /*
124 } 124 * If privileged enough, allow NMI events:
125 */
126 hwc->nmi = 0;
127 if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
128 hwc->nmi = 1;
125 129
126 hwc->irq_period = hw_event->irq_period; 130 hwc->irq_period = hw_event->irq_period;
127 /* 131 /*
@@ -248,10 +252,13 @@ __pmc_fixed_enable(struct perf_counter *counter,
248 int err; 252 int err;
249 253
250 /* 254 /*
251 * Enable IRQ generation (0x8) and ring-3 counting (0x2), 255 * Enable IRQ generation (0x8),
252 * and enable ring-0 counting if allowed: 256 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
257 * if requested:
253 */ 258 */
254 bits = 0x8ULL | 0x2ULL; 259 bits = 0x8ULL;
260 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
261 bits |= 0x2;
255 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) 262 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
256 bits |= 0x1; 263 bits |= 0x1;
257 bits <<= (idx * 4); 264 bits <<= (idx * 4);