diff options
author | Paul Mackerras <paulus@samba.org> | 2009-02-10 22:35:35 -0500 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2009-02-10 23:06:59 -0500 |
commit | 0475f9ea8e2cc030298908949e0d5da9f2fc2cfe (patch) | |
tree | eb2585d92e00ae4c7fc7e0654ffacde7e8a57e1c /arch/powerpc/kernel/perf_counter.c | |
parent | d278c48435625cb6b7edcf6a547620768b175709 (diff) |
perf_counters: allow users to count user, kernel and/or hypervisor events
Impact: new perf_counter feature
This extends the perf_counter_hw_event struct with bits that specify
that events in user, kernel and/or hypervisor mode should not be
counted (i.e. should be excluded), and adds code to program the PMU
mode selection bits accordingly on x86 and powerpc.
For software counters, we don't currently have the infrastructure to
distinguish which mode an event occurs in, so we currently fail the
counter initialization if the setting of the hw_event.exclude_* bits
would require us to distinguish. Context switches and CPU migrations
are currently considered to occur in kernel mode.
On x86, this changes the previous policy that only root can count
kernel events. Now non-root users can count kernel events or exclude
them. Non-root users still can't use NMI events, though. On x86 we
don't appear to have any way to control whether hypervisor events are
counted or not, so hw_event.exclude_hv is ignored.
On powerpc, the selection of whether to count events in user, kernel
and/or hypervisor mode is PMU-wide, not per-counter, so this adds a
check that the hw_event.exclude_* settings are the same as other events
on the PMU. Counters being added to a group have to have the same
settings as the other hardware counters in the group. Counters and
groups can only be enabled in hw_perf_group_sched_in or power_perf_enable
if they have the same settings as any other counters already on the
PMU. If we are not running on a hypervisor, the exclude_hv setting
is ignored (by forcing it to 0) since we can't ever get any
hypervisor events.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/kernel/perf_counter.c')
-rw-r--r-- | arch/powerpc/kernel/perf_counter.c | 68 |
1 files changed, 65 insertions, 3 deletions
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 5b0211348c73..bd6ba85beb54 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <asm/reg.h> | 16 | #include <asm/reg.h> |
17 | #include <asm/pmc.h> | 17 | #include <asm/pmc.h> |
18 | #include <asm/machdep.h> | 18 | #include <asm/machdep.h> |
19 | #include <asm/firmware.h> | ||
19 | 20 | ||
20 | struct cpu_hw_counters { | 21 | struct cpu_hw_counters { |
21 | int n_counters; | 22 | int n_counters; |
@@ -214,6 +215,36 @@ static int power_check_constraints(unsigned int event[], int n_ev) | |||
214 | return 0; | 215 | return 0; |
215 | } | 216 | } |
216 | 217 | ||
218 | /* | ||
219 | * Check if newly-added counters have consistent settings for | ||
220 | * exclude_{user,kernel,hv} with each other and any previously | ||
221 | * added counters. | ||
222 | */ | ||
223 | static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new) | ||
224 | { | ||
225 | int eu, ek, eh; | ||
226 | int i, n; | ||
227 | struct perf_counter *counter; | ||
228 | |||
229 | n = n_prev + n_new; | ||
230 | if (n <= 1) | ||
231 | return 0; | ||
232 | |||
233 | eu = ctrs[0]->hw_event.exclude_user; | ||
234 | ek = ctrs[0]->hw_event.exclude_kernel; | ||
235 | eh = ctrs[0]->hw_event.exclude_hv; | ||
236 | if (n_prev == 0) | ||
237 | n_prev = 1; | ||
238 | for (i = n_prev; i < n; ++i) { | ||
239 | counter = ctrs[i]; | ||
240 | if (counter->hw_event.exclude_user != eu || | ||
241 | counter->hw_event.exclude_kernel != ek || | ||
242 | counter->hw_event.exclude_hv != eh) | ||
243 | return -EAGAIN; | ||
244 | } | ||
245 | return 0; | ||
246 | } | ||
247 | |||
217 | static void power_perf_read(struct perf_counter *counter) | 248 | static void power_perf_read(struct perf_counter *counter) |
218 | { | 249 | { |
219 | long val, delta, prev; | 250 | long val, delta, prev; |
@@ -324,6 +355,20 @@ void hw_perf_restore(u64 disable) | |||
324 | } | 355 | } |
325 | 356 | ||
326 | /* | 357 | /* |
358 | * Add in MMCR0 freeze bits corresponding to the | ||
359 | * hw_event.exclude_* bits for the first counter. | ||
360 | * We have already checked that all counters have the | ||
361 | * same values for these bits as the first counter. | ||
362 | */ | ||
363 | counter = cpuhw->counter[0]; | ||
364 | if (counter->hw_event.exclude_user) | ||
365 | cpuhw->mmcr[0] |= MMCR0_FCP; | ||
366 | if (counter->hw_event.exclude_kernel) | ||
367 | cpuhw->mmcr[0] |= MMCR0_FCS; | ||
368 | if (counter->hw_event.exclude_hv) | ||
369 | cpuhw->mmcr[0] |= MMCR0_FCHV; | ||
370 | |||
371 | /* | ||
327 | * Write the new configuration to MMCR* with the freeze | 372 | * Write the new configuration to MMCR* with the freeze |
328 | * bit set and set the hardware counters to their initial values. | 373 | * bit set and set the hardware counters to their initial values. |
329 | * Then unfreeze the counters. | 374 | * Then unfreeze the counters. |
@@ -424,6 +469,8 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, | |||
424 | &cpuhw->counter[n0], &cpuhw->events[n0]); | 469 | &cpuhw->counter[n0], &cpuhw->events[n0]); |
425 | if (n < 0) | 470 | if (n < 0) |
426 | return -EAGAIN; | 471 | return -EAGAIN; |
472 | if (check_excludes(cpuhw->counter, n0, n)) | ||
473 | return -EAGAIN; | ||
427 | if (power_check_constraints(cpuhw->events, n + n0)) | 474 | if (power_check_constraints(cpuhw->events, n + n0)) |
428 | return -EAGAIN; | 475 | return -EAGAIN; |
429 | cpuhw->n_counters = n0 + n; | 476 | cpuhw->n_counters = n0 + n; |
@@ -476,6 +523,8 @@ static int power_perf_enable(struct perf_counter *counter) | |||
476 | goto out; | 523 | goto out; |
477 | cpuhw->counter[n0] = counter; | 524 | cpuhw->counter[n0] = counter; |
478 | cpuhw->events[n0] = counter->hw.config; | 525 | cpuhw->events[n0] = counter->hw.config; |
526 | if (check_excludes(cpuhw->counter, n0, 1)) | ||
527 | goto out; | ||
479 | if (power_check_constraints(cpuhw->events, n0 + 1)) | 528 | if (power_check_constraints(cpuhw->events, n0 + 1)) |
480 | goto out; | 529 | goto out; |
481 | 530 | ||
@@ -555,6 +604,17 @@ hw_perf_counter_init(struct perf_counter *counter) | |||
555 | counter->hw.idx = 0; | 604 | counter->hw.idx = 0; |
556 | 605 | ||
557 | /* | 606 | /* |
607 | * If we are not running on a hypervisor, force the | ||
608 | * exclude_hv bit to 0 so that we don't care what | ||
609 | * the user set it to. This also means that we don't | ||
610 | * set the MMCR0_FCHV bit, which unconditionally freezes | ||
611 | * the counters on the PPC970 variants used in Apple G5 | ||
612 | * machines (since MSR.HV is always 1 on those machines). | ||
613 | */ | ||
614 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | ||
615 | counter->hw_event.exclude_hv = 0; | ||
616 | |||
617 | /* | ||
558 | * If this is in a group, check if it can go on with all the | 618 | * If this is in a group, check if it can go on with all the |
559 | * other hardware counters in the group. We assume the counter | 619 | * other hardware counters in the group. We assume the counter |
560 | * hasn't been linked into its leader's sibling list at this point. | 620 | * hasn't been linked into its leader's sibling list at this point. |
@@ -566,11 +626,13 @@ hw_perf_counter_init(struct perf_counter *counter) | |||
566 | if (n < 0) | 626 | if (n < 0) |
567 | return NULL; | 627 | return NULL; |
568 | } | 628 | } |
569 | events[n++] = ev; | 629 | events[n] = ev; |
570 | if (power_check_constraints(events, n)) | 630 | if (check_excludes(ctrs, n, 1)) |
631 | return NULL; | ||
632 | if (power_check_constraints(events, n + 1)) | ||
571 | return NULL; | 633 | return NULL; |
572 | 634 | ||
573 | counter->hw.config = events[n - 1]; | 635 | counter->hw.config = events[n]; |
574 | atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); | 636 | atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); |
575 | return &power_perf_ops; | 637 | return &power_perf_ops; |
576 | } | 638 | } |