diff options
-rw-r--r-- | arch/powerpc/kernel/perf_counter.c | 68 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 31 | ||||
-rw-r--r-- | include/linux/perf_counter.h | 19 | ||||
-rw-r--r-- | kernel/perf_counter.c | 26 |
4 files changed, 117 insertions, 27 deletions
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 5b0211348c73..bd6ba85beb54 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <asm/reg.h> | 16 | #include <asm/reg.h> |
17 | #include <asm/pmc.h> | 17 | #include <asm/pmc.h> |
18 | #include <asm/machdep.h> | 18 | #include <asm/machdep.h> |
19 | #include <asm/firmware.h> | ||
19 | 20 | ||
20 | struct cpu_hw_counters { | 21 | struct cpu_hw_counters { |
21 | int n_counters; | 22 | int n_counters; |
@@ -214,6 +215,36 @@ static int power_check_constraints(unsigned int event[], int n_ev) | |||
214 | return 0; | 215 | return 0; |
215 | } | 216 | } |
216 | 217 | ||
218 | /* | ||
219 | * Check if newly-added counters have consistent settings for | ||
220 | * exclude_{user,kernel,hv} with each other and any previously | ||
221 | * added counters. | ||
222 | */ | ||
223 | static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new) | ||
224 | { | ||
225 | int eu, ek, eh; | ||
226 | int i, n; | ||
227 | struct perf_counter *counter; | ||
228 | |||
229 | n = n_prev + n_new; | ||
230 | if (n <= 1) | ||
231 | return 0; | ||
232 | |||
233 | eu = ctrs[0]->hw_event.exclude_user; | ||
234 | ek = ctrs[0]->hw_event.exclude_kernel; | ||
235 | eh = ctrs[0]->hw_event.exclude_hv; | ||
236 | if (n_prev == 0) | ||
237 | n_prev = 1; | ||
238 | for (i = n_prev; i < n; ++i) { | ||
239 | counter = ctrs[i]; | ||
240 | if (counter->hw_event.exclude_user != eu || | ||
241 | counter->hw_event.exclude_kernel != ek || | ||
242 | counter->hw_event.exclude_hv != eh) | ||
243 | return -EAGAIN; | ||
244 | } | ||
245 | return 0; | ||
246 | } | ||
247 | |||
217 | static void power_perf_read(struct perf_counter *counter) | 248 | static void power_perf_read(struct perf_counter *counter) |
218 | { | 249 | { |
219 | long val, delta, prev; | 250 | long val, delta, prev; |
@@ -324,6 +355,20 @@ void hw_perf_restore(u64 disable) | |||
324 | } | 355 | } |
325 | 356 | ||
326 | /* | 357 | /* |
358 | * Add in MMCR0 freeze bits corresponding to the | ||
359 | * hw_event.exclude_* bits for the first counter. | ||
360 | * We have already checked that all counters have the | ||
361 | * same values for these bits as the first counter. | ||
362 | */ | ||
363 | counter = cpuhw->counter[0]; | ||
364 | if (counter->hw_event.exclude_user) | ||
365 | cpuhw->mmcr[0] |= MMCR0_FCP; | ||
366 | if (counter->hw_event.exclude_kernel) | ||
367 | cpuhw->mmcr[0] |= MMCR0_FCS; | ||
368 | if (counter->hw_event.exclude_hv) | ||
369 | cpuhw->mmcr[0] |= MMCR0_FCHV; | ||
370 | |||
371 | /* | ||
327 | * Write the new configuration to MMCR* with the freeze | 372 | * Write the new configuration to MMCR* with the freeze |
328 | * bit set and set the hardware counters to their initial values. | 373 | * bit set and set the hardware counters to their initial values. |
329 | * Then unfreeze the counters. | 374 | * Then unfreeze the counters. |
@@ -424,6 +469,8 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, | |||
424 | &cpuhw->counter[n0], &cpuhw->events[n0]); | 469 | &cpuhw->counter[n0], &cpuhw->events[n0]); |
425 | if (n < 0) | 470 | if (n < 0) |
426 | return -EAGAIN; | 471 | return -EAGAIN; |
472 | if (check_excludes(cpuhw->counter, n0, n)) | ||
473 | return -EAGAIN; | ||
427 | if (power_check_constraints(cpuhw->events, n + n0)) | 474 | if (power_check_constraints(cpuhw->events, n + n0)) |
428 | return -EAGAIN; | 475 | return -EAGAIN; |
429 | cpuhw->n_counters = n0 + n; | 476 | cpuhw->n_counters = n0 + n; |
@@ -476,6 +523,8 @@ static int power_perf_enable(struct perf_counter *counter) | |||
476 | goto out; | 523 | goto out; |
477 | cpuhw->counter[n0] = counter; | 524 | cpuhw->counter[n0] = counter; |
478 | cpuhw->events[n0] = counter->hw.config; | 525 | cpuhw->events[n0] = counter->hw.config; |
526 | if (check_excludes(cpuhw->counter, n0, 1)) | ||
527 | goto out; | ||
479 | if (power_check_constraints(cpuhw->events, n0 + 1)) | 528 | if (power_check_constraints(cpuhw->events, n0 + 1)) |
480 | goto out; | 529 | goto out; |
481 | 530 | ||
@@ -555,6 +604,17 @@ hw_perf_counter_init(struct perf_counter *counter) | |||
555 | counter->hw.idx = 0; | 604 | counter->hw.idx = 0; |
556 | 605 | ||
557 | /* | 606 | /* |
607 | * If we are not running on a hypervisor, force the | ||
608 | * exclude_hv bit to 0 so that we don't care what | ||
609 | * the user set it to. This also means that we don't | ||
610 | * set the MMCR0_FCHV bit, which unconditionally freezes | ||
611 | * the counters on the PPC970 variants used in Apple G5 | ||
612 | * machines (since MSR.HV is always 1 on those machines). | ||
613 | */ | ||
614 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | ||
615 | counter->hw_event.exclude_hv = 0; | ||
616 | |||
617 | /* | ||
558 | * If this is in a group, check if it can go on with all the | 618 | * If this is in a group, check if it can go on with all the |
559 | * other hardware counters in the group. We assume the counter | 619 | * other hardware counters in the group. We assume the counter |
560 | * hasn't been linked into its leader's sibling list at this point. | 620 | * hasn't been linked into its leader's sibling list at this point. |
@@ -566,11 +626,13 @@ hw_perf_counter_init(struct perf_counter *counter) | |||
566 | if (n < 0) | 626 | if (n < 0) |
567 | return NULL; | 627 | return NULL; |
568 | } | 628 | } |
569 | events[n++] = ev; | 629 | events[n] = ev; |
570 | if (power_check_constraints(events, n)) | 630 | if (check_excludes(ctrs, n, 1)) |
631 | return NULL; | ||
632 | if (power_check_constraints(events, n + 1)) | ||
571 | return NULL; | 633 | return NULL; |
572 | 634 | ||
573 | counter->hw.config = events[n - 1]; | 635 | counter->hw.config = events[n]; |
574 | atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); | 636 | atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); |
575 | return &power_perf_ops; | 637 | return &power_perf_ops; |
576 | } | 638 | } |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 9901e46998d1..383d4c6423a1 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -107,21 +107,25 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
107 | return -EINVAL; | 107 | return -EINVAL; |
108 | 108 | ||
109 | /* | 109 | /* |
110 | * Count user events, and generate PMC IRQs: | 110 | * Generate PMC IRQs: |
111 | * (keep 'enabled' bit clear for now) | 111 | * (keep 'enabled' bit clear for now) |
112 | */ | 112 | */ |
113 | hwc->config = ARCH_PERFMON_EVENTSEL_USR | ARCH_PERFMON_EVENTSEL_INT; | 113 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; |
114 | 114 | ||
115 | /* | 115 | /* |
116 | * If privileged enough, count OS events too, and allow | 116 | * Count user and OS events unless requested not to. |
117 | * NMI events as well: | ||
118 | */ | 117 | */ |
119 | hwc->nmi = 0; | 118 | if (!hw_event->exclude_user) |
120 | if (capable(CAP_SYS_ADMIN)) { | 119 | hwc->config |= ARCH_PERFMON_EVENTSEL_USR; |
120 | if (!hw_event->exclude_kernel) | ||
121 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | 121 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; |
122 | if (hw_event->nmi) | 122 | |
123 | hwc->nmi = 1; | 123 | /* |
124 | } | 124 | * If privileged enough, allow NMI events: |
125 | */ | ||
126 | hwc->nmi = 0; | ||
127 | if (capable(CAP_SYS_ADMIN) && hw_event->nmi) | ||
128 | hwc->nmi = 1; | ||
125 | 129 | ||
126 | hwc->irq_period = hw_event->irq_period; | 130 | hwc->irq_period = hw_event->irq_period; |
127 | /* | 131 | /* |
@@ -248,10 +252,13 @@ __pmc_fixed_enable(struct perf_counter *counter, | |||
248 | int err; | 252 | int err; |
249 | 253 | ||
250 | /* | 254 | /* |
251 | * Enable IRQ generation (0x8) and ring-3 counting (0x2), | 255 | * Enable IRQ generation (0x8), |
252 | * and enable ring-0 counting if allowed: | 256 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) |
257 | * if requested: | ||
253 | */ | 258 | */ |
254 | bits = 0x8ULL | 0x2ULL; | 259 | bits = 0x8ULL; |
260 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) | ||
261 | bits |= 0x2; | ||
255 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | 262 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) |
256 | bits |= 0x1; | 263 | bits |= 0x1; |
257 | bits <<= (idx * 4); | 264 | bits <<= (idx * 4); |
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f55381fbcac9..c83f51d6e359 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h | |||
@@ -83,14 +83,17 @@ struct perf_counter_hw_event { | |||
83 | u64 irq_period; | 83 | u64 irq_period; |
84 | u32 record_type; | 84 | u32 record_type; |
85 | 85 | ||
86 | u32 disabled : 1, /* off by default */ | 86 | u32 disabled : 1, /* off by default */ |
87 | nmi : 1, /* NMI sampling */ | 87 | nmi : 1, /* NMI sampling */ |
88 | raw : 1, /* raw event type */ | 88 | raw : 1, /* raw event type */ |
89 | inherit : 1, /* children inherit it */ | 89 | inherit : 1, /* children inherit it */ |
90 | pinned : 1, /* must always be on PMU */ | 90 | pinned : 1, /* must always be on PMU */ |
91 | exclusive : 1, /* only counter on PMU */ | 91 | exclusive : 1, /* only group on PMU */ |
92 | 92 | exclude_user : 1, /* don't count user */ | |
93 | __reserved_1 : 26; | 93 | exclude_kernel : 1, /* ditto kernel */ |
94 | exclude_hv : 1, /* ditto hypervisor */ | ||
95 | |||
96 | __reserved_1 : 23; | ||
94 | 97 | ||
95 | u64 __reserved_2; | 98 | u64 __reserved_2; |
96 | }; | 99 | }; |
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 544193cbc478..89d5e3fe9700 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -1567,11 +1567,25 @@ sw_perf_counter_init(struct perf_counter *counter) | |||
1567 | { | 1567 | { |
1568 | const struct hw_perf_counter_ops *hw_ops = NULL; | 1568 | const struct hw_perf_counter_ops *hw_ops = NULL; |
1569 | 1569 | ||
1570 | /* | ||
1571 | * Software counters (currently) can't in general distinguish | ||
1572 | * between user, kernel and hypervisor events. | ||
1573 | * However, context switches and cpu migrations are considered | ||
1574 | * to be kernel events, and page faults are never hypervisor | ||
1575 | * events. | ||
1576 | */ | ||
1570 | switch (counter->hw_event.type) { | 1577 | switch (counter->hw_event.type) { |
1571 | case PERF_COUNT_CPU_CLOCK: | 1578 | case PERF_COUNT_CPU_CLOCK: |
1572 | hw_ops = &perf_ops_cpu_clock; | 1579 | if (!(counter->hw_event.exclude_user || |
1580 | counter->hw_event.exclude_kernel || | ||
1581 | counter->hw_event.exclude_hv)) | ||
1582 | hw_ops = &perf_ops_cpu_clock; | ||
1573 | break; | 1583 | break; |
1574 | case PERF_COUNT_TASK_CLOCK: | 1584 | case PERF_COUNT_TASK_CLOCK: |
1585 | if (counter->hw_event.exclude_user || | ||
1586 | counter->hw_event.exclude_kernel || | ||
1587 | counter->hw_event.exclude_hv) | ||
1588 | break; | ||
1575 | /* | 1589 | /* |
1576 | * If the user instantiates this as a per-cpu counter, | 1590 | * If the user instantiates this as a per-cpu counter, |
1577 | * use the cpu_clock counter instead. | 1591 | * use the cpu_clock counter instead. |
@@ -1582,13 +1596,17 @@ sw_perf_counter_init(struct perf_counter *counter) | |||
1582 | hw_ops = &perf_ops_cpu_clock; | 1596 | hw_ops = &perf_ops_cpu_clock; |
1583 | break; | 1597 | break; |
1584 | case PERF_COUNT_PAGE_FAULTS: | 1598 | case PERF_COUNT_PAGE_FAULTS: |
1585 | hw_ops = &perf_ops_page_faults; | 1599 | if (!(counter->hw_event.exclude_user || |
1600 | counter->hw_event.exclude_kernel)) | ||
1601 | hw_ops = &perf_ops_page_faults; | ||
1586 | break; | 1602 | break; |
1587 | case PERF_COUNT_CONTEXT_SWITCHES: | 1603 | case PERF_COUNT_CONTEXT_SWITCHES: |
1588 | hw_ops = &perf_ops_context_switches; | 1604 | if (!counter->hw_event.exclude_kernel) |
1605 | hw_ops = &perf_ops_context_switches; | ||
1589 | break; | 1606 | break; |
1590 | case PERF_COUNT_CPU_MIGRATIONS: | 1607 | case PERF_COUNT_CPU_MIGRATIONS: |
1591 | hw_ops = &perf_ops_cpu_migrations; | 1608 | if (!counter->hw_event.exclude_kernel) |
1609 | hw_ops = &perf_ops_cpu_migrations; | ||
1592 | break; | 1610 | break; |
1593 | default: | 1611 | default: |
1594 | break; | 1612 | break; |