diff options
-rw-r--r-- | arch/powerpc/kernel/power7-pmu.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 7 | ||||
-rw-r--r-- | include/linux/perf_counter.h | 22 | ||||
-rw-r--r-- | include/linux/syscalls.h | 2 | ||||
-rw-r--r-- | init/Kconfig | 2 | ||||
-rw-r--r-- | kernel/perf_counter.c | 95 | ||||
-rw-r--r-- | tools/perf/builtin-record.c | 7 | ||||
-rw-r--r-- | tools/perf/design.txt | 15 | ||||
-rw-r--r-- | tools/perf/perf.h | 5 | ||||
-rw-r--r-- | tools/perf/util/parse-events.c | 2 |
10 files changed, 144 insertions, 25 deletions
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index b3f7d1216bae..b72e7a19d054 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c | |||
@@ -294,12 +294,12 @@ static void power7_disable_pmc(unsigned int pmc, u64 mmcr[]) | |||
294 | } | 294 | } |
295 | 295 | ||
296 | static int power7_generic_events[] = { | 296 | static int power7_generic_events[] = { |
297 | [PERF_COUNT_CPU_CYCLES] = 0x1e, | 297 | [PERF_COUNT_HW_CPU_CYCLES] = 0x1e, |
298 | [PERF_COUNT_INSTRUCTIONS] = 2, | 298 | [PERF_COUNT_HW_INSTRUCTIONS] = 2, |
299 | [PERF_COUNT_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU */ | 299 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU*/ |
300 | [PERF_COUNT_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */ | 300 | [PERF_COUNT_HW_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */ |
301 | [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */ | 301 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */ |
302 | [PERF_COUNT_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */ | 302 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */ |
303 | }; | 303 | }; |
304 | 304 | ||
305 | #define C(x) PERF_COUNT_HW_CACHE_##x | 305 | #define C(x) PERF_COUNT_HW_CACHE_##x |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 895c82e78455..275bc142cd5d 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -968,6 +968,13 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) | |||
968 | if (!x86_pmu.num_counters_fixed) | 968 | if (!x86_pmu.num_counters_fixed) |
969 | return -1; | 969 | return -1; |
970 | 970 | ||
971 | /* | ||
972 | * Quirk, IA32_FIXED_CTRs do not work on current Atom processors: | ||
973 | */ | ||
974 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && | ||
975 | boot_cpu_data.x86_model == 28) | ||
976 | return -1; | ||
977 | |||
971 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; | 978 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; |
972 | 979 | ||
973 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | 980 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) |
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 6e133954e2e4..1b3118a1023a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h | |||
@@ -120,6 +120,8 @@ enum perf_counter_sample_format { | |||
120 | PERF_SAMPLE_ID = 1U << 6, | 120 | PERF_SAMPLE_ID = 1U << 6, |
121 | PERF_SAMPLE_CPU = 1U << 7, | 121 | PERF_SAMPLE_CPU = 1U << 7, |
122 | PERF_SAMPLE_PERIOD = 1U << 8, | 122 | PERF_SAMPLE_PERIOD = 1U << 8, |
123 | |||
124 | PERF_SAMPLE_MAX = 1U << 9, /* non-ABI */ | ||
123 | }; | 125 | }; |
124 | 126 | ||
125 | /* | 127 | /* |
@@ -131,17 +133,26 @@ enum perf_counter_read_format { | |||
131 | PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, | 133 | PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, |
132 | PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, | 134 | PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, |
133 | PERF_FORMAT_ID = 1U << 2, | 135 | PERF_FORMAT_ID = 1U << 2, |
136 | |||
137 | PERF_FORMAT_MAX = 1U << 3, /* non-ABI */ | ||
134 | }; | 138 | }; |
135 | 139 | ||
140 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ | ||
141 | |||
136 | /* | 142 | /* |
137 | * Hardware event to monitor via a performance monitoring counter: | 143 | * Hardware event to monitor via a performance monitoring counter: |
138 | */ | 144 | */ |
139 | struct perf_counter_attr { | 145 | struct perf_counter_attr { |
146 | |||
140 | /* | 147 | /* |
141 | * Major type: hardware/software/tracepoint/etc. | 148 | * Major type: hardware/software/tracepoint/etc. |
142 | */ | 149 | */ |
143 | __u32 type; | 150 | __u32 type; |
144 | __u32 __reserved_1; | 151 | |
152 | /* | ||
153 | * Size of the attr structure, for fwd/bwd compat. | ||
154 | */ | ||
155 | __u32 size; | ||
145 | 156 | ||
146 | /* | 157 | /* |
147 | * Type specific configuration information. | 158 | * Type specific configuration information. |
@@ -168,12 +179,12 @@ struct perf_counter_attr { | |||
168 | comm : 1, /* include comm data */ | 179 | comm : 1, /* include comm data */ |
169 | freq : 1, /* use freq, not period */ | 180 | freq : 1, /* use freq, not period */ |
170 | 181 | ||
171 | __reserved_2 : 53; | 182 | __reserved_1 : 53; |
172 | 183 | ||
173 | __u32 wakeup_events; /* wakeup every n events */ | 184 | __u32 wakeup_events; /* wakeup every n events */ |
174 | __u32 __reserved_3; | 185 | __u32 __reserved_2; |
175 | 186 | ||
176 | __u64 __reserved_4; | 187 | __u64 __reserved_3; |
177 | }; | 188 | }; |
178 | 189 | ||
179 | /* | 190 | /* |
@@ -621,7 +632,8 @@ extern int perf_counter_overflow(struct perf_counter *counter, int nmi, | |||
621 | static inline int is_software_counter(struct perf_counter *counter) | 632 | static inline int is_software_counter(struct perf_counter *counter) |
622 | { | 633 | { |
623 | return (counter->attr.type != PERF_TYPE_RAW) && | 634 | return (counter->attr.type != PERF_TYPE_RAW) && |
624 | (counter->attr.type != PERF_TYPE_HARDWARE); | 635 | (counter->attr.type != PERF_TYPE_HARDWARE) && |
636 | (counter->attr.type != PERF_TYPE_HW_CACHE); | ||
625 | } | 637 | } |
626 | 638 | ||
627 | extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); | 639 | extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c6c84ad8bd71..418d90f5effe 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
@@ -758,6 +758,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]); | |||
758 | 758 | ||
759 | 759 | ||
760 | asmlinkage long sys_perf_counter_open( | 760 | asmlinkage long sys_perf_counter_open( |
761 | const struct perf_counter_attr __user *attr_uptr, | 761 | struct perf_counter_attr __user *attr_uptr, |
762 | pid_t pid, int cpu, int group_fd, unsigned long flags); | 762 | pid_t pid, int cpu, int group_fd, unsigned long flags); |
763 | #endif | 763 | #endif |
diff --git a/init/Kconfig b/init/Kconfig index c649657e2259..d3a50967c337 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -936,6 +936,8 @@ config AIO | |||
936 | 936 | ||
937 | config HAVE_PERF_COUNTERS | 937 | config HAVE_PERF_COUNTERS |
938 | bool | 938 | bool |
939 | help | ||
940 | See tools/perf/design.txt for details. | ||
939 | 941 | ||
940 | menu "Performance Counters" | 942 | menu "Performance Counters" |
941 | 943 | ||
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index ef5d8a5b2453..29b685f551aa 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -3570,12 +3570,8 @@ perf_counter_alloc(struct perf_counter_attr *attr, | |||
3570 | if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP)) | 3570 | if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP)) |
3571 | goto done; | 3571 | goto done; |
3572 | 3572 | ||
3573 | if (attr->type == PERF_TYPE_RAW) { | ||
3574 | pmu = hw_perf_counter_init(counter); | ||
3575 | goto done; | ||
3576 | } | ||
3577 | |||
3578 | switch (attr->type) { | 3573 | switch (attr->type) { |
3574 | case PERF_TYPE_RAW: | ||
3579 | case PERF_TYPE_HARDWARE: | 3575 | case PERF_TYPE_HARDWARE: |
3580 | case PERF_TYPE_HW_CACHE: | 3576 | case PERF_TYPE_HW_CACHE: |
3581 | pmu = hw_perf_counter_init(counter); | 3577 | pmu = hw_perf_counter_init(counter); |
@@ -3588,6 +3584,9 @@ perf_counter_alloc(struct perf_counter_attr *attr, | |||
3588 | case PERF_TYPE_TRACEPOINT: | 3584 | case PERF_TYPE_TRACEPOINT: |
3589 | pmu = tp_perf_counter_init(counter); | 3585 | pmu = tp_perf_counter_init(counter); |
3590 | break; | 3586 | break; |
3587 | |||
3588 | default: | ||
3589 | break; | ||
3591 | } | 3590 | } |
3592 | done: | 3591 | done: |
3593 | err = 0; | 3592 | err = 0; |
@@ -3614,6 +3613,85 @@ done: | |||
3614 | return counter; | 3613 | return counter; |
3615 | } | 3614 | } |
3616 | 3615 | ||
3616 | static int perf_copy_attr(struct perf_counter_attr __user *uattr, | ||
3617 | struct perf_counter_attr *attr) | ||
3618 | { | ||
3619 | int ret; | ||
3620 | u32 size; | ||
3621 | |||
3622 | if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0)) | ||
3623 | return -EFAULT; | ||
3624 | |||
3625 | /* | ||
3626 | * zero the full structure, so that a short copy will be nice. | ||
3627 | */ | ||
3628 | memset(attr, 0, sizeof(*attr)); | ||
3629 | |||
3630 | ret = get_user(size, &uattr->size); | ||
3631 | if (ret) | ||
3632 | return ret; | ||
3633 | |||
3634 | if (size > PAGE_SIZE) /* silly large */ | ||
3635 | goto err_size; | ||
3636 | |||
3637 | if (!size) /* abi compat */ | ||
3638 | size = PERF_ATTR_SIZE_VER0; | ||
3639 | |||
3640 | if (size < PERF_ATTR_SIZE_VER0) | ||
3641 | goto err_size; | ||
3642 | |||
3643 | /* | ||
3644 | * If we're handed a bigger struct than we know of, | ||
3645 | * ensure all the unknown bits are 0. | ||
3646 | */ | ||
3647 | if (size > sizeof(*attr)) { | ||
3648 | unsigned long val; | ||
3649 | unsigned long __user *addr; | ||
3650 | unsigned long __user *end; | ||
3651 | |||
3652 | addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr), | ||
3653 | sizeof(unsigned long)); | ||
3654 | end = PTR_ALIGN((void __user *)uattr + size, | ||
3655 | sizeof(unsigned long)); | ||
3656 | |||
3657 | for (; addr < end; addr += sizeof(unsigned long)) { | ||
3658 | ret = get_user(val, addr); | ||
3659 | if (ret) | ||
3660 | return ret; | ||
3661 | if (val) | ||
3662 | goto err_size; | ||
3663 | } | ||
3664 | } | ||
3665 | |||
3666 | ret = copy_from_user(attr, uattr, size); | ||
3667 | if (ret) | ||
3668 | return -EFAULT; | ||
3669 | |||
3670 | /* | ||
3671 | * If the type exists, the corresponding creation will verify | ||
3672 | * the attr->config. | ||
3673 | */ | ||
3674 | if (attr->type >= PERF_TYPE_MAX) | ||
3675 | return -EINVAL; | ||
3676 | |||
3677 | if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) | ||
3678 | return -EINVAL; | ||
3679 | |||
3680 | if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) | ||
3681 | return -EINVAL; | ||
3682 | |||
3683 | if (attr->read_format & ~(PERF_FORMAT_MAX-1)) | ||
3684 | return -EINVAL; | ||
3685 | |||
3686 | out: | ||
3687 | return ret; | ||
3688 | |||
3689 | err_size: | ||
3690 | put_user(sizeof(*attr), &uattr->size); | ||
3691 | ret = -E2BIG; | ||
3692 | goto out; | ||
3693 | } | ||
3694 | |||
3617 | /** | 3695 | /** |
3618 | * sys_perf_counter_open - open a performance counter, associate it to a task/cpu | 3696 | * sys_perf_counter_open - open a performance counter, associate it to a task/cpu |
3619 | * | 3697 | * |
@@ -3623,7 +3701,7 @@ done: | |||
3623 | * @group_fd: group leader counter fd | 3701 | * @group_fd: group leader counter fd |
3624 | */ | 3702 | */ |
3625 | SYSCALL_DEFINE5(perf_counter_open, | 3703 | SYSCALL_DEFINE5(perf_counter_open, |
3626 | const struct perf_counter_attr __user *, attr_uptr, | 3704 | struct perf_counter_attr __user *, attr_uptr, |
3627 | pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) | 3705 | pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) |
3628 | { | 3706 | { |
3629 | struct perf_counter *counter, *group_leader; | 3707 | struct perf_counter *counter, *group_leader; |
@@ -3639,8 +3717,9 @@ SYSCALL_DEFINE5(perf_counter_open, | |||
3639 | if (flags) | 3717 | if (flags) |
3640 | return -EINVAL; | 3718 | return -EINVAL; |
3641 | 3719 | ||
3642 | if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0) | 3720 | ret = perf_copy_attr(attr_uptr, &attr); |
3643 | return -EFAULT; | 3721 | if (ret) |
3722 | return ret; | ||
3644 | 3723 | ||
3645 | if (!attr.exclude_kernel) { | 3724 | if (!attr.exclude_kernel) { |
3646 | if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) | 3725 | if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) |
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 29259e74dcfa..0f5771f615da 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -568,8 +568,11 @@ int cmd_record(int argc, const char **argv, const char *prefix) | |||
568 | if (!argc && target_pid == -1 && !system_wide) | 568 | if (!argc && target_pid == -1 && !system_wide) |
569 | usage_with_options(record_usage, options); | 569 | usage_with_options(record_usage, options); |
570 | 570 | ||
571 | if (!nr_counters) | 571 | if (!nr_counters) { |
572 | nr_counters = 1; | 572 | nr_counters = 1; |
573 | attrs[0].type = PERF_TYPE_HARDWARE; | ||
574 | attrs[0].config = PERF_COUNT_HW_CPU_CYCLES; | ||
575 | } | ||
573 | 576 | ||
574 | for (counter = 0; counter < nr_counters; counter++) { | 577 | for (counter = 0; counter < nr_counters; counter++) { |
575 | if (attrs[counter].sample_period) | 578 | if (attrs[counter].sample_period) |
diff --git a/tools/perf/design.txt b/tools/perf/design.txt index 860e116d979c..f71e0d245cba 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt | |||
@@ -440,3 +440,18 @@ by this process or by another, and doesn't affect any counters that | |||
440 | this process has created on other processes. It only enables or | 440 | this process has created on other processes. It only enables or |
441 | disables the group leaders, not any other members in the groups. | 441 | disables the group leaders, not any other members in the groups. |
442 | 442 | ||
443 | |||
444 | Arch requirements | ||
445 | ----------------- | ||
446 | |||
447 | If your architecture does not have hardware performance metrics, you can | ||
448 | still use the generic software counters based on hrtimers for sampling. | ||
449 | |||
450 | So to start with, in order to add HAVE_PERF_COUNTERS to your Kconfig, you | ||
451 | will need at least this: | ||
452 | - asm/perf_counter.h - a basic stub will suffice at first | ||
453 | - support for atomic64 types (and associated helper functions) | ||
454 | - set_perf_counter_pending() implemented | ||
455 | |||
456 | If your architecture does have hardware capabilities, you can override the | ||
457 | weak stub hw_perf_counter_init() to register hardware counters. | ||
diff --git a/tools/perf/perf.h b/tools/perf/perf.h index af0a5046d743..87a1aca4a424 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h | |||
@@ -53,11 +53,12 @@ static inline unsigned long long rdclock(void) | |||
53 | _min1 < _min2 ? _min1 : _min2; }) | 53 | _min1 < _min2 ? _min1 : _min2; }) |
54 | 54 | ||
55 | static inline int | 55 | static inline int |
56 | sys_perf_counter_open(struct perf_counter_attr *attr_uptr, | 56 | sys_perf_counter_open(struct perf_counter_attr *attr, |
57 | pid_t pid, int cpu, int group_fd, | 57 | pid_t pid, int cpu, int group_fd, |
58 | unsigned long flags) | 58 | unsigned long flags) |
59 | { | 59 | { |
60 | return syscall(__NR_perf_counter_open, attr_uptr, pid, cpu, | 60 | attr->size = sizeof(*attr); |
61 | return syscall(__NR_perf_counter_open, attr, pid, cpu, | ||
61 | group_fd, flags); | 62 | group_fd, flags); |
62 | } | 63 | } |
63 | 64 | ||
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 9d5f1ca50e6f..5a72586e1df0 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c | |||
@@ -75,7 +75,7 @@ static char *sw_event_names[] = { | |||
75 | #define MAX_ALIASES 8 | 75 | #define MAX_ALIASES 8 |
76 | 76 | ||
77 | static char *hw_cache [][MAX_ALIASES] = { | 77 | static char *hw_cache [][MAX_ALIASES] = { |
78 | { "L1-data" , "l1-d", "l1d", "l1" }, | 78 | { "L1-data" , "l1-d", "l1d" }, |
79 | { "L1-instruction" , "l1-i", "l1i" }, | 79 | { "L1-instruction" , "l1-i", "l1i" }, |
80 | { "L2" , "l2" }, | 80 | { "L2" , "l2" }, |
81 | { "Data-TLB" , "dtlb", "d-tlb" }, | 81 | { "Data-TLB" , "dtlb", "d-tlb" }, |