diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-06-12 06:46:55 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-06-12 08:28:52 -0400 |
commit | 974802eaa1afdc87e00821df7020a2b3c6fee623 (patch) | |
tree | b5c38bece7f4948fe9d5cf9be02a0042e0fa39f6 | |
parent | bbd36e5e6aa6f1757c84cdb406b6eb81686d14af (diff) |
perf_counter: Add forward/backward attribute ABI compatibility
Provide for means of extending the perf_counter_attr in a 'natural' way.
We allow growing the structure by appending fields at the end by specifying
the full structure size inside it.
When a new kernel sees a smaller (old) structure, it will 0 pad the tail.
When an old kernel sees a larger (new) structure, it will verify the tail
consists of 0s, otherwise fail.
If we fail due to a size-mismatch, we return -E2BIG and write the kernel's
native attribe size back into the provided structure.
Furthermore, add some attribute verification, so that we'll fail counter
creation when unknown bits are present (PERF_SAMPLE, PERF_FORMAT, or in
the __reserved fields).
(This ABI detail is introduced while keeping the existing syscall ABI.)
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/perf_counter.h | 19 | ||||
-rw-r--r-- | include/linux/syscalls.h | 2 | ||||
-rw-r--r-- | kernel/perf_counter.c | 89 | ||||
-rw-r--r-- | tools/perf/perf.h | 5 |
4 files changed, 105 insertions, 10 deletions
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7c4f32f6ae1a..1b3118a1023a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h | |||
@@ -120,6 +120,8 @@ enum perf_counter_sample_format { | |||
120 | PERF_SAMPLE_ID = 1U << 6, | 120 | PERF_SAMPLE_ID = 1U << 6, |
121 | PERF_SAMPLE_CPU = 1U << 7, | 121 | PERF_SAMPLE_CPU = 1U << 7, |
122 | PERF_SAMPLE_PERIOD = 1U << 8, | 122 | PERF_SAMPLE_PERIOD = 1U << 8, |
123 | |||
124 | PERF_SAMPLE_MAX = 1U << 9, /* non-ABI */ | ||
123 | }; | 125 | }; |
124 | 126 | ||
125 | /* | 127 | /* |
@@ -131,17 +133,26 @@ enum perf_counter_read_format { | |||
131 | PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, | 133 | PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, |
132 | PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, | 134 | PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, |
133 | PERF_FORMAT_ID = 1U << 2, | 135 | PERF_FORMAT_ID = 1U << 2, |
136 | |||
137 | PERF_FORMAT_MAX = 1U << 3, /* non-ABI */ | ||
134 | }; | 138 | }; |
135 | 139 | ||
140 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ | ||
141 | |||
136 | /* | 142 | /* |
137 | * Hardware event to monitor via a performance monitoring counter: | 143 | * Hardware event to monitor via a performance monitoring counter: |
138 | */ | 144 | */ |
139 | struct perf_counter_attr { | 145 | struct perf_counter_attr { |
146 | |||
140 | /* | 147 | /* |
141 | * Major type: hardware/software/tracepoint/etc. | 148 | * Major type: hardware/software/tracepoint/etc. |
142 | */ | 149 | */ |
143 | __u32 type; | 150 | __u32 type; |
144 | __u32 __reserved_1; | 151 | |
152 | /* | ||
153 | * Size of the attr structure, for fwd/bwd compat. | ||
154 | */ | ||
155 | __u32 size; | ||
145 | 156 | ||
146 | /* | 157 | /* |
147 | * Type specific configuration information. | 158 | * Type specific configuration information. |
@@ -168,12 +179,12 @@ struct perf_counter_attr { | |||
168 | comm : 1, /* include comm data */ | 179 | comm : 1, /* include comm data */ |
169 | freq : 1, /* use freq, not period */ | 180 | freq : 1, /* use freq, not period */ |
170 | 181 | ||
171 | __reserved_2 : 53; | 182 | __reserved_1 : 53; |
172 | 183 | ||
173 | __u32 wakeup_events; /* wakeup every n events */ | 184 | __u32 wakeup_events; /* wakeup every n events */ |
174 | __u32 __reserved_3; | 185 | __u32 __reserved_2; |
175 | 186 | ||
176 | __u64 __reserved_4; | 187 | __u64 __reserved_3; |
177 | }; | 188 | }; |
178 | 189 | ||
179 | /* | 190 | /* |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c6c84ad8bd71..418d90f5effe 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
@@ -758,6 +758,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]); | |||
758 | 758 | ||
759 | 759 | ||
760 | asmlinkage long sys_perf_counter_open( | 760 | asmlinkage long sys_perf_counter_open( |
761 | const struct perf_counter_attr __user *attr_uptr, | 761 | struct perf_counter_attr __user *attr_uptr, |
762 | pid_t pid, int cpu, int group_fd, unsigned long flags); | 762 | pid_t pid, int cpu, int group_fd, unsigned long flags); |
763 | #endif | 763 | #endif |
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 663bbe015057..29b685f551aa 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -3584,6 +3584,9 @@ perf_counter_alloc(struct perf_counter_attr *attr, | |||
3584 | case PERF_TYPE_TRACEPOINT: | 3584 | case PERF_TYPE_TRACEPOINT: |
3585 | pmu = tp_perf_counter_init(counter); | 3585 | pmu = tp_perf_counter_init(counter); |
3586 | break; | 3586 | break; |
3587 | |||
3588 | default: | ||
3589 | break; | ||
3587 | } | 3590 | } |
3588 | done: | 3591 | done: |
3589 | err = 0; | 3592 | err = 0; |
@@ -3610,6 +3613,85 @@ done: | |||
3610 | return counter; | 3613 | return counter; |
3611 | } | 3614 | } |
3612 | 3615 | ||
3616 | static int perf_copy_attr(struct perf_counter_attr __user *uattr, | ||
3617 | struct perf_counter_attr *attr) | ||
3618 | { | ||
3619 | int ret; | ||
3620 | u32 size; | ||
3621 | |||
3622 | if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0)) | ||
3623 | return -EFAULT; | ||
3624 | |||
3625 | /* | ||
3626 | * zero the full structure, so that a short copy will be nice. | ||
3627 | */ | ||
3628 | memset(attr, 0, sizeof(*attr)); | ||
3629 | |||
3630 | ret = get_user(size, &uattr->size); | ||
3631 | if (ret) | ||
3632 | return ret; | ||
3633 | |||
3634 | if (size > PAGE_SIZE) /* silly large */ | ||
3635 | goto err_size; | ||
3636 | |||
3637 | if (!size) /* abi compat */ | ||
3638 | size = PERF_ATTR_SIZE_VER0; | ||
3639 | |||
3640 | if (size < PERF_ATTR_SIZE_VER0) | ||
3641 | goto err_size; | ||
3642 | |||
3643 | /* | ||
3644 | * If we're handed a bigger struct than we know of, | ||
3645 | * ensure all the unknown bits are 0. | ||
3646 | */ | ||
3647 | if (size > sizeof(*attr)) { | ||
3648 | unsigned long val; | ||
3649 | unsigned long __user *addr; | ||
3650 | unsigned long __user *end; | ||
3651 | |||
3652 | addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr), | ||
3653 | sizeof(unsigned long)); | ||
3654 | end = PTR_ALIGN((void __user *)uattr + size, | ||
3655 | sizeof(unsigned long)); | ||
3656 | |||
3657 | for (; addr < end; addr += sizeof(unsigned long)) { | ||
3658 | ret = get_user(val, addr); | ||
3659 | if (ret) | ||
3660 | return ret; | ||
3661 | if (val) | ||
3662 | goto err_size; | ||
3663 | } | ||
3664 | } | ||
3665 | |||
3666 | ret = copy_from_user(attr, uattr, size); | ||
3667 | if (ret) | ||
3668 | return -EFAULT; | ||
3669 | |||
3670 | /* | ||
3671 | * If the type exists, the corresponding creation will verify | ||
3672 | * the attr->config. | ||
3673 | */ | ||
3674 | if (attr->type >= PERF_TYPE_MAX) | ||
3675 | return -EINVAL; | ||
3676 | |||
3677 | if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) | ||
3678 | return -EINVAL; | ||
3679 | |||
3680 | if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) | ||
3681 | return -EINVAL; | ||
3682 | |||
3683 | if (attr->read_format & ~(PERF_FORMAT_MAX-1)) | ||
3684 | return -EINVAL; | ||
3685 | |||
3686 | out: | ||
3687 | return ret; | ||
3688 | |||
3689 | err_size: | ||
3690 | put_user(sizeof(*attr), &uattr->size); | ||
3691 | ret = -E2BIG; | ||
3692 | goto out; | ||
3693 | } | ||
3694 | |||
3613 | /** | 3695 | /** |
3614 | * sys_perf_counter_open - open a performance counter, associate it to a task/cpu | 3696 | * sys_perf_counter_open - open a performance counter, associate it to a task/cpu |
3615 | * | 3697 | * |
@@ -3619,7 +3701,7 @@ done: | |||
3619 | * @group_fd: group leader counter fd | 3701 | * @group_fd: group leader counter fd |
3620 | */ | 3702 | */ |
3621 | SYSCALL_DEFINE5(perf_counter_open, | 3703 | SYSCALL_DEFINE5(perf_counter_open, |
3622 | const struct perf_counter_attr __user *, attr_uptr, | 3704 | struct perf_counter_attr __user *, attr_uptr, |
3623 | pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) | 3705 | pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) |
3624 | { | 3706 | { |
3625 | struct perf_counter *counter, *group_leader; | 3707 | struct perf_counter *counter, *group_leader; |
@@ -3635,8 +3717,9 @@ SYSCALL_DEFINE5(perf_counter_open, | |||
3635 | if (flags) | 3717 | if (flags) |
3636 | return -EINVAL; | 3718 | return -EINVAL; |
3637 | 3719 | ||
3638 | if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0) | 3720 | ret = perf_copy_attr(attr_uptr, &attr); |
3639 | return -EFAULT; | 3721 | if (ret) |
3722 | return ret; | ||
3640 | 3723 | ||
3641 | if (!attr.exclude_kernel) { | 3724 | if (!attr.exclude_kernel) { |
3642 | if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) | 3725 | if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) |
diff --git a/tools/perf/perf.h b/tools/perf/perf.h index af0a5046d743..87a1aca4a424 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h | |||
@@ -53,11 +53,12 @@ static inline unsigned long long rdclock(void) | |||
53 | _min1 < _min2 ? _min1 : _min2; }) | 53 | _min1 < _min2 ? _min1 : _min2; }) |
54 | 54 | ||
55 | static inline int | 55 | static inline int |
56 | sys_perf_counter_open(struct perf_counter_attr *attr_uptr, | 56 | sys_perf_counter_open(struct perf_counter_attr *attr, |
57 | pid_t pid, int cpu, int group_fd, | 57 | pid_t pid, int cpu, int group_fd, |
58 | unsigned long flags) | 58 | unsigned long flags) |
59 | { | 59 | { |
60 | return syscall(__NR_perf_counter_open, attr_uptr, pid, cpu, | 60 | attr->size = sizeof(*attr); |
61 | return syscall(__NR_perf_counter_open, attr, pid, cpu, | ||
61 | group_fd, flags); | 62 | group_fd, flags); |
62 | } | 63 | } |
63 | 64 | ||