diff options
-rw-r--r-- | arch/Kconfig | 7 | ||||
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | include/linux/perf_event.h | 18 | ||||
-rw-r--r-- | kernel/events/core.c | 150 | ||||
-rw-r--r-- | kernel/events/internal.h | 16 |
5 files changed, 190 insertions, 2 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 68d827b7ae82..2a83a3f6a615 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -228,6 +228,13 @@ config HAVE_PERF_REGS | |||
228 | Support selective register dumps for perf events. This includes | 228 | Support selective register dumps for perf events. This includes |
229 | bit-mapping of each registers and a unique architecture id. | 229 | bit-mapping of each registers and a unique architecture id. |
230 | 230 | ||
231 | config HAVE_PERF_USER_STACK_DUMP | ||
232 | bool | ||
233 | help | ||
234 | Support user stack dumps for perf event samples. This needs | ||
235 | access to the user stack pointer which is not unified across | ||
236 | architectures. | ||
237 | |||
231 | config HAVE_ARCH_JUMP_LABEL | 238 | config HAVE_ARCH_JUMP_LABEL |
232 | bool | 239 | bool |
233 | 240 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3fab6ec9edc4..a2d19ee750ca 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -61,6 +61,7 @@ config X86 | |||
61 | select PERF_EVENTS | 61 | select PERF_EVENTS |
62 | select HAVE_PERF_EVENTS_NMI | 62 | select HAVE_PERF_EVENTS_NMI |
63 | select HAVE_PERF_REGS | 63 | select HAVE_PERF_REGS |
64 | select HAVE_PERF_USER_STACK_DUMP | ||
64 | select ANON_INODES | 65 | select ANON_INODES |
65 | select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 | 66 | select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 |
66 | select HAVE_CMPXCHG_LOCAL if !M386 | 67 | select HAVE_CMPXCHG_LOCAL if !M386 |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8a73f75beb16..d1d25f6a5e24 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -131,8 +131,9 @@ enum perf_event_sample_format { | |||
131 | PERF_SAMPLE_RAW = 1U << 10, | 131 | PERF_SAMPLE_RAW = 1U << 10, |
132 | PERF_SAMPLE_BRANCH_STACK = 1U << 11, | 132 | PERF_SAMPLE_BRANCH_STACK = 1U << 11, |
133 | PERF_SAMPLE_REGS_USER = 1U << 12, | 133 | PERF_SAMPLE_REGS_USER = 1U << 12, |
134 | PERF_SAMPLE_STACK_USER = 1U << 13, | ||
134 | 135 | ||
135 | PERF_SAMPLE_MAX = 1U << 13, /* non-ABI */ | 136 | PERF_SAMPLE_MAX = 1U << 14, /* non-ABI */ |
136 | }; | 137 | }; |
137 | 138 | ||
138 | /* | 139 | /* |
@@ -205,6 +206,7 @@ enum perf_event_read_format { | |||
205 | #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ | 206 | #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ |
206 | #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ | 207 | #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ |
207 | #define PERF_ATTR_SIZE_VER3 88 /* add: sample_regs_user */ | 208 | #define PERF_ATTR_SIZE_VER3 88 /* add: sample_regs_user */ |
209 | #define PERF_ATTR_SIZE_VER4 96 /* add: sample_stack_user */ | ||
208 | 210 | ||
209 | /* | 211 | /* |
210 | * Hardware event_id to monitor via a performance monitoring event: | 212 | * Hardware event_id to monitor via a performance monitoring event: |
@@ -289,6 +291,14 @@ struct perf_event_attr { | |||
289 | * See asm/perf_regs.h for details. | 291 | * See asm/perf_regs.h for details. |
290 | */ | 292 | */ |
291 | __u64 sample_regs_user; | 293 | __u64 sample_regs_user; |
294 | |||
295 | /* | ||
296 | * Defines size of the user stack to dump on samples. | ||
297 | */ | ||
298 | __u32 sample_stack_user; | ||
299 | |||
300 | /* Align to u64. */ | ||
301 | __u32 __reserved_2; | ||
292 | }; | 302 | }; |
293 | 303 | ||
294 | /* | 304 | /* |
@@ -568,6 +578,10 @@ enum perf_event_type { | |||
568 | * | 578 | * |
569 | * { u64 abi; # enum perf_sample_regs_abi | 579 | * { u64 abi; # enum perf_sample_regs_abi |
570 | * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER | 580 | * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER |
581 | * | ||
582 | * { u64 size; | ||
583 | * char data[size]; | ||
584 | * u64 dyn_size; } && PERF_SAMPLE_STACK_USER | ||
571 | * }; | 585 | * }; |
572 | */ | 586 | */ |
573 | PERF_RECORD_SAMPLE = 9, | 587 | PERF_RECORD_SAMPLE = 9, |
@@ -1160,6 +1174,7 @@ struct perf_sample_data { | |||
1160 | struct perf_raw_record *raw; | 1174 | struct perf_raw_record *raw; |
1161 | struct perf_branch_stack *br_stack; | 1175 | struct perf_branch_stack *br_stack; |
1162 | struct perf_regs_user regs_user; | 1176 | struct perf_regs_user regs_user; |
1177 | u64 stack_user_size; | ||
1163 | }; | 1178 | }; |
1164 | 1179 | ||
1165 | static inline void perf_sample_data_init(struct perf_sample_data *data, | 1180 | static inline void perf_sample_data_init(struct perf_sample_data *data, |
@@ -1172,6 +1187,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, | |||
1172 | data->period = period; | 1187 | data->period = period; |
1173 | data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE; | 1188 | data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE; |
1174 | data->regs_user.regs = NULL; | 1189 | data->regs_user.regs = NULL; |
1190 | data->stack_user_size = 0; | ||
1175 | } | 1191 | } |
1176 | 1192 | ||
1177 | extern void perf_output_sample(struct perf_output_handle *handle, | 1193 | extern void perf_output_sample(struct perf_output_handle *handle, |
diff --git a/kernel/events/core.c b/kernel/events/core.c index d3ce97525b9f..2ba890450d15 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/perf_event.h> | 36 | #include <linux/perf_event.h> |
37 | #include <linux/ftrace_event.h> | 37 | #include <linux/ftrace_event.h> |
38 | #include <linux/hw_breakpoint.h> | 38 | #include <linux/hw_breakpoint.h> |
39 | #include <linux/mm_types.h> | ||
39 | 40 | ||
40 | #include "internal.h" | 41 | #include "internal.h" |
41 | 42 | ||
@@ -3787,6 +3788,101 @@ static void perf_sample_regs_user(struct perf_regs_user *regs_user, | |||
3787 | } | 3788 | } |
3788 | } | 3789 | } |
3789 | 3790 | ||
3791 | /* | ||
3792 | * Get remaining task size from user stack pointer. | ||
3793 | * | ||
3794 | * It'd be better to take stack vma map and limit this more | ||
3795 | * precisly, but there's no way to get it safely under interrupt, | ||
3796 | * so using TASK_SIZE as limit. | ||
3797 | */ | ||
3798 | static u64 perf_ustack_task_size(struct pt_regs *regs) | ||
3799 | { | ||
3800 | unsigned long addr = perf_user_stack_pointer(regs); | ||
3801 | |||
3802 | if (!addr || addr >= TASK_SIZE) | ||
3803 | return 0; | ||
3804 | |||
3805 | return TASK_SIZE - addr; | ||
3806 | } | ||
3807 | |||
3808 | static u16 | ||
3809 | perf_sample_ustack_size(u16 stack_size, u16 header_size, | ||
3810 | struct pt_regs *regs) | ||
3811 | { | ||
3812 | u64 task_size; | ||
3813 | |||
3814 | /* No regs, no stack pointer, no dump. */ | ||
3815 | if (!regs) | ||
3816 | return 0; | ||
3817 | |||
3818 | /* | ||
3819 | * Check if we fit in with the requested stack size into the: | ||
3820 | * - TASK_SIZE | ||
3821 | * If we don't, we limit the size to the TASK_SIZE. | ||
3822 | * | ||
3823 | * - remaining sample size | ||
3824 | * If we don't, we customize the stack size to | ||
3825 | * fit in to the remaining sample size. | ||
3826 | */ | ||
3827 | |||
3828 | task_size = min((u64) USHRT_MAX, perf_ustack_task_size(regs)); | ||
3829 | stack_size = min(stack_size, (u16) task_size); | ||
3830 | |||
3831 | /* Current header size plus static size and dynamic size. */ | ||
3832 | header_size += 2 * sizeof(u64); | ||
3833 | |||
3834 | /* Do we fit in with the current stack dump size? */ | ||
3835 | if ((u16) (header_size + stack_size) < header_size) { | ||
3836 | /* | ||
3837 | * If we overflow the maximum size for the sample, | ||
3838 | * we customize the stack dump size to fit in. | ||
3839 | */ | ||
3840 | stack_size = USHRT_MAX - header_size - sizeof(u64); | ||
3841 | stack_size = round_up(stack_size, sizeof(u64)); | ||
3842 | } | ||
3843 | |||
3844 | return stack_size; | ||
3845 | } | ||
3846 | |||
3847 | static void | ||
3848 | perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size, | ||
3849 | struct pt_regs *regs) | ||
3850 | { | ||
3851 | /* Case of a kernel thread, nothing to dump */ | ||
3852 | if (!regs) { | ||
3853 | u64 size = 0; | ||
3854 | perf_output_put(handle, size); | ||
3855 | } else { | ||
3856 | unsigned long sp; | ||
3857 | unsigned int rem; | ||
3858 | u64 dyn_size; | ||
3859 | |||
3860 | /* | ||
3861 | * We dump: | ||
3862 | * static size | ||
3863 | * - the size requested by user or the best one we can fit | ||
3864 | * in to the sample max size | ||
3865 | * data | ||
3866 | * - user stack dump data | ||
3867 | * dynamic size | ||
3868 | * - the actual dumped size | ||
3869 | */ | ||
3870 | |||
3871 | /* Static size. */ | ||
3872 | perf_output_put(handle, dump_size); | ||
3873 | |||
3874 | /* Data. */ | ||
3875 | sp = perf_user_stack_pointer(regs); | ||
3876 | rem = __output_copy_user(handle, (void *) sp, dump_size); | ||
3877 | dyn_size = dump_size - rem; | ||
3878 | |||
3879 | perf_output_skip(handle, rem); | ||
3880 | |||
3881 | /* Dynamic size. */ | ||
3882 | perf_output_put(handle, dyn_size); | ||
3883 | } | ||
3884 | } | ||
3885 | |||
3790 | static void __perf_event_header__init_id(struct perf_event_header *header, | 3886 | static void __perf_event_header__init_id(struct perf_event_header *header, |
3791 | struct perf_sample_data *data, | 3887 | struct perf_sample_data *data, |
3792 | struct perf_event *event) | 3888 | struct perf_event *event) |
@@ -4064,6 +4160,11 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
4064 | mask); | 4160 | mask); |
4065 | } | 4161 | } |
4066 | } | 4162 | } |
4163 | |||
4164 | if (sample_type & PERF_SAMPLE_STACK_USER) | ||
4165 | perf_output_sample_ustack(handle, | ||
4166 | data->stack_user_size, | ||
4167 | data->regs_user.regs); | ||
4067 | } | 4168 | } |
4068 | 4169 | ||
4069 | void perf_prepare_sample(struct perf_event_header *header, | 4170 | void perf_prepare_sample(struct perf_event_header *header, |
@@ -4129,6 +4230,35 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
4129 | 4230 | ||
4130 | header->size += size; | 4231 | header->size += size; |
4131 | } | 4232 | } |
4233 | |||
4234 | if (sample_type & PERF_SAMPLE_STACK_USER) { | ||
4235 | /* | ||
4236 | * Either we need PERF_SAMPLE_STACK_USER bit to be allways | ||
4237 | * processed as the last one or have additional check added | ||
4238 | * in case new sample type is added, because we could eat | ||
4239 | * up the rest of the sample size. | ||
4240 | */ | ||
4241 | struct perf_regs_user *uregs = &data->regs_user; | ||
4242 | u16 stack_size = event->attr.sample_stack_user; | ||
4243 | u16 size = sizeof(u64); | ||
4244 | |||
4245 | if (!uregs->abi) | ||
4246 | perf_sample_regs_user(uregs, regs); | ||
4247 | |||
4248 | stack_size = perf_sample_ustack_size(stack_size, header->size, | ||
4249 | uregs->regs); | ||
4250 | |||
4251 | /* | ||
4252 | * If there is something to dump, add space for the dump | ||
4253 | * itself and for the field that tells the dynamic size, | ||
4254 | * which is how many have been actually dumped. | ||
4255 | */ | ||
4256 | if (stack_size) | ||
4257 | size += sizeof(u64) + stack_size; | ||
4258 | |||
4259 | data->stack_user_size = stack_size; | ||
4260 | header->size += size; | ||
4261 | } | ||
4132 | } | 4262 | } |
4133 | 4263 | ||
4134 | static void perf_event_output(struct perf_event *event, | 4264 | static void perf_event_output(struct perf_event *event, |
@@ -6205,8 +6335,26 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, | |||
6205 | } | 6335 | } |
6206 | } | 6336 | } |
6207 | 6337 | ||
6208 | if (attr->sample_type & PERF_SAMPLE_REGS_USER) | 6338 | if (attr->sample_type & PERF_SAMPLE_REGS_USER) { |
6209 | ret = perf_reg_validate(attr->sample_regs_user); | 6339 | ret = perf_reg_validate(attr->sample_regs_user); |
6340 | if (ret) | ||
6341 | return ret; | ||
6342 | } | ||
6343 | |||
6344 | if (attr->sample_type & PERF_SAMPLE_STACK_USER) { | ||
6345 | if (!arch_perf_have_user_stack_dump()) | ||
6346 | return -ENOSYS; | ||
6347 | |||
6348 | /* | ||
6349 | * We have __u32 type for the size, but so far | ||
6350 | * we can only use __u16 as maximum due to the | ||
6351 | * __u16 sample size limit. | ||
6352 | */ | ||
6353 | if (attr->sample_stack_user >= USHRT_MAX) | ||
6354 | ret = -EINVAL; | ||
6355 | else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64))) | ||
6356 | ret = -EINVAL; | ||
6357 | } | ||
6210 | 6358 | ||
6211 | out: | 6359 | out: |
6212 | return ret; | 6360 | return ret; |
diff --git a/kernel/events/internal.h b/kernel/events/internal.h index ce7bdfc1d045..d56a64c99a8b 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h | |||
@@ -158,4 +158,20 @@ static inline void put_recursion_context(int *recursion, int rctx) | |||
158 | recursion[rctx]--; | 158 | recursion[rctx]--; |
159 | } | 159 | } |
160 | 160 | ||
161 | #ifdef CONFIG_HAVE_PERF_USER_STACK_DUMP | ||
162 | static inline bool arch_perf_have_user_stack_dump(void) | ||
163 | { | ||
164 | return true; | ||
165 | } | ||
166 | |||
167 | #define perf_user_stack_pointer(regs) user_stack_pointer(regs) | ||
168 | #else | ||
169 | static inline bool arch_perf_have_user_stack_dump(void) | ||
170 | { | ||
171 | return false; | ||
172 | } | ||
173 | |||
174 | #define perf_user_stack_pointer(regs) 0 | ||
175 | #endif /* CONFIG_HAVE_PERF_USER_STACK_DUMP */ | ||
176 | |||
161 | #endif /* _KERNEL_EVENTS_INTERNAL_H */ | 177 | #endif /* _KERNEL_EVENTS_INTERNAL_H */ |