diff options
-rw-r--r-- | include/linux/ftrace_event.h | 2 | ||||
-rw-r--r-- | include/linux/perf_event.h | 28 | ||||
-rw-r--r-- | include/trace/ftrace.h | 7 | ||||
-rw-r--r-- | kernel/events/core.c | 23 | ||||
-rw-r--r-- | kernel/sched/core.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_event_perf.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 2 |
9 files changed, 52 insertions, 24 deletions
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 0bebb5c348b8..d36f68b08acc 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h | |||
@@ -595,7 +595,7 @@ extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, | |||
595 | char *filter_str); | 595 | char *filter_str); |
596 | extern void ftrace_profile_free_filter(struct perf_event *event); | 596 | extern void ftrace_profile_free_filter(struct perf_event *event); |
597 | extern void *perf_trace_buf_prepare(int size, unsigned short type, | 597 | extern void *perf_trace_buf_prepare(int size, unsigned short type, |
598 | struct pt_regs *regs, int *rctxp); | 598 | struct pt_regs **regs, int *rctxp); |
599 | 599 | ||
600 | static inline void | 600 | static inline void |
601 | perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, | 601 | perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4f7a61ca4b39..3a7bd80b4db8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -665,6 +665,7 @@ static inline int is_software_event(struct perf_event *event) | |||
665 | 665 | ||
666 | extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; | 666 | extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
667 | 667 | ||
668 | extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64); | ||
668 | extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); | 669 | extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); |
669 | 670 | ||
670 | #ifndef perf_arch_fetch_caller_regs | 671 | #ifndef perf_arch_fetch_caller_regs |
@@ -689,14 +690,25 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs) | |||
689 | static __always_inline void | 690 | static __always_inline void |
690 | perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) | 691 | perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) |
691 | { | 692 | { |
692 | struct pt_regs hot_regs; | 693 | if (static_key_false(&perf_swevent_enabled[event_id])) |
694 | __perf_sw_event(event_id, nr, regs, addr); | ||
695 | } | ||
696 | |||
697 | DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]); | ||
693 | 698 | ||
699 | /* | ||
700 | * 'Special' version for the scheduler, it hard assumes no recursion, | ||
701 | * which is guaranteed by us not actually scheduling inside other swevents | ||
702 | * because those disable preemption. | ||
703 | */ | ||
704 | static __always_inline void | ||
705 | perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) | ||
706 | { | ||
694 | if (static_key_false(&perf_swevent_enabled[event_id])) { | 707 | if (static_key_false(&perf_swevent_enabled[event_id])) { |
695 | if (!regs) { | 708 | struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]); |
696 | perf_fetch_caller_regs(&hot_regs); | 709 | |
697 | regs = &hot_regs; | 710 | perf_fetch_caller_regs(regs); |
698 | } | 711 | ___perf_sw_event(event_id, nr, regs, addr); |
699 | __perf_sw_event(event_id, nr, regs, addr); | ||
700 | } | 712 | } |
701 | } | 713 | } |
702 | 714 | ||
@@ -712,7 +724,7 @@ static inline void perf_event_task_sched_in(struct task_struct *prev, | |||
712 | static inline void perf_event_task_sched_out(struct task_struct *prev, | 724 | static inline void perf_event_task_sched_out(struct task_struct *prev, |
713 | struct task_struct *next) | 725 | struct task_struct *next) |
714 | { | 726 | { |
715 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); | 727 | perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0); |
716 | 728 | ||
717 | if (static_key_false(&perf_sched_events.key)) | 729 | if (static_key_false(&perf_sched_events.key)) |
718 | __perf_event_task_sched_out(prev, next); | 730 | __perf_event_task_sched_out(prev, next); |
@@ -823,6 +835,8 @@ static inline int perf_event_refresh(struct perf_event *event, int refresh) | |||
823 | static inline void | 835 | static inline void |
824 | perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } | 836 | perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } |
825 | static inline void | 837 | static inline void |
838 | perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) { } | ||
839 | static inline void | ||
826 | perf_bp_event(struct perf_event *event, void *data) { } | 840 | perf_bp_event(struct perf_event *event, void *data) { } |
827 | 841 | ||
828 | static inline int perf_register_guest_info_callbacks | 842 | static inline int perf_register_guest_info_callbacks |
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 139b5067345b..27609dfcce25 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h | |||
@@ -763,7 +763,7 @@ perf_trace_##call(void *__data, proto) \ | |||
763 | struct ftrace_event_call *event_call = __data; \ | 763 | struct ftrace_event_call *event_call = __data; \ |
764 | struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ | 764 | struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ |
765 | struct ftrace_raw_##call *entry; \ | 765 | struct ftrace_raw_##call *entry; \ |
766 | struct pt_regs __regs; \ | 766 | struct pt_regs *__regs; \ |
767 | u64 __addr = 0, __count = 1; \ | 767 | u64 __addr = 0, __count = 1; \ |
768 | struct task_struct *__task = NULL; \ | 768 | struct task_struct *__task = NULL; \ |
769 | struct hlist_head *head; \ | 769 | struct hlist_head *head; \ |
@@ -782,18 +782,19 @@ perf_trace_##call(void *__data, proto) \ | |||
782 | sizeof(u64)); \ | 782 | sizeof(u64)); \ |
783 | __entry_size -= sizeof(u32); \ | 783 | __entry_size -= sizeof(u32); \ |
784 | \ | 784 | \ |
785 | perf_fetch_caller_regs(&__regs); \ | ||
786 | entry = perf_trace_buf_prepare(__entry_size, \ | 785 | entry = perf_trace_buf_prepare(__entry_size, \ |
787 | event_call->event.type, &__regs, &rctx); \ | 786 | event_call->event.type, &__regs, &rctx); \ |
788 | if (!entry) \ | 787 | if (!entry) \ |
789 | return; \ | 788 | return; \ |
790 | \ | 789 | \ |
790 | perf_fetch_caller_regs(__regs); \ | ||
791 | \ | ||
791 | tstruct \ | 792 | tstruct \ |
792 | \ | 793 | \ |
793 | { assign; } \ | 794 | { assign; } \ |
794 | \ | 795 | \ |
795 | perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ | 796 | perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ |
796 | __count, &__regs, head, __task); \ | 797 | __count, __regs, head, __task); \ |
797 | } | 798 | } |
798 | 799 | ||
799 | /* | 800 | /* |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 882f835a0d85..c10124b772c4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -5889,6 +5889,8 @@ end: | |||
5889 | rcu_read_unlock(); | 5889 | rcu_read_unlock(); |
5890 | } | 5890 | } |
5891 | 5891 | ||
5892 | DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]); | ||
5893 | |||
5892 | int perf_swevent_get_recursion_context(void) | 5894 | int perf_swevent_get_recursion_context(void) |
5893 | { | 5895 | { |
5894 | struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); | 5896 | struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); |
@@ -5904,21 +5906,30 @@ inline void perf_swevent_put_recursion_context(int rctx) | |||
5904 | put_recursion_context(swhash->recursion, rctx); | 5906 | put_recursion_context(swhash->recursion, rctx); |
5905 | } | 5907 | } |
5906 | 5908 | ||
5907 | void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) | 5909 | void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) |
5908 | { | 5910 | { |
5909 | struct perf_sample_data data; | 5911 | struct perf_sample_data data; |
5910 | int rctx; | ||
5911 | 5912 | ||
5912 | preempt_disable_notrace(); | 5913 | if (WARN_ON_ONCE(!regs)) |
5913 | rctx = perf_swevent_get_recursion_context(); | ||
5914 | if (rctx < 0) | ||
5915 | return; | 5914 | return; |
5916 | 5915 | ||
5917 | perf_sample_data_init(&data, addr, 0); | 5916 | perf_sample_data_init(&data, addr, 0); |
5918 | |||
5919 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs); | 5917 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs); |
5918 | } | ||
5919 | |||
5920 | void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) | ||
5921 | { | ||
5922 | int rctx; | ||
5923 | |||
5924 | preempt_disable_notrace(); | ||
5925 | rctx = perf_swevent_get_recursion_context(); | ||
5926 | if (unlikely(rctx < 0)) | ||
5927 | goto fail; | ||
5928 | |||
5929 | ___perf_sw_event(event_id, nr, regs, addr); | ||
5920 | 5930 | ||
5921 | perf_swevent_put_recursion_context(rctx); | 5931 | perf_swevent_put_recursion_context(rctx); |
5932 | fail: | ||
5922 | preempt_enable_notrace(); | 5933 | preempt_enable_notrace(); |
5923 | } | 5934 | } |
5924 | 5935 | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c0accc00566e..d22fb16a7153 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -1082,7 +1082,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
1082 | if (p->sched_class->migrate_task_rq) | 1082 | if (p->sched_class->migrate_task_rq) |
1083 | p->sched_class->migrate_task_rq(p, new_cpu); | 1083 | p->sched_class->migrate_task_rq(p, new_cpu); |
1084 | p->se.nr_migrations++; | 1084 | p->se.nr_migrations++; |
1085 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0); | 1085 | perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); |
1086 | } | 1086 | } |
1087 | 1087 | ||
1088 | __set_task_cpu(p, new_cpu); | 1088 | __set_task_cpu(p, new_cpu); |
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 4b9c114ee9de..6fa484de2ba1 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
@@ -261,7 +261,7 @@ void perf_trace_del(struct perf_event *p_event, int flags) | |||
261 | } | 261 | } |
262 | 262 | ||
263 | void *perf_trace_buf_prepare(int size, unsigned short type, | 263 | void *perf_trace_buf_prepare(int size, unsigned short type, |
264 | struct pt_regs *regs, int *rctxp) | 264 | struct pt_regs **regs, int *rctxp) |
265 | { | 265 | { |
266 | struct trace_entry *entry; | 266 | struct trace_entry *entry; |
267 | unsigned long flags; | 267 | unsigned long flags; |
@@ -280,6 +280,8 @@ void *perf_trace_buf_prepare(int size, unsigned short type, | |||
280 | if (*rctxp < 0) | 280 | if (*rctxp < 0) |
281 | return NULL; | 281 | return NULL; |
282 | 282 | ||
283 | if (regs) | ||
284 | *regs = this_cpu_ptr(&__perf_regs[*rctxp]); | ||
283 | raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]); | 285 | raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]); |
284 | 286 | ||
285 | /* zero the dead bytes from align to not leak stack to user */ | 287 | /* zero the dead bytes from align to not leak stack to user */ |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5edb518be345..296079ae6583 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -1148,7 +1148,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) | |||
1148 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | 1148 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); |
1149 | size -= sizeof(u32); | 1149 | size -= sizeof(u32); |
1150 | 1150 | ||
1151 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); | 1151 | entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); |
1152 | if (!entry) | 1152 | if (!entry) |
1153 | return; | 1153 | return; |
1154 | 1154 | ||
@@ -1179,7 +1179,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, | |||
1179 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | 1179 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); |
1180 | size -= sizeof(u32); | 1180 | size -= sizeof(u32); |
1181 | 1181 | ||
1182 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); | 1182 | entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); |
1183 | if (!entry) | 1183 | if (!entry) |
1184 | return; | 1184 | return; |
1185 | 1185 | ||
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index c6ee36fcbf90..f97f6e3a676c 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -574,7 +574,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
574 | size -= sizeof(u32); | 574 | size -= sizeof(u32); |
575 | 575 | ||
576 | rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, | 576 | rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, |
577 | sys_data->enter_event->event.type, regs, &rctx); | 577 | sys_data->enter_event->event.type, NULL, &rctx); |
578 | if (!rec) | 578 | if (!rec) |
579 | return; | 579 | return; |
580 | 580 | ||
@@ -647,7 +647,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | |||
647 | size -= sizeof(u32); | 647 | size -= sizeof(u32); |
648 | 648 | ||
649 | rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, | 649 | rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, |
650 | sys_data->exit_event->event.type, regs, &rctx); | 650 | sys_data->exit_event->event.type, NULL, &rctx); |
651 | if (!rec) | 651 | if (!rec) |
652 | return; | 652 | return; |
653 | 653 | ||
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 8520acc34b18..b11441321e7a 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c | |||
@@ -1111,7 +1111,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, | |||
1111 | if (hlist_empty(head)) | 1111 | if (hlist_empty(head)) |
1112 | goto out; | 1112 | goto out; |
1113 | 1113 | ||
1114 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); | 1114 | entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx); |
1115 | if (!entry) | 1115 | if (!entry) |
1116 | goto out; | 1116 | goto out; |
1117 | 1117 | ||