diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/kprobes.c | 34 | ||||
-rw-r--r-- | kernel/perf_event.c | 627 | ||||
-rw-r--r-- | kernel/sched.c | 12 | ||||
-rw-r--r-- | kernel/trace/Makefile | 4 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 54 | ||||
-rw-r--r-- | kernel/trace/trace_event_profile.c | 52 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 196 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 76 |
9 files changed, 580 insertions, 479 deletions
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index b7df302a0204..ccec774c716d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <linux/debugfs.h> | 44 | #include <linux/debugfs.h> |
45 | #include <linux/kdebug.h> | 45 | #include <linux/kdebug.h> |
46 | #include <linux/memory.h> | 46 | #include <linux/memory.h> |
47 | #include <linux/ftrace.h> | ||
47 | 48 | ||
48 | #include <asm-generic/sections.h> | 49 | #include <asm-generic/sections.h> |
49 | #include <asm/cacheflush.h> | 50 | #include <asm/cacheflush.h> |
@@ -93,6 +94,7 @@ static struct kprobe_blackpoint kprobe_blacklist[] = { | |||
93 | {"native_get_debugreg",}, | 94 | {"native_get_debugreg",}, |
94 | {"irq_entries_start",}, | 95 | {"irq_entries_start",}, |
95 | {"common_interrupt",}, | 96 | {"common_interrupt",}, |
97 | {"mcount",}, /* mcount can be called from everywhere */ | ||
96 | {NULL} /* Terminator */ | 98 | {NULL} /* Terminator */ |
97 | }; | 99 | }; |
98 | 100 | ||
@@ -124,30 +126,6 @@ static LIST_HEAD(kprobe_insn_pages); | |||
124 | static int kprobe_garbage_slots; | 126 | static int kprobe_garbage_slots; |
125 | static int collect_garbage_slots(void); | 127 | static int collect_garbage_slots(void); |
126 | 128 | ||
127 | static int __kprobes check_safety(void) | ||
128 | { | ||
129 | int ret = 0; | ||
130 | #if defined(CONFIG_PREEMPT) && defined(CONFIG_FREEZER) | ||
131 | ret = freeze_processes(); | ||
132 | if (ret == 0) { | ||
133 | struct task_struct *p, *q; | ||
134 | do_each_thread(p, q) { | ||
135 | if (p != current && p->state == TASK_RUNNING && | ||
136 | p->pid != 0) { | ||
137 | printk("Check failed: %s is running\n",p->comm); | ||
138 | ret = -1; | ||
139 | goto loop_end; | ||
140 | } | ||
141 | } while_each_thread(p, q); | ||
142 | } | ||
143 | loop_end: | ||
144 | thaw_processes(); | ||
145 | #else | ||
146 | synchronize_sched(); | ||
147 | #endif | ||
148 | return ret; | ||
149 | } | ||
150 | |||
151 | /** | 129 | /** |
152 | * __get_insn_slot() - Find a slot on an executable page for an instruction. | 130 | * __get_insn_slot() - Find a slot on an executable page for an instruction. |
153 | * We allocate an executable page if there's no room on existing ones. | 131 | * We allocate an executable page if there's no room on existing ones. |
@@ -235,9 +213,8 @@ static int __kprobes collect_garbage_slots(void) | |||
235 | { | 213 | { |
236 | struct kprobe_insn_page *kip, *next; | 214 | struct kprobe_insn_page *kip, *next; |
237 | 215 | ||
238 | /* Ensure no-one is preepmted on the garbages */ | 216 | /* Ensure no-one is interrupted on the garbages */ |
239 | if (check_safety()) | 217 | synchronize_sched(); |
240 | return -EAGAIN; | ||
241 | 218 | ||
242 | list_for_each_entry_safe(kip, next, &kprobe_insn_pages, list) { | 219 | list_for_each_entry_safe(kip, next, &kprobe_insn_pages, list) { |
243 | int i; | 220 | int i; |
@@ -728,7 +705,8 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
728 | 705 | ||
729 | preempt_disable(); | 706 | preempt_disable(); |
730 | if (!kernel_text_address((unsigned long) p->addr) || | 707 | if (!kernel_text_address((unsigned long) p->addr) || |
731 | in_kprobes_functions((unsigned long) p->addr)) { | 708 | in_kprobes_functions((unsigned long) p->addr) || |
709 | ftrace_text_reserved(p->addr, p->addr)) { | ||
732 | preempt_enable(); | 710 | preempt_enable(); |
733 | return -EINVAL; | 711 | return -EINVAL; |
734 | } | 712 | } |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 2ae7409bf38f..a661e7991865 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -98,11 +98,12 @@ void __weak hw_perf_enable(void) { barrier(); } | |||
98 | 98 | ||
99 | void __weak hw_perf_event_setup(int cpu) { barrier(); } | 99 | void __weak hw_perf_event_setup(int cpu) { barrier(); } |
100 | void __weak hw_perf_event_setup_online(int cpu) { barrier(); } | 100 | void __weak hw_perf_event_setup_online(int cpu) { barrier(); } |
101 | void __weak hw_perf_event_setup_offline(int cpu) { barrier(); } | ||
101 | 102 | ||
102 | int __weak | 103 | int __weak |
103 | hw_perf_group_sched_in(struct perf_event *group_leader, | 104 | hw_perf_group_sched_in(struct perf_event *group_leader, |
104 | struct perf_cpu_context *cpuctx, | 105 | struct perf_cpu_context *cpuctx, |
105 | struct perf_event_context *ctx, int cpu) | 106 | struct perf_event_context *ctx) |
106 | { | 107 | { |
107 | return 0; | 108 | return 0; |
108 | } | 109 | } |
@@ -248,7 +249,7 @@ static void perf_unpin_context(struct perf_event_context *ctx) | |||
248 | 249 | ||
249 | static inline u64 perf_clock(void) | 250 | static inline u64 perf_clock(void) |
250 | { | 251 | { |
251 | return cpu_clock(smp_processor_id()); | 252 | return cpu_clock(raw_smp_processor_id()); |
252 | } | 253 | } |
253 | 254 | ||
254 | /* | 255 | /* |
@@ -289,6 +290,15 @@ static void update_event_times(struct perf_event *event) | |||
289 | event->total_time_running = run_end - event->tstamp_running; | 290 | event->total_time_running = run_end - event->tstamp_running; |
290 | } | 291 | } |
291 | 292 | ||
293 | static struct list_head * | ||
294 | ctx_group_list(struct perf_event *event, struct perf_event_context *ctx) | ||
295 | { | ||
296 | if (event->attr.pinned) | ||
297 | return &ctx->pinned_groups; | ||
298 | else | ||
299 | return &ctx->flexible_groups; | ||
300 | } | ||
301 | |||
292 | /* | 302 | /* |
293 | * Add a event from the lists for its context. | 303 | * Add a event from the lists for its context. |
294 | * Must be called with ctx->mutex and ctx->lock held. | 304 | * Must be called with ctx->mutex and ctx->lock held. |
@@ -303,9 +313,19 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
303 | * add it straight to the context's event list, or to the group | 313 | * add it straight to the context's event list, or to the group |
304 | * leader's sibling list: | 314 | * leader's sibling list: |
305 | */ | 315 | */ |
306 | if (group_leader == event) | 316 | if (group_leader == event) { |
307 | list_add_tail(&event->group_entry, &ctx->group_list); | 317 | struct list_head *list; |
308 | else { | 318 | |
319 | if (is_software_event(event)) | ||
320 | event->group_flags |= PERF_GROUP_SOFTWARE; | ||
321 | |||
322 | list = ctx_group_list(event, ctx); | ||
323 | list_add_tail(&event->group_entry, list); | ||
324 | } else { | ||
325 | if (group_leader->group_flags & PERF_GROUP_SOFTWARE && | ||
326 | !is_software_event(event)) | ||
327 | group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; | ||
328 | |||
309 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | 329 | list_add_tail(&event->group_entry, &group_leader->sibling_list); |
310 | group_leader->nr_siblings++; | 330 | group_leader->nr_siblings++; |
311 | } | 331 | } |
@@ -355,9 +375,14 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
355 | * to the context list directly: | 375 | * to the context list directly: |
356 | */ | 376 | */ |
357 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { | 377 | list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { |
378 | struct list_head *list; | ||
358 | 379 | ||
359 | list_move_tail(&sibling->group_entry, &ctx->group_list); | 380 | list = ctx_group_list(event, ctx); |
381 | list_move_tail(&sibling->group_entry, list); | ||
360 | sibling->group_leader = sibling; | 382 | sibling->group_leader = sibling; |
383 | |||
384 | /* Inherit group flags from the previous leader */ | ||
385 | sibling->group_flags = event->group_flags; | ||
361 | } | 386 | } |
362 | } | 387 | } |
363 | 388 | ||
@@ -608,14 +633,13 @@ void perf_event_disable(struct perf_event *event) | |||
608 | static int | 633 | static int |
609 | event_sched_in(struct perf_event *event, | 634 | event_sched_in(struct perf_event *event, |
610 | struct perf_cpu_context *cpuctx, | 635 | struct perf_cpu_context *cpuctx, |
611 | struct perf_event_context *ctx, | 636 | struct perf_event_context *ctx) |
612 | int cpu) | ||
613 | { | 637 | { |
614 | if (event->state <= PERF_EVENT_STATE_OFF) | 638 | if (event->state <= PERF_EVENT_STATE_OFF) |
615 | return 0; | 639 | return 0; |
616 | 640 | ||
617 | event->state = PERF_EVENT_STATE_ACTIVE; | 641 | event->state = PERF_EVENT_STATE_ACTIVE; |
618 | event->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ | 642 | event->oncpu = smp_processor_id(); |
619 | /* | 643 | /* |
620 | * The new state must be visible before we turn it on in the hardware: | 644 | * The new state must be visible before we turn it on in the hardware: |
621 | */ | 645 | */ |
@@ -642,8 +666,7 @@ event_sched_in(struct perf_event *event, | |||
642 | static int | 666 | static int |
643 | group_sched_in(struct perf_event *group_event, | 667 | group_sched_in(struct perf_event *group_event, |
644 | struct perf_cpu_context *cpuctx, | 668 | struct perf_cpu_context *cpuctx, |
645 | struct perf_event_context *ctx, | 669 | struct perf_event_context *ctx) |
646 | int cpu) | ||
647 | { | 670 | { |
648 | struct perf_event *event, *partial_group; | 671 | struct perf_event *event, *partial_group; |
649 | int ret; | 672 | int ret; |
@@ -651,18 +674,18 @@ group_sched_in(struct perf_event *group_event, | |||
651 | if (group_event->state == PERF_EVENT_STATE_OFF) | 674 | if (group_event->state == PERF_EVENT_STATE_OFF) |
652 | return 0; | 675 | return 0; |
653 | 676 | ||
654 | ret = hw_perf_group_sched_in(group_event, cpuctx, ctx, cpu); | 677 | ret = hw_perf_group_sched_in(group_event, cpuctx, ctx); |
655 | if (ret) | 678 | if (ret) |
656 | return ret < 0 ? ret : 0; | 679 | return ret < 0 ? ret : 0; |
657 | 680 | ||
658 | if (event_sched_in(group_event, cpuctx, ctx, cpu)) | 681 | if (event_sched_in(group_event, cpuctx, ctx)) |
659 | return -EAGAIN; | 682 | return -EAGAIN; |
660 | 683 | ||
661 | /* | 684 | /* |
662 | * Schedule in siblings as one group (if any): | 685 | * Schedule in siblings as one group (if any): |
663 | */ | 686 | */ |
664 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | 687 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { |
665 | if (event_sched_in(event, cpuctx, ctx, cpu)) { | 688 | if (event_sched_in(event, cpuctx, ctx)) { |
666 | partial_group = event; | 689 | partial_group = event; |
667 | goto group_error; | 690 | goto group_error; |
668 | } | 691 | } |
@@ -686,24 +709,6 @@ group_error: | |||
686 | } | 709 | } |
687 | 710 | ||
688 | /* | 711 | /* |
689 | * Return 1 for a group consisting entirely of software events, | ||
690 | * 0 if the group contains any hardware events. | ||
691 | */ | ||
692 | static int is_software_only_group(struct perf_event *leader) | ||
693 | { | ||
694 | struct perf_event *event; | ||
695 | |||
696 | if (!is_software_event(leader)) | ||
697 | return 0; | ||
698 | |||
699 | list_for_each_entry(event, &leader->sibling_list, group_entry) | ||
700 | if (!is_software_event(event)) | ||
701 | return 0; | ||
702 | |||
703 | return 1; | ||
704 | } | ||
705 | |||
706 | /* | ||
707 | * Work out whether we can put this event group on the CPU now. | 712 | * Work out whether we can put this event group on the CPU now. |
708 | */ | 713 | */ |
709 | static int group_can_go_on(struct perf_event *event, | 714 | static int group_can_go_on(struct perf_event *event, |
@@ -713,7 +718,7 @@ static int group_can_go_on(struct perf_event *event, | |||
713 | /* | 718 | /* |
714 | * Groups consisting entirely of software events can always go on. | 719 | * Groups consisting entirely of software events can always go on. |
715 | */ | 720 | */ |
716 | if (is_software_only_group(event)) | 721 | if (event->group_flags & PERF_GROUP_SOFTWARE) |
717 | return 1; | 722 | return 1; |
718 | /* | 723 | /* |
719 | * If an exclusive group is already on, no other hardware | 724 | * If an exclusive group is already on, no other hardware |
@@ -754,7 +759,6 @@ static void __perf_install_in_context(void *info) | |||
754 | struct perf_event *event = info; | 759 | struct perf_event *event = info; |
755 | struct perf_event_context *ctx = event->ctx; | 760 | struct perf_event_context *ctx = event->ctx; |
756 | struct perf_event *leader = event->group_leader; | 761 | struct perf_event *leader = event->group_leader; |
757 | int cpu = smp_processor_id(); | ||
758 | int err; | 762 | int err; |
759 | 763 | ||
760 | /* | 764 | /* |
@@ -801,7 +805,7 @@ static void __perf_install_in_context(void *info) | |||
801 | if (!group_can_go_on(event, cpuctx, 1)) | 805 | if (!group_can_go_on(event, cpuctx, 1)) |
802 | err = -EEXIST; | 806 | err = -EEXIST; |
803 | else | 807 | else |
804 | err = event_sched_in(event, cpuctx, ctx, cpu); | 808 | err = event_sched_in(event, cpuctx, ctx); |
805 | 809 | ||
806 | if (err) { | 810 | if (err) { |
807 | /* | 811 | /* |
@@ -943,11 +947,9 @@ static void __perf_event_enable(void *info) | |||
943 | } else { | 947 | } else { |
944 | perf_disable(); | 948 | perf_disable(); |
945 | if (event == leader) | 949 | if (event == leader) |
946 | err = group_sched_in(event, cpuctx, ctx, | 950 | err = group_sched_in(event, cpuctx, ctx); |
947 | smp_processor_id()); | ||
948 | else | 951 | else |
949 | err = event_sched_in(event, cpuctx, ctx, | 952 | err = event_sched_in(event, cpuctx, ctx); |
950 | smp_processor_id()); | ||
951 | perf_enable(); | 953 | perf_enable(); |
952 | } | 954 | } |
953 | 955 | ||
@@ -1043,8 +1045,15 @@ static int perf_event_refresh(struct perf_event *event, int refresh) | |||
1043 | return 0; | 1045 | return 0; |
1044 | } | 1046 | } |
1045 | 1047 | ||
1046 | void __perf_event_sched_out(struct perf_event_context *ctx, | 1048 | enum event_type_t { |
1047 | struct perf_cpu_context *cpuctx) | 1049 | EVENT_FLEXIBLE = 0x1, |
1050 | EVENT_PINNED = 0x2, | ||
1051 | EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, | ||
1052 | }; | ||
1053 | |||
1054 | static void ctx_sched_out(struct perf_event_context *ctx, | ||
1055 | struct perf_cpu_context *cpuctx, | ||
1056 | enum event_type_t event_type) | ||
1048 | { | 1057 | { |
1049 | struct perf_event *event; | 1058 | struct perf_event *event; |
1050 | 1059 | ||
@@ -1055,10 +1064,18 @@ void __perf_event_sched_out(struct perf_event_context *ctx, | |||
1055 | update_context_time(ctx); | 1064 | update_context_time(ctx); |
1056 | 1065 | ||
1057 | perf_disable(); | 1066 | perf_disable(); |
1058 | if (ctx->nr_active) { | 1067 | if (!ctx->nr_active) |
1059 | list_for_each_entry(event, &ctx->group_list, group_entry) | 1068 | goto out_enable; |
1069 | |||
1070 | if (event_type & EVENT_PINNED) | ||
1071 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) | ||
1060 | group_sched_out(event, cpuctx, ctx); | 1072 | group_sched_out(event, cpuctx, ctx); |
1061 | } | 1073 | |
1074 | if (event_type & EVENT_FLEXIBLE) | ||
1075 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) | ||
1076 | group_sched_out(event, cpuctx, ctx); | ||
1077 | |||
1078 | out_enable: | ||
1062 | perf_enable(); | 1079 | perf_enable(); |
1063 | out: | 1080 | out: |
1064 | raw_spin_unlock(&ctx->lock); | 1081 | raw_spin_unlock(&ctx->lock); |
@@ -1170,9 +1187,9 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, | |||
1170 | * not restart the event. | 1187 | * not restart the event. |
1171 | */ | 1188 | */ |
1172 | void perf_event_task_sched_out(struct task_struct *task, | 1189 | void perf_event_task_sched_out(struct task_struct *task, |
1173 | struct task_struct *next, int cpu) | 1190 | struct task_struct *next) |
1174 | { | 1191 | { |
1175 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 1192 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
1176 | struct perf_event_context *ctx = task->perf_event_ctxp; | 1193 | struct perf_event_context *ctx = task->perf_event_ctxp; |
1177 | struct perf_event_context *next_ctx; | 1194 | struct perf_event_context *next_ctx; |
1178 | struct perf_event_context *parent; | 1195 | struct perf_event_context *parent; |
@@ -1220,15 +1237,13 @@ void perf_event_task_sched_out(struct task_struct *task, | |||
1220 | rcu_read_unlock(); | 1237 | rcu_read_unlock(); |
1221 | 1238 | ||
1222 | if (do_switch) { | 1239 | if (do_switch) { |
1223 | __perf_event_sched_out(ctx, cpuctx); | 1240 | ctx_sched_out(ctx, cpuctx, EVENT_ALL); |
1224 | cpuctx->task_ctx = NULL; | 1241 | cpuctx->task_ctx = NULL; |
1225 | } | 1242 | } |
1226 | } | 1243 | } |
1227 | 1244 | ||
1228 | /* | 1245 | static void task_ctx_sched_out(struct perf_event_context *ctx, |
1229 | * Called with IRQs disabled | 1246 | enum event_type_t event_type) |
1230 | */ | ||
1231 | static void __perf_event_task_sched_out(struct perf_event_context *ctx) | ||
1232 | { | 1247 | { |
1233 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 1248 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
1234 | 1249 | ||
@@ -1238,47 +1253,41 @@ static void __perf_event_task_sched_out(struct perf_event_context *ctx) | |||
1238 | if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) | 1253 | if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) |
1239 | return; | 1254 | return; |
1240 | 1255 | ||
1241 | __perf_event_sched_out(ctx, cpuctx); | 1256 | ctx_sched_out(ctx, cpuctx, event_type); |
1242 | cpuctx->task_ctx = NULL; | 1257 | cpuctx->task_ctx = NULL; |
1243 | } | 1258 | } |
1244 | 1259 | ||
1245 | /* | 1260 | /* |
1246 | * Called with IRQs disabled | 1261 | * Called with IRQs disabled |
1247 | */ | 1262 | */ |
1248 | static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx) | 1263 | static void __perf_event_task_sched_out(struct perf_event_context *ctx) |
1264 | { | ||
1265 | task_ctx_sched_out(ctx, EVENT_ALL); | ||
1266 | } | ||
1267 | |||
1268 | /* | ||
1269 | * Called with IRQs disabled | ||
1270 | */ | ||
1271 | static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, | ||
1272 | enum event_type_t event_type) | ||
1249 | { | 1273 | { |
1250 | __perf_event_sched_out(&cpuctx->ctx, cpuctx); | 1274 | ctx_sched_out(&cpuctx->ctx, cpuctx, event_type); |
1251 | } | 1275 | } |
1252 | 1276 | ||
1253 | static void | 1277 | static void |
1254 | __perf_event_sched_in(struct perf_event_context *ctx, | 1278 | ctx_pinned_sched_in(struct perf_event_context *ctx, |
1255 | struct perf_cpu_context *cpuctx, int cpu) | 1279 | struct perf_cpu_context *cpuctx) |
1256 | { | 1280 | { |
1257 | struct perf_event *event; | 1281 | struct perf_event *event; |
1258 | int can_add_hw = 1; | ||
1259 | |||
1260 | raw_spin_lock(&ctx->lock); | ||
1261 | ctx->is_active = 1; | ||
1262 | if (likely(!ctx->nr_events)) | ||
1263 | goto out; | ||
1264 | 1282 | ||
1265 | ctx->timestamp = perf_clock(); | 1283 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
1266 | 1284 | if (event->state <= PERF_EVENT_STATE_OFF) | |
1267 | perf_disable(); | ||
1268 | |||
1269 | /* | ||
1270 | * First go through the list and put on any pinned groups | ||
1271 | * in order to give them the best chance of going on. | ||
1272 | */ | ||
1273 | list_for_each_entry(event, &ctx->group_list, group_entry) { | ||
1274 | if (event->state <= PERF_EVENT_STATE_OFF || | ||
1275 | !event->attr.pinned) | ||
1276 | continue; | 1285 | continue; |
1277 | if (event->cpu != -1 && event->cpu != cpu) | 1286 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
1278 | continue; | 1287 | continue; |
1279 | 1288 | ||
1280 | if (group_can_go_on(event, cpuctx, 1)) | 1289 | if (group_can_go_on(event, cpuctx, 1)) |
1281 | group_sched_in(event, cpuctx, ctx, cpu); | 1290 | group_sched_in(event, cpuctx, ctx); |
1282 | 1291 | ||
1283 | /* | 1292 | /* |
1284 | * If this pinned group hasn't been scheduled, | 1293 | * If this pinned group hasn't been scheduled, |
@@ -1289,32 +1298,83 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
1289 | event->state = PERF_EVENT_STATE_ERROR; | 1298 | event->state = PERF_EVENT_STATE_ERROR; |
1290 | } | 1299 | } |
1291 | } | 1300 | } |
1301 | } | ||
1292 | 1302 | ||
1293 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1303 | static void |
1294 | /* | 1304 | ctx_flexible_sched_in(struct perf_event_context *ctx, |
1295 | * Ignore events in OFF or ERROR state, and | 1305 | struct perf_cpu_context *cpuctx) |
1296 | * ignore pinned events since we did them already. | 1306 | { |
1297 | */ | 1307 | struct perf_event *event; |
1298 | if (event->state <= PERF_EVENT_STATE_OFF || | 1308 | int can_add_hw = 1; |
1299 | event->attr.pinned) | ||
1300 | continue; | ||
1301 | 1309 | ||
1310 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) { | ||
1311 | /* Ignore events in OFF or ERROR state */ | ||
1312 | if (event->state <= PERF_EVENT_STATE_OFF) | ||
1313 | continue; | ||
1302 | /* | 1314 | /* |
1303 | * Listen to the 'cpu' scheduling filter constraint | 1315 | * Listen to the 'cpu' scheduling filter constraint |
1304 | * of events: | 1316 | * of events: |
1305 | */ | 1317 | */ |
1306 | if (event->cpu != -1 && event->cpu != cpu) | 1318 | if (event->cpu != -1 && event->cpu != smp_processor_id()) |
1307 | continue; | 1319 | continue; |
1308 | 1320 | ||
1309 | if (group_can_go_on(event, cpuctx, can_add_hw)) | 1321 | if (group_can_go_on(event, cpuctx, can_add_hw)) |
1310 | if (group_sched_in(event, cpuctx, ctx, cpu)) | 1322 | if (group_sched_in(event, cpuctx, ctx)) |
1311 | can_add_hw = 0; | 1323 | can_add_hw = 0; |
1312 | } | 1324 | } |
1325 | } | ||
1326 | |||
1327 | static void | ||
1328 | ctx_sched_in(struct perf_event_context *ctx, | ||
1329 | struct perf_cpu_context *cpuctx, | ||
1330 | enum event_type_t event_type) | ||
1331 | { | ||
1332 | raw_spin_lock(&ctx->lock); | ||
1333 | ctx->is_active = 1; | ||
1334 | if (likely(!ctx->nr_events)) | ||
1335 | goto out; | ||
1336 | |||
1337 | ctx->timestamp = perf_clock(); | ||
1338 | |||
1339 | perf_disable(); | ||
1340 | |||
1341 | /* | ||
1342 | * First go through the list and put on any pinned groups | ||
1343 | * in order to give them the best chance of going on. | ||
1344 | */ | ||
1345 | if (event_type & EVENT_PINNED) | ||
1346 | ctx_pinned_sched_in(ctx, cpuctx); | ||
1347 | |||
1348 | /* Then walk through the lower prio flexible groups */ | ||
1349 | if (event_type & EVENT_FLEXIBLE) | ||
1350 | ctx_flexible_sched_in(ctx, cpuctx); | ||
1351 | |||
1313 | perf_enable(); | 1352 | perf_enable(); |
1314 | out: | 1353 | out: |
1315 | raw_spin_unlock(&ctx->lock); | 1354 | raw_spin_unlock(&ctx->lock); |
1316 | } | 1355 | } |
1317 | 1356 | ||
1357 | static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | ||
1358 | enum event_type_t event_type) | ||
1359 | { | ||
1360 | struct perf_event_context *ctx = &cpuctx->ctx; | ||
1361 | |||
1362 | ctx_sched_in(ctx, cpuctx, event_type); | ||
1363 | } | ||
1364 | |||
1365 | static void task_ctx_sched_in(struct task_struct *task, | ||
1366 | enum event_type_t event_type) | ||
1367 | { | ||
1368 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
1369 | struct perf_event_context *ctx = task->perf_event_ctxp; | ||
1370 | |||
1371 | if (likely(!ctx)) | ||
1372 | return; | ||
1373 | if (cpuctx->task_ctx == ctx) | ||
1374 | return; | ||
1375 | ctx_sched_in(ctx, cpuctx, event_type); | ||
1376 | cpuctx->task_ctx = ctx; | ||
1377 | } | ||
1318 | /* | 1378 | /* |
1319 | * Called from scheduler to add the events of the current task | 1379 | * Called from scheduler to add the events of the current task |
1320 | * with interrupts disabled. | 1380 | * with interrupts disabled. |
@@ -1326,38 +1386,128 @@ __perf_event_sched_in(struct perf_event_context *ctx, | |||
1326 | * accessing the event control register. If a NMI hits, then it will | 1386 | * accessing the event control register. If a NMI hits, then it will |
1327 | * keep the event running. | 1387 | * keep the event running. |
1328 | */ | 1388 | */ |
1329 | void perf_event_task_sched_in(struct task_struct *task, int cpu) | 1389 | void perf_event_task_sched_in(struct task_struct *task) |
1330 | { | 1390 | { |
1331 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | 1391 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
1332 | struct perf_event_context *ctx = task->perf_event_ctxp; | 1392 | struct perf_event_context *ctx = task->perf_event_ctxp; |
1333 | 1393 | ||
1334 | if (likely(!ctx)) | 1394 | if (likely(!ctx)) |
1335 | return; | 1395 | return; |
1396 | |||
1336 | if (cpuctx->task_ctx == ctx) | 1397 | if (cpuctx->task_ctx == ctx) |
1337 | return; | 1398 | return; |
1338 | __perf_event_sched_in(ctx, cpuctx, cpu); | 1399 | |
1400 | /* | ||
1401 | * We want to keep the following priority order: | ||
1402 | * cpu pinned (that don't need to move), task pinned, | ||
1403 | * cpu flexible, task flexible. | ||
1404 | */ | ||
1405 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); | ||
1406 | |||
1407 | ctx_sched_in(ctx, cpuctx, EVENT_PINNED); | ||
1408 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); | ||
1409 | ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); | ||
1410 | |||
1339 | cpuctx->task_ctx = ctx; | 1411 | cpuctx->task_ctx = ctx; |
1340 | } | 1412 | } |
1341 | 1413 | ||
1342 | static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) | 1414 | #define MAX_INTERRUPTS (~0ULL) |
1415 | |||
1416 | static void perf_log_throttle(struct perf_event *event, int enable); | ||
1417 | |||
1418 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | ||
1343 | { | 1419 | { |
1344 | struct perf_event_context *ctx = &cpuctx->ctx; | 1420 | u64 frequency = event->attr.sample_freq; |
1421 | u64 sec = NSEC_PER_SEC; | ||
1422 | u64 divisor, dividend; | ||
1423 | |||
1424 | int count_fls, nsec_fls, frequency_fls, sec_fls; | ||
1425 | |||
1426 | count_fls = fls64(count); | ||
1427 | nsec_fls = fls64(nsec); | ||
1428 | frequency_fls = fls64(frequency); | ||
1429 | sec_fls = 30; | ||
1345 | 1430 | ||
1346 | __perf_event_sched_in(ctx, cpuctx, cpu); | 1431 | /* |
1432 | * We got @count in @nsec, with a target of sample_freq HZ | ||
1433 | * the target period becomes: | ||
1434 | * | ||
1435 | * @count * 10^9 | ||
1436 | * period = ------------------- | ||
1437 | * @nsec * sample_freq | ||
1438 | * | ||
1439 | */ | ||
1440 | |||
1441 | /* | ||
1442 | * Reduce accuracy by one bit such that @a and @b converge | ||
1443 | * to a similar magnitude. | ||
1444 | */ | ||
1445 | #define REDUCE_FLS(a, b) \ | ||
1446 | do { \ | ||
1447 | if (a##_fls > b##_fls) { \ | ||
1448 | a >>= 1; \ | ||
1449 | a##_fls--; \ | ||
1450 | } else { \ | ||
1451 | b >>= 1; \ | ||
1452 | b##_fls--; \ | ||
1453 | } \ | ||
1454 | } while (0) | ||
1455 | |||
1456 | /* | ||
1457 | * Reduce accuracy until either term fits in a u64, then proceed with | ||
1458 | * the other, so that finally we can do a u64/u64 division. | ||
1459 | */ | ||
1460 | while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) { | ||
1461 | REDUCE_FLS(nsec, frequency); | ||
1462 | REDUCE_FLS(sec, count); | ||
1463 | } | ||
1464 | |||
1465 | if (count_fls + sec_fls > 64) { | ||
1466 | divisor = nsec * frequency; | ||
1467 | |||
1468 | while (count_fls + sec_fls > 64) { | ||
1469 | REDUCE_FLS(count, sec); | ||
1470 | divisor >>= 1; | ||
1471 | } | ||
1472 | |||
1473 | dividend = count * sec; | ||
1474 | } else { | ||
1475 | dividend = count * sec; | ||
1476 | |||
1477 | while (nsec_fls + frequency_fls > 64) { | ||
1478 | REDUCE_FLS(nsec, frequency); | ||
1479 | dividend >>= 1; | ||
1480 | } | ||
1481 | |||
1482 | divisor = nsec * frequency; | ||
1483 | } | ||
1484 | |||
1485 | return div64_u64(dividend, divisor); | ||
1347 | } | 1486 | } |
1348 | 1487 | ||
1349 | #define MAX_INTERRUPTS (~0ULL) | 1488 | static void perf_event_stop(struct perf_event *event) |
1489 | { | ||
1490 | if (!event->pmu->stop) | ||
1491 | return event->pmu->disable(event); | ||
1350 | 1492 | ||
1351 | static void perf_log_throttle(struct perf_event *event, int enable); | 1493 | return event->pmu->stop(event); |
1494 | } | ||
1495 | |||
1496 | static int perf_event_start(struct perf_event *event) | ||
1497 | { | ||
1498 | if (!event->pmu->start) | ||
1499 | return event->pmu->enable(event); | ||
1352 | 1500 | ||
1353 | static void perf_adjust_period(struct perf_event *event, u64 events) | 1501 | return event->pmu->start(event); |
1502 | } | ||
1503 | |||
1504 | static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) | ||
1354 | { | 1505 | { |
1355 | struct hw_perf_event *hwc = &event->hw; | 1506 | struct hw_perf_event *hwc = &event->hw; |
1356 | u64 period, sample_period; | 1507 | u64 period, sample_period; |
1357 | s64 delta; | 1508 | s64 delta; |
1358 | 1509 | ||
1359 | events *= hwc->sample_period; | 1510 | period = perf_calculate_period(event, nsec, count); |
1360 | period = div64_u64(events, event->attr.sample_freq); | ||
1361 | 1511 | ||
1362 | delta = (s64)(period - hwc->sample_period); | 1512 | delta = (s64)(period - hwc->sample_period); |
1363 | delta = (delta + 7) / 8; /* low pass filter */ | 1513 | delta = (delta + 7) / 8; /* low pass filter */ |
@@ -1368,13 +1518,22 @@ static void perf_adjust_period(struct perf_event *event, u64 events) | |||
1368 | sample_period = 1; | 1518 | sample_period = 1; |
1369 | 1519 | ||
1370 | hwc->sample_period = sample_period; | 1520 | hwc->sample_period = sample_period; |
1521 | |||
1522 | if (atomic64_read(&hwc->period_left) > 8*sample_period) { | ||
1523 | perf_disable(); | ||
1524 | perf_event_stop(event); | ||
1525 | atomic64_set(&hwc->period_left, 0); | ||
1526 | perf_event_start(event); | ||
1527 | perf_enable(); | ||
1528 | } | ||
1371 | } | 1529 | } |
1372 | 1530 | ||
1373 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | 1531 | static void perf_ctx_adjust_freq(struct perf_event_context *ctx) |
1374 | { | 1532 | { |
1375 | struct perf_event *event; | 1533 | struct perf_event *event; |
1376 | struct hw_perf_event *hwc; | 1534 | struct hw_perf_event *hwc; |
1377 | u64 interrupts, freq; | 1535 | u64 interrupts, now; |
1536 | s64 delta; | ||
1378 | 1537 | ||
1379 | raw_spin_lock(&ctx->lock); | 1538 | raw_spin_lock(&ctx->lock); |
1380 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { | 1539 | list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { |
@@ -1395,44 +1554,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
1395 | if (interrupts == MAX_INTERRUPTS) { | 1554 | if (interrupts == MAX_INTERRUPTS) { |
1396 | perf_log_throttle(event, 1); | 1555 | perf_log_throttle(event, 1); |
1397 | event->pmu->unthrottle(event); | 1556 | event->pmu->unthrottle(event); |
1398 | interrupts = 2*sysctl_perf_event_sample_rate/HZ; | ||
1399 | } | 1557 | } |
1400 | 1558 | ||
1401 | if (!event->attr.freq || !event->attr.sample_freq) | 1559 | if (!event->attr.freq || !event->attr.sample_freq) |
1402 | continue; | 1560 | continue; |
1403 | 1561 | ||
1404 | /* | 1562 | event->pmu->read(event); |
1405 | * if the specified freq < HZ then we need to skip ticks | 1563 | now = atomic64_read(&event->count); |
1406 | */ | 1564 | delta = now - hwc->freq_count_stamp; |
1407 | if (event->attr.sample_freq < HZ) { | 1565 | hwc->freq_count_stamp = now; |
1408 | freq = event->attr.sample_freq; | ||
1409 | |||
1410 | hwc->freq_count += freq; | ||
1411 | hwc->freq_interrupts += interrupts; | ||
1412 | |||
1413 | if (hwc->freq_count < HZ) | ||
1414 | continue; | ||
1415 | |||
1416 | interrupts = hwc->freq_interrupts; | ||
1417 | hwc->freq_interrupts = 0; | ||
1418 | hwc->freq_count -= HZ; | ||
1419 | } else | ||
1420 | freq = HZ; | ||
1421 | |||
1422 | perf_adjust_period(event, freq * interrupts); | ||
1423 | 1566 | ||
1424 | /* | 1567 | if (delta > 0) |
1425 | * In order to avoid being stalled by an (accidental) huge | 1568 | perf_adjust_period(event, TICK_NSEC, delta); |
1426 | * sample period, force reset the sample period if we didn't | ||
1427 | * get any events in this freq period. | ||
1428 | */ | ||
1429 | if (!interrupts) { | ||
1430 | perf_disable(); | ||
1431 | event->pmu->disable(event); | ||
1432 | atomic64_set(&hwc->period_left, 0); | ||
1433 | event->pmu->enable(event); | ||
1434 | perf_enable(); | ||
1435 | } | ||
1436 | } | 1569 | } |
1437 | raw_spin_unlock(&ctx->lock); | 1570 | raw_spin_unlock(&ctx->lock); |
1438 | } | 1571 | } |
@@ -1442,26 +1575,18 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) | |||
1442 | */ | 1575 | */ |
1443 | static void rotate_ctx(struct perf_event_context *ctx) | 1576 | static void rotate_ctx(struct perf_event_context *ctx) |
1444 | { | 1577 | { |
1445 | struct perf_event *event; | ||
1446 | |||
1447 | if (!ctx->nr_events) | 1578 | if (!ctx->nr_events) |
1448 | return; | 1579 | return; |
1449 | 1580 | ||
1450 | raw_spin_lock(&ctx->lock); | 1581 | raw_spin_lock(&ctx->lock); |
1451 | /* | 1582 | |
1452 | * Rotate the first entry last (works just fine for group events too): | 1583 | /* Rotate the first entry last of non-pinned groups */ |
1453 | */ | 1584 | list_rotate_left(&ctx->flexible_groups); |
1454 | perf_disable(); | ||
1455 | list_for_each_entry(event, &ctx->group_list, group_entry) { | ||
1456 | list_move_tail(&event->group_entry, &ctx->group_list); | ||
1457 | break; | ||
1458 | } | ||
1459 | perf_enable(); | ||
1460 | 1585 | ||
1461 | raw_spin_unlock(&ctx->lock); | 1586 | raw_spin_unlock(&ctx->lock); |
1462 | } | 1587 | } |
1463 | 1588 | ||
1464 | void perf_event_task_tick(struct task_struct *curr, int cpu) | 1589 | void perf_event_task_tick(struct task_struct *curr) |
1465 | { | 1590 | { |
1466 | struct perf_cpu_context *cpuctx; | 1591 | struct perf_cpu_context *cpuctx; |
1467 | struct perf_event_context *ctx; | 1592 | struct perf_event_context *ctx; |
@@ -1469,24 +1594,43 @@ void perf_event_task_tick(struct task_struct *curr, int cpu) | |||
1469 | if (!atomic_read(&nr_events)) | 1594 | if (!atomic_read(&nr_events)) |
1470 | return; | 1595 | return; |
1471 | 1596 | ||
1472 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 1597 | cpuctx = &__get_cpu_var(perf_cpu_context); |
1473 | ctx = curr->perf_event_ctxp; | 1598 | ctx = curr->perf_event_ctxp; |
1474 | 1599 | ||
1600 | perf_disable(); | ||
1601 | |||
1475 | perf_ctx_adjust_freq(&cpuctx->ctx); | 1602 | perf_ctx_adjust_freq(&cpuctx->ctx); |
1476 | if (ctx) | 1603 | if (ctx) |
1477 | perf_ctx_adjust_freq(ctx); | 1604 | perf_ctx_adjust_freq(ctx); |
1478 | 1605 | ||
1479 | perf_event_cpu_sched_out(cpuctx); | 1606 | cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); |
1480 | if (ctx) | 1607 | if (ctx) |
1481 | __perf_event_task_sched_out(ctx); | 1608 | task_ctx_sched_out(ctx, EVENT_FLEXIBLE); |
1482 | 1609 | ||
1483 | rotate_ctx(&cpuctx->ctx); | 1610 | rotate_ctx(&cpuctx->ctx); |
1484 | if (ctx) | 1611 | if (ctx) |
1485 | rotate_ctx(ctx); | 1612 | rotate_ctx(ctx); |
1486 | 1613 | ||
1487 | perf_event_cpu_sched_in(cpuctx, cpu); | 1614 | cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); |
1488 | if (ctx) | 1615 | if (ctx) |
1489 | perf_event_task_sched_in(curr, cpu); | 1616 | task_ctx_sched_in(curr, EVENT_FLEXIBLE); |
1617 | |||
1618 | perf_enable(); | ||
1619 | } | ||
1620 | |||
1621 | static int event_enable_on_exec(struct perf_event *event, | ||
1622 | struct perf_event_context *ctx) | ||
1623 | { | ||
1624 | if (!event->attr.enable_on_exec) | ||
1625 | return 0; | ||
1626 | |||
1627 | event->attr.enable_on_exec = 0; | ||
1628 | if (event->state >= PERF_EVENT_STATE_INACTIVE) | ||
1629 | return 0; | ||
1630 | |||
1631 | __perf_event_mark_enabled(event, ctx); | ||
1632 | |||
1633 | return 1; | ||
1490 | } | 1634 | } |
1491 | 1635 | ||
1492 | /* | 1636 | /* |
@@ -1499,6 +1643,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1499 | struct perf_event *event; | 1643 | struct perf_event *event; |
1500 | unsigned long flags; | 1644 | unsigned long flags; |
1501 | int enabled = 0; | 1645 | int enabled = 0; |
1646 | int ret; | ||
1502 | 1647 | ||
1503 | local_irq_save(flags); | 1648 | local_irq_save(flags); |
1504 | ctx = task->perf_event_ctxp; | 1649 | ctx = task->perf_event_ctxp; |
@@ -1509,14 +1654,16 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1509 | 1654 | ||
1510 | raw_spin_lock(&ctx->lock); | 1655 | raw_spin_lock(&ctx->lock); |
1511 | 1656 | ||
1512 | list_for_each_entry(event, &ctx->group_list, group_entry) { | 1657 | list_for_each_entry(event, &ctx->pinned_groups, group_entry) { |
1513 | if (!event->attr.enable_on_exec) | 1658 | ret = event_enable_on_exec(event, ctx); |
1514 | continue; | 1659 | if (ret) |
1515 | event->attr.enable_on_exec = 0; | 1660 | enabled = 1; |
1516 | if (event->state >= PERF_EVENT_STATE_INACTIVE) | 1661 | } |
1517 | continue; | 1662 | |
1518 | __perf_event_mark_enabled(event, ctx); | 1663 | list_for_each_entry(event, &ctx->flexible_groups, group_entry) { |
1519 | enabled = 1; | 1664 | ret = event_enable_on_exec(event, ctx); |
1665 | if (ret) | ||
1666 | enabled = 1; | ||
1520 | } | 1667 | } |
1521 | 1668 | ||
1522 | /* | 1669 | /* |
@@ -1527,7 +1674,7 @@ static void perf_event_enable_on_exec(struct task_struct *task) | |||
1527 | 1674 | ||
1528 | raw_spin_unlock(&ctx->lock); | 1675 | raw_spin_unlock(&ctx->lock); |
1529 | 1676 | ||
1530 | perf_event_task_sched_in(task, smp_processor_id()); | 1677 | perf_event_task_sched_in(task); |
1531 | out: | 1678 | out: |
1532 | local_irq_restore(flags); | 1679 | local_irq_restore(flags); |
1533 | } | 1680 | } |
@@ -1590,7 +1737,8 @@ __perf_event_init_context(struct perf_event_context *ctx, | |||
1590 | { | 1737 | { |
1591 | raw_spin_lock_init(&ctx->lock); | 1738 | raw_spin_lock_init(&ctx->lock); |
1592 | mutex_init(&ctx->mutex); | 1739 | mutex_init(&ctx->mutex); |
1593 | INIT_LIST_HEAD(&ctx->group_list); | 1740 | INIT_LIST_HEAD(&ctx->pinned_groups); |
1741 | INIT_LIST_HEAD(&ctx->flexible_groups); | ||
1594 | INIT_LIST_HEAD(&ctx->event_list); | 1742 | INIT_LIST_HEAD(&ctx->event_list); |
1595 | atomic_set(&ctx->refcount, 1); | 1743 | atomic_set(&ctx->refcount, 1); |
1596 | ctx->task = task; | 1744 | ctx->task = task; |
@@ -3608,7 +3756,7 @@ void __perf_event_mmap(struct vm_area_struct *vma) | |||
3608 | /* .tid */ | 3756 | /* .tid */ |
3609 | .start = vma->vm_start, | 3757 | .start = vma->vm_start, |
3610 | .len = vma->vm_end - vma->vm_start, | 3758 | .len = vma->vm_end - vma->vm_start, |
3611 | .pgoff = vma->vm_pgoff, | 3759 | .pgoff = (u64)vma->vm_pgoff << PAGE_SHIFT, |
3612 | }, | 3760 | }, |
3613 | }; | 3761 | }; |
3614 | 3762 | ||
@@ -3688,12 +3836,12 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, | |||
3688 | 3836 | ||
3689 | if (event->attr.freq) { | 3837 | if (event->attr.freq) { |
3690 | u64 now = perf_clock(); | 3838 | u64 now = perf_clock(); |
3691 | s64 delta = now - hwc->freq_stamp; | 3839 | s64 delta = now - hwc->freq_time_stamp; |
3692 | 3840 | ||
3693 | hwc->freq_stamp = now; | 3841 | hwc->freq_time_stamp = now; |
3694 | 3842 | ||
3695 | if (delta > 0 && delta < TICK_NSEC) | 3843 | if (delta > 0 && delta < 2*TICK_NSEC) |
3696 | perf_adjust_period(event, NSEC_PER_SEC / (int)delta); | 3844 | perf_adjust_period(event, delta, hwc->last_period); |
3697 | } | 3845 | } |
3698 | 3846 | ||
3699 | /* | 3847 | /* |
@@ -4184,7 +4332,7 @@ static const struct pmu perf_ops_task_clock = { | |||
4184 | .read = task_clock_perf_event_read, | 4332 | .read = task_clock_perf_event_read, |
4185 | }; | 4333 | }; |
4186 | 4334 | ||
4187 | #ifdef CONFIG_EVENT_PROFILE | 4335 | #ifdef CONFIG_EVENT_TRACING |
4188 | 4336 | ||
4189 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | 4337 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, |
4190 | int entry_size) | 4338 | int entry_size) |
@@ -4289,7 +4437,7 @@ static void perf_event_free_filter(struct perf_event *event) | |||
4289 | { | 4437 | { |
4290 | } | 4438 | } |
4291 | 4439 | ||
4292 | #endif /* CONFIG_EVENT_PROFILE */ | 4440 | #endif /* CONFIG_EVENT_TRACING */ |
4293 | 4441 | ||
4294 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 4442 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
4295 | static void bp_perf_event_destroy(struct perf_event *event) | 4443 | static void bp_perf_event_destroy(struct perf_event *event) |
@@ -4870,8 +5018,15 @@ inherit_event(struct perf_event *parent_event, | |||
4870 | else | 5018 | else |
4871 | child_event->state = PERF_EVENT_STATE_OFF; | 5019 | child_event->state = PERF_EVENT_STATE_OFF; |
4872 | 5020 | ||
4873 | if (parent_event->attr.freq) | 5021 | if (parent_event->attr.freq) { |
4874 | child_event->hw.sample_period = parent_event->hw.sample_period; | 5022 | u64 sample_period = parent_event->hw.sample_period; |
5023 | struct hw_perf_event *hwc = &child_event->hw; | ||
5024 | |||
5025 | hwc->sample_period = sample_period; | ||
5026 | hwc->last_period = sample_period; | ||
5027 | |||
5028 | atomic64_set(&hwc->period_left, sample_period); | ||
5029 | } | ||
4875 | 5030 | ||
4876 | child_event->overflow_handler = parent_event->overflow_handler; | 5031 | child_event->overflow_handler = parent_event->overflow_handler; |
4877 | 5032 | ||
@@ -5039,7 +5194,11 @@ void perf_event_exit_task(struct task_struct *child) | |||
5039 | mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); | 5194 | mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); |
5040 | 5195 | ||
5041 | again: | 5196 | again: |
5042 | list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list, | 5197 | list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups, |
5198 | group_entry) | ||
5199 | __perf_event_exit_task(child_event, child_ctx, child); | ||
5200 | |||
5201 | list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups, | ||
5043 | group_entry) | 5202 | group_entry) |
5044 | __perf_event_exit_task(child_event, child_ctx, child); | 5203 | __perf_event_exit_task(child_event, child_ctx, child); |
5045 | 5204 | ||
@@ -5048,7 +5207,8 @@ again: | |||
5048 | * its siblings to the list, but we obtained 'tmp' before that which | 5207 | * its siblings to the list, but we obtained 'tmp' before that which |
5049 | * will still point to the list head terminating the iteration. | 5208 | * will still point to the list head terminating the iteration. |
5050 | */ | 5209 | */ |
5051 | if (!list_empty(&child_ctx->group_list)) | 5210 | if (!list_empty(&child_ctx->pinned_groups) || |
5211 | !list_empty(&child_ctx->flexible_groups)) | ||
5052 | goto again; | 5212 | goto again; |
5053 | 5213 | ||
5054 | mutex_unlock(&child_ctx->mutex); | 5214 | mutex_unlock(&child_ctx->mutex); |
@@ -5056,6 +5216,24 @@ again: | |||
5056 | put_ctx(child_ctx); | 5216 | put_ctx(child_ctx); |
5057 | } | 5217 | } |
5058 | 5218 | ||
5219 | static void perf_free_event(struct perf_event *event, | ||
5220 | struct perf_event_context *ctx) | ||
5221 | { | ||
5222 | struct perf_event *parent = event->parent; | ||
5223 | |||
5224 | if (WARN_ON_ONCE(!parent)) | ||
5225 | return; | ||
5226 | |||
5227 | mutex_lock(&parent->child_mutex); | ||
5228 | list_del_init(&event->child_list); | ||
5229 | mutex_unlock(&parent->child_mutex); | ||
5230 | |||
5231 | fput(parent->filp); | ||
5232 | |||
5233 | list_del_event(event, ctx); | ||
5234 | free_event(event); | ||
5235 | } | ||
5236 | |||
5059 | /* | 5237 | /* |
5060 | * free an unexposed, unused context as created by inheritance by | 5238 | * free an unexposed, unused context as created by inheritance by |
5061 | * init_task below, used by fork() in case of fail. | 5239 | * init_task below, used by fork() in case of fail. |
@@ -5070,36 +5248,70 @@ void perf_event_free_task(struct task_struct *task) | |||
5070 | 5248 | ||
5071 | mutex_lock(&ctx->mutex); | 5249 | mutex_lock(&ctx->mutex); |
5072 | again: | 5250 | again: |
5073 | list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) { | 5251 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) |
5074 | struct perf_event *parent = event->parent; | 5252 | perf_free_event(event, ctx); |
5075 | 5253 | ||
5076 | if (WARN_ON_ONCE(!parent)) | 5254 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, |
5077 | continue; | 5255 | group_entry) |
5256 | perf_free_event(event, ctx); | ||
5078 | 5257 | ||
5079 | mutex_lock(&parent->child_mutex); | 5258 | if (!list_empty(&ctx->pinned_groups) || |
5080 | list_del_init(&event->child_list); | 5259 | !list_empty(&ctx->flexible_groups)) |
5081 | mutex_unlock(&parent->child_mutex); | 5260 | goto again; |
5082 | 5261 | ||
5083 | fput(parent->filp); | 5262 | mutex_unlock(&ctx->mutex); |
5084 | 5263 | ||
5085 | list_del_event(event, ctx); | 5264 | put_ctx(ctx); |
5086 | free_event(event); | 5265 | } |
5266 | |||
5267 | static int | ||
5268 | inherit_task_group(struct perf_event *event, struct task_struct *parent, | ||
5269 | struct perf_event_context *parent_ctx, | ||
5270 | struct task_struct *child, | ||
5271 | int *inherited_all) | ||
5272 | { | ||
5273 | int ret; | ||
5274 | struct perf_event_context *child_ctx = child->perf_event_ctxp; | ||
5275 | |||
5276 | if (!event->attr.inherit) { | ||
5277 | *inherited_all = 0; | ||
5278 | return 0; | ||
5087 | } | 5279 | } |
5088 | 5280 | ||
5089 | if (!list_empty(&ctx->group_list)) | 5281 | if (!child_ctx) { |
5090 | goto again; | 5282 | /* |
5283 | * This is executed from the parent task context, so | ||
5284 | * inherit events that have been marked for cloning. | ||
5285 | * First allocate and initialize a context for the | ||
5286 | * child. | ||
5287 | */ | ||
5091 | 5288 | ||
5092 | mutex_unlock(&ctx->mutex); | 5289 | child_ctx = kzalloc(sizeof(struct perf_event_context), |
5290 | GFP_KERNEL); | ||
5291 | if (!child_ctx) | ||
5292 | return -ENOMEM; | ||
5093 | 5293 | ||
5094 | put_ctx(ctx); | 5294 | __perf_event_init_context(child_ctx, child); |
5295 | child->perf_event_ctxp = child_ctx; | ||
5296 | get_task_struct(child); | ||
5297 | } | ||
5298 | |||
5299 | ret = inherit_group(event, parent, parent_ctx, | ||
5300 | child, child_ctx); | ||
5301 | |||
5302 | if (ret) | ||
5303 | *inherited_all = 0; | ||
5304 | |||
5305 | return ret; | ||
5095 | } | 5306 | } |
5096 | 5307 | ||
5308 | |||
5097 | /* | 5309 | /* |
5098 | * Initialize the perf_event context in task_struct | 5310 | * Initialize the perf_event context in task_struct |
5099 | */ | 5311 | */ |
5100 | int perf_event_init_task(struct task_struct *child) | 5312 | int perf_event_init_task(struct task_struct *child) |
5101 | { | 5313 | { |
5102 | struct perf_event_context *child_ctx = NULL, *parent_ctx; | 5314 | struct perf_event_context *child_ctx, *parent_ctx; |
5103 | struct perf_event_context *cloned_ctx; | 5315 | struct perf_event_context *cloned_ctx; |
5104 | struct perf_event *event; | 5316 | struct perf_event *event; |
5105 | struct task_struct *parent = current; | 5317 | struct task_struct *parent = current; |
@@ -5137,41 +5349,22 @@ int perf_event_init_task(struct task_struct *child) | |||
5137 | * We dont have to disable NMIs - we are only looking at | 5349 | * We dont have to disable NMIs - we are only looking at |
5138 | * the list, not manipulating it: | 5350 | * the list, not manipulating it: |
5139 | */ | 5351 | */ |
5140 | list_for_each_entry(event, &parent_ctx->group_list, group_entry) { | 5352 | list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { |
5141 | 5353 | ret = inherit_task_group(event, parent, parent_ctx, child, | |
5142 | if (!event->attr.inherit) { | 5354 | &inherited_all); |
5143 | inherited_all = 0; | 5355 | if (ret) |
5144 | continue; | 5356 | break; |
5145 | } | 5357 | } |
5146 | |||
5147 | if (!child->perf_event_ctxp) { | ||
5148 | /* | ||
5149 | * This is executed from the parent task context, so | ||
5150 | * inherit events that have been marked for cloning. | ||
5151 | * First allocate and initialize a context for the | ||
5152 | * child. | ||
5153 | */ | ||
5154 | |||
5155 | child_ctx = kzalloc(sizeof(struct perf_event_context), | ||
5156 | GFP_KERNEL); | ||
5157 | if (!child_ctx) { | ||
5158 | ret = -ENOMEM; | ||
5159 | break; | ||
5160 | } | ||
5161 | |||
5162 | __perf_event_init_context(child_ctx, child); | ||
5163 | child->perf_event_ctxp = child_ctx; | ||
5164 | get_task_struct(child); | ||
5165 | } | ||
5166 | 5358 | ||
5167 | ret = inherit_group(event, parent, parent_ctx, | 5359 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { |
5168 | child, child_ctx); | 5360 | ret = inherit_task_group(event, parent, parent_ctx, child, |
5169 | if (ret) { | 5361 | &inherited_all); |
5170 | inherited_all = 0; | 5362 | if (ret) |
5171 | break; | 5363 | break; |
5172 | } | ||
5173 | } | 5364 | } |
5174 | 5365 | ||
5366 | child_ctx = child->perf_event_ctxp; | ||
5367 | |||
5175 | if (child_ctx && inherited_all) { | 5368 | if (child_ctx && inherited_all) { |
5176 | /* | 5369 | /* |
5177 | * Mark the child context as a clone of the parent | 5370 | * Mark the child context as a clone of the parent |
@@ -5220,7 +5413,9 @@ static void __perf_event_exit_cpu(void *info) | |||
5220 | struct perf_event_context *ctx = &cpuctx->ctx; | 5413 | struct perf_event_context *ctx = &cpuctx->ctx; |
5221 | struct perf_event *event, *tmp; | 5414 | struct perf_event *event, *tmp; |
5222 | 5415 | ||
5223 | list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) | 5416 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) |
5417 | __perf_event_remove_from_context(event); | ||
5418 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) | ||
5224 | __perf_event_remove_from_context(event); | 5419 | __perf_event_remove_from_context(event); |
5225 | } | 5420 | } |
5226 | static void perf_event_exit_cpu(int cpu) | 5421 | static void perf_event_exit_cpu(int cpu) |
@@ -5258,6 +5453,10 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | |||
5258 | perf_event_exit_cpu(cpu); | 5453 | perf_event_exit_cpu(cpu); |
5259 | break; | 5454 | break; |
5260 | 5455 | ||
5456 | case CPU_DEAD: | ||
5457 | hw_perf_event_setup_offline(cpu); | ||
5458 | break; | ||
5459 | |||
5261 | default: | 5460 | default: |
5262 | break; | 5461 | break; |
5263 | } | 5462 | } |
diff --git a/kernel/sched.c b/kernel/sched.c index 3a8fb30a91b1..3e71ebb101c2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2794,7 +2794,13 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
2794 | */ | 2794 | */ |
2795 | prev_state = prev->state; | 2795 | prev_state = prev->state; |
2796 | finish_arch_switch(prev); | 2796 | finish_arch_switch(prev); |
2797 | perf_event_task_sched_in(current, cpu_of(rq)); | 2797 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
2798 | local_irq_disable(); | ||
2799 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | ||
2800 | perf_event_task_sched_in(current); | ||
2801 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | ||
2802 | local_irq_enable(); | ||
2803 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | ||
2798 | finish_lock_switch(rq, prev); | 2804 | finish_lock_switch(rq, prev); |
2799 | 2805 | ||
2800 | fire_sched_in_preempt_notifiers(current); | 2806 | fire_sched_in_preempt_notifiers(current); |
@@ -5309,7 +5315,7 @@ void scheduler_tick(void) | |||
5309 | curr->sched_class->task_tick(rq, curr, 0); | 5315 | curr->sched_class->task_tick(rq, curr, 0); |
5310 | raw_spin_unlock(&rq->lock); | 5316 | raw_spin_unlock(&rq->lock); |
5311 | 5317 | ||
5312 | perf_event_task_tick(curr, cpu); | 5318 | perf_event_task_tick(curr); |
5313 | 5319 | ||
5314 | #ifdef CONFIG_SMP | 5320 | #ifdef CONFIG_SMP |
5315 | rq->idle_at_tick = idle_cpu(cpu); | 5321 | rq->idle_at_tick = idle_cpu(cpu); |
@@ -5523,7 +5529,7 @@ need_resched_nonpreemptible: | |||
5523 | 5529 | ||
5524 | if (likely(prev != next)) { | 5530 | if (likely(prev != next)) { |
5525 | sched_info_switch(prev, next); | 5531 | sched_info_switch(prev, next); |
5526 | perf_event_task_sched_out(prev, next, cpu); | 5532 | perf_event_task_sched_out(prev, next); |
5527 | 5533 | ||
5528 | rq->nr_switches++; | 5534 | rq->nr_switches++; |
5529 | rq->curr = next; | 5535 | rq->curr = next; |
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index cd9ecd89ec77..d00c6fe23f54 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -51,7 +51,9 @@ endif | |||
51 | obj-$(CONFIG_EVENT_TRACING) += trace_events.o | 51 | obj-$(CONFIG_EVENT_TRACING) += trace_events.o |
52 | obj-$(CONFIG_EVENT_TRACING) += trace_export.o | 52 | obj-$(CONFIG_EVENT_TRACING) += trace_export.o |
53 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o | 53 | obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o |
54 | obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o | 54 | ifeq ($(CONFIG_PERF_EVENTS),y) |
55 | obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o | ||
56 | endif | ||
55 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o | 57 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o |
56 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o | 58 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o |
57 | obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o | 59 | obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1e6640f80454..1904797f4a8a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/hardirq.h> | 22 | #include <linux/hardirq.h> |
23 | #include <linux/kthread.h> | 23 | #include <linux/kthread.h> |
24 | #include <linux/uaccess.h> | 24 | #include <linux/uaccess.h> |
25 | #include <linux/kprobes.h> | ||
26 | #include <linux/ftrace.h> | 25 | #include <linux/ftrace.h> |
27 | #include <linux/sysctl.h> | 26 | #include <linux/sysctl.h> |
28 | #include <linux/ctype.h> | 27 | #include <linux/ctype.h> |
@@ -898,36 +897,6 @@ static struct dyn_ftrace *ftrace_free_records; | |||
898 | } \ | 897 | } \ |
899 | } | 898 | } |
900 | 899 | ||
901 | #ifdef CONFIG_KPROBES | ||
902 | |||
903 | static int frozen_record_count; | ||
904 | |||
905 | static inline void freeze_record(struct dyn_ftrace *rec) | ||
906 | { | ||
907 | if (!(rec->flags & FTRACE_FL_FROZEN)) { | ||
908 | rec->flags |= FTRACE_FL_FROZEN; | ||
909 | frozen_record_count++; | ||
910 | } | ||
911 | } | ||
912 | |||
913 | static inline void unfreeze_record(struct dyn_ftrace *rec) | ||
914 | { | ||
915 | if (rec->flags & FTRACE_FL_FROZEN) { | ||
916 | rec->flags &= ~FTRACE_FL_FROZEN; | ||
917 | frozen_record_count--; | ||
918 | } | ||
919 | } | ||
920 | |||
921 | static inline int record_frozen(struct dyn_ftrace *rec) | ||
922 | { | ||
923 | return rec->flags & FTRACE_FL_FROZEN; | ||
924 | } | ||
925 | #else | ||
926 | # define freeze_record(rec) ({ 0; }) | ||
927 | # define unfreeze_record(rec) ({ 0; }) | ||
928 | # define record_frozen(rec) ({ 0; }) | ||
929 | #endif /* CONFIG_KPROBES */ | ||
930 | |||
931 | static void ftrace_free_rec(struct dyn_ftrace *rec) | 900 | static void ftrace_free_rec(struct dyn_ftrace *rec) |
932 | { | 901 | { |
933 | rec->freelist = ftrace_free_records; | 902 | rec->freelist = ftrace_free_records; |
@@ -1025,6 +994,21 @@ static void ftrace_bug(int failed, unsigned long ip) | |||
1025 | } | 994 | } |
1026 | 995 | ||
1027 | 996 | ||
997 | /* Return 1 if the address range is reserved for ftrace */ | ||
998 | int ftrace_text_reserved(void *start, void *end) | ||
999 | { | ||
1000 | struct dyn_ftrace *rec; | ||
1001 | struct ftrace_page *pg; | ||
1002 | |||
1003 | do_for_each_ftrace_rec(pg, rec) { | ||
1004 | if (rec->ip <= (unsigned long)end && | ||
1005 | rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start) | ||
1006 | return 1; | ||
1007 | } while_for_each_ftrace_rec(); | ||
1008 | return 0; | ||
1009 | } | ||
1010 | |||
1011 | |||
1028 | static int | 1012 | static int |
1029 | __ftrace_replace_code(struct dyn_ftrace *rec, int enable) | 1013 | __ftrace_replace_code(struct dyn_ftrace *rec, int enable) |
1030 | { | 1014 | { |
@@ -1076,14 +1060,6 @@ static void ftrace_replace_code(int enable) | |||
1076 | !(rec->flags & FTRACE_FL_CONVERTED)) | 1060 | !(rec->flags & FTRACE_FL_CONVERTED)) |
1077 | continue; | 1061 | continue; |
1078 | 1062 | ||
1079 | /* ignore updates to this record's mcount site */ | ||
1080 | if (get_kprobe((void *)rec->ip)) { | ||
1081 | freeze_record(rec); | ||
1082 | continue; | ||
1083 | } else { | ||
1084 | unfreeze_record(rec); | ||
1085 | } | ||
1086 | |||
1087 | failed = __ftrace_replace_code(rec, enable); | 1063 | failed = __ftrace_replace_code(rec, enable); |
1088 | if (failed) { | 1064 | if (failed) { |
1089 | rec->flags |= FTRACE_FL_FAILED; | 1065 | rec->flags |= FTRACE_FL_FAILED; |
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 9e25573242cf..f0d693005075 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c | |||
@@ -6,14 +6,12 @@ | |||
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/kprobes.h> | ||
9 | #include "trace.h" | 10 | #include "trace.h" |
10 | 11 | ||
11 | 12 | ||
12 | char *perf_trace_buf; | 13 | static char *perf_trace_buf; |
13 | EXPORT_SYMBOL_GPL(perf_trace_buf); | 14 | static char *perf_trace_buf_nmi; |
14 | |||
15 | char *perf_trace_buf_nmi; | ||
16 | EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); | ||
17 | 15 | ||
18 | typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; | 16 | typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; |
19 | 17 | ||
@@ -120,3 +118,47 @@ void ftrace_profile_disable(int event_id) | |||
120 | } | 118 | } |
121 | mutex_unlock(&event_mutex); | 119 | mutex_unlock(&event_mutex); |
122 | } | 120 | } |
121 | |||
122 | __kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type, | ||
123 | int *rctxp, unsigned long *irq_flags) | ||
124 | { | ||
125 | struct trace_entry *entry; | ||
126 | char *trace_buf, *raw_data; | ||
127 | int pc, cpu; | ||
128 | |||
129 | pc = preempt_count(); | ||
130 | |||
131 | /* Protect the per cpu buffer, begin the rcu read side */ | ||
132 | local_irq_save(*irq_flags); | ||
133 | |||
134 | *rctxp = perf_swevent_get_recursion_context(); | ||
135 | if (*rctxp < 0) | ||
136 | goto err_recursion; | ||
137 | |||
138 | cpu = smp_processor_id(); | ||
139 | |||
140 | if (in_nmi()) | ||
141 | trace_buf = rcu_dereference(perf_trace_buf_nmi); | ||
142 | else | ||
143 | trace_buf = rcu_dereference(perf_trace_buf); | ||
144 | |||
145 | if (!trace_buf) | ||
146 | goto err; | ||
147 | |||
148 | raw_data = per_cpu_ptr(trace_buf, cpu); | ||
149 | |||
150 | /* zero the dead bytes from align to not leak stack to user */ | ||
151 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
152 | |||
153 | entry = (struct trace_entry *)raw_data; | ||
154 | tracing_generic_entry_update(entry, *irq_flags, pc); | ||
155 | entry->type = type; | ||
156 | |||
157 | return raw_data; | ||
158 | err: | ||
159 | perf_swevent_put_recursion_context(*rctxp); | ||
160 | err_recursion: | ||
161 | local_irq_restore(*irq_flags); | ||
162 | return NULL; | ||
163 | } | ||
164 | EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare); | ||
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e42af9aad69f..4615f62a04f1 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
@@ -1371,7 +1371,7 @@ out_unlock: | |||
1371 | return err; | 1371 | return err; |
1372 | } | 1372 | } |
1373 | 1373 | ||
1374 | #ifdef CONFIG_EVENT_PROFILE | 1374 | #ifdef CONFIG_PERF_EVENTS |
1375 | 1375 | ||
1376 | void ftrace_profile_free_filter(struct perf_event *event) | 1376 | void ftrace_profile_free_filter(struct perf_event *event) |
1377 | { | 1377 | { |
@@ -1439,5 +1439,5 @@ out_unlock: | |||
1439 | return err; | 1439 | return err; |
1440 | } | 1440 | } |
1441 | 1441 | ||
1442 | #endif /* CONFIG_EVENT_PROFILE */ | 1442 | #endif /* CONFIG_PERF_EVENTS */ |
1443 | 1443 | ||
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 50b1b8239806..356c10227c98 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -91,11 +91,6 @@ static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr) | |||
91 | return retval; | 91 | return retval; |
92 | } | 92 | } |
93 | 93 | ||
94 | static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num) | ||
95 | { | ||
96 | return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num)); | ||
97 | } | ||
98 | |||
99 | static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, | 94 | static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, |
100 | void *dummy) | 95 | void *dummy) |
101 | { | 96 | { |
@@ -231,9 +226,7 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff) | |||
231 | { | 226 | { |
232 | int ret = -EINVAL; | 227 | int ret = -EINVAL; |
233 | 228 | ||
234 | if (ff->func == fetch_argument) | 229 | if (ff->func == fetch_register) { |
235 | ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data); | ||
236 | else if (ff->func == fetch_register) { | ||
237 | const char *name; | 230 | const char *name; |
238 | name = regs_query_register_name((unsigned int)((long)ff->data)); | 231 | name = regs_query_register_name((unsigned int)((long)ff->data)); |
239 | ret = snprintf(buf, n, "%%%s", name); | 232 | ret = snprintf(buf, n, "%%%s", name); |
@@ -489,14 +482,6 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) | |||
489 | } | 482 | } |
490 | } else | 483 | } else |
491 | ret = -EINVAL; | 484 | ret = -EINVAL; |
492 | } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) { | ||
493 | ret = strict_strtoul(arg + 3, 10, ¶m); | ||
494 | if (ret || param > PARAM_MAX_ARGS) | ||
495 | ret = -EINVAL; | ||
496 | else { | ||
497 | ff->func = fetch_argument; | ||
498 | ff->data = (void *)param; | ||
499 | } | ||
500 | } else | 485 | } else |
501 | ret = -EINVAL; | 486 | ret = -EINVAL; |
502 | return ret; | 487 | return ret; |
@@ -611,7 +596,6 @@ static int create_trace_probe(int argc, char **argv) | |||
611 | * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] | 596 | * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] |
612 | * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] | 597 | * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] |
613 | * Fetch args: | 598 | * Fetch args: |
614 | * $argN : fetch Nth of function argument. (N:0-) | ||
615 | * $retval : fetch return value | 599 | * $retval : fetch return value |
616 | * $stack : fetch stack address | 600 | * $stack : fetch stack address |
617 | * $stackN : fetch Nth of stack (N:0-) | 601 | * $stackN : fetch Nth of stack (N:0-) |
@@ -958,7 +942,7 @@ static const struct file_operations kprobe_profile_ops = { | |||
958 | }; | 942 | }; |
959 | 943 | ||
960 | /* Kprobe handler */ | 944 | /* Kprobe handler */ |
961 | static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) | 945 | static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) |
962 | { | 946 | { |
963 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); | 947 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); |
964 | struct kprobe_trace_entry *entry; | 948 | struct kprobe_trace_entry *entry; |
@@ -978,7 +962,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) | |||
978 | event = trace_current_buffer_lock_reserve(&buffer, call->id, size, | 962 | event = trace_current_buffer_lock_reserve(&buffer, call->id, size, |
979 | irq_flags, pc); | 963 | irq_flags, pc); |
980 | if (!event) | 964 | if (!event) |
981 | return 0; | 965 | return; |
982 | 966 | ||
983 | entry = ring_buffer_event_data(event); | 967 | entry = ring_buffer_event_data(event); |
984 | entry->nargs = tp->nr_args; | 968 | entry->nargs = tp->nr_args; |
@@ -988,11 +972,10 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) | |||
988 | 972 | ||
989 | if (!filter_current_check_discard(buffer, call, entry, event)) | 973 | if (!filter_current_check_discard(buffer, call, entry, event)) |
990 | trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); | 974 | trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); |
991 | return 0; | ||
992 | } | 975 | } |
993 | 976 | ||
994 | /* Kretprobe handler */ | 977 | /* Kretprobe handler */ |
995 | static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, | 978 | static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, |
996 | struct pt_regs *regs) | 979 | struct pt_regs *regs) |
997 | { | 980 | { |
998 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); | 981 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); |
@@ -1011,7 +994,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, | |||
1011 | event = trace_current_buffer_lock_reserve(&buffer, call->id, size, | 994 | event = trace_current_buffer_lock_reserve(&buffer, call->id, size, |
1012 | irq_flags, pc); | 995 | irq_flags, pc); |
1013 | if (!event) | 996 | if (!event) |
1014 | return 0; | 997 | return; |
1015 | 998 | ||
1016 | entry = ring_buffer_event_data(event); | 999 | entry = ring_buffer_event_data(event); |
1017 | entry->nargs = tp->nr_args; | 1000 | entry->nargs = tp->nr_args; |
@@ -1022,8 +1005,6 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, | |||
1022 | 1005 | ||
1023 | if (!filter_current_check_discard(buffer, call, entry, event)) | 1006 | if (!filter_current_check_discard(buffer, call, entry, event)) |
1024 | trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); | 1007 | trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); |
1025 | |||
1026 | return 0; | ||
1027 | } | 1008 | } |
1028 | 1009 | ||
1029 | /* Event entry printers */ | 1010 | /* Event entry printers */ |
@@ -1250,137 +1231,67 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call, | |||
1250 | ", REC->" FIELD_STRING_RETIP); | 1231 | ", REC->" FIELD_STRING_RETIP); |
1251 | } | 1232 | } |
1252 | 1233 | ||
1253 | #ifdef CONFIG_EVENT_PROFILE | 1234 | #ifdef CONFIG_PERF_EVENTS |
1254 | 1235 | ||
1255 | /* Kprobe profile handler */ | 1236 | /* Kprobe profile handler */ |
1256 | static __kprobes int kprobe_profile_func(struct kprobe *kp, | 1237 | static __kprobes void kprobe_profile_func(struct kprobe *kp, |
1257 | struct pt_regs *regs) | 1238 | struct pt_regs *regs) |
1258 | { | 1239 | { |
1259 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); | 1240 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); |
1260 | struct ftrace_event_call *call = &tp->call; | 1241 | struct ftrace_event_call *call = &tp->call; |
1261 | struct kprobe_trace_entry *entry; | 1242 | struct kprobe_trace_entry *entry; |
1262 | struct trace_entry *ent; | 1243 | int size, __size, i; |
1263 | int size, __size, i, pc, __cpu; | ||
1264 | unsigned long irq_flags; | 1244 | unsigned long irq_flags; |
1265 | char *trace_buf; | ||
1266 | char *raw_data; | ||
1267 | int rctx; | 1245 | int rctx; |
1268 | 1246 | ||
1269 | pc = preempt_count(); | ||
1270 | __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); | 1247 | __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); |
1271 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | 1248 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); |
1272 | size -= sizeof(u32); | 1249 | size -= sizeof(u32); |
1273 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, | 1250 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, |
1274 | "profile buffer not large enough")) | 1251 | "profile buffer not large enough")) |
1275 | return 0; | 1252 | return; |
1276 | |||
1277 | /* | ||
1278 | * Protect the non nmi buffer | ||
1279 | * This also protects the rcu read side | ||
1280 | */ | ||
1281 | local_irq_save(irq_flags); | ||
1282 | 1253 | ||
1283 | rctx = perf_swevent_get_recursion_context(); | 1254 | entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); |
1284 | if (rctx < 0) | 1255 | if (!entry) |
1285 | goto end_recursion; | 1256 | return; |
1286 | |||
1287 | __cpu = smp_processor_id(); | ||
1288 | |||
1289 | if (in_nmi()) | ||
1290 | trace_buf = rcu_dereference(perf_trace_buf_nmi); | ||
1291 | else | ||
1292 | trace_buf = rcu_dereference(perf_trace_buf); | ||
1293 | |||
1294 | if (!trace_buf) | ||
1295 | goto end; | ||
1296 | |||
1297 | raw_data = per_cpu_ptr(trace_buf, __cpu); | ||
1298 | |||
1299 | /* Zero dead bytes from alignment to avoid buffer leak to userspace */ | ||
1300 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
1301 | entry = (struct kprobe_trace_entry *)raw_data; | ||
1302 | ent = &entry->ent; | ||
1303 | 1257 | ||
1304 | tracing_generic_entry_update(ent, irq_flags, pc); | ||
1305 | ent->type = call->id; | ||
1306 | entry->nargs = tp->nr_args; | 1258 | entry->nargs = tp->nr_args; |
1307 | entry->ip = (unsigned long)kp->addr; | 1259 | entry->ip = (unsigned long)kp->addr; |
1308 | for (i = 0; i < tp->nr_args; i++) | 1260 | for (i = 0; i < tp->nr_args; i++) |
1309 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); | 1261 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); |
1310 | perf_tp_event(call->id, entry->ip, 1, entry, size); | ||
1311 | 1262 | ||
1312 | end: | 1263 | ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags); |
1313 | perf_swevent_put_recursion_context(rctx); | ||
1314 | end_recursion: | ||
1315 | local_irq_restore(irq_flags); | ||
1316 | |||
1317 | return 0; | ||
1318 | } | 1264 | } |
1319 | 1265 | ||
1320 | /* Kretprobe profile handler */ | 1266 | /* Kretprobe profile handler */ |
1321 | static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, | 1267 | static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri, |
1322 | struct pt_regs *regs) | 1268 | struct pt_regs *regs) |
1323 | { | 1269 | { |
1324 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); | 1270 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); |
1325 | struct ftrace_event_call *call = &tp->call; | 1271 | struct ftrace_event_call *call = &tp->call; |
1326 | struct kretprobe_trace_entry *entry; | 1272 | struct kretprobe_trace_entry *entry; |
1327 | struct trace_entry *ent; | 1273 | int size, __size, i; |
1328 | int size, __size, i, pc, __cpu; | ||
1329 | unsigned long irq_flags; | 1274 | unsigned long irq_flags; |
1330 | char *trace_buf; | ||
1331 | char *raw_data; | ||
1332 | int rctx; | 1275 | int rctx; |
1333 | 1276 | ||
1334 | pc = preempt_count(); | ||
1335 | __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); | 1277 | __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); |
1336 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); | 1278 | size = ALIGN(__size + sizeof(u32), sizeof(u64)); |
1337 | size -= sizeof(u32); | 1279 | size -= sizeof(u32); |
1338 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, | 1280 | if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, |
1339 | "profile buffer not large enough")) | 1281 | "profile buffer not large enough")) |
1340 | return 0; | 1282 | return; |
1341 | |||
1342 | /* | ||
1343 | * Protect the non nmi buffer | ||
1344 | * This also protects the rcu read side | ||
1345 | */ | ||
1346 | local_irq_save(irq_flags); | ||
1347 | |||
1348 | rctx = perf_swevent_get_recursion_context(); | ||
1349 | if (rctx < 0) | ||
1350 | goto end_recursion; | ||
1351 | |||
1352 | __cpu = smp_processor_id(); | ||
1353 | |||
1354 | if (in_nmi()) | ||
1355 | trace_buf = rcu_dereference(perf_trace_buf_nmi); | ||
1356 | else | ||
1357 | trace_buf = rcu_dereference(perf_trace_buf); | ||
1358 | |||
1359 | if (!trace_buf) | ||
1360 | goto end; | ||
1361 | |||
1362 | raw_data = per_cpu_ptr(trace_buf, __cpu); | ||
1363 | 1283 | ||
1364 | /* Zero dead bytes from alignment to avoid buffer leak to userspace */ | 1284 | entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); |
1365 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | 1285 | if (!entry) |
1366 | entry = (struct kretprobe_trace_entry *)raw_data; | 1286 | return; |
1367 | ent = &entry->ent; | ||
1368 | 1287 | ||
1369 | tracing_generic_entry_update(ent, irq_flags, pc); | ||
1370 | ent->type = call->id; | ||
1371 | entry->nargs = tp->nr_args; | 1288 | entry->nargs = tp->nr_args; |
1372 | entry->func = (unsigned long)tp->rp.kp.addr; | 1289 | entry->func = (unsigned long)tp->rp.kp.addr; |
1373 | entry->ret_ip = (unsigned long)ri->ret_addr; | 1290 | entry->ret_ip = (unsigned long)ri->ret_addr; |
1374 | for (i = 0; i < tp->nr_args; i++) | 1291 | for (i = 0; i < tp->nr_args; i++) |
1375 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); | 1292 | entry->args[i] = call_fetch(&tp->args[i].fetch, regs); |
1376 | perf_tp_event(call->id, entry->ret_ip, 1, entry, size); | ||
1377 | |||
1378 | end: | ||
1379 | perf_swevent_put_recursion_context(rctx); | ||
1380 | end_recursion: | ||
1381 | local_irq_restore(irq_flags); | ||
1382 | 1293 | ||
1383 | return 0; | 1294 | ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags); |
1384 | } | 1295 | } |
1385 | 1296 | ||
1386 | static int probe_profile_enable(struct ftrace_event_call *call) | 1297 | static int probe_profile_enable(struct ftrace_event_call *call) |
@@ -1408,7 +1319,7 @@ static void probe_profile_disable(struct ftrace_event_call *call) | |||
1408 | disable_kprobe(&tp->rp.kp); | 1319 | disable_kprobe(&tp->rp.kp); |
1409 | } | 1320 | } |
1410 | } | 1321 | } |
1411 | #endif /* CONFIG_EVENT_PROFILE */ | 1322 | #endif /* CONFIG_PERF_EVENTS */ |
1412 | 1323 | ||
1413 | 1324 | ||
1414 | static __kprobes | 1325 | static __kprobes |
@@ -1418,10 +1329,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) | |||
1418 | 1329 | ||
1419 | if (tp->flags & TP_FLAG_TRACE) | 1330 | if (tp->flags & TP_FLAG_TRACE) |
1420 | kprobe_trace_func(kp, regs); | 1331 | kprobe_trace_func(kp, regs); |
1421 | #ifdef CONFIG_EVENT_PROFILE | 1332 | #ifdef CONFIG_PERF_EVENTS |
1422 | if (tp->flags & TP_FLAG_PROFILE) | 1333 | if (tp->flags & TP_FLAG_PROFILE) |
1423 | kprobe_profile_func(kp, regs); | 1334 | kprobe_profile_func(kp, regs); |
1424 | #endif /* CONFIG_EVENT_PROFILE */ | 1335 | #endif |
1425 | return 0; /* We don't tweek kernel, so just return 0 */ | 1336 | return 0; /* We don't tweek kernel, so just return 0 */ |
1426 | } | 1337 | } |
1427 | 1338 | ||
@@ -1432,10 +1343,10 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) | |||
1432 | 1343 | ||
1433 | if (tp->flags & TP_FLAG_TRACE) | 1344 | if (tp->flags & TP_FLAG_TRACE) |
1434 | kretprobe_trace_func(ri, regs); | 1345 | kretprobe_trace_func(ri, regs); |
1435 | #ifdef CONFIG_EVENT_PROFILE | 1346 | #ifdef CONFIG_PERF_EVENTS |
1436 | if (tp->flags & TP_FLAG_PROFILE) | 1347 | if (tp->flags & TP_FLAG_PROFILE) |
1437 | kretprobe_profile_func(ri, regs); | 1348 | kretprobe_profile_func(ri, regs); |
1438 | #endif /* CONFIG_EVENT_PROFILE */ | 1349 | #endif |
1439 | return 0; /* We don't tweek kernel, so just return 0 */ | 1350 | return 0; /* We don't tweek kernel, so just return 0 */ |
1440 | } | 1351 | } |
1441 | 1352 | ||
@@ -1464,7 +1375,7 @@ static int register_probe_event(struct trace_probe *tp) | |||
1464 | call->regfunc = probe_event_enable; | 1375 | call->regfunc = probe_event_enable; |
1465 | call->unregfunc = probe_event_disable; | 1376 | call->unregfunc = probe_event_disable; |
1466 | 1377 | ||
1467 | #ifdef CONFIG_EVENT_PROFILE | 1378 | #ifdef CONFIG_PERF_EVENTS |
1468 | call->profile_enable = probe_profile_enable; | 1379 | call->profile_enable = probe_profile_enable; |
1469 | call->profile_disable = probe_profile_disable; | 1380 | call->profile_disable = probe_profile_disable; |
1470 | #endif | 1381 | #endif |
@@ -1523,28 +1434,67 @@ static int kprobe_trace_selftest_target(int a1, int a2, int a3, | |||
1523 | 1434 | ||
1524 | static __init int kprobe_trace_self_tests_init(void) | 1435 | static __init int kprobe_trace_self_tests_init(void) |
1525 | { | 1436 | { |
1526 | int ret; | 1437 | int ret, warn = 0; |
1527 | int (*target)(int, int, int, int, int, int); | 1438 | int (*target)(int, int, int, int, int, int); |
1439 | struct trace_probe *tp; | ||
1528 | 1440 | ||
1529 | target = kprobe_trace_selftest_target; | 1441 | target = kprobe_trace_selftest_target; |
1530 | 1442 | ||
1531 | pr_info("Testing kprobe tracing: "); | 1443 | pr_info("Testing kprobe tracing: "); |
1532 | 1444 | ||
1533 | ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " | 1445 | ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " |
1534 | "$arg1 $arg2 $arg3 $arg4 $stack $stack0"); | 1446 | "$stack $stack0 +0($stack)"); |
1535 | if (WARN_ON_ONCE(ret)) | 1447 | if (WARN_ON_ONCE(ret)) { |
1536 | pr_warning("error enabling function entry\n"); | 1448 | pr_warning("error on probing function entry.\n"); |
1449 | warn++; | ||
1450 | } else { | ||
1451 | /* Enable trace point */ | ||
1452 | tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM); | ||
1453 | if (WARN_ON_ONCE(tp == NULL)) { | ||
1454 | pr_warning("error on getting new probe.\n"); | ||
1455 | warn++; | ||
1456 | } else | ||
1457 | probe_event_enable(&tp->call); | ||
1458 | } | ||
1537 | 1459 | ||
1538 | ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " | 1460 | ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " |
1539 | "$retval"); | 1461 | "$retval"); |
1540 | if (WARN_ON_ONCE(ret)) | 1462 | if (WARN_ON_ONCE(ret)) { |
1541 | pr_warning("error enabling function return\n"); | 1463 | pr_warning("error on probing function return.\n"); |
1464 | warn++; | ||
1465 | } else { | ||
1466 | /* Enable trace point */ | ||
1467 | tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM); | ||
1468 | if (WARN_ON_ONCE(tp == NULL)) { | ||
1469 | pr_warning("error on getting new probe.\n"); | ||
1470 | warn++; | ||
1471 | } else | ||
1472 | probe_event_enable(&tp->call); | ||
1473 | } | ||
1474 | |||
1475 | if (warn) | ||
1476 | goto end; | ||
1542 | 1477 | ||
1543 | ret = target(1, 2, 3, 4, 5, 6); | 1478 | ret = target(1, 2, 3, 4, 5, 6); |
1544 | 1479 | ||
1545 | cleanup_all_probes(); | 1480 | ret = command_trace_probe("-:testprobe"); |
1481 | if (WARN_ON_ONCE(ret)) { | ||
1482 | pr_warning("error on deleting a probe.\n"); | ||
1483 | warn++; | ||
1484 | } | ||
1485 | |||
1486 | ret = command_trace_probe("-:testprobe2"); | ||
1487 | if (WARN_ON_ONCE(ret)) { | ||
1488 | pr_warning("error on deleting a probe.\n"); | ||
1489 | warn++; | ||
1490 | } | ||
1546 | 1491 | ||
1547 | pr_cont("OK\n"); | 1492 | end: |
1493 | cleanup_all_probes(); | ||
1494 | if (warn) | ||
1495 | pr_cont("NG: Some tests are failed. Please check them.\n"); | ||
1496 | else | ||
1497 | pr_cont("OK\n"); | ||
1548 | return 0; | 1498 | return 0; |
1549 | } | 1499 | } |
1550 | 1500 | ||
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 75289f372dd2..4e332b9e449c 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -421,7 +421,7 @@ int __init init_ftrace_syscalls(void) | |||
421 | } | 421 | } |
422 | core_initcall(init_ftrace_syscalls); | 422 | core_initcall(init_ftrace_syscalls); |
423 | 423 | ||
424 | #ifdef CONFIG_EVENT_PROFILE | 424 | #ifdef CONFIG_PERF_EVENTS |
425 | 425 | ||
426 | static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); | 426 | static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); |
427 | static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); | 427 | static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); |
@@ -433,12 +433,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) | |||
433 | struct syscall_metadata *sys_data; | 433 | struct syscall_metadata *sys_data; |
434 | struct syscall_trace_enter *rec; | 434 | struct syscall_trace_enter *rec; |
435 | unsigned long flags; | 435 | unsigned long flags; |
436 | char *trace_buf; | ||
437 | char *raw_data; | ||
438 | int syscall_nr; | 436 | int syscall_nr; |
439 | int rctx; | 437 | int rctx; |
440 | int size; | 438 | int size; |
441 | int cpu; | ||
442 | 439 | ||
443 | syscall_nr = syscall_get_nr(current, regs); | 440 | syscall_nr = syscall_get_nr(current, regs); |
444 | if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) | 441 | if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) |
@@ -457,37 +454,15 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) | |||
457 | "profile buffer not large enough")) | 454 | "profile buffer not large enough")) |
458 | return; | 455 | return; |
459 | 456 | ||
460 | /* Protect the per cpu buffer, begin the rcu read side */ | 457 | rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size, |
461 | local_irq_save(flags); | 458 | sys_data->enter_event->id, &rctx, &flags); |
462 | 459 | if (!rec) | |
463 | rctx = perf_swevent_get_recursion_context(); | 460 | return; |
464 | if (rctx < 0) | ||
465 | goto end_recursion; | ||
466 | |||
467 | cpu = smp_processor_id(); | ||
468 | |||
469 | trace_buf = rcu_dereference(perf_trace_buf); | ||
470 | |||
471 | if (!trace_buf) | ||
472 | goto end; | ||
473 | |||
474 | raw_data = per_cpu_ptr(trace_buf, cpu); | ||
475 | |||
476 | /* zero the dead bytes from align to not leak stack to user */ | ||
477 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
478 | 461 | ||
479 | rec = (struct syscall_trace_enter *) raw_data; | ||
480 | tracing_generic_entry_update(&rec->ent, 0, 0); | ||
481 | rec->ent.type = sys_data->enter_event->id; | ||
482 | rec->nr = syscall_nr; | 462 | rec->nr = syscall_nr; |
483 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | 463 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, |
484 | (unsigned long *)&rec->args); | 464 | (unsigned long *)&rec->args); |
485 | perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); | 465 | ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); |
486 | |||
487 | end: | ||
488 | perf_swevent_put_recursion_context(rctx); | ||
489 | end_recursion: | ||
490 | local_irq_restore(flags); | ||
491 | } | 466 | } |
492 | 467 | ||
493 | int prof_sysenter_enable(struct ftrace_event_call *call) | 468 | int prof_sysenter_enable(struct ftrace_event_call *call) |
@@ -531,11 +506,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) | |||
531 | struct syscall_trace_exit *rec; | 506 | struct syscall_trace_exit *rec; |
532 | unsigned long flags; | 507 | unsigned long flags; |
533 | int syscall_nr; | 508 | int syscall_nr; |
534 | char *trace_buf; | ||
535 | char *raw_data; | ||
536 | int rctx; | 509 | int rctx; |
537 | int size; | 510 | int size; |
538 | int cpu; | ||
539 | 511 | ||
540 | syscall_nr = syscall_get_nr(current, regs); | 512 | syscall_nr = syscall_get_nr(current, regs); |
541 | if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) | 513 | if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) |
@@ -557,38 +529,15 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) | |||
557 | "exit event has grown above profile buffer size")) | 529 | "exit event has grown above profile buffer size")) |
558 | return; | 530 | return; |
559 | 531 | ||
560 | /* Protect the per cpu buffer, begin the rcu read side */ | 532 | rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size, |
561 | local_irq_save(flags); | 533 | sys_data->exit_event->id, &rctx, &flags); |
562 | 534 | if (!rec) | |
563 | rctx = perf_swevent_get_recursion_context(); | 535 | return; |
564 | if (rctx < 0) | ||
565 | goto end_recursion; | ||
566 | |||
567 | cpu = smp_processor_id(); | ||
568 | |||
569 | trace_buf = rcu_dereference(perf_trace_buf); | ||
570 | |||
571 | if (!trace_buf) | ||
572 | goto end; | ||
573 | |||
574 | raw_data = per_cpu_ptr(trace_buf, cpu); | ||
575 | |||
576 | /* zero the dead bytes from align to not leak stack to user */ | ||
577 | *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; | ||
578 | |||
579 | rec = (struct syscall_trace_exit *)raw_data; | ||
580 | 536 | ||
581 | tracing_generic_entry_update(&rec->ent, 0, 0); | ||
582 | rec->ent.type = sys_data->exit_event->id; | ||
583 | rec->nr = syscall_nr; | 537 | rec->nr = syscall_nr; |
584 | rec->ret = syscall_get_return_value(current, regs); | 538 | rec->ret = syscall_get_return_value(current, regs); |
585 | 539 | ||
586 | perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); | 540 | ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); |
587 | |||
588 | end: | ||
589 | perf_swevent_put_recursion_context(rctx); | ||
590 | end_recursion: | ||
591 | local_irq_restore(flags); | ||
592 | } | 541 | } |
593 | 542 | ||
594 | int prof_sysexit_enable(struct ftrace_event_call *call) | 543 | int prof_sysexit_enable(struct ftrace_event_call *call) |
@@ -626,6 +575,5 @@ void prof_sysexit_disable(struct ftrace_event_call *call) | |||
626 | mutex_unlock(&syscall_trace_lock); | 575 | mutex_unlock(&syscall_trace_lock); |
627 | } | 576 | } |
628 | 577 | ||
629 | #endif | 578 | #endif /* CONFIG_PERF_EVENTS */ |
630 | |||
631 | 579 | ||