diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/syscall.c | 7 | ||||
-rw-r--r-- | kernel/events/core.c | 752 | ||||
-rw-r--r-- | kernel/events/hw_breakpoint.c | 8 | ||||
-rw-r--r-- | kernel/events/internal.h | 33 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 327 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 8 | ||||
-rw-r--r-- | kernel/trace/Makefile | 1 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 222 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 10 | ||||
-rw-r--r-- | kernel/trace/trace_uprobe.c | 10 | ||||
-rw-r--r-- | kernel/watchdog.c | 28 |
11 files changed, 1226 insertions, 180 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 536edc2be307..504c10b990ef 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/file.h> | 16 | #include <linux/file.h> |
17 | #include <linux/license.h> | 17 | #include <linux/license.h> |
18 | #include <linux/filter.h> | 18 | #include <linux/filter.h> |
19 | #include <linux/version.h> | ||
19 | 20 | ||
20 | static LIST_HEAD(bpf_map_types); | 21 | static LIST_HEAD(bpf_map_types); |
21 | 22 | ||
@@ -467,7 +468,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd) | |||
467 | } | 468 | } |
468 | 469 | ||
469 | /* last field in 'union bpf_attr' used by this command */ | 470 | /* last field in 'union bpf_attr' used by this command */ |
470 | #define BPF_PROG_LOAD_LAST_FIELD log_buf | 471 | #define BPF_PROG_LOAD_LAST_FIELD kern_version |
471 | 472 | ||
472 | static int bpf_prog_load(union bpf_attr *attr) | 473 | static int bpf_prog_load(union bpf_attr *attr) |
473 | { | 474 | { |
@@ -492,6 +493,10 @@ static int bpf_prog_load(union bpf_attr *attr) | |||
492 | if (attr->insn_cnt >= BPF_MAXINSNS) | 493 | if (attr->insn_cnt >= BPF_MAXINSNS) |
493 | return -EINVAL; | 494 | return -EINVAL; |
494 | 495 | ||
496 | if (type == BPF_PROG_TYPE_KPROBE && | ||
497 | attr->kern_version != LINUX_VERSION_CODE) | ||
498 | return -EINVAL; | ||
499 | |||
495 | /* plain bpf_prog allocation */ | 500 | /* plain bpf_prog allocation */ |
496 | prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); | 501 | prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); |
497 | if (!prog) | 502 | if (!prog) |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 2fabc0627165..06917d537302 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -34,14 +34,16 @@ | |||
34 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
35 | #include <linux/anon_inodes.h> | 35 | #include <linux/anon_inodes.h> |
36 | #include <linux/kernel_stat.h> | 36 | #include <linux/kernel_stat.h> |
37 | #include <linux/cgroup.h> | ||
37 | #include <linux/perf_event.h> | 38 | #include <linux/perf_event.h> |
38 | #include <linux/ftrace_event.h> | 39 | #include <linux/ftrace_event.h> |
39 | #include <linux/hw_breakpoint.h> | 40 | #include <linux/hw_breakpoint.h> |
40 | #include <linux/mm_types.h> | 41 | #include <linux/mm_types.h> |
41 | #include <linux/cgroup.h> | ||
42 | #include <linux/module.h> | 42 | #include <linux/module.h> |
43 | #include <linux/mman.h> | 43 | #include <linux/mman.h> |
44 | #include <linux/compat.h> | 44 | #include <linux/compat.h> |
45 | #include <linux/bpf.h> | ||
46 | #include <linux/filter.h> | ||
45 | 47 | ||
46 | #include "internal.h" | 48 | #include "internal.h" |
47 | 49 | ||
@@ -153,7 +155,7 @@ enum event_type_t { | |||
153 | */ | 155 | */ |
154 | struct static_key_deferred perf_sched_events __read_mostly; | 156 | struct static_key_deferred perf_sched_events __read_mostly; |
155 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); | 157 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); |
156 | static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); | 158 | static DEFINE_PER_CPU(int, perf_sched_cb_usages); |
157 | 159 | ||
158 | static atomic_t nr_mmap_events __read_mostly; | 160 | static atomic_t nr_mmap_events __read_mostly; |
159 | static atomic_t nr_comm_events __read_mostly; | 161 | static atomic_t nr_comm_events __read_mostly; |
@@ -327,6 +329,11 @@ static inline u64 perf_clock(void) | |||
327 | return local_clock(); | 329 | return local_clock(); |
328 | } | 330 | } |
329 | 331 | ||
332 | static inline u64 perf_event_clock(struct perf_event *event) | ||
333 | { | ||
334 | return event->clock(); | ||
335 | } | ||
336 | |||
330 | static inline struct perf_cpu_context * | 337 | static inline struct perf_cpu_context * |
331 | __get_cpu_context(struct perf_event_context *ctx) | 338 | __get_cpu_context(struct perf_event_context *ctx) |
332 | { | 339 | { |
@@ -351,32 +358,6 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx, | |||
351 | 358 | ||
352 | #ifdef CONFIG_CGROUP_PERF | 359 | #ifdef CONFIG_CGROUP_PERF |
353 | 360 | ||
354 | /* | ||
355 | * perf_cgroup_info keeps track of time_enabled for a cgroup. | ||
356 | * This is a per-cpu dynamically allocated data structure. | ||
357 | */ | ||
358 | struct perf_cgroup_info { | ||
359 | u64 time; | ||
360 | u64 timestamp; | ||
361 | }; | ||
362 | |||
363 | struct perf_cgroup { | ||
364 | struct cgroup_subsys_state css; | ||
365 | struct perf_cgroup_info __percpu *info; | ||
366 | }; | ||
367 | |||
368 | /* | ||
369 | * Must ensure cgroup is pinned (css_get) before calling | ||
370 | * this function. In other words, we cannot call this function | ||
371 | * if there is no cgroup event for the current CPU context. | ||
372 | */ | ||
373 | static inline struct perf_cgroup * | ||
374 | perf_cgroup_from_task(struct task_struct *task) | ||
375 | { | ||
376 | return container_of(task_css(task, perf_event_cgrp_id), | ||
377 | struct perf_cgroup, css); | ||
378 | } | ||
379 | |||
380 | static inline bool | 361 | static inline bool |
381 | perf_cgroup_match(struct perf_event *event) | 362 | perf_cgroup_match(struct perf_event *event) |
382 | { | 363 | { |
@@ -905,6 +886,15 @@ static void get_ctx(struct perf_event_context *ctx) | |||
905 | WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); | 886 | WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); |
906 | } | 887 | } |
907 | 888 | ||
889 | static void free_ctx(struct rcu_head *head) | ||
890 | { | ||
891 | struct perf_event_context *ctx; | ||
892 | |||
893 | ctx = container_of(head, struct perf_event_context, rcu_head); | ||
894 | kfree(ctx->task_ctx_data); | ||
895 | kfree(ctx); | ||
896 | } | ||
897 | |||
908 | static void put_ctx(struct perf_event_context *ctx) | 898 | static void put_ctx(struct perf_event_context *ctx) |
909 | { | 899 | { |
910 | if (atomic_dec_and_test(&ctx->refcount)) { | 900 | if (atomic_dec_and_test(&ctx->refcount)) { |
@@ -912,7 +902,7 @@ static void put_ctx(struct perf_event_context *ctx) | |||
912 | put_ctx(ctx->parent_ctx); | 902 | put_ctx(ctx->parent_ctx); |
913 | if (ctx->task) | 903 | if (ctx->task) |
914 | put_task_struct(ctx->task); | 904 | put_task_struct(ctx->task); |
915 | kfree_rcu(ctx, rcu_head); | 905 | call_rcu(&ctx->rcu_head, free_ctx); |
916 | } | 906 | } |
917 | } | 907 | } |
918 | 908 | ||
@@ -1239,9 +1229,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
1239 | if (is_cgroup_event(event)) | 1229 | if (is_cgroup_event(event)) |
1240 | ctx->nr_cgroups++; | 1230 | ctx->nr_cgroups++; |
1241 | 1231 | ||
1242 | if (has_branch_stack(event)) | ||
1243 | ctx->nr_branch_stack++; | ||
1244 | |||
1245 | list_add_rcu(&event->event_entry, &ctx->event_list); | 1232 | list_add_rcu(&event->event_entry, &ctx->event_list); |
1246 | ctx->nr_events++; | 1233 | ctx->nr_events++; |
1247 | if (event->attr.inherit_stat) | 1234 | if (event->attr.inherit_stat) |
@@ -1408,9 +1395,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
1408 | cpuctx->cgrp = NULL; | 1395 | cpuctx->cgrp = NULL; |
1409 | } | 1396 | } |
1410 | 1397 | ||
1411 | if (has_branch_stack(event)) | ||
1412 | ctx->nr_branch_stack--; | ||
1413 | |||
1414 | ctx->nr_events--; | 1398 | ctx->nr_events--; |
1415 | if (event->attr.inherit_stat) | 1399 | if (event->attr.inherit_stat) |
1416 | ctx->nr_stat--; | 1400 | ctx->nr_stat--; |
@@ -1847,6 +1831,7 @@ static void perf_set_shadow_time(struct perf_event *event, | |||
1847 | #define MAX_INTERRUPTS (~0ULL) | 1831 | #define MAX_INTERRUPTS (~0ULL) |
1848 | 1832 | ||
1849 | static void perf_log_throttle(struct perf_event *event, int enable); | 1833 | static void perf_log_throttle(struct perf_event *event, int enable); |
1834 | static void perf_log_itrace_start(struct perf_event *event); | ||
1850 | 1835 | ||
1851 | static int | 1836 | static int |
1852 | event_sched_in(struct perf_event *event, | 1837 | event_sched_in(struct perf_event *event, |
@@ -1881,6 +1866,12 @@ event_sched_in(struct perf_event *event, | |||
1881 | 1866 | ||
1882 | perf_pmu_disable(event->pmu); | 1867 | perf_pmu_disable(event->pmu); |
1883 | 1868 | ||
1869 | event->tstamp_running += tstamp - event->tstamp_stopped; | ||
1870 | |||
1871 | perf_set_shadow_time(event, ctx, tstamp); | ||
1872 | |||
1873 | perf_log_itrace_start(event); | ||
1874 | |||
1884 | if (event->pmu->add(event, PERF_EF_START)) { | 1875 | if (event->pmu->add(event, PERF_EF_START)) { |
1885 | event->state = PERF_EVENT_STATE_INACTIVE; | 1876 | event->state = PERF_EVENT_STATE_INACTIVE; |
1886 | event->oncpu = -1; | 1877 | event->oncpu = -1; |
@@ -1888,10 +1879,6 @@ event_sched_in(struct perf_event *event, | |||
1888 | goto out; | 1879 | goto out; |
1889 | } | 1880 | } |
1890 | 1881 | ||
1891 | event->tstamp_running += tstamp - event->tstamp_stopped; | ||
1892 | |||
1893 | perf_set_shadow_time(event, ctx, tstamp); | ||
1894 | |||
1895 | if (!is_software_event(event)) | 1882 | if (!is_software_event(event)) |
1896 | cpuctx->active_oncpu++; | 1883 | cpuctx->active_oncpu++; |
1897 | if (!ctx->nr_active++) | 1884 | if (!ctx->nr_active++) |
@@ -2559,6 +2546,9 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, | |||
2559 | next->perf_event_ctxp[ctxn] = ctx; | 2546 | next->perf_event_ctxp[ctxn] = ctx; |
2560 | ctx->task = next; | 2547 | ctx->task = next; |
2561 | next_ctx->task = task; | 2548 | next_ctx->task = task; |
2549 | |||
2550 | swap(ctx->task_ctx_data, next_ctx->task_ctx_data); | ||
2551 | |||
2562 | do_switch = 0; | 2552 | do_switch = 0; |
2563 | 2553 | ||
2564 | perf_event_sync_stat(ctx, next_ctx); | 2554 | perf_event_sync_stat(ctx, next_ctx); |
@@ -2577,6 +2567,56 @@ unlock: | |||
2577 | } | 2567 | } |
2578 | } | 2568 | } |
2579 | 2569 | ||
2570 | void perf_sched_cb_dec(struct pmu *pmu) | ||
2571 | { | ||
2572 | this_cpu_dec(perf_sched_cb_usages); | ||
2573 | } | ||
2574 | |||
2575 | void perf_sched_cb_inc(struct pmu *pmu) | ||
2576 | { | ||
2577 | this_cpu_inc(perf_sched_cb_usages); | ||
2578 | } | ||
2579 | |||
2580 | /* | ||
2581 | * This function provides the context switch callback to the lower code | ||
2582 | * layer. It is invoked ONLY when the context switch callback is enabled. | ||
2583 | */ | ||
2584 | static void perf_pmu_sched_task(struct task_struct *prev, | ||
2585 | struct task_struct *next, | ||
2586 | bool sched_in) | ||
2587 | { | ||
2588 | struct perf_cpu_context *cpuctx; | ||
2589 | struct pmu *pmu; | ||
2590 | unsigned long flags; | ||
2591 | |||
2592 | if (prev == next) | ||
2593 | return; | ||
2594 | |||
2595 | local_irq_save(flags); | ||
2596 | |||
2597 | rcu_read_lock(); | ||
2598 | |||
2599 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
2600 | if (pmu->sched_task) { | ||
2601 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | ||
2602 | |||
2603 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); | ||
2604 | |||
2605 | perf_pmu_disable(pmu); | ||
2606 | |||
2607 | pmu->sched_task(cpuctx->task_ctx, sched_in); | ||
2608 | |||
2609 | perf_pmu_enable(pmu); | ||
2610 | |||
2611 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | ||
2612 | } | ||
2613 | } | ||
2614 | |||
2615 | rcu_read_unlock(); | ||
2616 | |||
2617 | local_irq_restore(flags); | ||
2618 | } | ||
2619 | |||
2580 | #define for_each_task_context_nr(ctxn) \ | 2620 | #define for_each_task_context_nr(ctxn) \ |
2581 | for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) | 2621 | for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) |
2582 | 2622 | ||
@@ -2596,6 +2636,9 @@ void __perf_event_task_sched_out(struct task_struct *task, | |||
2596 | { | 2636 | { |
2597 | int ctxn; | 2637 | int ctxn; |
2598 | 2638 | ||
2639 | if (__this_cpu_read(perf_sched_cb_usages)) | ||
2640 | perf_pmu_sched_task(task, next, false); | ||
2641 | |||
2599 | for_each_task_context_nr(ctxn) | 2642 | for_each_task_context_nr(ctxn) |
2600 | perf_event_context_sched_out(task, ctxn, next); | 2643 | perf_event_context_sched_out(task, ctxn, next); |
2601 | 2644 | ||
@@ -2755,64 +2798,6 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, | |||
2755 | } | 2798 | } |
2756 | 2799 | ||
2757 | /* | 2800 | /* |
2758 | * When sampling the branck stack in system-wide, it may be necessary | ||
2759 | * to flush the stack on context switch. This happens when the branch | ||
2760 | * stack does not tag its entries with the pid of the current task. | ||
2761 | * Otherwise it becomes impossible to associate a branch entry with a | ||
2762 | * task. This ambiguity is more likely to appear when the branch stack | ||
2763 | * supports priv level filtering and the user sets it to monitor only | ||
2764 | * at the user level (which could be a useful measurement in system-wide | ||
2765 | * mode). In that case, the risk is high of having a branch stack with | ||
2766 | * branch from multiple tasks. Flushing may mean dropping the existing | ||
2767 | * entries or stashing them somewhere in the PMU specific code layer. | ||
2768 | * | ||
2769 | * This function provides the context switch callback to the lower code | ||
2770 | * layer. It is invoked ONLY when there is at least one system-wide context | ||
2771 | * with at least one active event using taken branch sampling. | ||
2772 | */ | ||
2773 | static void perf_branch_stack_sched_in(struct task_struct *prev, | ||
2774 | struct task_struct *task) | ||
2775 | { | ||
2776 | struct perf_cpu_context *cpuctx; | ||
2777 | struct pmu *pmu; | ||
2778 | unsigned long flags; | ||
2779 | |||
2780 | /* no need to flush branch stack if not changing task */ | ||
2781 | if (prev == task) | ||
2782 | return; | ||
2783 | |||
2784 | local_irq_save(flags); | ||
2785 | |||
2786 | rcu_read_lock(); | ||
2787 | |||
2788 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
2789 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | ||
2790 | |||
2791 | /* | ||
2792 | * check if the context has at least one | ||
2793 | * event using PERF_SAMPLE_BRANCH_STACK | ||
2794 | */ | ||
2795 | if (cpuctx->ctx.nr_branch_stack > 0 | ||
2796 | && pmu->flush_branch_stack) { | ||
2797 | |||
2798 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); | ||
2799 | |||
2800 | perf_pmu_disable(pmu); | ||
2801 | |||
2802 | pmu->flush_branch_stack(); | ||
2803 | |||
2804 | perf_pmu_enable(pmu); | ||
2805 | |||
2806 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | ||
2807 | } | ||
2808 | } | ||
2809 | |||
2810 | rcu_read_unlock(); | ||
2811 | |||
2812 | local_irq_restore(flags); | ||
2813 | } | ||
2814 | |||
2815 | /* | ||
2816 | * Called from scheduler to add the events of the current task | 2801 | * Called from scheduler to add the events of the current task |
2817 | * with interrupts disabled. | 2802 | * with interrupts disabled. |
2818 | * | 2803 | * |
@@ -2844,9 +2829,8 @@ void __perf_event_task_sched_in(struct task_struct *prev, | |||
2844 | if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) | 2829 | if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) |
2845 | perf_cgroup_sched_in(prev, task); | 2830 | perf_cgroup_sched_in(prev, task); |
2846 | 2831 | ||
2847 | /* check for system-wide branch_stack events */ | 2832 | if (__this_cpu_read(perf_sched_cb_usages)) |
2848 | if (atomic_read(this_cpu_ptr(&perf_branch_stack_events))) | 2833 | perf_pmu_sched_task(prev, task, true); |
2849 | perf_branch_stack_sched_in(prev, task); | ||
2850 | } | 2834 | } |
2851 | 2835 | ||
2852 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | 2836 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) |
@@ -3220,7 +3204,10 @@ static void __perf_event_read(void *info) | |||
3220 | 3204 | ||
3221 | static inline u64 perf_event_count(struct perf_event *event) | 3205 | static inline u64 perf_event_count(struct perf_event *event) |
3222 | { | 3206 | { |
3223 | return local64_read(&event->count) + atomic64_read(&event->child_count); | 3207 | if (event->pmu->count) |
3208 | return event->pmu->count(event); | ||
3209 | |||
3210 | return __perf_event_count(event); | ||
3224 | } | 3211 | } |
3225 | 3212 | ||
3226 | static u64 perf_event_read(struct perf_event *event) | 3213 | static u64 perf_event_read(struct perf_event *event) |
@@ -3321,12 +3308,15 @@ errout: | |||
3321 | * Returns a matching context with refcount and pincount. | 3308 | * Returns a matching context with refcount and pincount. |
3322 | */ | 3309 | */ |
3323 | static struct perf_event_context * | 3310 | static struct perf_event_context * |
3324 | find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) | 3311 | find_get_context(struct pmu *pmu, struct task_struct *task, |
3312 | struct perf_event *event) | ||
3325 | { | 3313 | { |
3326 | struct perf_event_context *ctx, *clone_ctx = NULL; | 3314 | struct perf_event_context *ctx, *clone_ctx = NULL; |
3327 | struct perf_cpu_context *cpuctx; | 3315 | struct perf_cpu_context *cpuctx; |
3316 | void *task_ctx_data = NULL; | ||
3328 | unsigned long flags; | 3317 | unsigned long flags; |
3329 | int ctxn, err; | 3318 | int ctxn, err; |
3319 | int cpu = event->cpu; | ||
3330 | 3320 | ||
3331 | if (!task) { | 3321 | if (!task) { |
3332 | /* Must be root to operate on a CPU event: */ | 3322 | /* Must be root to operate on a CPU event: */ |
@@ -3354,11 +3344,24 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) | |||
3354 | if (ctxn < 0) | 3344 | if (ctxn < 0) |
3355 | goto errout; | 3345 | goto errout; |
3356 | 3346 | ||
3347 | if (event->attach_state & PERF_ATTACH_TASK_DATA) { | ||
3348 | task_ctx_data = kzalloc(pmu->task_ctx_size, GFP_KERNEL); | ||
3349 | if (!task_ctx_data) { | ||
3350 | err = -ENOMEM; | ||
3351 | goto errout; | ||
3352 | } | ||
3353 | } | ||
3354 | |||
3357 | retry: | 3355 | retry: |
3358 | ctx = perf_lock_task_context(task, ctxn, &flags); | 3356 | ctx = perf_lock_task_context(task, ctxn, &flags); |
3359 | if (ctx) { | 3357 | if (ctx) { |
3360 | clone_ctx = unclone_ctx(ctx); | 3358 | clone_ctx = unclone_ctx(ctx); |
3361 | ++ctx->pin_count; | 3359 | ++ctx->pin_count; |
3360 | |||
3361 | if (task_ctx_data && !ctx->task_ctx_data) { | ||
3362 | ctx->task_ctx_data = task_ctx_data; | ||
3363 | task_ctx_data = NULL; | ||
3364 | } | ||
3362 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 3365 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
3363 | 3366 | ||
3364 | if (clone_ctx) | 3367 | if (clone_ctx) |
@@ -3369,6 +3372,11 @@ retry: | |||
3369 | if (!ctx) | 3372 | if (!ctx) |
3370 | goto errout; | 3373 | goto errout; |
3371 | 3374 | ||
3375 | if (task_ctx_data) { | ||
3376 | ctx->task_ctx_data = task_ctx_data; | ||
3377 | task_ctx_data = NULL; | ||
3378 | } | ||
3379 | |||
3372 | err = 0; | 3380 | err = 0; |
3373 | mutex_lock(&task->perf_event_mutex); | 3381 | mutex_lock(&task->perf_event_mutex); |
3374 | /* | 3382 | /* |
@@ -3395,13 +3403,16 @@ retry: | |||
3395 | } | 3403 | } |
3396 | } | 3404 | } |
3397 | 3405 | ||
3406 | kfree(task_ctx_data); | ||
3398 | return ctx; | 3407 | return ctx; |
3399 | 3408 | ||
3400 | errout: | 3409 | errout: |
3410 | kfree(task_ctx_data); | ||
3401 | return ERR_PTR(err); | 3411 | return ERR_PTR(err); |
3402 | } | 3412 | } |
3403 | 3413 | ||
3404 | static void perf_event_free_filter(struct perf_event *event); | 3414 | static void perf_event_free_filter(struct perf_event *event); |
3415 | static void perf_event_free_bpf_prog(struct perf_event *event); | ||
3405 | 3416 | ||
3406 | static void free_event_rcu(struct rcu_head *head) | 3417 | static void free_event_rcu(struct rcu_head *head) |
3407 | { | 3418 | { |
@@ -3411,10 +3422,10 @@ static void free_event_rcu(struct rcu_head *head) | |||
3411 | if (event->ns) | 3422 | if (event->ns) |
3412 | put_pid_ns(event->ns); | 3423 | put_pid_ns(event->ns); |
3413 | perf_event_free_filter(event); | 3424 | perf_event_free_filter(event); |
3425 | perf_event_free_bpf_prog(event); | ||
3414 | kfree(event); | 3426 | kfree(event); |
3415 | } | 3427 | } |
3416 | 3428 | ||
3417 | static void ring_buffer_put(struct ring_buffer *rb); | ||
3418 | static void ring_buffer_attach(struct perf_event *event, | 3429 | static void ring_buffer_attach(struct perf_event *event, |
3419 | struct ring_buffer *rb); | 3430 | struct ring_buffer *rb); |
3420 | 3431 | ||
@@ -3423,10 +3434,6 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu) | |||
3423 | if (event->parent) | 3434 | if (event->parent) |
3424 | return; | 3435 | return; |
3425 | 3436 | ||
3426 | if (has_branch_stack(event)) { | ||
3427 | if (!(event->attach_state & PERF_ATTACH_TASK)) | ||
3428 | atomic_dec(&per_cpu(perf_branch_stack_events, cpu)); | ||
3429 | } | ||
3430 | if (is_cgroup_event(event)) | 3437 | if (is_cgroup_event(event)) |
3431 | atomic_dec(&per_cpu(perf_cgroup_events, cpu)); | 3438 | atomic_dec(&per_cpu(perf_cgroup_events, cpu)); |
3432 | } | 3439 | } |
@@ -3454,6 +3461,91 @@ static void unaccount_event(struct perf_event *event) | |||
3454 | unaccount_event_cpu(event, event->cpu); | 3461 | unaccount_event_cpu(event, event->cpu); |
3455 | } | 3462 | } |
3456 | 3463 | ||
3464 | /* | ||
3465 | * The following implement mutual exclusion of events on "exclusive" pmus | ||
3466 | * (PERF_PMU_CAP_EXCLUSIVE). Such pmus can only have one event scheduled | ||
3467 | * at a time, so we disallow creating events that might conflict, namely: | ||
3468 | * | ||
3469 | * 1) cpu-wide events in the presence of per-task events, | ||
3470 | * 2) per-task events in the presence of cpu-wide events, | ||
3471 | * 3) two matching events on the same context. | ||
3472 | * | ||
3473 | * The former two cases are handled in the allocation path (perf_event_alloc(), | ||
3474 | * __free_event()), the latter -- before the first perf_install_in_context(). | ||
3475 | */ | ||
3476 | static int exclusive_event_init(struct perf_event *event) | ||
3477 | { | ||
3478 | struct pmu *pmu = event->pmu; | ||
3479 | |||
3480 | if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) | ||
3481 | return 0; | ||
3482 | |||
3483 | /* | ||
3484 | * Prevent co-existence of per-task and cpu-wide events on the | ||
3485 | * same exclusive pmu. | ||
3486 | * | ||
3487 | * Negative pmu::exclusive_cnt means there are cpu-wide | ||
3488 | * events on this "exclusive" pmu, positive means there are | ||
3489 | * per-task events. | ||
3490 | * | ||
3491 | * Since this is called in perf_event_alloc() path, event::ctx | ||
3492 | * doesn't exist yet; it is, however, safe to use PERF_ATTACH_TASK | ||
3493 | * to mean "per-task event", because unlike other attach states it | ||
3494 | * never gets cleared. | ||
3495 | */ | ||
3496 | if (event->attach_state & PERF_ATTACH_TASK) { | ||
3497 | if (!atomic_inc_unless_negative(&pmu->exclusive_cnt)) | ||
3498 | return -EBUSY; | ||
3499 | } else { | ||
3500 | if (!atomic_dec_unless_positive(&pmu->exclusive_cnt)) | ||
3501 | return -EBUSY; | ||
3502 | } | ||
3503 | |||
3504 | return 0; | ||
3505 | } | ||
3506 | |||
3507 | static void exclusive_event_destroy(struct perf_event *event) | ||
3508 | { | ||
3509 | struct pmu *pmu = event->pmu; | ||
3510 | |||
3511 | if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) | ||
3512 | return; | ||
3513 | |||
3514 | /* see comment in exclusive_event_init() */ | ||
3515 | if (event->attach_state & PERF_ATTACH_TASK) | ||
3516 | atomic_dec(&pmu->exclusive_cnt); | ||
3517 | else | ||
3518 | atomic_inc(&pmu->exclusive_cnt); | ||
3519 | } | ||
3520 | |||
3521 | static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2) | ||
3522 | { | ||
3523 | if ((e1->pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && | ||
3524 | (e1->cpu == e2->cpu || | ||
3525 | e1->cpu == -1 || | ||
3526 | e2->cpu == -1)) | ||
3527 | return true; | ||
3528 | return false; | ||
3529 | } | ||
3530 | |||
3531 | /* Called under the same ctx::mutex as perf_install_in_context() */ | ||
3532 | static bool exclusive_event_installable(struct perf_event *event, | ||
3533 | struct perf_event_context *ctx) | ||
3534 | { | ||
3535 | struct perf_event *iter_event; | ||
3536 | struct pmu *pmu = event->pmu; | ||
3537 | |||
3538 | if (!(pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE)) | ||
3539 | return true; | ||
3540 | |||
3541 | list_for_each_entry(iter_event, &ctx->event_list, event_entry) { | ||
3542 | if (exclusive_event_match(iter_event, event)) | ||
3543 | return false; | ||
3544 | } | ||
3545 | |||
3546 | return true; | ||
3547 | } | ||
3548 | |||
3457 | static void __free_event(struct perf_event *event) | 3549 | static void __free_event(struct perf_event *event) |
3458 | { | 3550 | { |
3459 | if (!event->parent) { | 3551 | if (!event->parent) { |
@@ -3467,8 +3559,10 @@ static void __free_event(struct perf_event *event) | |||
3467 | if (event->ctx) | 3559 | if (event->ctx) |
3468 | put_ctx(event->ctx); | 3560 | put_ctx(event->ctx); |
3469 | 3561 | ||
3470 | if (event->pmu) | 3562 | if (event->pmu) { |
3563 | exclusive_event_destroy(event); | ||
3471 | module_put(event->pmu->module); | 3564 | module_put(event->pmu->module); |
3565 | } | ||
3472 | 3566 | ||
3473 | call_rcu(&event->rcu_head, free_event_rcu); | 3567 | call_rcu(&event->rcu_head, free_event_rcu); |
3474 | } | 3568 | } |
@@ -3927,6 +4021,7 @@ static inline int perf_fget_light(int fd, struct fd *p) | |||
3927 | static int perf_event_set_output(struct perf_event *event, | 4021 | static int perf_event_set_output(struct perf_event *event, |
3928 | struct perf_event *output_event); | 4022 | struct perf_event *output_event); |
3929 | static int perf_event_set_filter(struct perf_event *event, void __user *arg); | 4023 | static int perf_event_set_filter(struct perf_event *event, void __user *arg); |
4024 | static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd); | ||
3930 | 4025 | ||
3931 | static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg) | 4026 | static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg) |
3932 | { | 4027 | { |
@@ -3980,6 +4075,9 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon | |||
3980 | case PERF_EVENT_IOC_SET_FILTER: | 4075 | case PERF_EVENT_IOC_SET_FILTER: |
3981 | return perf_event_set_filter(event, (void __user *)arg); | 4076 | return perf_event_set_filter(event, (void __user *)arg); |
3982 | 4077 | ||
4078 | case PERF_EVENT_IOC_SET_BPF: | ||
4079 | return perf_event_set_bpf_prog(event, arg); | ||
4080 | |||
3983 | default: | 4081 | default: |
3984 | return -ENOTTY; | 4082 | return -ENOTTY; |
3985 | } | 4083 | } |
@@ -4096,6 +4194,8 @@ static void perf_event_init_userpage(struct perf_event *event) | |||
4096 | /* Allow new userspace to detect that bit 0 is deprecated */ | 4194 | /* Allow new userspace to detect that bit 0 is deprecated */ |
4097 | userpg->cap_bit0_is_deprecated = 1; | 4195 | userpg->cap_bit0_is_deprecated = 1; |
4098 | userpg->size = offsetof(struct perf_event_mmap_page, __reserved); | 4196 | userpg->size = offsetof(struct perf_event_mmap_page, __reserved); |
4197 | userpg->data_offset = PAGE_SIZE; | ||
4198 | userpg->data_size = perf_data_size(rb); | ||
4099 | 4199 | ||
4100 | unlock: | 4200 | unlock: |
4101 | rcu_read_unlock(); | 4201 | rcu_read_unlock(); |
@@ -4263,7 +4363,7 @@ static void rb_free_rcu(struct rcu_head *rcu_head) | |||
4263 | rb_free(rb); | 4363 | rb_free(rb); |
4264 | } | 4364 | } |
4265 | 4365 | ||
4266 | static struct ring_buffer *ring_buffer_get(struct perf_event *event) | 4366 | struct ring_buffer *ring_buffer_get(struct perf_event *event) |
4267 | { | 4367 | { |
4268 | struct ring_buffer *rb; | 4368 | struct ring_buffer *rb; |
4269 | 4369 | ||
@@ -4278,7 +4378,7 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event) | |||
4278 | return rb; | 4378 | return rb; |
4279 | } | 4379 | } |
4280 | 4380 | ||
4281 | static void ring_buffer_put(struct ring_buffer *rb) | 4381 | void ring_buffer_put(struct ring_buffer *rb) |
4282 | { | 4382 | { |
4283 | if (!atomic_dec_and_test(&rb->refcount)) | 4383 | if (!atomic_dec_and_test(&rb->refcount)) |
4284 | return; | 4384 | return; |
@@ -4295,6 +4395,9 @@ static void perf_mmap_open(struct vm_area_struct *vma) | |||
4295 | atomic_inc(&event->mmap_count); | 4395 | atomic_inc(&event->mmap_count); |
4296 | atomic_inc(&event->rb->mmap_count); | 4396 | atomic_inc(&event->rb->mmap_count); |
4297 | 4397 | ||
4398 | if (vma->vm_pgoff) | ||
4399 | atomic_inc(&event->rb->aux_mmap_count); | ||
4400 | |||
4298 | if (event->pmu->event_mapped) | 4401 | if (event->pmu->event_mapped) |
4299 | event->pmu->event_mapped(event); | 4402 | event->pmu->event_mapped(event); |
4300 | } | 4403 | } |
@@ -4319,6 +4422,20 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
4319 | if (event->pmu->event_unmapped) | 4422 | if (event->pmu->event_unmapped) |
4320 | event->pmu->event_unmapped(event); | 4423 | event->pmu->event_unmapped(event); |
4321 | 4424 | ||
4425 | /* | ||
4426 | * rb->aux_mmap_count will always drop before rb->mmap_count and | ||
4427 | * event->mmap_count, so it is ok to use event->mmap_mutex to | ||
4428 | * serialize with perf_mmap here. | ||
4429 | */ | ||
4430 | if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff && | ||
4431 | atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) { | ||
4432 | atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm); | ||
4433 | vma->vm_mm->pinned_vm -= rb->aux_mmap_locked; | ||
4434 | |||
4435 | rb_free_aux(rb); | ||
4436 | mutex_unlock(&event->mmap_mutex); | ||
4437 | } | ||
4438 | |||
4322 | atomic_dec(&rb->mmap_count); | 4439 | atomic_dec(&rb->mmap_count); |
4323 | 4440 | ||
4324 | if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) | 4441 | if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) |
@@ -4392,7 +4509,7 @@ out_put: | |||
4392 | 4509 | ||
4393 | static const struct vm_operations_struct perf_mmap_vmops = { | 4510 | static const struct vm_operations_struct perf_mmap_vmops = { |
4394 | .open = perf_mmap_open, | 4511 | .open = perf_mmap_open, |
4395 | .close = perf_mmap_close, | 4512 | .close = perf_mmap_close, /* non mergable */ |
4396 | .fault = perf_mmap_fault, | 4513 | .fault = perf_mmap_fault, |
4397 | .page_mkwrite = perf_mmap_fault, | 4514 | .page_mkwrite = perf_mmap_fault, |
4398 | }; | 4515 | }; |
@@ -4403,10 +4520,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
4403 | unsigned long user_locked, user_lock_limit; | 4520 | unsigned long user_locked, user_lock_limit; |
4404 | struct user_struct *user = current_user(); | 4521 | struct user_struct *user = current_user(); |
4405 | unsigned long locked, lock_limit; | 4522 | unsigned long locked, lock_limit; |
4406 | struct ring_buffer *rb; | 4523 | struct ring_buffer *rb = NULL; |
4407 | unsigned long vma_size; | 4524 | unsigned long vma_size; |
4408 | unsigned long nr_pages; | 4525 | unsigned long nr_pages; |
4409 | long user_extra, extra; | 4526 | long user_extra = 0, extra = 0; |
4410 | int ret = 0, flags = 0; | 4527 | int ret = 0, flags = 0; |
4411 | 4528 | ||
4412 | /* | 4529 | /* |
@@ -4421,7 +4538,66 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
4421 | return -EINVAL; | 4538 | return -EINVAL; |
4422 | 4539 | ||
4423 | vma_size = vma->vm_end - vma->vm_start; | 4540 | vma_size = vma->vm_end - vma->vm_start; |
4424 | nr_pages = (vma_size / PAGE_SIZE) - 1; | 4541 | |
4542 | if (vma->vm_pgoff == 0) { | ||
4543 | nr_pages = (vma_size / PAGE_SIZE) - 1; | ||
4544 | } else { | ||
4545 | /* | ||
4546 | * AUX area mapping: if rb->aux_nr_pages != 0, it's already | ||
4547 | * mapped, all subsequent mappings should have the same size | ||
4548 | * and offset. Must be above the normal perf buffer. | ||
4549 | */ | ||
4550 | u64 aux_offset, aux_size; | ||
4551 | |||
4552 | if (!event->rb) | ||
4553 | return -EINVAL; | ||
4554 | |||
4555 | nr_pages = vma_size / PAGE_SIZE; | ||
4556 | |||
4557 | mutex_lock(&event->mmap_mutex); | ||
4558 | ret = -EINVAL; | ||
4559 | |||
4560 | rb = event->rb; | ||
4561 | if (!rb) | ||
4562 | goto aux_unlock; | ||
4563 | |||
4564 | aux_offset = ACCESS_ONCE(rb->user_page->aux_offset); | ||
4565 | aux_size = ACCESS_ONCE(rb->user_page->aux_size); | ||
4566 | |||
4567 | if (aux_offset < perf_data_size(rb) + PAGE_SIZE) | ||
4568 | goto aux_unlock; | ||
4569 | |||
4570 | if (aux_offset != vma->vm_pgoff << PAGE_SHIFT) | ||
4571 | goto aux_unlock; | ||
4572 | |||
4573 | /* already mapped with a different offset */ | ||
4574 | if (rb_has_aux(rb) && rb->aux_pgoff != vma->vm_pgoff) | ||
4575 | goto aux_unlock; | ||
4576 | |||
4577 | if (aux_size != vma_size || aux_size != nr_pages * PAGE_SIZE) | ||
4578 | goto aux_unlock; | ||
4579 | |||
4580 | /* already mapped with a different size */ | ||
4581 | if (rb_has_aux(rb) && rb->aux_nr_pages != nr_pages) | ||
4582 | goto aux_unlock; | ||
4583 | |||
4584 | if (!is_power_of_2(nr_pages)) | ||
4585 | goto aux_unlock; | ||
4586 | |||
4587 | if (!atomic_inc_not_zero(&rb->mmap_count)) | ||
4588 | goto aux_unlock; | ||
4589 | |||
4590 | if (rb_has_aux(rb)) { | ||
4591 | atomic_inc(&rb->aux_mmap_count); | ||
4592 | ret = 0; | ||
4593 | goto unlock; | ||
4594 | } | ||
4595 | |||
4596 | atomic_set(&rb->aux_mmap_count, 1); | ||
4597 | user_extra = nr_pages; | ||
4598 | |||
4599 | goto accounting; | ||
4600 | } | ||
4425 | 4601 | ||
4426 | /* | 4602 | /* |
4427 | * If we have rb pages ensure they're a power-of-two number, so we | 4603 | * If we have rb pages ensure they're a power-of-two number, so we |
@@ -4433,9 +4609,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
4433 | if (vma_size != PAGE_SIZE * (1 + nr_pages)) | 4609 | if (vma_size != PAGE_SIZE * (1 + nr_pages)) |
4434 | return -EINVAL; | 4610 | return -EINVAL; |
4435 | 4611 | ||
4436 | if (vma->vm_pgoff != 0) | ||
4437 | return -EINVAL; | ||
4438 | |||
4439 | WARN_ON_ONCE(event->ctx->parent_ctx); | 4612 | WARN_ON_ONCE(event->ctx->parent_ctx); |
4440 | again: | 4613 | again: |
4441 | mutex_lock(&event->mmap_mutex); | 4614 | mutex_lock(&event->mmap_mutex); |
@@ -4459,6 +4632,8 @@ again: | |||
4459 | } | 4632 | } |
4460 | 4633 | ||
4461 | user_extra = nr_pages + 1; | 4634 | user_extra = nr_pages + 1; |
4635 | |||
4636 | accounting: | ||
4462 | user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10); | 4637 | user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10); |
4463 | 4638 | ||
4464 | /* | 4639 | /* |
@@ -4468,7 +4643,6 @@ again: | |||
4468 | 4643 | ||
4469 | user_locked = atomic_long_read(&user->locked_vm) + user_extra; | 4644 | user_locked = atomic_long_read(&user->locked_vm) + user_extra; |
4470 | 4645 | ||
4471 | extra = 0; | ||
4472 | if (user_locked > user_lock_limit) | 4646 | if (user_locked > user_lock_limit) |
4473 | extra = user_locked - user_lock_limit; | 4647 | extra = user_locked - user_lock_limit; |
4474 | 4648 | ||
@@ -4482,35 +4656,46 @@ again: | |||
4482 | goto unlock; | 4656 | goto unlock; |
4483 | } | 4657 | } |
4484 | 4658 | ||
4485 | WARN_ON(event->rb); | 4659 | WARN_ON(!rb && event->rb); |
4486 | 4660 | ||
4487 | if (vma->vm_flags & VM_WRITE) | 4661 | if (vma->vm_flags & VM_WRITE) |
4488 | flags |= RING_BUFFER_WRITABLE; | 4662 | flags |= RING_BUFFER_WRITABLE; |
4489 | 4663 | ||
4490 | rb = rb_alloc(nr_pages, | ||
4491 | event->attr.watermark ? event->attr.wakeup_watermark : 0, | ||
4492 | event->cpu, flags); | ||
4493 | |||
4494 | if (!rb) { | 4664 | if (!rb) { |
4495 | ret = -ENOMEM; | 4665 | rb = rb_alloc(nr_pages, |
4496 | goto unlock; | 4666 | event->attr.watermark ? event->attr.wakeup_watermark : 0, |
4497 | } | 4667 | event->cpu, flags); |
4498 | 4668 | ||
4499 | atomic_set(&rb->mmap_count, 1); | 4669 | if (!rb) { |
4500 | rb->mmap_locked = extra; | 4670 | ret = -ENOMEM; |
4501 | rb->mmap_user = get_current_user(); | 4671 | goto unlock; |
4672 | } | ||
4502 | 4673 | ||
4503 | atomic_long_add(user_extra, &user->locked_vm); | 4674 | atomic_set(&rb->mmap_count, 1); |
4504 | vma->vm_mm->pinned_vm += extra; | 4675 | rb->mmap_user = get_current_user(); |
4676 | rb->mmap_locked = extra; | ||
4505 | 4677 | ||
4506 | ring_buffer_attach(event, rb); | 4678 | ring_buffer_attach(event, rb); |
4507 | 4679 | ||
4508 | perf_event_init_userpage(event); | 4680 | perf_event_init_userpage(event); |
4509 | perf_event_update_userpage(event); | 4681 | perf_event_update_userpage(event); |
4682 | } else { | ||
4683 | ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, | ||
4684 | event->attr.aux_watermark, flags); | ||
4685 | if (!ret) | ||
4686 | rb->aux_mmap_locked = extra; | ||
4687 | } | ||
4510 | 4688 | ||
4511 | unlock: | 4689 | unlock: |
4512 | if (!ret) | 4690 | if (!ret) { |
4691 | atomic_long_add(user_extra, &user->locked_vm); | ||
4692 | vma->vm_mm->pinned_vm += extra; | ||
4693 | |||
4513 | atomic_inc(&event->mmap_count); | 4694 | atomic_inc(&event->mmap_count); |
4695 | } else if (rb) { | ||
4696 | atomic_dec(&rb->mmap_count); | ||
4697 | } | ||
4698 | aux_unlock: | ||
4514 | mutex_unlock(&event->mmap_mutex); | 4699 | mutex_unlock(&event->mmap_mutex); |
4515 | 4700 | ||
4516 | /* | 4701 | /* |
@@ -4766,7 +4951,7 @@ static void __perf_event_header__init_id(struct perf_event_header *header, | |||
4766 | } | 4951 | } |
4767 | 4952 | ||
4768 | if (sample_type & PERF_SAMPLE_TIME) | 4953 | if (sample_type & PERF_SAMPLE_TIME) |
4769 | data->time = perf_clock(); | 4954 | data->time = perf_event_clock(event); |
4770 | 4955 | ||
4771 | if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) | 4956 | if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) |
4772 | data->id = primary_event_id(event); | 4957 | data->id = primary_event_id(event); |
@@ -5344,6 +5529,8 @@ static void perf_event_task_output(struct perf_event *event, | |||
5344 | task_event->event_id.tid = perf_event_tid(event, task); | 5529 | task_event->event_id.tid = perf_event_tid(event, task); |
5345 | task_event->event_id.ptid = perf_event_tid(event, current); | 5530 | task_event->event_id.ptid = perf_event_tid(event, current); |
5346 | 5531 | ||
5532 | task_event->event_id.time = perf_event_clock(event); | ||
5533 | |||
5347 | perf_output_put(&handle, task_event->event_id); | 5534 | perf_output_put(&handle, task_event->event_id); |
5348 | 5535 | ||
5349 | perf_event__output_id_sample(event, &handle, &sample); | 5536 | perf_event__output_id_sample(event, &handle, &sample); |
@@ -5377,7 +5564,7 @@ static void perf_event_task(struct task_struct *task, | |||
5377 | /* .ppid */ | 5564 | /* .ppid */ |
5378 | /* .tid */ | 5565 | /* .tid */ |
5379 | /* .ptid */ | 5566 | /* .ptid */ |
5380 | .time = perf_clock(), | 5567 | /* .time */ |
5381 | }, | 5568 | }, |
5382 | }; | 5569 | }; |
5383 | 5570 | ||
@@ -5732,6 +5919,40 @@ void perf_event_mmap(struct vm_area_struct *vma) | |||
5732 | perf_event_mmap_event(&mmap_event); | 5919 | perf_event_mmap_event(&mmap_event); |
5733 | } | 5920 | } |
5734 | 5921 | ||
5922 | void perf_event_aux_event(struct perf_event *event, unsigned long head, | ||
5923 | unsigned long size, u64 flags) | ||
5924 | { | ||
5925 | struct perf_output_handle handle; | ||
5926 | struct perf_sample_data sample; | ||
5927 | struct perf_aux_event { | ||
5928 | struct perf_event_header header; | ||
5929 | u64 offset; | ||
5930 | u64 size; | ||
5931 | u64 flags; | ||
5932 | } rec = { | ||
5933 | .header = { | ||
5934 | .type = PERF_RECORD_AUX, | ||
5935 | .misc = 0, | ||
5936 | .size = sizeof(rec), | ||
5937 | }, | ||
5938 | .offset = head, | ||
5939 | .size = size, | ||
5940 | .flags = flags, | ||
5941 | }; | ||
5942 | int ret; | ||
5943 | |||
5944 | perf_event_header__init_id(&rec.header, &sample, event); | ||
5945 | ret = perf_output_begin(&handle, event, rec.header.size); | ||
5946 | |||
5947 | if (ret) | ||
5948 | return; | ||
5949 | |||
5950 | perf_output_put(&handle, rec); | ||
5951 | perf_event__output_id_sample(event, &handle, &sample); | ||
5952 | |||
5953 | perf_output_end(&handle); | ||
5954 | } | ||
5955 | |||
5735 | /* | 5956 | /* |
5736 | * IRQ throttle logging | 5957 | * IRQ throttle logging |
5737 | */ | 5958 | */ |
@@ -5753,7 +5974,7 @@ static void perf_log_throttle(struct perf_event *event, int enable) | |||
5753 | .misc = 0, | 5974 | .misc = 0, |
5754 | .size = sizeof(throttle_event), | 5975 | .size = sizeof(throttle_event), |
5755 | }, | 5976 | }, |
5756 | .time = perf_clock(), | 5977 | .time = perf_event_clock(event), |
5757 | .id = primary_event_id(event), | 5978 | .id = primary_event_id(event), |
5758 | .stream_id = event->id, | 5979 | .stream_id = event->id, |
5759 | }; | 5980 | }; |
@@ -5773,6 +5994,44 @@ static void perf_log_throttle(struct perf_event *event, int enable) | |||
5773 | perf_output_end(&handle); | 5994 | perf_output_end(&handle); |
5774 | } | 5995 | } |
5775 | 5996 | ||
5997 | static void perf_log_itrace_start(struct perf_event *event) | ||
5998 | { | ||
5999 | struct perf_output_handle handle; | ||
6000 | struct perf_sample_data sample; | ||
6001 | struct perf_aux_event { | ||
6002 | struct perf_event_header header; | ||
6003 | u32 pid; | ||
6004 | u32 tid; | ||
6005 | } rec; | ||
6006 | int ret; | ||
6007 | |||
6008 | if (event->parent) | ||
6009 | event = event->parent; | ||
6010 | |||
6011 | if (!(event->pmu->capabilities & PERF_PMU_CAP_ITRACE) || | ||
6012 | event->hw.itrace_started) | ||
6013 | return; | ||
6014 | |||
6015 | event->hw.itrace_started = 1; | ||
6016 | |||
6017 | rec.header.type = PERF_RECORD_ITRACE_START; | ||
6018 | rec.header.misc = 0; | ||
6019 | rec.header.size = sizeof(rec); | ||
6020 | rec.pid = perf_event_pid(event, current); | ||
6021 | rec.tid = perf_event_tid(event, current); | ||
6022 | |||
6023 | perf_event_header__init_id(&rec.header, &sample, event); | ||
6024 | ret = perf_output_begin(&handle, event, rec.header.size); | ||
6025 | |||
6026 | if (ret) | ||
6027 | return; | ||
6028 | |||
6029 | perf_output_put(&handle, rec); | ||
6030 | perf_event__output_id_sample(event, &handle, &sample); | ||
6031 | |||
6032 | perf_output_end(&handle); | ||
6033 | } | ||
6034 | |||
5776 | /* | 6035 | /* |
5777 | * Generic event overflow handling, sampling. | 6036 | * Generic event overflow handling, sampling. |
5778 | */ | 6037 | */ |
@@ -6133,6 +6392,7 @@ static int perf_swevent_add(struct perf_event *event, int flags) | |||
6133 | } | 6392 | } |
6134 | 6393 | ||
6135 | hlist_add_head_rcu(&event->hlist_entry, head); | 6394 | hlist_add_head_rcu(&event->hlist_entry, head); |
6395 | perf_event_update_userpage(event); | ||
6136 | 6396 | ||
6137 | return 0; | 6397 | return 0; |
6138 | } | 6398 | } |
@@ -6296,6 +6556,8 @@ static int perf_swevent_init(struct perf_event *event) | |||
6296 | static struct pmu perf_swevent = { | 6556 | static struct pmu perf_swevent = { |
6297 | .task_ctx_nr = perf_sw_context, | 6557 | .task_ctx_nr = perf_sw_context, |
6298 | 6558 | ||
6559 | .capabilities = PERF_PMU_CAP_NO_NMI, | ||
6560 | |||
6299 | .event_init = perf_swevent_init, | 6561 | .event_init = perf_swevent_init, |
6300 | .add = perf_swevent_add, | 6562 | .add = perf_swevent_add, |
6301 | .del = perf_swevent_del, | 6563 | .del = perf_swevent_del, |
@@ -6449,6 +6711,49 @@ static void perf_event_free_filter(struct perf_event *event) | |||
6449 | ftrace_profile_free_filter(event); | 6711 | ftrace_profile_free_filter(event); |
6450 | } | 6712 | } |
6451 | 6713 | ||
6714 | static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) | ||
6715 | { | ||
6716 | struct bpf_prog *prog; | ||
6717 | |||
6718 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | ||
6719 | return -EINVAL; | ||
6720 | |||
6721 | if (event->tp_event->prog) | ||
6722 | return -EEXIST; | ||
6723 | |||
6724 | if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) | ||
6725 | /* bpf programs can only be attached to kprobes */ | ||
6726 | return -EINVAL; | ||
6727 | |||
6728 | prog = bpf_prog_get(prog_fd); | ||
6729 | if (IS_ERR(prog)) | ||
6730 | return PTR_ERR(prog); | ||
6731 | |||
6732 | if (prog->aux->prog_type != BPF_PROG_TYPE_KPROBE) { | ||
6733 | /* valid fd, but invalid bpf program type */ | ||
6734 | bpf_prog_put(prog); | ||
6735 | return -EINVAL; | ||
6736 | } | ||
6737 | |||
6738 | event->tp_event->prog = prog; | ||
6739 | |||
6740 | return 0; | ||
6741 | } | ||
6742 | |||
6743 | static void perf_event_free_bpf_prog(struct perf_event *event) | ||
6744 | { | ||
6745 | struct bpf_prog *prog; | ||
6746 | |||
6747 | if (!event->tp_event) | ||
6748 | return; | ||
6749 | |||
6750 | prog = event->tp_event->prog; | ||
6751 | if (prog) { | ||
6752 | event->tp_event->prog = NULL; | ||
6753 | bpf_prog_put(prog); | ||
6754 | } | ||
6755 | } | ||
6756 | |||
6452 | #else | 6757 | #else |
6453 | 6758 | ||
6454 | static inline void perf_tp_register(void) | 6759 | static inline void perf_tp_register(void) |
@@ -6464,6 +6769,14 @@ static void perf_event_free_filter(struct perf_event *event) | |||
6464 | { | 6769 | { |
6465 | } | 6770 | } |
6466 | 6771 | ||
6772 | static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) | ||
6773 | { | ||
6774 | return -ENOENT; | ||
6775 | } | ||
6776 | |||
6777 | static void perf_event_free_bpf_prog(struct perf_event *event) | ||
6778 | { | ||
6779 | } | ||
6467 | #endif /* CONFIG_EVENT_TRACING */ | 6780 | #endif /* CONFIG_EVENT_TRACING */ |
6468 | 6781 | ||
6469 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 6782 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
@@ -6602,6 +6915,7 @@ static int cpu_clock_event_add(struct perf_event *event, int flags) | |||
6602 | { | 6915 | { |
6603 | if (flags & PERF_EF_START) | 6916 | if (flags & PERF_EF_START) |
6604 | cpu_clock_event_start(event, flags); | 6917 | cpu_clock_event_start(event, flags); |
6918 | perf_event_update_userpage(event); | ||
6605 | 6919 | ||
6606 | return 0; | 6920 | return 0; |
6607 | } | 6921 | } |
@@ -6638,6 +6952,8 @@ static int cpu_clock_event_init(struct perf_event *event) | |||
6638 | static struct pmu perf_cpu_clock = { | 6952 | static struct pmu perf_cpu_clock = { |
6639 | .task_ctx_nr = perf_sw_context, | 6953 | .task_ctx_nr = perf_sw_context, |
6640 | 6954 | ||
6955 | .capabilities = PERF_PMU_CAP_NO_NMI, | ||
6956 | |||
6641 | .event_init = cpu_clock_event_init, | 6957 | .event_init = cpu_clock_event_init, |
6642 | .add = cpu_clock_event_add, | 6958 | .add = cpu_clock_event_add, |
6643 | .del = cpu_clock_event_del, | 6959 | .del = cpu_clock_event_del, |
@@ -6676,6 +6992,7 @@ static int task_clock_event_add(struct perf_event *event, int flags) | |||
6676 | { | 6992 | { |
6677 | if (flags & PERF_EF_START) | 6993 | if (flags & PERF_EF_START) |
6678 | task_clock_event_start(event, flags); | 6994 | task_clock_event_start(event, flags); |
6995 | perf_event_update_userpage(event); | ||
6679 | 6996 | ||
6680 | return 0; | 6997 | return 0; |
6681 | } | 6998 | } |
@@ -6716,6 +7033,8 @@ static int task_clock_event_init(struct perf_event *event) | |||
6716 | static struct pmu perf_task_clock = { | 7033 | static struct pmu perf_task_clock = { |
6717 | .task_ctx_nr = perf_sw_context, | 7034 | .task_ctx_nr = perf_sw_context, |
6718 | 7035 | ||
7036 | .capabilities = PERF_PMU_CAP_NO_NMI, | ||
7037 | |||
6719 | .event_init = task_clock_event_init, | 7038 | .event_init = task_clock_event_init, |
6720 | .add = task_clock_event_add, | 7039 | .add = task_clock_event_add, |
6721 | .del = task_clock_event_del, | 7040 | .del = task_clock_event_del, |
@@ -6993,6 +7312,7 @@ got_cpu_context: | |||
6993 | pmu->event_idx = perf_event_idx_default; | 7312 | pmu->event_idx = perf_event_idx_default; |
6994 | 7313 | ||
6995 | list_add_rcu(&pmu->entry, &pmus); | 7314 | list_add_rcu(&pmu->entry, &pmus); |
7315 | atomic_set(&pmu->exclusive_cnt, 0); | ||
6996 | ret = 0; | 7316 | ret = 0; |
6997 | unlock: | 7317 | unlock: |
6998 | mutex_unlock(&pmus_lock); | 7318 | mutex_unlock(&pmus_lock); |
@@ -7037,12 +7357,23 @@ EXPORT_SYMBOL_GPL(perf_pmu_unregister); | |||
7037 | 7357 | ||
7038 | static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) | 7358 | static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) |
7039 | { | 7359 | { |
7360 | struct perf_event_context *ctx = NULL; | ||
7040 | int ret; | 7361 | int ret; |
7041 | 7362 | ||
7042 | if (!try_module_get(pmu->module)) | 7363 | if (!try_module_get(pmu->module)) |
7043 | return -ENODEV; | 7364 | return -ENODEV; |
7365 | |||
7366 | if (event->group_leader != event) { | ||
7367 | ctx = perf_event_ctx_lock(event->group_leader); | ||
7368 | BUG_ON(!ctx); | ||
7369 | } | ||
7370 | |||
7044 | event->pmu = pmu; | 7371 | event->pmu = pmu; |
7045 | ret = pmu->event_init(event); | 7372 | ret = pmu->event_init(event); |
7373 | |||
7374 | if (ctx) | ||
7375 | perf_event_ctx_unlock(event->group_leader, ctx); | ||
7376 | |||
7046 | if (ret) | 7377 | if (ret) |
7047 | module_put(pmu->module); | 7378 | module_put(pmu->module); |
7048 | 7379 | ||
@@ -7089,10 +7420,6 @@ static void account_event_cpu(struct perf_event *event, int cpu) | |||
7089 | if (event->parent) | 7420 | if (event->parent) |
7090 | return; | 7421 | return; |
7091 | 7422 | ||
7092 | if (has_branch_stack(event)) { | ||
7093 | if (!(event->attach_state & PERF_ATTACH_TASK)) | ||
7094 | atomic_inc(&per_cpu(perf_branch_stack_events, cpu)); | ||
7095 | } | ||
7096 | if (is_cgroup_event(event)) | 7423 | if (is_cgroup_event(event)) |
7097 | atomic_inc(&per_cpu(perf_cgroup_events, cpu)); | 7424 | atomic_inc(&per_cpu(perf_cgroup_events, cpu)); |
7098 | } | 7425 | } |
@@ -7131,7 +7458,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
7131 | struct perf_event *group_leader, | 7458 | struct perf_event *group_leader, |
7132 | struct perf_event *parent_event, | 7459 | struct perf_event *parent_event, |
7133 | perf_overflow_handler_t overflow_handler, | 7460 | perf_overflow_handler_t overflow_handler, |
7134 | void *context) | 7461 | void *context, int cgroup_fd) |
7135 | { | 7462 | { |
7136 | struct pmu *pmu; | 7463 | struct pmu *pmu; |
7137 | struct perf_event *event; | 7464 | struct perf_event *event; |
@@ -7186,18 +7513,18 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
7186 | 7513 | ||
7187 | if (task) { | 7514 | if (task) { |
7188 | event->attach_state = PERF_ATTACH_TASK; | 7515 | event->attach_state = PERF_ATTACH_TASK; |
7189 | |||
7190 | if (attr->type == PERF_TYPE_TRACEPOINT) | ||
7191 | event->hw.tp_target = task; | ||
7192 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | ||
7193 | /* | 7516 | /* |
7194 | * hw_breakpoint is a bit difficult here.. | 7517 | * XXX pmu::event_init needs to know what task to account to |
7518 | * and we cannot use the ctx information because we need the | ||
7519 | * pmu before we get a ctx. | ||
7195 | */ | 7520 | */ |
7196 | else if (attr->type == PERF_TYPE_BREAKPOINT) | 7521 | event->hw.target = task; |
7197 | event->hw.bp_target = task; | ||
7198 | #endif | ||
7199 | } | 7522 | } |
7200 | 7523 | ||
7524 | event->clock = &local_clock; | ||
7525 | if (parent_event) | ||
7526 | event->clock = parent_event->clock; | ||
7527 | |||
7201 | if (!overflow_handler && parent_event) { | 7528 | if (!overflow_handler && parent_event) { |
7202 | overflow_handler = parent_event->overflow_handler; | 7529 | overflow_handler = parent_event->overflow_handler; |
7203 | context = parent_event->overflow_handler_context; | 7530 | context = parent_event->overflow_handler_context; |
@@ -7224,6 +7551,15 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
7224 | if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) | 7551 | if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) |
7225 | goto err_ns; | 7552 | goto err_ns; |
7226 | 7553 | ||
7554 | if (!has_branch_stack(event)) | ||
7555 | event->attr.branch_sample_type = 0; | ||
7556 | |||
7557 | if (cgroup_fd != -1) { | ||
7558 | err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader); | ||
7559 | if (err) | ||
7560 | goto err_ns; | ||
7561 | } | ||
7562 | |||
7227 | pmu = perf_init_event(event); | 7563 | pmu = perf_init_event(event); |
7228 | if (!pmu) | 7564 | if (!pmu) |
7229 | goto err_ns; | 7565 | goto err_ns; |
@@ -7232,21 +7568,30 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
7232 | goto err_ns; | 7568 | goto err_ns; |
7233 | } | 7569 | } |
7234 | 7570 | ||
7571 | err = exclusive_event_init(event); | ||
7572 | if (err) | ||
7573 | goto err_pmu; | ||
7574 | |||
7235 | if (!event->parent) { | 7575 | if (!event->parent) { |
7236 | if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { | 7576 | if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { |
7237 | err = get_callchain_buffers(); | 7577 | err = get_callchain_buffers(); |
7238 | if (err) | 7578 | if (err) |
7239 | goto err_pmu; | 7579 | goto err_per_task; |
7240 | } | 7580 | } |
7241 | } | 7581 | } |
7242 | 7582 | ||
7243 | return event; | 7583 | return event; |
7244 | 7584 | ||
7585 | err_per_task: | ||
7586 | exclusive_event_destroy(event); | ||
7587 | |||
7245 | err_pmu: | 7588 | err_pmu: |
7246 | if (event->destroy) | 7589 | if (event->destroy) |
7247 | event->destroy(event); | 7590 | event->destroy(event); |
7248 | module_put(pmu->module); | 7591 | module_put(pmu->module); |
7249 | err_ns: | 7592 | err_ns: |
7593 | if (is_cgroup_event(event)) | ||
7594 | perf_detach_cgroup(event); | ||
7250 | if (event->ns) | 7595 | if (event->ns) |
7251 | put_pid_ns(event->ns); | 7596 | put_pid_ns(event->ns); |
7252 | kfree(event); | 7597 | kfree(event); |
@@ -7409,6 +7754,19 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) | |||
7409 | if (output_event->cpu == -1 && output_event->ctx != event->ctx) | 7754 | if (output_event->cpu == -1 && output_event->ctx != event->ctx) |
7410 | goto out; | 7755 | goto out; |
7411 | 7756 | ||
7757 | /* | ||
7758 | * Mixing clocks in the same buffer is trouble you don't need. | ||
7759 | */ | ||
7760 | if (output_event->clock != event->clock) | ||
7761 | goto out; | ||
7762 | |||
7763 | /* | ||
7764 | * If both events generate aux data, they must be on the same PMU | ||
7765 | */ | ||
7766 | if (has_aux(event) && has_aux(output_event) && | ||
7767 | event->pmu != output_event->pmu) | ||
7768 | goto out; | ||
7769 | |||
7412 | set: | 7770 | set: |
7413 | mutex_lock(&event->mmap_mutex); | 7771 | mutex_lock(&event->mmap_mutex); |
7414 | /* Can't redirect output if we've got an active mmap() */ | 7772 | /* Can't redirect output if we've got an active mmap() */ |
@@ -7441,6 +7799,43 @@ static void mutex_lock_double(struct mutex *a, struct mutex *b) | |||
7441 | mutex_lock_nested(b, SINGLE_DEPTH_NESTING); | 7799 | mutex_lock_nested(b, SINGLE_DEPTH_NESTING); |
7442 | } | 7800 | } |
7443 | 7801 | ||
7802 | static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) | ||
7803 | { | ||
7804 | bool nmi_safe = false; | ||
7805 | |||
7806 | switch (clk_id) { | ||
7807 | case CLOCK_MONOTONIC: | ||
7808 | event->clock = &ktime_get_mono_fast_ns; | ||
7809 | nmi_safe = true; | ||
7810 | break; | ||
7811 | |||
7812 | case CLOCK_MONOTONIC_RAW: | ||
7813 | event->clock = &ktime_get_raw_fast_ns; | ||
7814 | nmi_safe = true; | ||
7815 | break; | ||
7816 | |||
7817 | case CLOCK_REALTIME: | ||
7818 | event->clock = &ktime_get_real_ns; | ||
7819 | break; | ||
7820 | |||
7821 | case CLOCK_BOOTTIME: | ||
7822 | event->clock = &ktime_get_boot_ns; | ||
7823 | break; | ||
7824 | |||
7825 | case CLOCK_TAI: | ||
7826 | event->clock = &ktime_get_tai_ns; | ||
7827 | break; | ||
7828 | |||
7829 | default: | ||
7830 | return -EINVAL; | ||
7831 | } | ||
7832 | |||
7833 | if (!nmi_safe && !(event->pmu->capabilities & PERF_PMU_CAP_NO_NMI)) | ||
7834 | return -EINVAL; | ||
7835 | |||
7836 | return 0; | ||
7837 | } | ||
7838 | |||
7444 | /** | 7839 | /** |
7445 | * sys_perf_event_open - open a performance event, associate it to a task/cpu | 7840 | * sys_perf_event_open - open a performance event, associate it to a task/cpu |
7446 | * | 7841 | * |
@@ -7465,6 +7860,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7465 | int move_group = 0; | 7860 | int move_group = 0; |
7466 | int err; | 7861 | int err; |
7467 | int f_flags = O_RDWR; | 7862 | int f_flags = O_RDWR; |
7863 | int cgroup_fd = -1; | ||
7468 | 7864 | ||
7469 | /* for future expandability... */ | 7865 | /* for future expandability... */ |
7470 | if (flags & ~PERF_FLAG_ALL) | 7866 | if (flags & ~PERF_FLAG_ALL) |
@@ -7530,21 +7926,16 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7530 | 7926 | ||
7531 | get_online_cpus(); | 7927 | get_online_cpus(); |
7532 | 7928 | ||
7929 | if (flags & PERF_FLAG_PID_CGROUP) | ||
7930 | cgroup_fd = pid; | ||
7931 | |||
7533 | event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, | 7932 | event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, |
7534 | NULL, NULL); | 7933 | NULL, NULL, cgroup_fd); |
7535 | if (IS_ERR(event)) { | 7934 | if (IS_ERR(event)) { |
7536 | err = PTR_ERR(event); | 7935 | err = PTR_ERR(event); |
7537 | goto err_cpus; | 7936 | goto err_cpus; |
7538 | } | 7937 | } |
7539 | 7938 | ||
7540 | if (flags & PERF_FLAG_PID_CGROUP) { | ||
7541 | err = perf_cgroup_connect(pid, event, &attr, group_leader); | ||
7542 | if (err) { | ||
7543 | __free_event(event); | ||
7544 | goto err_cpus; | ||
7545 | } | ||
7546 | } | ||
7547 | |||
7548 | if (is_sampling_event(event)) { | 7939 | if (is_sampling_event(event)) { |
7549 | if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { | 7940 | if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { |
7550 | err = -ENOTSUPP; | 7941 | err = -ENOTSUPP; |
@@ -7560,6 +7951,12 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7560 | */ | 7951 | */ |
7561 | pmu = event->pmu; | 7952 | pmu = event->pmu; |
7562 | 7953 | ||
7954 | if (attr.use_clockid) { | ||
7955 | err = perf_event_set_clock(event, attr.clockid); | ||
7956 | if (err) | ||
7957 | goto err_alloc; | ||
7958 | } | ||
7959 | |||
7563 | if (group_leader && | 7960 | if (group_leader && |
7564 | (is_software_event(event) != is_software_event(group_leader))) { | 7961 | (is_software_event(event) != is_software_event(group_leader))) { |
7565 | if (is_software_event(event)) { | 7962 | if (is_software_event(event)) { |
@@ -7586,12 +7983,17 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7586 | /* | 7983 | /* |
7587 | * Get the target context (task or percpu): | 7984 | * Get the target context (task or percpu): |
7588 | */ | 7985 | */ |
7589 | ctx = find_get_context(pmu, task, event->cpu); | 7986 | ctx = find_get_context(pmu, task, event); |
7590 | if (IS_ERR(ctx)) { | 7987 | if (IS_ERR(ctx)) { |
7591 | err = PTR_ERR(ctx); | 7988 | err = PTR_ERR(ctx); |
7592 | goto err_alloc; | 7989 | goto err_alloc; |
7593 | } | 7990 | } |
7594 | 7991 | ||
7992 | if ((pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) && group_leader) { | ||
7993 | err = -EBUSY; | ||
7994 | goto err_context; | ||
7995 | } | ||
7996 | |||
7595 | if (task) { | 7997 | if (task) { |
7596 | put_task_struct(task); | 7998 | put_task_struct(task); |
7597 | task = NULL; | 7999 | task = NULL; |
@@ -7609,6 +8011,11 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7609 | */ | 8011 | */ |
7610 | if (group_leader->group_leader != group_leader) | 8012 | if (group_leader->group_leader != group_leader) |
7611 | goto err_context; | 8013 | goto err_context; |
8014 | |||
8015 | /* All events in a group should have the same clock */ | ||
8016 | if (group_leader->clock != event->clock) | ||
8017 | goto err_context; | ||
8018 | |||
7612 | /* | 8019 | /* |
7613 | * Do not allow to attach to a group in a different | 8020 | * Do not allow to attach to a group in a different |
7614 | * task or CPU context: | 8021 | * task or CPU context: |
@@ -7709,6 +8116,13 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7709 | get_ctx(ctx); | 8116 | get_ctx(ctx); |
7710 | } | 8117 | } |
7711 | 8118 | ||
8119 | if (!exclusive_event_installable(event, ctx)) { | ||
8120 | err = -EBUSY; | ||
8121 | mutex_unlock(&ctx->mutex); | ||
8122 | fput(event_file); | ||
8123 | goto err_context; | ||
8124 | } | ||
8125 | |||
7712 | perf_install_in_context(ctx, event, event->cpu); | 8126 | perf_install_in_context(ctx, event, event->cpu); |
7713 | perf_unpin_context(ctx); | 8127 | perf_unpin_context(ctx); |
7714 | 8128 | ||
@@ -7781,7 +8195,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
7781 | */ | 8195 | */ |
7782 | 8196 | ||
7783 | event = perf_event_alloc(attr, cpu, task, NULL, NULL, | 8197 | event = perf_event_alloc(attr, cpu, task, NULL, NULL, |
7784 | overflow_handler, context); | 8198 | overflow_handler, context, -1); |
7785 | if (IS_ERR(event)) { | 8199 | if (IS_ERR(event)) { |
7786 | err = PTR_ERR(event); | 8200 | err = PTR_ERR(event); |
7787 | goto err; | 8201 | goto err; |
@@ -7792,7 +8206,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
7792 | 8206 | ||
7793 | account_event(event); | 8207 | account_event(event); |
7794 | 8208 | ||
7795 | ctx = find_get_context(event->pmu, task, cpu); | 8209 | ctx = find_get_context(event->pmu, task, event); |
7796 | if (IS_ERR(ctx)) { | 8210 | if (IS_ERR(ctx)) { |
7797 | err = PTR_ERR(ctx); | 8211 | err = PTR_ERR(ctx); |
7798 | goto err_free; | 8212 | goto err_free; |
@@ -7800,6 +8214,14 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
7800 | 8214 | ||
7801 | WARN_ON_ONCE(ctx->parent_ctx); | 8215 | WARN_ON_ONCE(ctx->parent_ctx); |
7802 | mutex_lock(&ctx->mutex); | 8216 | mutex_lock(&ctx->mutex); |
8217 | if (!exclusive_event_installable(event, ctx)) { | ||
8218 | mutex_unlock(&ctx->mutex); | ||
8219 | perf_unpin_context(ctx); | ||
8220 | put_ctx(ctx); | ||
8221 | err = -EBUSY; | ||
8222 | goto err_free; | ||
8223 | } | ||
8224 | |||
7803 | perf_install_in_context(ctx, event, cpu); | 8225 | perf_install_in_context(ctx, event, cpu); |
7804 | perf_unpin_context(ctx); | 8226 | perf_unpin_context(ctx); |
7805 | mutex_unlock(&ctx->mutex); | 8227 | mutex_unlock(&ctx->mutex); |
@@ -8142,7 +8564,7 @@ inherit_event(struct perf_event *parent_event, | |||
8142 | parent_event->cpu, | 8564 | parent_event->cpu, |
8143 | child, | 8565 | child, |
8144 | group_leader, parent_event, | 8566 | group_leader, parent_event, |
8145 | NULL, NULL); | 8567 | NULL, NULL, -1); |
8146 | if (IS_ERR(child_event)) | 8568 | if (IS_ERR(child_event)) |
8147 | return child_event; | 8569 | return child_event; |
8148 | 8570 | ||
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 9803a6600d49..92ce5f4ccc26 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c | |||
@@ -116,12 +116,12 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) | |||
116 | */ | 116 | */ |
117 | static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type) | 117 | static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type) |
118 | { | 118 | { |
119 | struct task_struct *tsk = bp->hw.bp_target; | 119 | struct task_struct *tsk = bp->hw.target; |
120 | struct perf_event *iter; | 120 | struct perf_event *iter; |
121 | int count = 0; | 121 | int count = 0; |
122 | 122 | ||
123 | list_for_each_entry(iter, &bp_task_head, hw.bp_list) { | 123 | list_for_each_entry(iter, &bp_task_head, hw.bp_list) { |
124 | if (iter->hw.bp_target == tsk && | 124 | if (iter->hw.target == tsk && |
125 | find_slot_idx(iter) == type && | 125 | find_slot_idx(iter) == type && |
126 | (iter->cpu < 0 || cpu == iter->cpu)) | 126 | (iter->cpu < 0 || cpu == iter->cpu)) |
127 | count += hw_breakpoint_weight(iter); | 127 | count += hw_breakpoint_weight(iter); |
@@ -153,7 +153,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, | |||
153 | int nr; | 153 | int nr; |
154 | 154 | ||
155 | nr = info->cpu_pinned; | 155 | nr = info->cpu_pinned; |
156 | if (!bp->hw.bp_target) | 156 | if (!bp->hw.target) |
157 | nr += max_task_bp_pinned(cpu, type); | 157 | nr += max_task_bp_pinned(cpu, type); |
158 | else | 158 | else |
159 | nr += task_bp_pinned(cpu, bp, type); | 159 | nr += task_bp_pinned(cpu, bp, type); |
@@ -210,7 +210,7 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, | |||
210 | weight = -weight; | 210 | weight = -weight; |
211 | 211 | ||
212 | /* Pinned counter cpu profiling */ | 212 | /* Pinned counter cpu profiling */ |
213 | if (!bp->hw.bp_target) { | 213 | if (!bp->hw.target) { |
214 | get_bp_info(bp->cpu, type)->cpu_pinned += weight; | 214 | get_bp_info(bp->cpu, type)->cpu_pinned += weight; |
215 | return; | 215 | return; |
216 | } | 216 | } |
diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 569b218782ad..9f6ce9ba4a04 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h | |||
@@ -27,6 +27,7 @@ struct ring_buffer { | |||
27 | local_t lost; /* nr records lost */ | 27 | local_t lost; /* nr records lost */ |
28 | 28 | ||
29 | long watermark; /* wakeup watermark */ | 29 | long watermark; /* wakeup watermark */ |
30 | long aux_watermark; | ||
30 | /* poll crap */ | 31 | /* poll crap */ |
31 | spinlock_t event_lock; | 32 | spinlock_t event_lock; |
32 | struct list_head event_list; | 33 | struct list_head event_list; |
@@ -35,6 +36,20 @@ struct ring_buffer { | |||
35 | unsigned long mmap_locked; | 36 | unsigned long mmap_locked; |
36 | struct user_struct *mmap_user; | 37 | struct user_struct *mmap_user; |
37 | 38 | ||
39 | /* AUX area */ | ||
40 | local_t aux_head; | ||
41 | local_t aux_nest; | ||
42 | local_t aux_wakeup; | ||
43 | unsigned long aux_pgoff; | ||
44 | int aux_nr_pages; | ||
45 | int aux_overwrite; | ||
46 | atomic_t aux_mmap_count; | ||
47 | unsigned long aux_mmap_locked; | ||
48 | void (*free_aux)(void *); | ||
49 | atomic_t aux_refcount; | ||
50 | void **aux_pages; | ||
51 | void *aux_priv; | ||
52 | |||
38 | struct perf_event_mmap_page *user_page; | 53 | struct perf_event_mmap_page *user_page; |
39 | void *data_pages[0]; | 54 | void *data_pages[0]; |
40 | }; | 55 | }; |
@@ -43,6 +58,19 @@ extern void rb_free(struct ring_buffer *rb); | |||
43 | extern struct ring_buffer * | 58 | extern struct ring_buffer * |
44 | rb_alloc(int nr_pages, long watermark, int cpu, int flags); | 59 | rb_alloc(int nr_pages, long watermark, int cpu, int flags); |
45 | extern void perf_event_wakeup(struct perf_event *event); | 60 | extern void perf_event_wakeup(struct perf_event *event); |
61 | extern int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, | ||
62 | pgoff_t pgoff, int nr_pages, long watermark, int flags); | ||
63 | extern void rb_free_aux(struct ring_buffer *rb); | ||
64 | extern struct ring_buffer *ring_buffer_get(struct perf_event *event); | ||
65 | extern void ring_buffer_put(struct ring_buffer *rb); | ||
66 | |||
67 | static inline bool rb_has_aux(struct ring_buffer *rb) | ||
68 | { | ||
69 | return !!rb->aux_nr_pages; | ||
70 | } | ||
71 | |||
72 | void perf_event_aux_event(struct perf_event *event, unsigned long head, | ||
73 | unsigned long size, u64 flags); | ||
46 | 74 | ||
47 | extern void | 75 | extern void |
48 | perf_event_header__init_id(struct perf_event_header *header, | 76 | perf_event_header__init_id(struct perf_event_header *header, |
@@ -81,6 +109,11 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb) | |||
81 | return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); | 109 | return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); |
82 | } | 110 | } |
83 | 111 | ||
112 | static inline unsigned long perf_aux_size(struct ring_buffer *rb) | ||
113 | { | ||
114 | return rb->aux_nr_pages << PAGE_SHIFT; | ||
115 | } | ||
116 | |||
84 | #define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ | 117 | #define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ |
85 | static inline unsigned long \ | 118 | static inline unsigned long \ |
86 | func_name(struct perf_output_handle *handle, \ | 119 | func_name(struct perf_output_handle *handle, \ |
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index eadb95ce7aac..232f00f273cb 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c | |||
@@ -243,14 +243,317 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) | |||
243 | spin_lock_init(&rb->event_lock); | 243 | spin_lock_init(&rb->event_lock); |
244 | } | 244 | } |
245 | 245 | ||
246 | /* | ||
247 | * This is called before hardware starts writing to the AUX area to | ||
248 | * obtain an output handle and make sure there's room in the buffer. | ||
249 | * When the capture completes, call perf_aux_output_end() to commit | ||
250 | * the recorded data to the buffer. | ||
251 | * | ||
252 | * The ordering is similar to that of perf_output_{begin,end}, with | ||
253 | * the exception of (B), which should be taken care of by the pmu | ||
254 | * driver, since ordering rules will differ depending on hardware. | ||
255 | */ | ||
256 | void *perf_aux_output_begin(struct perf_output_handle *handle, | ||
257 | struct perf_event *event) | ||
258 | { | ||
259 | struct perf_event *output_event = event; | ||
260 | unsigned long aux_head, aux_tail; | ||
261 | struct ring_buffer *rb; | ||
262 | |||
263 | if (output_event->parent) | ||
264 | output_event = output_event->parent; | ||
265 | |||
266 | /* | ||
267 | * Since this will typically be open across pmu::add/pmu::del, we | ||
268 | * grab ring_buffer's refcount instead of holding rcu read lock | ||
269 | * to make sure it doesn't disappear under us. | ||
270 | */ | ||
271 | rb = ring_buffer_get(output_event); | ||
272 | if (!rb) | ||
273 | return NULL; | ||
274 | |||
275 | if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount)) | ||
276 | goto err; | ||
277 | |||
278 | /* | ||
279 | * Nesting is not supported for AUX area, make sure nested | ||
280 | * writers are caught early | ||
281 | */ | ||
282 | if (WARN_ON_ONCE(local_xchg(&rb->aux_nest, 1))) | ||
283 | goto err_put; | ||
284 | |||
285 | aux_head = local_read(&rb->aux_head); | ||
286 | |||
287 | handle->rb = rb; | ||
288 | handle->event = event; | ||
289 | handle->head = aux_head; | ||
290 | handle->size = 0; | ||
291 | |||
292 | /* | ||
293 | * In overwrite mode, AUX data stores do not depend on aux_tail, | ||
294 | * therefore (A) control dependency barrier does not exist. The | ||
295 | * (B) <-> (C) ordering is still observed by the pmu driver. | ||
296 | */ | ||
297 | if (!rb->aux_overwrite) { | ||
298 | aux_tail = ACCESS_ONCE(rb->user_page->aux_tail); | ||
299 | handle->wakeup = local_read(&rb->aux_wakeup) + rb->aux_watermark; | ||
300 | if (aux_head - aux_tail < perf_aux_size(rb)) | ||
301 | handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb)); | ||
302 | |||
303 | /* | ||
304 | * handle->size computation depends on aux_tail load; this forms a | ||
305 | * control dependency barrier separating aux_tail load from aux data | ||
306 | * store that will be enabled on successful return | ||
307 | */ | ||
308 | if (!handle->size) { /* A, matches D */ | ||
309 | event->pending_disable = 1; | ||
310 | perf_output_wakeup(handle); | ||
311 | local_set(&rb->aux_nest, 0); | ||
312 | goto err_put; | ||
313 | } | ||
314 | } | ||
315 | |||
316 | return handle->rb->aux_priv; | ||
317 | |||
318 | err_put: | ||
319 | rb_free_aux(rb); | ||
320 | |||
321 | err: | ||
322 | ring_buffer_put(rb); | ||
323 | handle->event = NULL; | ||
324 | |||
325 | return NULL; | ||
326 | } | ||
327 | |||
328 | /* | ||
329 | * Commit the data written by hardware into the ring buffer by adjusting | ||
330 | * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the | ||
331 | * pmu driver's responsibility to observe ordering rules of the hardware, | ||
332 | * so that all the data is externally visible before this is called. | ||
333 | */ | ||
334 | void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, | ||
335 | bool truncated) | ||
336 | { | ||
337 | struct ring_buffer *rb = handle->rb; | ||
338 | unsigned long aux_head; | ||
339 | u64 flags = 0; | ||
340 | |||
341 | if (truncated) | ||
342 | flags |= PERF_AUX_FLAG_TRUNCATED; | ||
343 | |||
344 | /* in overwrite mode, driver provides aux_head via handle */ | ||
345 | if (rb->aux_overwrite) { | ||
346 | flags |= PERF_AUX_FLAG_OVERWRITE; | ||
347 | |||
348 | aux_head = handle->head; | ||
349 | local_set(&rb->aux_head, aux_head); | ||
350 | } else { | ||
351 | aux_head = local_read(&rb->aux_head); | ||
352 | local_add(size, &rb->aux_head); | ||
353 | } | ||
354 | |||
355 | if (size || flags) { | ||
356 | /* | ||
357 | * Only send RECORD_AUX if we have something useful to communicate | ||
358 | */ | ||
359 | |||
360 | perf_event_aux_event(handle->event, aux_head, size, flags); | ||
361 | } | ||
362 | |||
363 | aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); | ||
364 | |||
365 | if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) { | ||
366 | perf_output_wakeup(handle); | ||
367 | local_add(rb->aux_watermark, &rb->aux_wakeup); | ||
368 | } | ||
369 | handle->event = NULL; | ||
370 | |||
371 | local_set(&rb->aux_nest, 0); | ||
372 | rb_free_aux(rb); | ||
373 | ring_buffer_put(rb); | ||
374 | } | ||
375 | |||
376 | /* | ||
377 | * Skip over a given number of bytes in the AUX buffer, due to, for example, | ||
378 | * hardware's alignment constraints. | ||
379 | */ | ||
380 | int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) | ||
381 | { | ||
382 | struct ring_buffer *rb = handle->rb; | ||
383 | unsigned long aux_head; | ||
384 | |||
385 | if (size > handle->size) | ||
386 | return -ENOSPC; | ||
387 | |||
388 | local_add(size, &rb->aux_head); | ||
389 | |||
390 | aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); | ||
391 | if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) { | ||
392 | perf_output_wakeup(handle); | ||
393 | local_add(rb->aux_watermark, &rb->aux_wakeup); | ||
394 | handle->wakeup = local_read(&rb->aux_wakeup) + | ||
395 | rb->aux_watermark; | ||
396 | } | ||
397 | |||
398 | handle->head = aux_head; | ||
399 | handle->size -= size; | ||
400 | |||
401 | return 0; | ||
402 | } | ||
403 | |||
404 | void *perf_get_aux(struct perf_output_handle *handle) | ||
405 | { | ||
406 | /* this is only valid between perf_aux_output_begin and *_end */ | ||
407 | if (!handle->event) | ||
408 | return NULL; | ||
409 | |||
410 | return handle->rb->aux_priv; | ||
411 | } | ||
412 | |||
413 | #define PERF_AUX_GFP (GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY) | ||
414 | |||
415 | static struct page *rb_alloc_aux_page(int node, int order) | ||
416 | { | ||
417 | struct page *page; | ||
418 | |||
419 | if (order > MAX_ORDER) | ||
420 | order = MAX_ORDER; | ||
421 | |||
422 | do { | ||
423 | page = alloc_pages_node(node, PERF_AUX_GFP, order); | ||
424 | } while (!page && order--); | ||
425 | |||
426 | if (page && order) { | ||
427 | /* | ||
428 | * Communicate the allocation size to the driver | ||
429 | */ | ||
430 | split_page(page, order); | ||
431 | SetPagePrivate(page); | ||
432 | set_page_private(page, order); | ||
433 | } | ||
434 | |||
435 | return page; | ||
436 | } | ||
437 | |||
438 | static void rb_free_aux_page(struct ring_buffer *rb, int idx) | ||
439 | { | ||
440 | struct page *page = virt_to_page(rb->aux_pages[idx]); | ||
441 | |||
442 | ClearPagePrivate(page); | ||
443 | page->mapping = NULL; | ||
444 | __free_page(page); | ||
445 | } | ||
446 | |||
447 | int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, | ||
448 | pgoff_t pgoff, int nr_pages, long watermark, int flags) | ||
449 | { | ||
450 | bool overwrite = !(flags & RING_BUFFER_WRITABLE); | ||
451 | int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu); | ||
452 | int ret = -ENOMEM, max_order = 0; | ||
453 | |||
454 | if (!has_aux(event)) | ||
455 | return -ENOTSUPP; | ||
456 | |||
457 | if (event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) { | ||
458 | /* | ||
459 | * We need to start with the max_order that fits in nr_pages, | ||
460 | * not the other way around, hence ilog2() and not get_order. | ||
461 | */ | ||
462 | max_order = ilog2(nr_pages); | ||
463 | |||
464 | /* | ||
465 | * PMU requests more than one contiguous chunks of memory | ||
466 | * for SW double buffering | ||
467 | */ | ||
468 | if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_SW_DOUBLEBUF) && | ||
469 | !overwrite) { | ||
470 | if (!max_order) | ||
471 | return -EINVAL; | ||
472 | |||
473 | max_order--; | ||
474 | } | ||
475 | } | ||
476 | |||
477 | rb->aux_pages = kzalloc_node(nr_pages * sizeof(void *), GFP_KERNEL, node); | ||
478 | if (!rb->aux_pages) | ||
479 | return -ENOMEM; | ||
480 | |||
481 | rb->free_aux = event->pmu->free_aux; | ||
482 | for (rb->aux_nr_pages = 0; rb->aux_nr_pages < nr_pages;) { | ||
483 | struct page *page; | ||
484 | int last, order; | ||
485 | |||
486 | order = min(max_order, ilog2(nr_pages - rb->aux_nr_pages)); | ||
487 | page = rb_alloc_aux_page(node, order); | ||
488 | if (!page) | ||
489 | goto out; | ||
490 | |||
491 | for (last = rb->aux_nr_pages + (1 << page_private(page)); | ||
492 | last > rb->aux_nr_pages; rb->aux_nr_pages++) | ||
493 | rb->aux_pages[rb->aux_nr_pages] = page_address(page++); | ||
494 | } | ||
495 | |||
496 | rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages, | ||
497 | overwrite); | ||
498 | if (!rb->aux_priv) | ||
499 | goto out; | ||
500 | |||
501 | ret = 0; | ||
502 | |||
503 | /* | ||
504 | * aux_pages (and pmu driver's private data, aux_priv) will be | ||
505 | * referenced in both producer's and consumer's contexts, thus | ||
506 | * we keep a refcount here to make sure either of the two can | ||
507 | * reference them safely. | ||
508 | */ | ||
509 | atomic_set(&rb->aux_refcount, 1); | ||
510 | |||
511 | rb->aux_overwrite = overwrite; | ||
512 | rb->aux_watermark = watermark; | ||
513 | |||
514 | if (!rb->aux_watermark && !rb->aux_overwrite) | ||
515 | rb->aux_watermark = nr_pages << (PAGE_SHIFT - 1); | ||
516 | |||
517 | out: | ||
518 | if (!ret) | ||
519 | rb->aux_pgoff = pgoff; | ||
520 | else | ||
521 | rb_free_aux(rb); | ||
522 | |||
523 | return ret; | ||
524 | } | ||
525 | |||
526 | static void __rb_free_aux(struct ring_buffer *rb) | ||
527 | { | ||
528 | int pg; | ||
529 | |||
530 | if (rb->aux_priv) { | ||
531 | rb->free_aux(rb->aux_priv); | ||
532 | rb->free_aux = NULL; | ||
533 | rb->aux_priv = NULL; | ||
534 | } | ||
535 | |||
536 | for (pg = 0; pg < rb->aux_nr_pages; pg++) | ||
537 | rb_free_aux_page(rb, pg); | ||
538 | |||
539 | kfree(rb->aux_pages); | ||
540 | rb->aux_nr_pages = 0; | ||
541 | } | ||
542 | |||
543 | void rb_free_aux(struct ring_buffer *rb) | ||
544 | { | ||
545 | if (atomic_dec_and_test(&rb->aux_refcount)) | ||
546 | __rb_free_aux(rb); | ||
547 | } | ||
548 | |||
246 | #ifndef CONFIG_PERF_USE_VMALLOC | 549 | #ifndef CONFIG_PERF_USE_VMALLOC |
247 | 550 | ||
248 | /* | 551 | /* |
249 | * Back perf_mmap() with regular GFP_KERNEL-0 pages. | 552 | * Back perf_mmap() with regular GFP_KERNEL-0 pages. |
250 | */ | 553 | */ |
251 | 554 | ||
252 | struct page * | 555 | static struct page * |
253 | perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) | 556 | __perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) |
254 | { | 557 | { |
255 | if (pgoff > rb->nr_pages) | 558 | if (pgoff > rb->nr_pages) |
256 | return NULL; | 559 | return NULL; |
@@ -340,8 +643,8 @@ static int data_page_nr(struct ring_buffer *rb) | |||
340 | return rb->nr_pages << page_order(rb); | 643 | return rb->nr_pages << page_order(rb); |
341 | } | 644 | } |
342 | 645 | ||
343 | struct page * | 646 | static struct page * |
344 | perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) | 647 | __perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) |
345 | { | 648 | { |
346 | /* The '>' counts in the user page. */ | 649 | /* The '>' counts in the user page. */ |
347 | if (pgoff > data_page_nr(rb)) | 650 | if (pgoff > data_page_nr(rb)) |
@@ -416,3 +719,19 @@ fail: | |||
416 | } | 719 | } |
417 | 720 | ||
418 | #endif | 721 | #endif |
722 | |||
723 | struct page * | ||
724 | perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) | ||
725 | { | ||
726 | if (rb->aux_nr_pages) { | ||
727 | /* above AUX space */ | ||
728 | if (pgoff > rb->aux_pgoff + rb->aux_nr_pages) | ||
729 | return NULL; | ||
730 | |||
731 | /* AUX space */ | ||
732 | if (pgoff >= rb->aux_pgoff) | ||
733 | return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]); | ||
734 | } | ||
735 | |||
736 | return __perf_mmap_to_page(rb, pgoff); | ||
737 | } | ||
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index fedbdd7d5d1e..3b9a48ae153a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -432,6 +432,14 @@ config UPROBE_EVENT | |||
432 | This option is required if you plan to use perf-probe subcommand | 432 | This option is required if you plan to use perf-probe subcommand |
433 | of perf tools on user space applications. | 433 | of perf tools on user space applications. |
434 | 434 | ||
435 | config BPF_EVENTS | ||
436 | depends on BPF_SYSCALL | ||
437 | depends on KPROBE_EVENT | ||
438 | bool | ||
439 | default y | ||
440 | help | ||
441 | This allows the user to attach BPF programs to kprobe events. | ||
442 | |||
435 | config PROBE_EVENTS | 443 | config PROBE_EVENTS |
436 | def_bool n | 444 | def_bool n |
437 | 445 | ||
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 98f26588255e..9b1044e936a6 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -53,6 +53,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o | |||
53 | endif | 53 | endif |
54 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o | 54 | obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o |
55 | obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o | 55 | obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o |
56 | obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o | ||
56 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o | 57 | obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o |
57 | obj-$(CONFIG_TRACEPOINTS) += power-traces.o | 58 | obj-$(CONFIG_TRACEPOINTS) += power-traces.o |
58 | ifeq ($(CONFIG_PM),y) | 59 | ifeq ($(CONFIG_PM),y) |
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c new file mode 100644 index 000000000000..2d56ce501632 --- /dev/null +++ b/kernel/trace/bpf_trace.c | |||
@@ -0,0 +1,222 @@ | |||
1 | /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or | ||
4 | * modify it under the terms of version 2 of the GNU General Public | ||
5 | * License as published by the Free Software Foundation. | ||
6 | */ | ||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/types.h> | ||
9 | #include <linux/slab.h> | ||
10 | #include <linux/bpf.h> | ||
11 | #include <linux/filter.h> | ||
12 | #include <linux/uaccess.h> | ||
13 | #include <linux/ctype.h> | ||
14 | #include "trace.h" | ||
15 | |||
16 | static DEFINE_PER_CPU(int, bpf_prog_active); | ||
17 | |||
18 | /** | ||
19 | * trace_call_bpf - invoke BPF program | ||
20 | * @prog: BPF program | ||
21 | * @ctx: opaque context pointer | ||
22 | * | ||
23 | * kprobe handlers execute BPF programs via this helper. | ||
24 | * Can be used from static tracepoints in the future. | ||
25 | * | ||
26 | * Return: BPF programs always return an integer which is interpreted by | ||
27 | * kprobe handler as: | ||
28 | * 0 - return from kprobe (event is filtered out) | ||
29 | * 1 - store kprobe event into ring buffer | ||
30 | * Other values are reserved and currently alias to 1 | ||
31 | */ | ||
32 | unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) | ||
33 | { | ||
34 | unsigned int ret; | ||
35 | |||
36 | if (in_nmi()) /* not supported yet */ | ||
37 | return 1; | ||
38 | |||
39 | preempt_disable(); | ||
40 | |||
41 | if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { | ||
42 | /* | ||
43 | * since some bpf program is already running on this cpu, | ||
44 | * don't call into another bpf program (same or different) | ||
45 | * and don't send kprobe event into ring-buffer, | ||
46 | * so return zero here | ||
47 | */ | ||
48 | ret = 0; | ||
49 | goto out; | ||
50 | } | ||
51 | |||
52 | rcu_read_lock(); | ||
53 | ret = BPF_PROG_RUN(prog, ctx); | ||
54 | rcu_read_unlock(); | ||
55 | |||
56 | out: | ||
57 | __this_cpu_dec(bpf_prog_active); | ||
58 | preempt_enable(); | ||
59 | |||
60 | return ret; | ||
61 | } | ||
62 | EXPORT_SYMBOL_GPL(trace_call_bpf); | ||
63 | |||
64 | static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | ||
65 | { | ||
66 | void *dst = (void *) (long) r1; | ||
67 | int size = (int) r2; | ||
68 | void *unsafe_ptr = (void *) (long) r3; | ||
69 | |||
70 | return probe_kernel_read(dst, unsafe_ptr, size); | ||
71 | } | ||
72 | |||
73 | static const struct bpf_func_proto bpf_probe_read_proto = { | ||
74 | .func = bpf_probe_read, | ||
75 | .gpl_only = true, | ||
76 | .ret_type = RET_INTEGER, | ||
77 | .arg1_type = ARG_PTR_TO_STACK, | ||
78 | .arg2_type = ARG_CONST_STACK_SIZE, | ||
79 | .arg3_type = ARG_ANYTHING, | ||
80 | }; | ||
81 | |||
82 | static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | ||
83 | { | ||
84 | /* NMI safe access to clock monotonic */ | ||
85 | return ktime_get_mono_fast_ns(); | ||
86 | } | ||
87 | |||
88 | static const struct bpf_func_proto bpf_ktime_get_ns_proto = { | ||
89 | .func = bpf_ktime_get_ns, | ||
90 | .gpl_only = true, | ||
91 | .ret_type = RET_INTEGER, | ||
92 | }; | ||
93 | |||
94 | /* | ||
95 | * limited trace_printk() | ||
96 | * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed | ||
97 | */ | ||
98 | static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) | ||
99 | { | ||
100 | char *fmt = (char *) (long) r1; | ||
101 | int mod[3] = {}; | ||
102 | int fmt_cnt = 0; | ||
103 | int i; | ||
104 | |||
105 | /* | ||
106 | * bpf_check()->check_func_arg()->check_stack_boundary() | ||
107 | * guarantees that fmt points to bpf program stack, | ||
108 | * fmt_size bytes of it were initialized and fmt_size > 0 | ||
109 | */ | ||
110 | if (fmt[--fmt_size] != 0) | ||
111 | return -EINVAL; | ||
112 | |||
113 | /* check format string for allowed specifiers */ | ||
114 | for (i = 0; i < fmt_size; i++) { | ||
115 | if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) | ||
116 | return -EINVAL; | ||
117 | |||
118 | if (fmt[i] != '%') | ||
119 | continue; | ||
120 | |||
121 | if (fmt_cnt >= 3) | ||
122 | return -EINVAL; | ||
123 | |||
124 | /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */ | ||
125 | i++; | ||
126 | if (fmt[i] == 'l') { | ||
127 | mod[fmt_cnt]++; | ||
128 | i++; | ||
129 | } else if (fmt[i] == 'p') { | ||
130 | mod[fmt_cnt]++; | ||
131 | i++; | ||
132 | if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) | ||
133 | return -EINVAL; | ||
134 | fmt_cnt++; | ||
135 | continue; | ||
136 | } | ||
137 | |||
138 | if (fmt[i] == 'l') { | ||
139 | mod[fmt_cnt]++; | ||
140 | i++; | ||
141 | } | ||
142 | |||
143 | if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x') | ||
144 | return -EINVAL; | ||
145 | fmt_cnt++; | ||
146 | } | ||
147 | |||
148 | return __trace_printk(1/* fake ip will not be printed */, fmt, | ||
149 | mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3, | ||
150 | mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4, | ||
151 | mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5); | ||
152 | } | ||
153 | |||
154 | static const struct bpf_func_proto bpf_trace_printk_proto = { | ||
155 | .func = bpf_trace_printk, | ||
156 | .gpl_only = true, | ||
157 | .ret_type = RET_INTEGER, | ||
158 | .arg1_type = ARG_PTR_TO_STACK, | ||
159 | .arg2_type = ARG_CONST_STACK_SIZE, | ||
160 | }; | ||
161 | |||
162 | static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) | ||
163 | { | ||
164 | switch (func_id) { | ||
165 | case BPF_FUNC_map_lookup_elem: | ||
166 | return &bpf_map_lookup_elem_proto; | ||
167 | case BPF_FUNC_map_update_elem: | ||
168 | return &bpf_map_update_elem_proto; | ||
169 | case BPF_FUNC_map_delete_elem: | ||
170 | return &bpf_map_delete_elem_proto; | ||
171 | case BPF_FUNC_probe_read: | ||
172 | return &bpf_probe_read_proto; | ||
173 | case BPF_FUNC_ktime_get_ns: | ||
174 | return &bpf_ktime_get_ns_proto; | ||
175 | |||
176 | case BPF_FUNC_trace_printk: | ||
177 | /* | ||
178 | * this program might be calling bpf_trace_printk, | ||
179 | * so allocate per-cpu printk buffers | ||
180 | */ | ||
181 | trace_printk_init_buffers(); | ||
182 | |||
183 | return &bpf_trace_printk_proto; | ||
184 | default: | ||
185 | return NULL; | ||
186 | } | ||
187 | } | ||
188 | |||
189 | /* bpf+kprobe programs can access fields of 'struct pt_regs' */ | ||
190 | static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type) | ||
191 | { | ||
192 | /* check bounds */ | ||
193 | if (off < 0 || off >= sizeof(struct pt_regs)) | ||
194 | return false; | ||
195 | |||
196 | /* only read is allowed */ | ||
197 | if (type != BPF_READ) | ||
198 | return false; | ||
199 | |||
200 | /* disallow misaligned access */ | ||
201 | if (off % size != 0) | ||
202 | return false; | ||
203 | |||
204 | return true; | ||
205 | } | ||
206 | |||
207 | static struct bpf_verifier_ops kprobe_prog_ops = { | ||
208 | .get_func_proto = kprobe_prog_func_proto, | ||
209 | .is_valid_access = kprobe_prog_is_valid_access, | ||
210 | }; | ||
211 | |||
212 | static struct bpf_prog_type_list kprobe_tl = { | ||
213 | .ops = &kprobe_prog_ops, | ||
214 | .type = BPF_PROG_TYPE_KPROBE, | ||
215 | }; | ||
216 | |||
217 | static int __init register_kprobe_prog_ops(void) | ||
218 | { | ||
219 | bpf_register_prog_type(&kprobe_tl); | ||
220 | return 0; | ||
221 | } | ||
222 | late_initcall(register_kprobe_prog_ops); | ||
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9ba3f43f580e..d0ce590f06e1 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -1135,11 +1135,15 @@ static void | |||
1135 | kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) | 1135 | kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) |
1136 | { | 1136 | { |
1137 | struct ftrace_event_call *call = &tk->tp.call; | 1137 | struct ftrace_event_call *call = &tk->tp.call; |
1138 | struct bpf_prog *prog = call->prog; | ||
1138 | struct kprobe_trace_entry_head *entry; | 1139 | struct kprobe_trace_entry_head *entry; |
1139 | struct hlist_head *head; | 1140 | struct hlist_head *head; |
1140 | int size, __size, dsize; | 1141 | int size, __size, dsize; |
1141 | int rctx; | 1142 | int rctx; |
1142 | 1143 | ||
1144 | if (prog && !trace_call_bpf(prog, regs)) | ||
1145 | return; | ||
1146 | |||
1143 | head = this_cpu_ptr(call->perf_events); | 1147 | head = this_cpu_ptr(call->perf_events); |
1144 | if (hlist_empty(head)) | 1148 | if (hlist_empty(head)) |
1145 | return; | 1149 | return; |
@@ -1166,11 +1170,15 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, | |||
1166 | struct pt_regs *regs) | 1170 | struct pt_regs *regs) |
1167 | { | 1171 | { |
1168 | struct ftrace_event_call *call = &tk->tp.call; | 1172 | struct ftrace_event_call *call = &tk->tp.call; |
1173 | struct bpf_prog *prog = call->prog; | ||
1169 | struct kretprobe_trace_entry_head *entry; | 1174 | struct kretprobe_trace_entry_head *entry; |
1170 | struct hlist_head *head; | 1175 | struct hlist_head *head; |
1171 | int size, __size, dsize; | 1176 | int size, __size, dsize; |
1172 | int rctx; | 1177 | int rctx; |
1173 | 1178 | ||
1179 | if (prog && !trace_call_bpf(prog, regs)) | ||
1180 | return; | ||
1181 | |||
1174 | head = this_cpu_ptr(call->perf_events); | 1182 | head = this_cpu_ptr(call->perf_events); |
1175 | if (hlist_empty(head)) | 1183 | if (hlist_empty(head)) |
1176 | return; | 1184 | return; |
@@ -1287,7 +1295,7 @@ static int register_kprobe_event(struct trace_kprobe *tk) | |||
1287 | kfree(call->print_fmt); | 1295 | kfree(call->print_fmt); |
1288 | return -ENODEV; | 1296 | return -ENODEV; |
1289 | } | 1297 | } |
1290 | call->flags = 0; | 1298 | call->flags = TRACE_EVENT_FL_KPROBE; |
1291 | call->class->reg = kprobe_register; | 1299 | call->class->reg = kprobe_register; |
1292 | call->data = tk; | 1300 | call->data = tk; |
1293 | ret = trace_add_event_call(call); | 1301 | ret = trace_add_event_call(call); |
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 74865465e0b7..d60fe62ec4fa 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c | |||
@@ -1006,7 +1006,7 @@ __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm) | |||
1006 | return true; | 1006 | return true; |
1007 | 1007 | ||
1008 | list_for_each_entry(event, &filter->perf_events, hw.tp_list) { | 1008 | list_for_each_entry(event, &filter->perf_events, hw.tp_list) { |
1009 | if (event->hw.tp_target->mm == mm) | 1009 | if (event->hw.target->mm == mm) |
1010 | return true; | 1010 | return true; |
1011 | } | 1011 | } |
1012 | 1012 | ||
@@ -1016,7 +1016,7 @@ __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm) | |||
1016 | static inline bool | 1016 | static inline bool |
1017 | uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event) | 1017 | uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event) |
1018 | { | 1018 | { |
1019 | return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm); | 1019 | return __uprobe_perf_filter(&tu->filter, event->hw.target->mm); |
1020 | } | 1020 | } |
1021 | 1021 | ||
1022 | static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) | 1022 | static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) |
@@ -1024,10 +1024,10 @@ static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) | |||
1024 | bool done; | 1024 | bool done; |
1025 | 1025 | ||
1026 | write_lock(&tu->filter.rwlock); | 1026 | write_lock(&tu->filter.rwlock); |
1027 | if (event->hw.tp_target) { | 1027 | if (event->hw.target) { |
1028 | list_del(&event->hw.tp_list); | 1028 | list_del(&event->hw.tp_list); |
1029 | done = tu->filter.nr_systemwide || | 1029 | done = tu->filter.nr_systemwide || |
1030 | (event->hw.tp_target->flags & PF_EXITING) || | 1030 | (event->hw.target->flags & PF_EXITING) || |
1031 | uprobe_filter_event(tu, event); | 1031 | uprobe_filter_event(tu, event); |
1032 | } else { | 1032 | } else { |
1033 | tu->filter.nr_systemwide--; | 1033 | tu->filter.nr_systemwide--; |
@@ -1047,7 +1047,7 @@ static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) | |||
1047 | int err; | 1047 | int err; |
1048 | 1048 | ||
1049 | write_lock(&tu->filter.rwlock); | 1049 | write_lock(&tu->filter.rwlock); |
1050 | if (event->hw.tp_target) { | 1050 | if (event->hw.target) { |
1051 | /* | 1051 | /* |
1052 | * event->parent != NULL means copy_process(), we can avoid | 1052 | * event->parent != NULL means copy_process(), we can avoid |
1053 | * uprobe_apply(). current->mm must be probed and we can rely | 1053 | * uprobe_apply(). current->mm must be probed and we can rely |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 3174bf8e3538..9a056f5bc02c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -567,9 +567,37 @@ static void watchdog_nmi_disable(unsigned int cpu) | |||
567 | cpu0_err = 0; | 567 | cpu0_err = 0; |
568 | } | 568 | } |
569 | } | 569 | } |
570 | |||
571 | void watchdog_nmi_enable_all(void) | ||
572 | { | ||
573 | int cpu; | ||
574 | |||
575 | if (!watchdog_user_enabled) | ||
576 | return; | ||
577 | |||
578 | get_online_cpus(); | ||
579 | for_each_online_cpu(cpu) | ||
580 | watchdog_nmi_enable(cpu); | ||
581 | put_online_cpus(); | ||
582 | } | ||
583 | |||
584 | void watchdog_nmi_disable_all(void) | ||
585 | { | ||
586 | int cpu; | ||
587 | |||
588 | if (!watchdog_running) | ||
589 | return; | ||
590 | |||
591 | get_online_cpus(); | ||
592 | for_each_online_cpu(cpu) | ||
593 | watchdog_nmi_disable(cpu); | ||
594 | put_online_cpus(); | ||
595 | } | ||
570 | #else | 596 | #else |
571 | static int watchdog_nmi_enable(unsigned int cpu) { return 0; } | 597 | static int watchdog_nmi_enable(unsigned int cpu) { return 0; } |
572 | static void watchdog_nmi_disable(unsigned int cpu) { return; } | 598 | static void watchdog_nmi_disable(unsigned int cpu) { return; } |
599 | void watchdog_nmi_enable_all(void) {} | ||
600 | void watchdog_nmi_disable_all(void) {} | ||
573 | #endif /* CONFIG_HARDLOCKUP_DETECTOR */ | 601 | #endif /* CONFIG_HARDLOCKUP_DETECTOR */ |
574 | 602 | ||
575 | static struct smp_hotplug_thread watchdog_threads = { | 603 | static struct smp_hotplug_thread watchdog_threads = { |