diff options
author | Yan, Zheng <zheng.z.yan@intel.com> | 2014-11-04 21:55:58 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-02-18 11:16:02 -0500 |
commit | ba532500c5651a4be4108acc64ed99a95cb005b3 (patch) | |
tree | 6fe0a328c1b461bea944dd4b1c9aa46fb71b0fd4 /kernel/events | |
parent | 27ac905b8f88d28779b0661809286b5ba2817d37 (diff) |
perf: Introduce pmu context switch callback
The callback is invoked when process is scheduled in or out.
It provides mechanism for later patches to save/store the LBR
stack. For the schedule in case, the callback is invoked at
the same place that flush branch stack callback is invoked.
So it also can replace the flush branch stack callback. To
avoid unnecessary overhead, the callback is enabled only when
there are events use the LBR stack.
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: eranian@google.com
Cc: jolsa@redhat.com
Link: http://lkml.kernel.org/r/1415156173-10035-3-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/events')
-rw-r--r-- | kernel/events/core.c | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index fef45b4bb5f8..6c8b31b7efb6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -154,6 +154,7 @@ enum event_type_t { | |||
154 | struct static_key_deferred perf_sched_events __read_mostly; | 154 | struct static_key_deferred perf_sched_events __read_mostly; |
155 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); | 155 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); |
156 | static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); | 156 | static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); |
157 | static DEFINE_PER_CPU(int, perf_sched_cb_usages); | ||
157 | 158 | ||
158 | static atomic_t nr_mmap_events __read_mostly; | 159 | static atomic_t nr_mmap_events __read_mostly; |
159 | static atomic_t nr_comm_events __read_mostly; | 160 | static atomic_t nr_comm_events __read_mostly; |
@@ -2577,6 +2578,56 @@ unlock: | |||
2577 | } | 2578 | } |
2578 | } | 2579 | } |
2579 | 2580 | ||
2581 | void perf_sched_cb_dec(struct pmu *pmu) | ||
2582 | { | ||
2583 | this_cpu_dec(perf_sched_cb_usages); | ||
2584 | } | ||
2585 | |||
2586 | void perf_sched_cb_inc(struct pmu *pmu) | ||
2587 | { | ||
2588 | this_cpu_inc(perf_sched_cb_usages); | ||
2589 | } | ||
2590 | |||
2591 | /* | ||
2592 | * This function provides the context switch callback to the lower code | ||
2593 | * layer. It is invoked ONLY when the context switch callback is enabled. | ||
2594 | */ | ||
2595 | static void perf_pmu_sched_task(struct task_struct *prev, | ||
2596 | struct task_struct *next, | ||
2597 | bool sched_in) | ||
2598 | { | ||
2599 | struct perf_cpu_context *cpuctx; | ||
2600 | struct pmu *pmu; | ||
2601 | unsigned long flags; | ||
2602 | |||
2603 | if (prev == next) | ||
2604 | return; | ||
2605 | |||
2606 | local_irq_save(flags); | ||
2607 | |||
2608 | rcu_read_lock(); | ||
2609 | |||
2610 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
2611 | if (pmu->sched_task) { | ||
2612 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | ||
2613 | |||
2614 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); | ||
2615 | |||
2616 | perf_pmu_disable(pmu); | ||
2617 | |||
2618 | pmu->sched_task(cpuctx->task_ctx, sched_in); | ||
2619 | |||
2620 | perf_pmu_enable(pmu); | ||
2621 | |||
2622 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | ||
2623 | } | ||
2624 | } | ||
2625 | |||
2626 | rcu_read_unlock(); | ||
2627 | |||
2628 | local_irq_restore(flags); | ||
2629 | } | ||
2630 | |||
2580 | #define for_each_task_context_nr(ctxn) \ | 2631 | #define for_each_task_context_nr(ctxn) \ |
2581 | for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) | 2632 | for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) |
2582 | 2633 | ||
@@ -2596,6 +2647,9 @@ void __perf_event_task_sched_out(struct task_struct *task, | |||
2596 | { | 2647 | { |
2597 | int ctxn; | 2648 | int ctxn; |
2598 | 2649 | ||
2650 | if (__this_cpu_read(perf_sched_cb_usages)) | ||
2651 | perf_pmu_sched_task(task, next, false); | ||
2652 | |||
2599 | for_each_task_context_nr(ctxn) | 2653 | for_each_task_context_nr(ctxn) |
2600 | perf_event_context_sched_out(task, ctxn, next); | 2654 | perf_event_context_sched_out(task, ctxn, next); |
2601 | 2655 | ||
@@ -2847,6 +2901,9 @@ void __perf_event_task_sched_in(struct task_struct *prev, | |||
2847 | /* check for system-wide branch_stack events */ | 2901 | /* check for system-wide branch_stack events */ |
2848 | if (atomic_read(this_cpu_ptr(&perf_branch_stack_events))) | 2902 | if (atomic_read(this_cpu_ptr(&perf_branch_stack_events))) |
2849 | perf_branch_stack_sched_in(prev, task); | 2903 | perf_branch_stack_sched_in(prev, task); |
2904 | |||
2905 | if (__this_cpu_read(perf_sched_cb_usages)) | ||
2906 | perf_pmu_sched_task(prev, task, true); | ||
2850 | } | 2907 | } |
2851 | 2908 | ||
2852 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | 2909 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) |