perf: Introduce pmu context switch callback

The callback is invoked when process is scheduled in or out. It provides mechanism for later patches to save/store the LBR stack. For the schedule in case, the callback is invoked at the same place that flush branch stack callback is invoked. So it also can replace the flush branch stack callback. To avoid unnecessary overhead, the callback is enabled only when there are events use the LBR stack. Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> Signed-off-by: Kan Liang <kan.liang@intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: eranian@google.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1415156173-10035-3-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Yan, Zheng <zheng.z.yan@intel.com> 2014-11-04 21:55:58 -0500
committer: Ingo Molnar <mingo@kernel.org> 2015-02-18 11:16:02 -0500
commit: ba532500c5651a4be4108acc64ed99a95cb005b3 (patch)
tree: 6fe0a328c1b461bea944dd4b1c9aa46fb71b0fd4 /kernel/events
parent: 27ac905b8f88d28779b0661809286b5ba2817d37 (diff)
1 files changed, 57 insertions, 0 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fef45b4bb5f8..6c8b31b7efb6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -154,6 +154,7 @@ enum event_type_t {
 struct static_key_deferred perf_sched_events __read_mostly;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
+static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
@@ -2577,6 +2578,56 @@ unlock:
        }
 }
+void perf_sched_cb_dec(struct pmu *pmu)
+{
+        this_cpu_dec(perf_sched_cb_usages);
+}
+void perf_sched_cb_inc(struct pmu *pmu)
+{
+        this_cpu_inc(perf_sched_cb_usages);
+}
+/*
+ * This function provides the context switch callback to the lower code
+ * layer. It is invoked ONLY when the context switch callback is enabled.
+ */
+static void perf_pmu_sched_task(struct task_struct *prev,
+                                struct task_struct *next,
+                                bool sched_in)
+{
+        struct perf_cpu_context *cpuctx;
+        struct pmu *pmu;
+        unsigned long flags;
+        if (prev == next)
+                return;
+        local_irq_save(flags);
+        rcu_read_lock();
+        list_for_each_entry_rcu(pmu, &pmus, entry) {
+                if (pmu->sched_task) {
+                        cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+                        perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+                        perf_pmu_disable(pmu);
+                        pmu->sched_task(cpuctx->task_ctx, sched_in);
+                        perf_pmu_enable(pmu);
+                        perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
+                }
+        }
+        rcu_read_unlock();
+        local_irq_restore(flags);
+}
 #define for_each_task_context_nr(ctxn)                                  \
        for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
@@ -2596,6 +2647,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
 {
        int ctxn;
+        if (__this_cpu_read(perf_sched_cb_usages))
+                perf_pmu_sched_task(task, next, false);
        for_each_task_context_nr(ctxn)
                perf_event_context_sched_out(task, ctxn, next);
@@ -2847,6 +2901,9 @@ void __perf_event_task_sched_in(struct task_struct *prev,
        /* check for system-wide branch_stack events */
        if (atomic_read(this_cpu_ptr(&perf_branch_stack_events)))
                perf_branch_stack_sched_in(prev, task);
+        if (__this_cpu_read(perf_sched_cb_usages))
+                perf_pmu_sched_task(prev, task, true);
 }
 static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
author	Yan, Zheng <zheng.z.yan@intel.com>	2014-11-04 21:55:58 -0500
committer	Ingo Molnar <mingo@kernel.org>	2015-02-18 11:16:02 -0500
commit	ba532500c5651a4be4108acc64ed99a95cb005b3 (patch)
tree	6fe0a328c1b461bea944dd4b1c9aa46fb71b0fd4 /kernel/events
parent	27ac905b8f88d28779b0661809286b5ba2817d37 (diff)

diff --git a/kernel/events/core.c b/kernel/events/core.c index fef45b4bb5f8..6c8b31b7efb6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c
@@ -154,6 +154,7 @@ enum event_type_t {
154	struct static_key_deferred perf_sched_events __read_mostly;	154	struct static_key_deferred perf_sched_events __read_mostly;
155	static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);	155	static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
156	static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);	156	static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
		157	static DEFINE_PER_CPU(int, perf_sched_cb_usages);
157		158
158	static atomic_t nr_mmap_events __read_mostly;	159	static atomic_t nr_mmap_events __read_mostly;
159	static atomic_t nr_comm_events __read_mostly;	160	static atomic_t nr_comm_events __read_mostly;
@@ -2577,6 +2578,56 @@ unlock:
2577	}	2578	}
2578	}	2579	}
2579		2580
		2581	void perf_sched_cb_dec(struct pmu *pmu)
		2582	{
		2583	this_cpu_dec(perf_sched_cb_usages);
		2584	}
		2585
		2586	void perf_sched_cb_inc(struct pmu *pmu)
		2587	{
		2588	this_cpu_inc(perf_sched_cb_usages);
		2589	}
		2590
		2591	/*
		2592	* This function provides the context switch callback to the lower code
		2593	* layer. It is invoked ONLY when the context switch callback is enabled.
		2594	*/
		2595	static void perf_pmu_sched_task(struct task_struct *prev,
		2596	struct task_struct *next,
		2597	bool sched_in)
		2598	{
		2599	struct perf_cpu_context *cpuctx;
		2600	struct pmu *pmu;
		2601	unsigned long flags;
		2602
		2603	if (prev == next)
		2604	return;
		2605
		2606	local_irq_save(flags);
		2607
		2608	rcu_read_lock();
		2609
		2610	list_for_each_entry_rcu(pmu, &pmus, entry) {
		2611	if (pmu->sched_task) {
		2612	cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
		2613
		2614	perf_ctx_lock(cpuctx, cpuctx->task_ctx);
		2615
		2616	perf_pmu_disable(pmu);
		2617
		2618	pmu->sched_task(cpuctx->task_ctx, sched_in);
		2619
		2620	perf_pmu_enable(pmu);
		2621
		2622	perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
		2623	}
		2624	}
		2625
		2626	rcu_read_unlock();
		2627
		2628	local_irq_restore(flags);
		2629	}
		2630
2580	#define for_each_task_context_nr(ctxn) \	2631	#define for_each_task_context_nr(ctxn) \
2581	for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)	2632	for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
2582		2633
@@ -2596,6 +2647,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
2596	{	2647	{
2597	int ctxn;	2648	int ctxn;
2598		2649
		2650	if (__this_cpu_read(perf_sched_cb_usages))
		2651	perf_pmu_sched_task(task, next, false);
		2652
2599	for_each_task_context_nr(ctxn)	2653	for_each_task_context_nr(ctxn)
2600	perf_event_context_sched_out(task, ctxn, next);	2654	perf_event_context_sched_out(task, ctxn, next);
2601		2655
@@ -2847,6 +2901,9 @@ void __perf_event_task_sched_in(struct task_struct *prev,
2847	/* check for system-wide branch_stack events */	2901	/* check for system-wide branch_stack events */
2848	if (atomic_read(this_cpu_ptr(&perf_branch_stack_events)))	2902	if (atomic_read(this_cpu_ptr(&perf_branch_stack_events)))
2849	perf_branch_stack_sched_in(prev, task);	2903	perf_branch_stack_sched_in(prev, task);
		2904
		2905	if (__this_cpu_read(perf_sched_cb_usages))
		2906	perf_pmu_sched_task(prev, task, true);
2850	}	2907	}
2851		2908
2852	static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)	2909	static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)