aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2011-08-01 06:49:14 -0400
committerIngo Molnar <mingo@elte.hu>2011-08-14 05:53:03 -0400
commit144060fee07e9c22e179d00819c83c86fbcbf82c (patch)
treea13f3806fb266158570f1d7f7ece72f68199d7c2 /kernel/events
parent7fdba1ca10462f42ad2246b918fe6368f5ce488e (diff)
perf: Add PM notifiers to fix CPU hotplug races
Francis reports that s2r gets him spurious NMIs, this is because the suspend code leaves the boot cpu up and running. Cure this by adding a suspend notifier. The problem is that hotplug and suspend are completely un-serialized and the PM notifiers run before the suspend cpu unplug of all but the boot cpu. This leaves a window where the user can initialize another hotplug operation (either remove or add a cpu) resulting in either one too many or one too few hotplug ops. Thus we cannot use the hotplug code for the suspend case. There's another reason to not use the hotplug code, which is that the hotplug code totally destroys the perf state, we can do better for suspend and simply remove all counters from the PMU so that we can re-instate them on resume. Reported-by: Francis Moreau <francis.moro@gmail.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/n/tip-1cvevybkgmv4s6v5y37t4847@git.kernel.org Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c97
1 files changed, 95 insertions, 2 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b8785e26ee1c..d4c85425e3a0 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -29,6 +29,7 @@
29#include <linux/hardirq.h> 29#include <linux/hardirq.h>
30#include <linux/rculist.h> 30#include <linux/rculist.h>
31#include <linux/uaccess.h> 31#include <linux/uaccess.h>
32#include <linux/suspend.h>
32#include <linux/syscalls.h> 33#include <linux/syscalls.h>
33#include <linux/anon_inodes.h> 34#include <linux/anon_inodes.h>
34#include <linux/kernel_stat.h> 35#include <linux/kernel_stat.h>
@@ -6809,7 +6810,7 @@ static void __cpuinit perf_event_init_cpu(int cpu)
6809 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); 6810 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
6810 6811
6811 mutex_lock(&swhash->hlist_mutex); 6812 mutex_lock(&swhash->hlist_mutex);
6812 if (swhash->hlist_refcount > 0) { 6813 if (swhash->hlist_refcount > 0 && !swhash->swevent_hlist) {
6813 struct swevent_hlist *hlist; 6814 struct swevent_hlist *hlist;
6814 6815
6815 hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu)); 6816 hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
@@ -6898,7 +6899,14 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
6898{ 6899{
6899 unsigned int cpu = (long)hcpu; 6900 unsigned int cpu = (long)hcpu;
6900 6901
6901 switch (action & ~CPU_TASKS_FROZEN) { 6902 /*
6903 * Ignore suspend/resume action, the perf_pm_notifier will
6904 * take care of that.
6905 */
6906 if (action & CPU_TASKS_FROZEN)
6907 return NOTIFY_OK;
6908
6909 switch (action) {
6902 6910
6903 case CPU_UP_PREPARE: 6911 case CPU_UP_PREPARE:
6904 case CPU_DOWN_FAILED: 6912 case CPU_DOWN_FAILED:
@@ -6917,6 +6925,90 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
6917 return NOTIFY_OK; 6925 return NOTIFY_OK;
6918} 6926}
6919 6927
6928static void perf_pm_resume_cpu(void *unused)
6929{
6930 struct perf_cpu_context *cpuctx;
6931 struct perf_event_context *ctx;
6932 struct pmu *pmu;
6933 int idx;
6934
6935 idx = srcu_read_lock(&pmus_srcu);
6936 list_for_each_entry_rcu(pmu, &pmus, entry) {
6937 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
6938 ctx = cpuctx->task_ctx;
6939
6940 perf_ctx_lock(cpuctx, ctx);
6941 perf_pmu_disable(cpuctx->ctx.pmu);
6942
6943 cpu_ctx_sched_out(cpuctx, EVENT_ALL);
6944 if (ctx)
6945 ctx_sched_out(ctx, cpuctx, EVENT_ALL);
6946
6947 perf_pmu_enable(cpuctx->ctx.pmu);
6948 perf_ctx_unlock(cpuctx, ctx);
6949 }
6950 srcu_read_unlock(&pmus_srcu, idx);
6951}
6952
6953static void perf_pm_suspend_cpu(void *unused)
6954{
6955 struct perf_cpu_context *cpuctx;
6956 struct perf_event_context *ctx;
6957 struct pmu *pmu;
6958 int idx;
6959
6960 idx = srcu_read_lock(&pmus_srcu);
6961 list_for_each_entry_rcu(pmu, &pmus, entry) {
6962 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
6963 ctx = cpuctx->task_ctx;
6964
6965 perf_ctx_lock(cpuctx, ctx);
6966 perf_pmu_disable(cpuctx->ctx.pmu);
6967
6968 perf_event_sched_in(cpuctx, ctx, current);
6969
6970 perf_pmu_enable(cpuctx->ctx.pmu);
6971 perf_ctx_unlock(cpuctx, ctx);
6972 }
6973 srcu_read_unlock(&pmus_srcu, idx);
6974}
6975
6976static int perf_resume(void)
6977{
6978 get_online_cpus();
6979 smp_call_function(perf_pm_resume_cpu, NULL, 1);
6980 put_online_cpus();
6981
6982 return NOTIFY_OK;
6983}
6984
6985static int perf_suspend(void)
6986{
6987 get_online_cpus();
6988 smp_call_function(perf_pm_suspend_cpu, NULL, 1);
6989 put_online_cpus();
6990
6991 return NOTIFY_OK;
6992}
6993
6994static int perf_pm(struct notifier_block *self, unsigned long action, void *ptr)
6995{
6996 switch (action) {
6997 case PM_POST_HIBERNATION:
6998 case PM_POST_SUSPEND:
6999 return perf_resume();
7000 case PM_HIBERNATION_PREPARE:
7001 case PM_SUSPEND_PREPARE:
7002 return perf_suspend();
7003 default:
7004 return NOTIFY_DONE;
7005 }
7006}
7007
7008static struct notifier_block perf_pm_notifier = {
7009 .notifier_call = perf_pm,
7010};
7011
6920void __init perf_event_init(void) 7012void __init perf_event_init(void)
6921{ 7013{
6922 int ret; 7014 int ret;
@@ -6931,6 +7023,7 @@ void __init perf_event_init(void)
6931 perf_tp_register(); 7023 perf_tp_register();
6932 perf_cpu_notifier(perf_cpu_notify); 7024 perf_cpu_notifier(perf_cpu_notify);
6933 register_reboot_notifier(&perf_reboot_notifier); 7025 register_reboot_notifier(&perf_reboot_notifier);
7026 register_pm_notifier(&perf_pm_notifier);
6934 7027
6935 ret = init_hw_breakpoint(); 7028 ret = init_hw_breakpoint();
6936 WARN(ret, "hw_breakpoint initialization failed with: %d", ret); 7029 WARN(ret, "hw_breakpoint initialization failed with: %d", ret);