From 4ac3dbec800d93485a5c84e37af676278eea657c Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Wed, 27 Oct 2010 11:17:15 -0400 Subject: oprofile: Fix the hang while taking the cpu offline The kernel build with CONFIG_OPROFILE and CPU_HOTPLUG enabled. The oprofile is initialised using system timer in absence of hardware counters supports. Oprofile isn't started from userland. In this setup while doing a CPU offline the kernel hangs in infinite for loop inside lock_hrtimer_base() function This happens because as part of oprofile_cpu_notify(, it tries to stop an hrtimer which was never started. These per-cpu hrtimers are started when the oprfile is started. echo 1 > /dev/oprofile/enable This problem also existwhen the cpu is booted with maxcpus parameter set. When bringing the remaining cpus online the timers are started even if oprofile is not yet enabled. This patch fix this issue by adding a state variable so that these hrtimer start/stop is only attempted when oprofile is started For stable kernels v2.6.35.y and v2.6.36.y. Reported-by: Jan Sebastien Tested-by: sricharan Signed-off-by: Santosh Shilimkar Cc: stable@kernel.org Signed-off-by: Robert Richter --- drivers/oprofile/timer_int.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers/oprofile') diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c index dc0ae4d14dff..010725117dbb 100644 --- a/drivers/oprofile/timer_int.c +++ b/drivers/oprofile/timer_int.c @@ -21,6 +21,7 @@ #include "oprof.h" static DEFINE_PER_CPU(struct hrtimer, oprofile_hrtimer); +static int ctr_running; static enum hrtimer_restart oprofile_hrtimer_notify(struct hrtimer *hrtimer) { @@ -33,6 +34,9 @@ static void __oprofile_hrtimer_start(void *unused) { struct hrtimer *hrtimer = &__get_cpu_var(oprofile_hrtimer); + if (!ctr_running) + return; + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = oprofile_hrtimer_notify; @@ -42,7 +46,10 @@ static void __oprofile_hrtimer_start(void *unused) static int oprofile_hrtimer_start(void) { + get_online_cpus(); + ctr_running = 1; on_each_cpu(__oprofile_hrtimer_start, NULL, 1); + put_online_cpus(); return 0; } @@ -50,6 +57,9 @@ static void __oprofile_hrtimer_stop(int cpu) { struct hrtimer *hrtimer = &per_cpu(oprofile_hrtimer, cpu); + if (!ctr_running) + return; + hrtimer_cancel(hrtimer); } @@ -57,8 +67,11 @@ static void oprofile_hrtimer_stop(void) { int cpu; + get_online_cpus(); for_each_online_cpu(cpu) __oprofile_hrtimer_stop(cpu); + ctr_running = 0; + put_online_cpus(); } static int __cpuinit oprofile_cpu_notify(struct notifier_block *self, -- cgit v1.2.2 From 3d7851b3cdd43a734e5cc4c643fd886ab28ad4d5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 15 Oct 2010 09:51:08 -0400 Subject: oprofile: Remove deprecated use of flush_scheduled_work() flush_scheduled_work() is deprecated and scheduled to be removed. sync_stop() currently cancels cpu_buffer works inside buffer_mutex and flushes the system workqueue outside. Instead, split end_cpu_work() into two parts - stopping further work enqueues and flushing works - and do the former inside buffer_mutex and latter outside. For stable kernels v2.6.35.y and v2.6.36.y. Signed-off-by: Tejun Heo Cc: stable@kernel.org Signed-off-by: Robert Richter --- drivers/oprofile/buffer_sync.c | 2 +- drivers/oprofile/cpu_buffer.c | 10 +++++++--- drivers/oprofile/cpu_buffer.h | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/oprofile') diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index b7e755f4178a..a3984f4ef192 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -190,7 +190,7 @@ void sync_stop(void) profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); task_handoff_unregister(&task_free_nb); mutex_unlock(&buffer_mutex); - flush_scheduled_work(); + flush_cpu_work(); /* make sure we don't leak task structs */ process_task_mortuary(); diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index f179ac2ea801..59f55441e075 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -111,14 +111,18 @@ void start_cpu_work(void) void end_cpu_work(void) { - int i; - work_enabled = 0; +} + +void flush_cpu_work(void) +{ + int i; for_each_online_cpu(i) { struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i); - cancel_delayed_work(&b->work); + /* these works are per-cpu, no need for flush_sync */ + flush_delayed_work(&b->work); } } diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index 68ea16ab645f..e1d097e250ae 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h @@ -25,6 +25,7 @@ void free_cpu_buffers(void); void start_cpu_work(void); void end_cpu_work(void); +void flush_cpu_work(void); /* CPU buffer is composed of such entries (which are * also used for context switch notes) -- cgit v1.2.2