aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJason Wessel <jason.wessel@windriver.com>2008-05-27 13:23:29 -0400
committerIngo Molnar <mingo@elte.hu>2008-06-02 06:38:27 -0400
commit8c2238eaaf0f774ca0f8d9daad7a616429bbb7f1 (patch)
tree5da9c1af587558a7a6481cdd6efc67956e950a74 /kernel
parent02ff375590ac4140d88afc76505df1ad45c6af59 (diff)
softlockup: fix NMI hangs due to lock race - 2.6.26-rc regression
The touch_nmi_watchdog() routine on x86 ultimately calls touch_softlockup_watchdog(). The problem is that to touch the softlockup watchdog, the cpu_clock code has to be called which could involve multiple cpu locks and can lead to a hard hang if one of the locks is held by a processor that is not going to return anytime soon (such as could be the case with kgdb or perhaps even with some other kind of exception). This patch causes the public version of the touch_softlockup_watchdog() to defer the cpu clock access to a later point. The test case for this problem is to use the following kernel config options: CONFIG_KGDB_TESTS=y CONFIG_KGDB_TESTS_ON_BOOT=y CONFIG_KGDB_TESTS_BOOT_STRING="V1F100I100000" It should be noted that kgdb test suite and these options were not available until 2.6.26-rc2, so it was necessary to patch the kgdb test suite during the bisection. I would consider this patch a regression fix because the problem first appeared in commit 27ec4407790d075c325e1f4da0a19c56953cce23 when some logic was added to try to periodically sync the clocks. It was possible to work around this particular problem by simply not performing the sync anytime the system was in a critical context. This was ok until commit 3e51f33fcc7f55e6df25d15b55ed10c8b4da84cd, which added config option CONFIG_HAVE_UNSTABLE_SCHED_CLOCK and some multi-cpu locks to sync the clocks. It became clear that accessing this code from an nmi was the source of the lockups. Avoiding the access to the low level clock code from an code inside the NMI processing also fixed the problem with the 27ec44... commit. Signed-off-by: Jason Wessel <jason.wessel@windriver.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/softlockup.c15
1 files changed, 10 insertions, 5 deletions
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index a3a0b239b7f7..6b682d86bddf 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -64,12 +64,17 @@ static unsigned long get_timestamp(int this_cpu)
64 return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ 64 return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
65} 65}
66 66
67void touch_softlockup_watchdog(void) 67static void __touch_softlockup_watchdog(void)
68{ 68{
69 int this_cpu = raw_smp_processor_id(); 69 int this_cpu = raw_smp_processor_id();
70 70
71 __raw_get_cpu_var(touch_timestamp) = get_timestamp(this_cpu); 71 __raw_get_cpu_var(touch_timestamp) = get_timestamp(this_cpu);
72} 72}
73
74void touch_softlockup_watchdog(void)
75{
76 __raw_get_cpu_var(touch_timestamp) = 0;
77}
73EXPORT_SYMBOL(touch_softlockup_watchdog); 78EXPORT_SYMBOL(touch_softlockup_watchdog);
74 79
75void touch_all_softlockup_watchdogs(void) 80void touch_all_softlockup_watchdogs(void)
@@ -103,7 +108,7 @@ void softlockup_tick(void)
103 } 108 }
104 109
105 if (touch_timestamp == 0) { 110 if (touch_timestamp == 0) {
106 touch_softlockup_watchdog(); 111 __touch_softlockup_watchdog();
107 return; 112 return;
108 } 113 }
109 114
@@ -118,7 +123,7 @@ void softlockup_tick(void)
118 123
119 /* do not print during early bootup: */ 124 /* do not print during early bootup: */
120 if (unlikely(system_state != SYSTEM_RUNNING)) { 125 if (unlikely(system_state != SYSTEM_RUNNING)) {
121 touch_softlockup_watchdog(); 126 __touch_softlockup_watchdog();
122 return; 127 return;
123 } 128 }
124 129
@@ -243,7 +248,7 @@ static int watchdog(void *__bind_cpu)
243 sched_setscheduler(current, SCHED_FIFO, &param); 248 sched_setscheduler(current, SCHED_FIFO, &param);
244 249
245 /* initialize timestamp */ 250 /* initialize timestamp */
246 touch_softlockup_watchdog(); 251 __touch_softlockup_watchdog();
247 252
248 set_current_state(TASK_INTERRUPTIBLE); 253 set_current_state(TASK_INTERRUPTIBLE);
249 /* 254 /*
@@ -252,7 +257,7 @@ static int watchdog(void *__bind_cpu)
252 * debug-printout triggers in softlockup_tick(). 257 * debug-printout triggers in softlockup_tick().
253 */ 258 */
254 while (!kthread_should_stop()) { 259 while (!kthread_should_stop()) {
255 touch_softlockup_watchdog(); 260 __touch_softlockup_watchdog();
256 schedule(); 261 schedule();
257 262
258 if (kthread_should_stop()) 263 if (kthread_should_stop())