diff options
| -rw-r--r-- | Documentation/kernel-parameters.txt | 3 | ||||
| -rw-r--r-- | include/linux/sched.h | 3 | ||||
| -rw-r--r-- | kernel/softlockup.c | 45 | ||||
| -rw-r--r-- | kernel/sysctl.c | 20 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 4 | ||||
| -rw-r--r-- | lib/Kconfig.debug | 26 |
6 files changed, 86 insertions, 15 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 30d44b78171a..47e7d8794fc6 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -2034,6 +2034,9 @@ and is between 256 and 4096 characters. It is defined in the file | |||
| 2034 | 2034 | ||
| 2035 | snd-ymfpci= [HW,ALSA] | 2035 | snd-ymfpci= [HW,ALSA] |
| 2036 | 2036 | ||
| 2037 | softlockup_panic= | ||
| 2038 | [KNL] Should the soft-lockup detector generate panics. | ||
| 2039 | |||
| 2037 | sonypi.*= [HW] Sony Programmable I/O Control Device driver | 2040 | sonypi.*= [HW] Sony Programmable I/O Control Device driver |
| 2038 | See Documentation/sonypi.txt | 2041 | See Documentation/sonypi.txt |
| 2039 | 2042 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 1941d8b5cf11..af443a08431f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -295,10 +295,11 @@ extern void softlockup_tick(void); | |||
| 295 | extern void spawn_softlockup_task(void); | 295 | extern void spawn_softlockup_task(void); |
| 296 | extern void touch_softlockup_watchdog(void); | 296 | extern void touch_softlockup_watchdog(void); |
| 297 | extern void touch_all_softlockup_watchdogs(void); | 297 | extern void touch_all_softlockup_watchdogs(void); |
| 298 | extern unsigned long softlockup_thresh; | 298 | extern unsigned int softlockup_panic; |
| 299 | extern unsigned long sysctl_hung_task_check_count; | 299 | extern unsigned long sysctl_hung_task_check_count; |
| 300 | extern unsigned long sysctl_hung_task_timeout_secs; | 300 | extern unsigned long sysctl_hung_task_timeout_secs; |
| 301 | extern unsigned long sysctl_hung_task_warnings; | 301 | extern unsigned long sysctl_hung_task_warnings; |
| 302 | extern int softlockup_thresh; | ||
| 302 | #else | 303 | #else |
| 303 | static inline void softlockup_tick(void) | 304 | static inline void softlockup_tick(void) |
| 304 | { | 305 | { |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index a272d78185eb..7bd8d1aadd5d 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/delay.h> | 13 | #include <linux/delay.h> |
| 14 | #include <linux/freezer.h> | 14 | #include <linux/freezer.h> |
| 15 | #include <linux/kthread.h> | 15 | #include <linux/kthread.h> |
| 16 | #include <linux/lockdep.h> | ||
| 16 | #include <linux/notifier.h> | 17 | #include <linux/notifier.h> |
| 17 | #include <linux/module.h> | 18 | #include <linux/module.h> |
| 18 | 19 | ||
| @@ -25,7 +26,22 @@ static DEFINE_PER_CPU(unsigned long, print_timestamp); | |||
| 25 | static DEFINE_PER_CPU(struct task_struct *, watchdog_task); | 26 | static DEFINE_PER_CPU(struct task_struct *, watchdog_task); |
| 26 | 27 | ||
| 27 | static int __read_mostly did_panic; | 28 | static int __read_mostly did_panic; |
| 28 | unsigned long __read_mostly softlockup_thresh = 60; | 29 | int __read_mostly softlockup_thresh = 60; |
| 30 | |||
| 31 | /* | ||
| 32 | * Should we panic (and reboot, if panic_timeout= is set) when a | ||
| 33 | * soft-lockup occurs: | ||
| 34 | */ | ||
| 35 | unsigned int __read_mostly softlockup_panic = | ||
| 36 | CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; | ||
| 37 | |||
| 38 | static int __init softlockup_panic_setup(char *str) | ||
| 39 | { | ||
| 40 | softlockup_panic = simple_strtoul(str, NULL, 0); | ||
| 41 | |||
| 42 | return 1; | ||
| 43 | } | ||
| 44 | __setup("softlockup_panic=", softlockup_panic_setup); | ||
| 29 | 45 | ||
| 30 | static int | 46 | static int |
| 31 | softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) | 47 | softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) |
| @@ -84,6 +100,14 @@ void softlockup_tick(void) | |||
| 84 | struct pt_regs *regs = get_irq_regs(); | 100 | struct pt_regs *regs = get_irq_regs(); |
| 85 | unsigned long now; | 101 | unsigned long now; |
| 86 | 102 | ||
| 103 | /* Is detection switched off? */ | ||
| 104 | if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) { | ||
| 105 | /* Be sure we don't false trigger if switched back on */ | ||
| 106 | if (touch_timestamp) | ||
| 107 | per_cpu(touch_timestamp, this_cpu) = 0; | ||
| 108 | return; | ||
| 109 | } | ||
| 110 | |||
| 87 | if (touch_timestamp == 0) { | 111 | if (touch_timestamp == 0) { |
| 88 | __touch_softlockup_watchdog(); | 112 | __touch_softlockup_watchdog(); |
| 89 | return; | 113 | return; |
| @@ -92,11 +116,8 @@ void softlockup_tick(void) | |||
| 92 | print_timestamp = per_cpu(print_timestamp, this_cpu); | 116 | print_timestamp = per_cpu(print_timestamp, this_cpu); |
| 93 | 117 | ||
| 94 | /* report at most once a second */ | 118 | /* report at most once a second */ |
| 95 | if ((print_timestamp >= touch_timestamp && | 119 | if (print_timestamp == touch_timestamp || did_panic) |
| 96 | print_timestamp < (touch_timestamp + 1)) || | ||
| 97 | did_panic || !per_cpu(watchdog_task, this_cpu)) { | ||
| 98 | return; | 120 | return; |
| 99 | } | ||
| 100 | 121 | ||
| 101 | /* do not print during early bootup: */ | 122 | /* do not print during early bootup: */ |
| 102 | if (unlikely(system_state != SYSTEM_RUNNING)) { | 123 | if (unlikely(system_state != SYSTEM_RUNNING)) { |
| @@ -106,8 +127,11 @@ void softlockup_tick(void) | |||
| 106 | 127 | ||
| 107 | now = get_timestamp(this_cpu); | 128 | now = get_timestamp(this_cpu); |
| 108 | 129 | ||
| 109 | /* Wake up the high-prio watchdog task every second: */ | 130 | /* |
| 110 | if (now > (touch_timestamp + 1)) | 131 | * Wake up the high-prio watchdog task twice per |
| 132 | * threshold timespan. | ||
| 133 | */ | ||
| 134 | if (now > touch_timestamp + softlockup_thresh/2) | ||
| 111 | wake_up_process(per_cpu(watchdog_task, this_cpu)); | 135 | wake_up_process(per_cpu(watchdog_task, this_cpu)); |
| 112 | 136 | ||
| 113 | /* Warn about unreasonable delays: */ | 137 | /* Warn about unreasonable delays: */ |
| @@ -121,11 +145,15 @@ void softlockup_tick(void) | |||
| 121 | this_cpu, now - touch_timestamp, | 145 | this_cpu, now - touch_timestamp, |
| 122 | current->comm, task_pid_nr(current)); | 146 | current->comm, task_pid_nr(current)); |
| 123 | print_modules(); | 147 | print_modules(); |
| 148 | print_irqtrace_events(current); | ||
| 124 | if (regs) | 149 | if (regs) |
| 125 | show_regs(regs); | 150 | show_regs(regs); |
| 126 | else | 151 | else |
| 127 | dump_stack(); | 152 | dump_stack(); |
| 128 | spin_unlock(&print_lock); | 153 | spin_unlock(&print_lock); |
| 154 | |||
| 155 | if (softlockup_panic) | ||
| 156 | panic("softlockup: hung tasks"); | ||
| 129 | } | 157 | } |
| 130 | 158 | ||
| 131 | /* | 159 | /* |
| @@ -178,6 +206,9 @@ static void check_hung_task(struct task_struct *t, unsigned long now) | |||
| 178 | 206 | ||
| 179 | t->last_switch_timestamp = now; | 207 | t->last_switch_timestamp = now; |
| 180 | touch_nmi_watchdog(); | 208 | touch_nmi_watchdog(); |
| 209 | |||
| 210 | if (softlockup_panic) | ||
| 211 | panic("softlockup: blocked tasks"); | ||
| 181 | } | 212 | } |
| 182 | 213 | ||
| 183 | /* | 214 | /* |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b859e6b5a767..2a7b9d88706b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -88,12 +88,13 @@ extern int rcutorture_runnable; | |||
| 88 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ | 88 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ |
| 89 | 89 | ||
| 90 | /* Constants used for minimum and maximum */ | 90 | /* Constants used for minimum and maximum */ |
| 91 | #if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) | 91 | #if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP) |
| 92 | static int one = 1; | 92 | static int one = 1; |
| 93 | #endif | 93 | #endif |
| 94 | 94 | ||
| 95 | #ifdef CONFIG_DETECT_SOFTLOCKUP | 95 | #ifdef CONFIG_DETECT_SOFTLOCKUP |
| 96 | static int sixty = 60; | 96 | static int sixty = 60; |
| 97 | static int neg_one = -1; | ||
| 97 | #endif | 98 | #endif |
| 98 | 99 | ||
| 99 | #ifdef CONFIG_MMU | 100 | #ifdef CONFIG_MMU |
| @@ -739,13 +740,24 @@ static struct ctl_table kern_table[] = { | |||
| 739 | #ifdef CONFIG_DETECT_SOFTLOCKUP | 740 | #ifdef CONFIG_DETECT_SOFTLOCKUP |
| 740 | { | 741 | { |
| 741 | .ctl_name = CTL_UNNUMBERED, | 742 | .ctl_name = CTL_UNNUMBERED, |
| 743 | .procname = "softlockup_panic", | ||
| 744 | .data = &softlockup_panic, | ||
| 745 | .maxlen = sizeof(int), | ||
| 746 | .mode = 0644, | ||
| 747 | .proc_handler = &proc_dointvec_minmax, | ||
| 748 | .strategy = &sysctl_intvec, | ||
| 749 | .extra1 = &zero, | ||
| 750 | .extra2 = &one, | ||
| 751 | }, | ||
| 752 | { | ||
| 753 | .ctl_name = CTL_UNNUMBERED, | ||
| 742 | .procname = "softlockup_thresh", | 754 | .procname = "softlockup_thresh", |
| 743 | .data = &softlockup_thresh, | 755 | .data = &softlockup_thresh, |
| 744 | .maxlen = sizeof(unsigned long), | 756 | .maxlen = sizeof(int), |
| 745 | .mode = 0644, | 757 | .mode = 0644, |
| 746 | .proc_handler = &proc_doulongvec_minmax, | 758 | .proc_handler = &proc_dointvec_minmax, |
| 747 | .strategy = &sysctl_intvec, | 759 | .strategy = &sysctl_intvec, |
| 748 | .extra1 = &one, | 760 | .extra1 = &neg_one, |
| 749 | .extra2 = &sixty, | 761 | .extra2 = &sixty, |
| 750 | }, | 762 | }, |
| 751 | { | 763 | { |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index beef7ccdf842..942fc7c85283 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -140,8 +140,6 @@ void tick_nohz_update_jiffies(void) | |||
| 140 | if (!ts->tick_stopped) | 140 | if (!ts->tick_stopped) |
| 141 | return; | 141 | return; |
| 142 | 142 | ||
| 143 | touch_softlockup_watchdog(); | ||
| 144 | |||
| 145 | cpu_clear(cpu, nohz_cpu_mask); | 143 | cpu_clear(cpu, nohz_cpu_mask); |
| 146 | now = ktime_get(); | 144 | now = ktime_get(); |
| 147 | ts->idle_waketime = now; | 145 | ts->idle_waketime = now; |
| @@ -149,6 +147,8 @@ void tick_nohz_update_jiffies(void) | |||
| 149 | local_irq_save(flags); | 147 | local_irq_save(flags); |
| 150 | tick_do_update_jiffies64(now); | 148 | tick_do_update_jiffies64(now); |
| 151 | local_irq_restore(flags); | 149 | local_irq_restore(flags); |
| 150 | |||
| 151 | touch_softlockup_watchdog(); | ||
| 152 | } | 152 | } |
| 153 | 153 | ||
| 154 | void tick_nohz_stop_idle(int cpu) | 154 | void tick_nohz_stop_idle(int cpu) |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ba106db5a65b..882c51048993 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
| @@ -150,7 +150,7 @@ config DETECT_SOFTLOCKUP | |||
| 150 | help | 150 | help |
| 151 | Say Y here to enable the kernel to detect "soft lockups", | 151 | Say Y here to enable the kernel to detect "soft lockups", |
| 152 | which are bugs that cause the kernel to loop in kernel | 152 | which are bugs that cause the kernel to loop in kernel |
| 153 | mode for more than 10 seconds, without giving other tasks a | 153 | mode for more than 60 seconds, without giving other tasks a |
| 154 | chance to run. | 154 | chance to run. |
| 155 | 155 | ||
| 156 | When a soft-lockup is detected, the kernel will print the | 156 | When a soft-lockup is detected, the kernel will print the |
| @@ -162,6 +162,30 @@ config DETECT_SOFTLOCKUP | |||
| 162 | can be detected via the NMI-watchdog, on platforms that | 162 | can be detected via the NMI-watchdog, on platforms that |
| 163 | support it.) | 163 | support it.) |
| 164 | 164 | ||
| 165 | config BOOTPARAM_SOFTLOCKUP_PANIC | ||
| 166 | bool "Panic (Reboot) On Soft Lockups" | ||
| 167 | depends on DETECT_SOFTLOCKUP | ||
| 168 | help | ||
| 169 | Say Y here to enable the kernel to panic on "soft lockups", | ||
| 170 | which are bugs that cause the kernel to loop in kernel | ||
| 171 | mode for more than 60 seconds, without giving other tasks a | ||
| 172 | chance to run. | ||
| 173 | |||
| 174 | The panic can be used in combination with panic_timeout, | ||
| 175 | to cause the system to reboot automatically after a | ||
| 176 | lockup has been detected. This feature is useful for | ||
| 177 | high-availability systems that have uptime guarantees and | ||
| 178 | where a lockup must be resolved ASAP. | ||
| 179 | |||
| 180 | Say N if unsure. | ||
| 181 | |||
| 182 | config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE | ||
| 183 | int | ||
| 184 | depends on DETECT_SOFTLOCKUP | ||
| 185 | range 0 1 | ||
| 186 | default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC | ||
| 187 | default 1 if BOOTPARAM_SOFTLOCKUP_PANIC | ||
| 188 | |||
| 165 | config SCHED_DEBUG | 189 | config SCHED_DEBUG |
| 166 | bool "Collect scheduler debugging info" | 190 | bool "Collect scheduler debugging info" |
| 167 | depends on DEBUG_KERNEL && PROC_FS | 191 | depends on DEBUG_KERNEL && PROC_FS |
