aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2017-08-15 03:50:13 -0400
committerThomas Gleixner <tglx@linutronix.de>2017-08-18 06:35:02 -0400
commit7edaeb6841dfb27e362288ab8466ebdc4972e867 (patch)
tree419ce3f71f7ffc17131bb911fb16ff037e3c6bb1
parentef954844c7ace62f773f4f23e28d2d915adc419f (diff)
kernel/watchdog: Prevent false positives with turbo modes
The hardlockup detector on x86 uses a performance counter based on unhalted CPU cycles and a periodic hrtimer. The hrtimer period is about 2/5 of the performance counter period, so the hrtimer should fire 2-3 times before the performance counter NMI fires. The NMI code checks whether the hrtimer fired since the last invocation. If not, it assumess a hard lockup. The calculation of those periods is based on the nominal CPU frequency. Turbo modes increase the CPU clock frequency and therefore shorten the period of the perf/NMI watchdog. With extreme Turbo-modes (3x nominal frequency) the perf/NMI period is shorter than the hrtimer period which leads to false positives. A simple fix would be to shorten the hrtimer period, but that comes with the side effect of more frequent hrtimer and softlockup thread wakeups, which is not desired. Implement a low pass filter, which checks the perf/NMI period against kernel time. If the perf/NMI fires before 4/5 of the watchdog period has elapsed then the event is ignored and postponed to the next perf/NMI. That solves the problem and avoids the overhead of shorter hrtimer periods and more frequent softlockup thread wakeups. Fixes: 58687acba592 ("lockup_detector: Combine nmi_watchdog and softlockup detector") Reported-and-tested-by: Kan Liang <Kan.liang@intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: dzickus@redhat.com Cc: prarit@redhat.com Cc: ak@linux.intel.com Cc: babu.moger@oracle.com Cc: peterz@infradead.org Cc: eranian@google.com Cc: acme@redhat.com Cc: stable@vger.kernel.org Cc: atomlin@redhat.com Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1708150931310.1886@nanos
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--include/linux/nmi.h8
-rw-r--r--kernel/watchdog.c1
-rw-r--r--kernel/watchdog_hld.c59
-rw-r--r--lib/Kconfig.debug7
5 files changed, 76 insertions, 0 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 781521b7cf9e..9101bfc85539 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -100,6 +100,7 @@ config X86
100 select GENERIC_STRNCPY_FROM_USER 100 select GENERIC_STRNCPY_FROM_USER
101 select GENERIC_STRNLEN_USER 101 select GENERIC_STRNLEN_USER
102 select GENERIC_TIME_VSYSCALL 102 select GENERIC_TIME_VSYSCALL
103 select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
103 select HAVE_ACPI_APEI if ACPI 104 select HAVE_ACPI_APEI if ACPI
104 select HAVE_ACPI_APEI_NMI if ACPI 105 select HAVE_ACPI_APEI_NMI if ACPI
105 select HAVE_ALIGNED_STRUCT_PAGE if SLUB 106 select HAVE_ALIGNED_STRUCT_PAGE if SLUB
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 8aa01fd859fb..a36abe2da13e 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -168,6 +168,14 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
168#define sysctl_softlockup_all_cpu_backtrace 0 168#define sysctl_softlockup_all_cpu_backtrace 0
169#define sysctl_hardlockup_all_cpu_backtrace 0 169#define sysctl_hardlockup_all_cpu_backtrace 0
170#endif 170#endif
171
172#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \
173 defined(CONFIG_HARDLOCKUP_DETECTOR)
174void watchdog_update_hrtimer_threshold(u64 period);
175#else
176static inline void watchdog_update_hrtimer_threshold(u64 period) { }
177#endif
178
171extern bool is_hardlockup(void); 179extern bool is_hardlockup(void);
172struct ctl_table; 180struct ctl_table;
173extern int proc_watchdog(struct ctl_table *, int , 181extern int proc_watchdog(struct ctl_table *, int ,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 06d3389bca0d..f5d52024f6b7 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -240,6 +240,7 @@ static void set_sample_period(void)
240 * hardlockup detector generates a warning 240 * hardlockup detector generates a warning
241 */ 241 */
242 sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5); 242 sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
243 watchdog_update_hrtimer_threshold(sample_period);
243} 244}
244 245
245/* Commands for resetting the watchdog */ 246/* Commands for resetting the watchdog */
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 295a0d84934c..3a09ea1b1d3d 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -37,6 +37,62 @@ void arch_touch_nmi_watchdog(void)
37} 37}
38EXPORT_SYMBOL(arch_touch_nmi_watchdog); 38EXPORT_SYMBOL(arch_touch_nmi_watchdog);
39 39
40#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
41static DEFINE_PER_CPU(ktime_t, last_timestamp);
42static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
43static ktime_t watchdog_hrtimer_sample_threshold __read_mostly;
44
45void watchdog_update_hrtimer_threshold(u64 period)
46{
47 /*
48 * The hrtimer runs with a period of (watchdog_threshold * 2) / 5
49 *
50 * So it runs effectively with 2.5 times the rate of the NMI
51 * watchdog. That means the hrtimer should fire 2-3 times before
52 * the NMI watchdog expires. The NMI watchdog on x86 is based on
53 * unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles
54 * might run way faster than expected and the NMI fires in a
55 * smaller period than the one deduced from the nominal CPU
56 * frequency. Depending on the Turbo-Mode factor this might be fast
57 * enough to get the NMI period smaller than the hrtimer watchdog
58 * period and trigger false positives.
59 *
60 * The sample threshold is used to check in the NMI handler whether
61 * the minimum time between two NMI samples has elapsed. That
62 * prevents false positives.
63 *
64 * Set this to 4/5 of the actual watchdog threshold period so the
65 * hrtimer is guaranteed to fire at least once within the real
66 * watchdog threshold.
67 */
68 watchdog_hrtimer_sample_threshold = period * 2;
69}
70
71static bool watchdog_check_timestamp(void)
72{
73 ktime_t delta, now = ktime_get_mono_fast_ns();
74
75 delta = now - __this_cpu_read(last_timestamp);
76 if (delta < watchdog_hrtimer_sample_threshold) {
77 /*
78 * If ktime is jiffies based, a stalled timer would prevent
79 * jiffies from being incremented and the filter would look
80 * at a stale timestamp and never trigger.
81 */
82 if (__this_cpu_inc_return(nmi_rearmed) < 10)
83 return false;
84 }
85 __this_cpu_write(nmi_rearmed, 0);
86 __this_cpu_write(last_timestamp, now);
87 return true;
88}
89#else
90static inline bool watchdog_check_timestamp(void)
91{
92 return true;
93}
94#endif
95
40static struct perf_event_attr wd_hw_attr = { 96static struct perf_event_attr wd_hw_attr = {
41 .type = PERF_TYPE_HARDWARE, 97 .type = PERF_TYPE_HARDWARE,
42 .config = PERF_COUNT_HW_CPU_CYCLES, 98 .config = PERF_COUNT_HW_CPU_CYCLES,
@@ -61,6 +117,9 @@ static void watchdog_overflow_callback(struct perf_event *event,
61 return; 117 return;
62 } 118 }
63 119
120 if (!watchdog_check_timestamp())
121 return;
122
64 /* check for a hardlockup 123 /* check for a hardlockup
65 * This is done by making sure our timer interrupt 124 * This is done by making sure our timer interrupt
66 * is incrementing. The timer interrupt should have 125 * is incrementing. The timer interrupt should have
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 98fe715522e8..c617b9d1d6cb 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -798,6 +798,13 @@ config HARDLOCKUP_DETECTOR_PERF
798 select SOFTLOCKUP_DETECTOR 798 select SOFTLOCKUP_DETECTOR
799 799
800# 800#
801# Enables a timestamp based low pass filter to compensate for perf based
802# hard lockup detection which runs too fast due to turbo modes.
803#
804config HARDLOCKUP_CHECK_TIMESTAMP
805 bool
806
807#
801# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard 808# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard
802# lockup detector rather than the perf based detector. 809# lockup detector rather than the perf based detector.
803# 810#