aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/rtc
diff options
context:
space:
mode:
authorPratyush Anand <panand@redhat.com>2016-09-15 00:08:16 -0400
committerAlexandre Belloni <alexandre.belloni@free-electrons.com>2016-09-19 18:22:00 -0400
commit970fc7f4afd52d638d88aeda985ea03ccd33acee (patch)
tree6991cd15508695a4966d9f07bf4217c0da6cb932 /drivers/rtc
parentde75ccdd4118ca41ac473c1ab96365280c631cdd (diff)
rtc: cmos: Initialize hpet timer before irq is registered
We have observed on few x86 machines with rtc-cmos device that hpet_rtc_interrupt() is called just after irq registration and before cmos_do_probe() could call hpet_rtc_timer_init(). So, neither hpet_default_delta nor hpet_t1_cmp is initialized by the time interrupt is raised in the given situation, and this results in NMI watchdog LOCKUP. It has only been observed sporadically on kdump secondary kernels. See the call trace: ---<-snip->--- [ 27.913194] Kernel panic - not syncing: Watchdog detected hard LOCKUP on cpu 0 [ 27.915371] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.10.0-342.el7.x86_64 #1 [ 27.917503] Hardware name: HP ProLiant DL160 Gen8, BIOS J03 02/10/2014 [ 27.919455] ffffffff8186a728 0000000059c82488 ffff880034e05af0 ffffffff81637bd4 [ 27.921870] ffff880034e05b70 ffffffff8163144a 0000000000000010 ffff880034e05b80 [ 27.924257] ffff880034e05b20 0000000059c82488 0000000000000000 0000000000000000 [ 27.926599] Call Trace: [ 27.927352] <NMI> [<ffffffff81637bd4>] dump_stack+0x19/0x1b [ 27.929080] [<ffffffff8163144a>] panic+0xd8/0x1e7 [ 27.930588] [<ffffffff8111d3e0>] ? restart_watchdog_hrtimer+0x50/0x50 [ 27.932502] [<ffffffff8111d4a2>] watchdog_overflow_callback+0xc2/0xd0 [ 27.934427] [<ffffffff811612c1>] __perf_event_overflow+0xa1/0x250 [ 27.936232] [<ffffffff81161d94>] perf_event_overflow+0x14/0x20 [ 27.937957] [<ffffffff81032ae8>] intel_pmu_handle_irq+0x1e8/0x470 [ 27.939799] [<ffffffff8164164b>] perf_event_nmi_handler+0x2b/0x50 [ 27.941649] [<ffffffff81640d99>] nmi_handle.isra.0+0x69/0xb0 [ 27.943348] [<ffffffff81640f49>] do_nmi+0x169/0x340 [ 27.944802] [<ffffffff816401d3>] end_repeat_nmi+0x1e/0x2e [ 27.946424] [<ffffffff81056ee5>] ? hpet_rtc_interrupt+0x85/0x380 [ 27.948197] [<ffffffff81056ee5>] ? hpet_rtc_interrupt+0x85/0x380 [ 27.949992] [<ffffffff81056ee5>] ? hpet_rtc_interrupt+0x85/0x380 [ 27.951816] <<EOE>> <IRQ> [<ffffffff8108f5a3>] ? run_timer_softirq+0x43/0x340 [ 27.954114] [<ffffffff8111e24e>] handle_irq_event_percpu+0x3e/0x1e0 [ 27.955962] [<ffffffff8111e42d>] handle_irq_event+0x3d/0x60 [ 27.957635] [<ffffffff811210c7>] handle_edge_irq+0x77/0x130 [ 27.959332] [<ffffffff8101704f>] handle_irq+0xbf/0x150 [ 27.960949] [<ffffffff8164a86f>] do_IRQ+0x4f/0xf0 [ 27.962434] [<ffffffff8163faed>] common_interrupt+0x6d/0x6d [ 27.964101] <EOI> [<ffffffff8163f43b>] ? _raw_spin_unlock_irqrestore+0x1b/0x40 [ 27.966308] [<fffff8111ff07>] __setup_irq+0x2a7/0x570 [ 28.067859] [<ffffffff81056e60>] ? hpet_cpuhp_notify+0x140/0x140 [ 28.069709] [<ffffffff8112032c>] request_threaded_irq+0xcc/0x170 [ 28.071585] [<ffffffff814b24a6>] cmos_do_probe+0x1e6/0x450 [ 28.073240] [<ffffffff814b2710>] ? cmos_do_probe+0x450/0x450 [ 28.074911] [<ffffffff814b27cb>] cmos_pnp_probe+0xbb/0xc0 [ 28.076533] [<ffffffff8139b245>] pnp_device_probe+0x65/0xd0 [ 28.078198] [<ffffffff813f8ca7>] driver_probe_device+0x87/0x390 [ 28.079971] [<ffffffff813f9083>] __driver_attach+0x93/0xa0 [ 28.081660] [<ffffffff813f8ff0>] ? __device_attach+0x40/0x40 [ 28.083662] [<ffffffff813f6a13>] bus_for_each_dev+0x73/0xc0 [ 28.085370] [<ffffffff813f86fe>] driver_attach+0x1e/0x20 [ 28.086974] [<ffffffff813f8250>] bus_add_driver+0x200/0x2d0 [ 28.088634] [<ffffffff81ade49a>] ? rtc_sysfs_init+0xe/0xe [ 28.090349] [<ffffffff813f9704>] driver_register+0x64/0xf0 [ 28.091989] [<ffffffff8139b070>] pnp_register_driver+0x20/0x30 [ 28.093707] [<ffffffff81ade4ab>] cmos_init+0x11/0x71 ---<-snip->--- This patch moves hpet_rtc_timer_init() before IRQ registration, so that we can gracefully handle such spurious interrupts. It also masks HPET RTC interrupts, in case IRQ registration fails. We were able to reproduce the problem in maximum 15 trials of kdump secondary kernel boot on an hp-dl160gen8 FCoE host machine without this patch. However, more than 35 trials went fine after applying this patch. Suggested-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Pratyush Anand <panand@redhat.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Diffstat (limited to 'drivers/rtc')
-rw-r--r--drivers/rtc/rtc-cmos.c4
1 files changed, 3 insertions, 1 deletions
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 43745cac0141..fddde655cbd4 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -707,6 +707,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
707 goto cleanup1; 707 goto cleanup1;
708 } 708 }
709 709
710 hpet_rtc_timer_init();
711
710 if (is_valid_irq(rtc_irq)) { 712 if (is_valid_irq(rtc_irq)) {
711 irq_handler_t rtc_cmos_int_handler; 713 irq_handler_t rtc_cmos_int_handler;
712 714
@@ -714,6 +716,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
714 rtc_cmos_int_handler = hpet_rtc_interrupt; 716 rtc_cmos_int_handler = hpet_rtc_interrupt;
715 retval = hpet_register_irq_handler(cmos_interrupt); 717 retval = hpet_register_irq_handler(cmos_interrupt);
716 if (retval) { 718 if (retval) {
719 hpet_mask_rtc_irq_bit(RTC_IRQMASK);
717 dev_warn(dev, "hpet_register_irq_handler " 720 dev_warn(dev, "hpet_register_irq_handler "
718 " failed in rtc_init()."); 721 " failed in rtc_init().");
719 goto cleanup1; 722 goto cleanup1;
@@ -729,7 +732,6 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
729 goto cleanup1; 732 goto cleanup1;
730 } 733 }
731 } 734 }
732 hpet_rtc_timer_init();
733 735
734 /* export at least the first block of NVRAM */ 736 /* export at least the first block of NVRAM */
735 nvram.size = address_space - NVRAM_OFFSET; 737 nvram.size = address_space - NVRAM_OFFSET;