aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2017-01-31 03:37:34 -0500
committerThomas Gleixner <tglx@linutronix.de>2017-01-31 15:47:58 -0500
commit0becc0ae5b42828785b589f686725ff5bc3b9b25 (patch)
treebe6d0e1f37c38ed0a7dd5da2d4b1e93f0fb43101
parent24c2503255d35c269b67162c397a1a1c1e02f6ce (diff)
x86/mce: Make timer handling more robust
Erik reported that on a preproduction hardware a CMCI storm triggers the BUG_ON in add_timer_on(). The reason is that the per CPU MCE timer is started by the CMCI logic before the MCE CPU hotplug callback starts the timer with add_timer_on(). So the timer is already queued which triggers the BUG. Using add_timer_on() is pretty pointless in this code because the timer is strictlty per CPU, initialized as pinned and all operations which arm the timer happen on the CPU to which the timer belongs. Simplify the whole machinery by using mod_timer() instead of add_timer_on() which avoids the problem because mod_timer() can handle already queued timers. Use __start_timer() everywhere so the earliest armed expiry time is preserved. Reported-by: Erik Veijola <erik.veijola@intel.com> Tested-by: Borislav Petkov <bp@alien8.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Borislav Petkov <bp@alien8.de> Cc: Tony Luck <tony.luck@intel.com> Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1701310936080.3457@nanos Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c31
1 files changed, 12 insertions, 19 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 00ef43233e03..537c6647d84c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1373,20 +1373,15 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
1373 1373
1374static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; 1374static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
1375 1375
1376static void __restart_timer(struct timer_list *t, unsigned long interval) 1376static void __start_timer(struct timer_list *t, unsigned long interval)
1377{ 1377{
1378 unsigned long when = jiffies + interval; 1378 unsigned long when = jiffies + interval;
1379 unsigned long flags; 1379 unsigned long flags;
1380 1380
1381 local_irq_save(flags); 1381 local_irq_save(flags);
1382 1382
1383 if (timer_pending(t)) { 1383 if (!timer_pending(t) || time_before(when, t->expires))
1384 if (time_before(when, t->expires)) 1384 mod_timer(t, round_jiffies(when));
1385 mod_timer(t, when);
1386 } else {
1387 t->expires = round_jiffies(when);
1388 add_timer_on(t, smp_processor_id());
1389 }
1390 1385
1391 local_irq_restore(flags); 1386 local_irq_restore(flags);
1392} 1387}
@@ -1421,7 +1416,7 @@ static void mce_timer_fn(unsigned long data)
1421 1416
1422done: 1417done:
1423 __this_cpu_write(mce_next_interval, iv); 1418 __this_cpu_write(mce_next_interval, iv);
1424 __restart_timer(t, iv); 1419 __start_timer(t, iv);
1425} 1420}
1426 1421
1427/* 1422/*
@@ -1432,7 +1427,7 @@ void mce_timer_kick(unsigned long interval)
1432 struct timer_list *t = this_cpu_ptr(&mce_timer); 1427 struct timer_list *t = this_cpu_ptr(&mce_timer);
1433 unsigned long iv = __this_cpu_read(mce_next_interval); 1428 unsigned long iv = __this_cpu_read(mce_next_interval);
1434 1429
1435 __restart_timer(t, interval); 1430 __start_timer(t, interval);
1436 1431
1437 if (interval < iv) 1432 if (interval < iv)
1438 __this_cpu_write(mce_next_interval, interval); 1433 __this_cpu_write(mce_next_interval, interval);
@@ -1779,17 +1774,15 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
1779 } 1774 }
1780} 1775}
1781 1776
1782static void mce_start_timer(unsigned int cpu, struct timer_list *t) 1777static void mce_start_timer(struct timer_list *t)
1783{ 1778{
1784 unsigned long iv = check_interval * HZ; 1779 unsigned long iv = check_interval * HZ;
1785 1780
1786 if (mca_cfg.ignore_ce || !iv) 1781 if (mca_cfg.ignore_ce || !iv)
1787 return; 1782 return;
1788 1783
1789 per_cpu(mce_next_interval, cpu) = iv; 1784 this_cpu_write(mce_next_interval, iv);
1790 1785 __start_timer(t, iv);
1791 t->expires = round_jiffies(jiffies + iv);
1792 add_timer_on(t, cpu);
1793} 1786}
1794 1787
1795static void __mcheck_cpu_setup_timer(void) 1788static void __mcheck_cpu_setup_timer(void)
@@ -1806,7 +1799,7 @@ static void __mcheck_cpu_init_timer(void)
1806 unsigned int cpu = smp_processor_id(); 1799 unsigned int cpu = smp_processor_id();
1807 1800
1808 setup_pinned_timer(t, mce_timer_fn, cpu); 1801 setup_pinned_timer(t, mce_timer_fn, cpu);
1809 mce_start_timer(cpu, t); 1802 mce_start_timer(t);
1810} 1803}
1811 1804
1812/* Handle unconfigured int18 (should never happen) */ 1805/* Handle unconfigured int18 (should never happen) */
@@ -2566,7 +2559,7 @@ static int mce_cpu_dead(unsigned int cpu)
2566 2559
2567static int mce_cpu_online(unsigned int cpu) 2560static int mce_cpu_online(unsigned int cpu)
2568{ 2561{
2569 struct timer_list *t = &per_cpu(mce_timer, cpu); 2562 struct timer_list *t = this_cpu_ptr(&mce_timer);
2570 int ret; 2563 int ret;
2571 2564
2572 mce_device_create(cpu); 2565 mce_device_create(cpu);
@@ -2577,13 +2570,13 @@ static int mce_cpu_online(unsigned int cpu)
2577 return ret; 2570 return ret;
2578 } 2571 }
2579 mce_reenable_cpu(); 2572 mce_reenable_cpu();
2580 mce_start_timer(cpu, t); 2573 mce_start_timer(t);
2581 return 0; 2574 return 0;
2582} 2575}
2583 2576
2584static int mce_cpu_pre_down(unsigned int cpu) 2577static int mce_cpu_pre_down(unsigned int cpu)
2585{ 2578{
2586 struct timer_list *t = &per_cpu(mce_timer, cpu); 2579 struct timer_list *t = this_cpu_ptr(&mce_timer);
2587 2580
2588 mce_disable_cpu(); 2581 mce_disable_cpu();
2589 del_timer_sync(t); 2582 del_timer_sync(t);