aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <andi@firstfloor.org>2009-02-12 07:39:29 -0500
committerH. Peter Anvin <hpa@linux.intel.com>2009-02-17 18:32:44 -0500
commit52d168e28bc11dd026b620fe1767cadde5a747cd (patch)
tree8a6ff475d4de3041ee2137e6a3044b568fd58b67
parent9bd984058088d6ef7af6946591a207e51a2f4890 (diff)
x86, mce: switch machine check polling to per CPU timer
Impact: Higher priority bug fix The machine check poller runs a single timer and then broadcasted an IPI to all CPUs to check them. This leads to unnecessary synchronization between CPUs. The original CPU running the timer has to wait potentially a long time for all other CPUs answering. This is also real time unfriendly and in general inefficient. This was especially a problem on systems with a lot of events where the poller run with a higher frequency after processing some events. There could be more and more CPU time wasted with this, to the point of significantly slowing down machines. The machine check polling is actually fully independent per CPU, so there's no reason to not just do this all with per CPU timers. This patch implements that. Also switch the poller also to use standard timers instead of work queues. It was using work queues to be able to execute a user program on a event, but mce_notify_user() handles this case now with a separate callback. So instead always run the poll code in in a standard per CPU timer, which means that in the common case of not having to execute a trigger there will be less overhead. This allows to clean up the initialization significantly, because standard timers are already up when machine checks get init'ed. No multiple initialization functions. Thanks to Thomas Gleixner for some help. Cc: thockin@google.com v2: Use del_timer_sync() on cpu shutdown and don't try to handle migrated timers. v3: Add WARN_ON for timer running on unexpected CPU Signed-off-by: Andi Kleen <ak@linux.intel.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c68
1 files changed, 45 insertions, 23 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 18b379cf0610..3f0550d16f3c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -353,18 +353,17 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
353 353
354static int check_interval = 5 * 60; /* 5 minutes */ 354static int check_interval = 5 * 60; /* 5 minutes */
355static int next_interval; /* in jiffies */ 355static int next_interval; /* in jiffies */
356static void mcheck_timer(struct work_struct *work); 356static void mcheck_timer(unsigned long);
357static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer); 357static DEFINE_PER_CPU(struct timer_list, mce_timer);
358 358
359static void mcheck_check_cpu(void *info) 359static void mcheck_timer(unsigned long data)
360{ 360{
361 struct timer_list *t = &per_cpu(mce_timer, data);
362
363 WARN_ON(smp_processor_id() != data);
364
361 if (mce_available(&current_cpu_data)) 365 if (mce_available(&current_cpu_data))
362 do_machine_check(NULL, 0); 366 do_machine_check(NULL, 0);
363}
364
365static void mcheck_timer(struct work_struct *work)
366{
367 on_each_cpu(mcheck_check_cpu, NULL, 1);
368 367
369 /* 368 /*
370 * Alert userspace if needed. If we logged an MCE, reduce the 369 * Alert userspace if needed. If we logged an MCE, reduce the
@@ -377,7 +376,8 @@ static void mcheck_timer(struct work_struct *work)
377 (int)round_jiffies_relative(check_interval*HZ)); 376 (int)round_jiffies_relative(check_interval*HZ));
378 } 377 }
379 378
380 schedule_delayed_work(&mcheck_work, next_interval); 379 t->expires = jiffies + next_interval;
380 add_timer(t);
381} 381}
382 382
383static void mce_do_trigger(struct work_struct *work) 383static void mce_do_trigger(struct work_struct *work)
@@ -436,16 +436,11 @@ static struct notifier_block mce_idle_notifier = {
436 436
437static __init int periodic_mcheck_init(void) 437static __init int periodic_mcheck_init(void)
438{ 438{
439 next_interval = check_interval * HZ; 439 idle_notifier_register(&mce_idle_notifier);
440 if (next_interval) 440 return 0;
441 schedule_delayed_work(&mcheck_work,
442 round_jiffies_relative(next_interval));
443 idle_notifier_register(&mce_idle_notifier);
444 return 0;
445} 441}
446__initcall(periodic_mcheck_init); 442__initcall(periodic_mcheck_init);
447 443
448
449/* 444/*
450 * Initialize Machine Checks for a CPU. 445 * Initialize Machine Checks for a CPU.
451 */ 446 */
@@ -515,6 +510,20 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
515 } 510 }
516} 511}
517 512
513static void mce_init_timer(void)
514{
515 struct timer_list *t = &__get_cpu_var(mce_timer);
516
517 /* data race harmless because everyone sets to the same value */
518 if (!next_interval)
519 next_interval = check_interval * HZ;
520 if (!next_interval)
521 return;
522 setup_timer(t, mcheck_timer, smp_processor_id());
523 t->expires = round_jiffies_relative(jiffies + next_interval);
524 add_timer(t);
525}
526
518/* 527/*
519 * Called for each booted CPU to set up machine checks. 528 * Called for each booted CPU to set up machine checks.
520 * Must be called with preempt off. 529 * Must be called with preempt off.
@@ -529,6 +538,7 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
529 538
530 mce_init(NULL); 539 mce_init(NULL);
531 mce_cpu_features(c); 540 mce_cpu_features(c);
541 mce_init_timer();
532} 542}
533 543
534/* 544/*
@@ -758,17 +768,19 @@ static int mce_resume(struct sys_device *dev)
758 return 0; 768 return 0;
759} 769}
760 770
771static void mce_cpu_restart(void *data)
772{
773 del_timer_sync(&__get_cpu_var(mce_timer));
774 if (mce_available(&current_cpu_data))
775 mce_init(NULL);
776 mce_init_timer();
777}
778
761/* Reinit MCEs after user configuration changes */ 779/* Reinit MCEs after user configuration changes */
762static void mce_restart(void) 780static void mce_restart(void)
763{ 781{
764 if (next_interval)
765 cancel_delayed_work(&mcheck_work);
766 /* Timer race is harmless here */
767 on_each_cpu(mce_init, NULL, 1);
768 next_interval = check_interval * HZ; 782 next_interval = check_interval * HZ;
769 if (next_interval) 783 on_each_cpu(mce_cpu_restart, NULL, 1);
770 schedule_delayed_work(&mcheck_work,
771 round_jiffies_relative(next_interval));
772} 784}
773 785
774static struct sysdev_class mce_sysclass = { 786static struct sysdev_class mce_sysclass = {
@@ -899,6 +911,7 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
899 unsigned long action, void *hcpu) 911 unsigned long action, void *hcpu)
900{ 912{
901 unsigned int cpu = (unsigned long)hcpu; 913 unsigned int cpu = (unsigned long)hcpu;
914 struct timer_list *t = &per_cpu(mce_timer, cpu);
902 915
903 switch (action) { 916 switch (action) {
904 case CPU_ONLINE: 917 case CPU_ONLINE:
@@ -913,6 +926,15 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
913 threshold_cpu_callback(action, cpu); 926 threshold_cpu_callback(action, cpu);
914 mce_remove_device(cpu); 927 mce_remove_device(cpu);
915 break; 928 break;
929 case CPU_DOWN_PREPARE:
930 case CPU_DOWN_PREPARE_FROZEN:
931 del_timer_sync(t);
932 break;
933 case CPU_DOWN_FAILED:
934 case CPU_DOWN_FAILED_FROZEN:
935 t->expires = round_jiffies_relative(jiffies + next_interval);
936 add_timer_on(t, cpu);
937 break;
916 } 938 }
917 return NOTIFY_OK; 939 return NOTIFY_OK;
918} 940}