aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2013-03-06 06:18:35 -0500
committerThomas Gleixner <tglx@linutronix.de>2013-03-13 06:39:39 -0400
commit989dcb645ca715129c5a2b39102c8334a20d9615 (patch)
tree244553d975af4d64fa033c11e02267e9753376f5 /kernel
parent26517f3e99248668315aee9460dcea21628cdd7f (diff)
tick: Handle broadcast wakeup of multiple cpus
Some brilliant hardware implementations wake multiple cores when the broadcast timer fires. This leads to the following interesting problem: CPU0 CPU1 wakeup from idle wakeup from idle leave broadcast mode leave broadcast mode restart per cpu timer restart per cpu timer go back to idle handle broadcast (empty mask) enter broadcast mode programm broadcast device enter broadcast mode programm broadcast device So what happens is that due to the forced reprogramming of the cpu local timer, we need to set a event in the future. Now if we manage to go back to idle before the timer fires, we switch off the timer and arm the broadcast device with an already expired time (covered by forced mode). So in the worst case we repeat the above ping pong forever. Unfortunately we have no information about what caused the wakeup, but we can check current time against the expiry time of the local cpu. If the local event is already in the past, we know that the broadcast timer is about to fire and send an IPI. So we mark ourself as an IPI target even if we left broadcast mode and avoid the reprogramming of the local cpu timer. This still leaves the possibility that a CPU which is not handling the broadcast interrupt is going to reach idle again before the IPI arrives. This can't be solved in the core code and will be handled in follow up patches. Reported-by: Jason Liu <liu.h.jason@gmail.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: LAK <linux-arm-kernel@lists.infradead.org> Cc: John Stultz <john.stultz@linaro.org> Cc: Arjan van de Veen <arjan@infradead.org> Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com> Link: http://lkml.kernel.org/r/20130306111537.492045206@linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/time/tick-broadcast.c59
1 files changed, 58 insertions, 1 deletions
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 005c0ca81a32..2100aad6b5f2 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -393,6 +393,7 @@ int tick_resume_broadcast(void)
393 393
394static cpumask_var_t tick_broadcast_oneshot_mask; 394static cpumask_var_t tick_broadcast_oneshot_mask;
395static cpumask_var_t tick_broadcast_pending_mask; 395static cpumask_var_t tick_broadcast_pending_mask;
396static cpumask_var_t tick_broadcast_force_mask;
396 397
397/* 398/*
398 * Exposed for debugging: see timer_list.c 399 * Exposed for debugging: see timer_list.c
@@ -483,6 +484,10 @@ again:
483 } 484 }
484 } 485 }
485 486
487 /* Take care of enforced broadcast requests */
488 cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
489 cpumask_clear(tick_broadcast_force_mask);
490
486 /* 491 /*
487 * Wakeup the cpus which have an expired event. 492 * Wakeup the cpus which have an expired event.
488 */ 493 */
@@ -518,6 +523,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
518 struct clock_event_device *bc, *dev; 523 struct clock_event_device *bc, *dev;
519 struct tick_device *td; 524 struct tick_device *td;
520 unsigned long flags; 525 unsigned long flags;
526 ktime_t now;
521 int cpu; 527 int cpu;
522 528
523 /* 529 /*
@@ -545,7 +551,16 @@ void tick_broadcast_oneshot_control(unsigned long reason)
545 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); 551 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
546 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { 552 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
547 clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); 553 clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
548 if (dev->next_event.tv64 < bc->next_event.tv64) 554 /*
555 * We only reprogram the broadcast timer if we
556 * did not mark ourself in the force mask and
557 * if the cpu local event is earlier than the
558 * broadcast event. If the current CPU is in
559 * the force mask, then we are going to be
560 * woken by the IPI right away.
561 */
562 if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
563 dev->next_event.tv64 < bc->next_event.tv64)
549 tick_broadcast_set_event(bc, cpu, dev->next_event, 1); 564 tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
550 } 565 }
551 } else { 566 } else {
@@ -566,6 +581,47 @@ void tick_broadcast_oneshot_control(unsigned long reason)
566 tick_broadcast_pending_mask)) 581 tick_broadcast_pending_mask))
567 goto out; 582 goto out;
568 583
584 /*
585 * If the pending bit is not set, then we are
586 * either the CPU handling the broadcast
587 * interrupt or we got woken by something else.
588 *
589 * We are not longer in the broadcast mask, so
590 * if the cpu local expiry time is already
591 * reached, we would reprogram the cpu local
592 * timer with an already expired event.
593 *
594 * This can lead to a ping-pong when we return
595 * to idle and therefor rearm the broadcast
596 * timer before the cpu local timer was able
597 * to fire. This happens because the forced
598 * reprogramming makes sure that the event
599 * will happen in the future and depending on
600 * the min_delta setting this might be far
601 * enough out that the ping-pong starts.
602 *
603 * If the cpu local next_event has expired
604 * then we know that the broadcast timer
605 * next_event has expired as well and
606 * broadcast is about to be handled. So we
607 * avoid reprogramming and enforce that the
608 * broadcast handler, which did not run yet,
609 * will invoke the cpu local handler.
610 *
611 * We cannot call the handler directly from
612 * here, because we might be in a NOHZ phase
613 * and we did not go through the irq_enter()
614 * nohz fixups.
615 */
616 now = ktime_get();
617 if (dev->next_event.tv64 <= now.tv64) {
618 cpumask_set_cpu(cpu, tick_broadcast_force_mask);
619 goto out;
620 }
621 /*
622 * We got woken by something else. Reprogram
623 * the cpu local timer device.
624 */
569 tick_program_event(dev->next_event, 1); 625 tick_program_event(dev->next_event, 1);
570 } 626 }
571 } 627 }
@@ -707,5 +763,6 @@ void __init tick_broadcast_init(void)
707#ifdef CONFIG_TICK_ONESHOT 763#ifdef CONFIG_TICK_ONESHOT
708 alloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); 764 alloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
709 alloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT); 765 alloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
766 alloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
710#endif 767#endif
711} 768}