aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arm/kernel/process.c11
-rw-r--r--arch/x86/kernel/process.c12
-rw-r--r--include/linux/clockchips.h12
-rw-r--r--init/main.c2
-rw-r--r--kernel/hrtimer.c6
-rw-r--r--kernel/time/tick-broadcast.c225
-rw-r--r--kernel/time/tick-common.c1
-rw-r--r--kernel/time/tick-internal.h3
-rw-r--r--kernel/time/tick-sched.c4
9 files changed, 214 insertions, 62 deletions
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 047d3e40e470..db4ffd09ee23 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -199,7 +199,16 @@ void cpu_idle(void)
199#ifdef CONFIG_PL310_ERRATA_769419 199#ifdef CONFIG_PL310_ERRATA_769419
200 wmb(); 200 wmb();
201#endif 201#endif
202 if (hlt_counter) { 202 /*
203 * In poll mode we reenable interrupts and spin.
204 *
205 * Also if we detected in the wakeup from idle
206 * path that the tick broadcast device expired
207 * for us, we don't want to go deep idle as we
208 * know that the IPI is going to arrive right
209 * away
210 */
211 if (hlt_counter || tick_check_broadcast_expired()) {
203 local_irq_enable(); 212 local_irq_enable();
204 cpu_relax(); 213 cpu_relax();
205 } else if (!need_resched()) { 214 } else if (!need_resched()) {
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 14ae10031ff0..aa524da03bba 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -336,6 +336,18 @@ void cpu_idle(void)
336 local_touch_nmi(); 336 local_touch_nmi();
337 local_irq_disable(); 337 local_irq_disable();
338 338
339 /*
340 * We detected in the wakeup path that the
341 * tick broadcast device expired for us, but
342 * we raced with the other CPU and came back
343 * here before it was able to fire the IPI.
344 * No point in going idle.
345 */
346 if (tick_check_broadcast_expired()) {
347 local_irq_enable();
348 continue;
349 }
350
339 enter_idle(); 351 enter_idle();
340 352
341 /* Don't trace irqs off for idle */ 353 /* Don't trace irqs off for idle */
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 66346521cb65..464e229e7d84 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -55,6 +55,11 @@ enum clock_event_nofitiers {
55#define CLOCK_EVT_FEAT_C3STOP 0x000008 55#define CLOCK_EVT_FEAT_C3STOP 0x000008
56#define CLOCK_EVT_FEAT_DUMMY 0x000010 56#define CLOCK_EVT_FEAT_DUMMY 0x000010
57 57
58/*
59 * Core shall set the interrupt affinity dynamically in broadcast mode
60 */
61#define CLOCK_EVT_FEAT_DYNIRQ 0x000020
62
58/** 63/**
59 * struct clock_event_device - clock event device descriptor 64 * struct clock_event_device - clock event device descriptor
60 * @event_handler: Assigned by the framework to be called by the low 65 * @event_handler: Assigned by the framework to be called by the low
@@ -170,6 +175,12 @@ extern void tick_broadcast(const struct cpumask *mask);
170extern int tick_receive_broadcast(void); 175extern int tick_receive_broadcast(void);
171#endif 176#endif
172 177
178#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
179extern int tick_check_broadcast_expired(void);
180#else
181static inline int tick_check_broadcast_expired(void) { return 0; }
182#endif
183
173#ifdef CONFIG_GENERIC_CLOCKEVENTS 184#ifdef CONFIG_GENERIC_CLOCKEVENTS
174extern void clockevents_notify(unsigned long reason, void *arg); 185extern void clockevents_notify(unsigned long reason, void *arg);
175#else 186#else
@@ -182,6 +193,7 @@ static inline void clockevents_suspend(void) {}
182static inline void clockevents_resume(void) {} 193static inline void clockevents_resume(void) {}
183 194
184#define clockevents_notify(reason, arg) do { } while (0) 195#define clockevents_notify(reason, arg) do { } while (0)
196static inline int tick_check_broadcast_expired(void) { return 0; }
185 197
186#endif 198#endif
187 199
diff --git a/init/main.c b/init/main.c
index 63534a141b4e..b3e061428545 100644
--- a/init/main.c
+++ b/init/main.c
@@ -494,7 +494,6 @@ asmlinkage void __init start_kernel(void)
494 * Interrupts are still disabled. Do necessary setups, then 494 * Interrupts are still disabled. Do necessary setups, then
495 * enable them 495 * enable them
496 */ 496 */
497 tick_init();
498 boot_cpu_init(); 497 boot_cpu_init();
499 page_address_init(); 498 page_address_init();
500 printk(KERN_NOTICE "%s", linux_banner); 499 printk(KERN_NOTICE "%s", linux_banner);
@@ -551,6 +550,7 @@ asmlinkage void __init start_kernel(void)
551 /* init some links before init_ISA_irqs() */ 550 /* init some links before init_ISA_irqs() */
552 early_irq_init(); 551 early_irq_init();
553 init_IRQ(); 552 init_IRQ();
553 tick_init();
554 init_timers(); 554 init_timers();
555 hrtimers_init(); 555 hrtimers_init();
556 softirq_init(); 556 softirq_init();
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 258720741d3e..d6830d5ae730 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1022,7 +1022,8 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1022 * @timer: the timer to be added 1022 * @timer: the timer to be added
1023 * @tim: expiry time 1023 * @tim: expiry time
1024 * @delta_ns: "slack" range for the timer 1024 * @delta_ns: "slack" range for the timer
1025 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) 1025 * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
1026 * relative (HRTIMER_MODE_REL)
1026 * 1027 *
1027 * Returns: 1028 * Returns:
1028 * 0 on success 1029 * 0 on success
@@ -1039,7 +1040,8 @@ EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
1039 * hrtimer_start - (re)start an hrtimer on the current CPU 1040 * hrtimer_start - (re)start an hrtimer on the current CPU
1040 * @timer: the timer to be added 1041 * @timer: the timer to be added
1041 * @tim: expiry time 1042 * @tim: expiry time
1042 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) 1043 * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or
1044 * relative (HRTIMER_MODE_REL)
1043 * 1045 *
1044 * Returns: 1046 * Returns:
1045 * 0 on success 1047 * 0 on success
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 2fb8cb88df8d..d76d816afc5d 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -28,9 +28,8 @@
28 */ 28 */
29 29
30static struct tick_device tick_broadcast_device; 30static struct tick_device tick_broadcast_device;
31/* FIXME: Use cpumask_var_t. */ 31static cpumask_var_t tick_broadcast_mask;
32static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS); 32static cpumask_var_t tmpmask;
33static DECLARE_BITMAP(tmpmask, NR_CPUS);
34static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); 33static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
35static int tick_broadcast_force; 34static int tick_broadcast_force;
36 35
@@ -50,7 +49,7 @@ struct tick_device *tick_get_broadcast_device(void)
50 49
51struct cpumask *tick_get_broadcast_mask(void) 50struct cpumask *tick_get_broadcast_mask(void)
52{ 51{
53 return to_cpumask(tick_broadcast_mask); 52 return tick_broadcast_mask;
54} 53}
55 54
56/* 55/*
@@ -74,7 +73,7 @@ int tick_check_broadcast_device(struct clock_event_device *dev)
74 73
75 clockevents_exchange_device(tick_broadcast_device.evtdev, dev); 74 clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
76 tick_broadcast_device.evtdev = dev; 75 tick_broadcast_device.evtdev = dev;
77 if (!cpumask_empty(tick_get_broadcast_mask())) 76 if (!cpumask_empty(tick_broadcast_mask))
78 tick_broadcast_start_periodic(dev); 77 tick_broadcast_start_periodic(dev);
79 return 1; 78 return 1;
80} 79}
@@ -123,7 +122,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
123 if (!tick_device_is_functional(dev)) { 122 if (!tick_device_is_functional(dev)) {
124 dev->event_handler = tick_handle_periodic; 123 dev->event_handler = tick_handle_periodic;
125 tick_device_setup_broadcast_func(dev); 124 tick_device_setup_broadcast_func(dev);
126 cpumask_set_cpu(cpu, tick_get_broadcast_mask()); 125 cpumask_set_cpu(cpu, tick_broadcast_mask);
127 tick_broadcast_start_periodic(tick_broadcast_device.evtdev); 126 tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
128 ret = 1; 127 ret = 1;
129 } else { 128 } else {
@@ -134,7 +133,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
134 */ 133 */
135 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { 134 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
136 int cpu = smp_processor_id(); 135 int cpu = smp_processor_id();
137 cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); 136 cpumask_clear_cpu(cpu, tick_broadcast_mask);
138 tick_broadcast_clear_oneshot(cpu); 137 tick_broadcast_clear_oneshot(cpu);
139 } else { 138 } else {
140 tick_device_setup_broadcast_func(dev); 139 tick_device_setup_broadcast_func(dev);
@@ -198,9 +197,8 @@ static void tick_do_periodic_broadcast(void)
198{ 197{
199 raw_spin_lock(&tick_broadcast_lock); 198 raw_spin_lock(&tick_broadcast_lock);
200 199
201 cpumask_and(to_cpumask(tmpmask), 200 cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
202 cpu_online_mask, tick_get_broadcast_mask()); 201 tick_do_broadcast(tmpmask);
203 tick_do_broadcast(to_cpumask(tmpmask));
204 202
205 raw_spin_unlock(&tick_broadcast_lock); 203 raw_spin_unlock(&tick_broadcast_lock);
206} 204}
@@ -263,13 +261,12 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
263 if (!tick_device_is_functional(dev)) 261 if (!tick_device_is_functional(dev))
264 goto out; 262 goto out;
265 263
266 bc_stopped = cpumask_empty(tick_get_broadcast_mask()); 264 bc_stopped = cpumask_empty(tick_broadcast_mask);
267 265
268 switch (*reason) { 266 switch (*reason) {
269 case CLOCK_EVT_NOTIFY_BROADCAST_ON: 267 case CLOCK_EVT_NOTIFY_BROADCAST_ON:
270 case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: 268 case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
271 if (!cpumask_test_cpu(cpu, tick_get_broadcast_mask())) { 269 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
272 cpumask_set_cpu(cpu, tick_get_broadcast_mask());
273 if (tick_broadcast_device.mode == 270 if (tick_broadcast_device.mode ==
274 TICKDEV_MODE_PERIODIC) 271 TICKDEV_MODE_PERIODIC)
275 clockevents_shutdown(dev); 272 clockevents_shutdown(dev);
@@ -279,8 +276,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
279 break; 276 break;
280 case CLOCK_EVT_NOTIFY_BROADCAST_OFF: 277 case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
281 if (!tick_broadcast_force && 278 if (!tick_broadcast_force &&
282 cpumask_test_cpu(cpu, tick_get_broadcast_mask())) { 279 cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
283 cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
284 if (tick_broadcast_device.mode == 280 if (tick_broadcast_device.mode ==
285 TICKDEV_MODE_PERIODIC) 281 TICKDEV_MODE_PERIODIC)
286 tick_setup_periodic(dev, 0); 282 tick_setup_periodic(dev, 0);
@@ -288,7 +284,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
288 break; 284 break;
289 } 285 }
290 286
291 if (cpumask_empty(tick_get_broadcast_mask())) { 287 if (cpumask_empty(tick_broadcast_mask)) {
292 if (!bc_stopped) 288 if (!bc_stopped)
293 clockevents_shutdown(bc); 289 clockevents_shutdown(bc);
294 } else if (bc_stopped) { 290 } else if (bc_stopped) {
@@ -337,10 +333,10 @@ void tick_shutdown_broadcast(unsigned int *cpup)
337 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 333 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
338 334
339 bc = tick_broadcast_device.evtdev; 335 bc = tick_broadcast_device.evtdev;
340 cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); 336 cpumask_clear_cpu(cpu, tick_broadcast_mask);
341 337
342 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { 338 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
343 if (bc && cpumask_empty(tick_get_broadcast_mask())) 339 if (bc && cpumask_empty(tick_broadcast_mask))
344 clockevents_shutdown(bc); 340 clockevents_shutdown(bc);
345 } 341 }
346 342
@@ -376,13 +372,13 @@ int tick_resume_broadcast(void)
376 372
377 switch (tick_broadcast_device.mode) { 373 switch (tick_broadcast_device.mode) {
378 case TICKDEV_MODE_PERIODIC: 374 case TICKDEV_MODE_PERIODIC:
379 if (!cpumask_empty(tick_get_broadcast_mask())) 375 if (!cpumask_empty(tick_broadcast_mask))
380 tick_broadcast_start_periodic(bc); 376 tick_broadcast_start_periodic(bc);
381 broadcast = cpumask_test_cpu(smp_processor_id(), 377 broadcast = cpumask_test_cpu(smp_processor_id(),
382 tick_get_broadcast_mask()); 378 tick_broadcast_mask);
383 break; 379 break;
384 case TICKDEV_MODE_ONESHOT: 380 case TICKDEV_MODE_ONESHOT:
385 if (!cpumask_empty(tick_get_broadcast_mask())) 381 if (!cpumask_empty(tick_broadcast_mask))
386 broadcast = tick_resume_broadcast_oneshot(bc); 382 broadcast = tick_resume_broadcast_oneshot(bc);
387 break; 383 break;
388 } 384 }
@@ -395,25 +391,58 @@ int tick_resume_broadcast(void)
395 391
396#ifdef CONFIG_TICK_ONESHOT 392#ifdef CONFIG_TICK_ONESHOT
397 393
398/* FIXME: use cpumask_var_t. */ 394static cpumask_var_t tick_broadcast_oneshot_mask;
399static DECLARE_BITMAP(tick_broadcast_oneshot_mask, NR_CPUS); 395static cpumask_var_t tick_broadcast_pending_mask;
396static cpumask_var_t tick_broadcast_force_mask;
400 397
401/* 398/*
402 * Exposed for debugging: see timer_list.c 399 * Exposed for debugging: see timer_list.c
403 */ 400 */
404struct cpumask *tick_get_broadcast_oneshot_mask(void) 401struct cpumask *tick_get_broadcast_oneshot_mask(void)
405{ 402{
406 return to_cpumask(tick_broadcast_oneshot_mask); 403 return tick_broadcast_oneshot_mask;
407} 404}
408 405
409static int tick_broadcast_set_event(ktime_t expires, int force) 406/*
407 * Called before going idle with interrupts disabled. Checks whether a
408 * broadcast event from the other core is about to happen. We detected
409 * that in tick_broadcast_oneshot_control(). The callsite can use this
410 * to avoid a deep idle transition as we are about to get the
411 * broadcast IPI right away.
412 */
413int tick_check_broadcast_expired(void)
410{ 414{
411 struct clock_event_device *bc = tick_broadcast_device.evtdev; 415 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
416}
417
418/*
419 * Set broadcast interrupt affinity
420 */
421static void tick_broadcast_set_affinity(struct clock_event_device *bc,
422 const struct cpumask *cpumask)
423{
424 if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
425 return;
426
427 if (cpumask_equal(bc->cpumask, cpumask))
428 return;
429
430 bc->cpumask = cpumask;
431 irq_set_affinity(bc->irq, bc->cpumask);
432}
433
434static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
435 ktime_t expires, int force)
436{
437 int ret;
412 438
413 if (bc->mode != CLOCK_EVT_MODE_ONESHOT) 439 if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
414 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); 440 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
415 441
416 return clockevents_program_event(bc, expires, force); 442 ret = clockevents_program_event(bc, expires, force);
443 if (!ret)
444 tick_broadcast_set_affinity(bc, cpumask_of(cpu));
445 return ret;
417} 446}
418 447
419int tick_resume_broadcast_oneshot(struct clock_event_device *bc) 448int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
@@ -428,7 +457,7 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
428 */ 457 */
429void tick_check_oneshot_broadcast(int cpu) 458void tick_check_oneshot_broadcast(int cpu)
430{ 459{
431 if (cpumask_test_cpu(cpu, to_cpumask(tick_broadcast_oneshot_mask))) { 460 if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
432 struct tick_device *td = &per_cpu(tick_cpu_device, cpu); 461 struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
433 462
434 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT); 463 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
@@ -442,27 +471,39 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
442{ 471{
443 struct tick_device *td; 472 struct tick_device *td;
444 ktime_t now, next_event; 473 ktime_t now, next_event;
445 int cpu; 474 int cpu, next_cpu = 0;
446 475
447 raw_spin_lock(&tick_broadcast_lock); 476 raw_spin_lock(&tick_broadcast_lock);
448again: 477again:
449 dev->next_event.tv64 = KTIME_MAX; 478 dev->next_event.tv64 = KTIME_MAX;
450 next_event.tv64 = KTIME_MAX; 479 next_event.tv64 = KTIME_MAX;
451 cpumask_clear(to_cpumask(tmpmask)); 480 cpumask_clear(tmpmask);
452 now = ktime_get(); 481 now = ktime_get();
453 /* Find all expired events */ 482 /* Find all expired events */
454 for_each_cpu(cpu, tick_get_broadcast_oneshot_mask()) { 483 for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
455 td = &per_cpu(tick_cpu_device, cpu); 484 td = &per_cpu(tick_cpu_device, cpu);
456 if (td->evtdev->next_event.tv64 <= now.tv64) 485 if (td->evtdev->next_event.tv64 <= now.tv64) {
457 cpumask_set_cpu(cpu, to_cpumask(tmpmask)); 486 cpumask_set_cpu(cpu, tmpmask);
458 else if (td->evtdev->next_event.tv64 < next_event.tv64) 487 /*
488 * Mark the remote cpu in the pending mask, so
489 * it can avoid reprogramming the cpu local
490 * timer in tick_broadcast_oneshot_control().
491 */
492 cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
493 } else if (td->evtdev->next_event.tv64 < next_event.tv64) {
459 next_event.tv64 = td->evtdev->next_event.tv64; 494 next_event.tv64 = td->evtdev->next_event.tv64;
495 next_cpu = cpu;
496 }
460 } 497 }
461 498
499 /* Take care of enforced broadcast requests */
500 cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
501 cpumask_clear(tick_broadcast_force_mask);
502
462 /* 503 /*
463 * Wakeup the cpus which have an expired event. 504 * Wakeup the cpus which have an expired event.
464 */ 505 */
465 tick_do_broadcast(to_cpumask(tmpmask)); 506 tick_do_broadcast(tmpmask);
466 507
467 /* 508 /*
468 * Two reasons for reprogram: 509 * Two reasons for reprogram:
@@ -479,7 +520,7 @@ again:
479 * Rearm the broadcast device. If event expired, 520 * Rearm the broadcast device. If event expired,
480 * repeat the above 521 * repeat the above
481 */ 522 */
482 if (tick_broadcast_set_event(next_event, 0)) 523 if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
483 goto again; 524 goto again;
484 } 525 }
485 raw_spin_unlock(&tick_broadcast_lock); 526 raw_spin_unlock(&tick_broadcast_lock);
@@ -494,6 +535,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
494 struct clock_event_device *bc, *dev; 535 struct clock_event_device *bc, *dev;
495 struct tick_device *td; 536 struct tick_device *td;
496 unsigned long flags; 537 unsigned long flags;
538 ktime_t now;
497 int cpu; 539 int cpu;
498 540
499 /* 541 /*
@@ -518,21 +560,84 @@ void tick_broadcast_oneshot_control(unsigned long reason)
518 560
519 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 561 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
520 if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { 562 if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
521 if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) { 563 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
522 cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask()); 564 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
523 clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); 565 clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
524 if (dev->next_event.tv64 < bc->next_event.tv64) 566 /*
525 tick_broadcast_set_event(dev->next_event, 1); 567 * We only reprogram the broadcast timer if we
568 * did not mark ourself in the force mask and
569 * if the cpu local event is earlier than the
570 * broadcast event. If the current CPU is in
571 * the force mask, then we are going to be
572 * woken by the IPI right away.
573 */
574 if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
575 dev->next_event.tv64 < bc->next_event.tv64)
576 tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
526 } 577 }
527 } else { 578 } else {
528 if (cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) { 579 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
529 cpumask_clear_cpu(cpu,
530 tick_get_broadcast_oneshot_mask());
531 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); 580 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
532 if (dev->next_event.tv64 != KTIME_MAX) 581 if (dev->next_event.tv64 == KTIME_MAX)
533 tick_program_event(dev->next_event, 1); 582 goto out;
583 /*
584 * The cpu which was handling the broadcast
585 * timer marked this cpu in the broadcast
586 * pending mask and fired the broadcast
587 * IPI. So we are going to handle the expired
588 * event anyway via the broadcast IPI
589 * handler. No need to reprogram the timer
590 * with an already expired event.
591 */
592 if (cpumask_test_and_clear_cpu(cpu,
593 tick_broadcast_pending_mask))
594 goto out;
595
596 /*
597 * If the pending bit is not set, then we are
598 * either the CPU handling the broadcast
599 * interrupt or we got woken by something else.
600 *
601 * We are not longer in the broadcast mask, so
602 * if the cpu local expiry time is already
603 * reached, we would reprogram the cpu local
604 * timer with an already expired event.
605 *
606 * This can lead to a ping-pong when we return
607 * to idle and therefor rearm the broadcast
608 * timer before the cpu local timer was able
609 * to fire. This happens because the forced
610 * reprogramming makes sure that the event
611 * will happen in the future and depending on
612 * the min_delta setting this might be far
613 * enough out that the ping-pong starts.
614 *
615 * If the cpu local next_event has expired
616 * then we know that the broadcast timer
617 * next_event has expired as well and
618 * broadcast is about to be handled. So we
619 * avoid reprogramming and enforce that the
620 * broadcast handler, which did not run yet,
621 * will invoke the cpu local handler.
622 *
623 * We cannot call the handler directly from
624 * here, because we might be in a NOHZ phase
625 * and we did not go through the irq_enter()
626 * nohz fixups.
627 */
628 now = ktime_get();
629 if (dev->next_event.tv64 <= now.tv64) {
630 cpumask_set_cpu(cpu, tick_broadcast_force_mask);
631 goto out;
632 }
633 /*
634 * We got woken by something else. Reprogram
635 * the cpu local timer device.
636 */
637 tick_program_event(dev->next_event, 1);
534 } 638 }
535 } 639 }
640out:
536 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 641 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
537} 642}
538 643
@@ -543,7 +648,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
543 */ 648 */
544static void tick_broadcast_clear_oneshot(int cpu) 649static void tick_broadcast_clear_oneshot(int cpu)
545{ 650{
546 cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask()); 651 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
547} 652}
548 653
549static void tick_broadcast_init_next_event(struct cpumask *mask, 654static void tick_broadcast_init_next_event(struct cpumask *mask,
@@ -581,17 +686,16 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
581 * oneshot_mask bits for those and program the 686 * oneshot_mask bits for those and program the
582 * broadcast device to fire. 687 * broadcast device to fire.
583 */ 688 */
584 cpumask_copy(to_cpumask(tmpmask), tick_get_broadcast_mask()); 689 cpumask_copy(tmpmask, tick_broadcast_mask);
585 cpumask_clear_cpu(cpu, to_cpumask(tmpmask)); 690 cpumask_clear_cpu(cpu, tmpmask);
586 cpumask_or(tick_get_broadcast_oneshot_mask(), 691 cpumask_or(tick_broadcast_oneshot_mask,
587 tick_get_broadcast_oneshot_mask(), 692 tick_broadcast_oneshot_mask, tmpmask);
588 to_cpumask(tmpmask));
589 693
590 if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) { 694 if (was_periodic && !cpumask_empty(tmpmask)) {
591 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); 695 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
592 tick_broadcast_init_next_event(to_cpumask(tmpmask), 696 tick_broadcast_init_next_event(tmpmask,
593 tick_next_period); 697 tick_next_period);
594 tick_broadcast_set_event(tick_next_period, 1); 698 tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
595 } else 699 } else
596 bc->next_event.tv64 = KTIME_MAX; 700 bc->next_event.tv64 = KTIME_MAX;
597 } else { 701 } else {
@@ -639,7 +743,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
639 * Clear the broadcast mask flag for the dead cpu, but do not 743 * Clear the broadcast mask flag for the dead cpu, but do not
640 * stop the broadcast device! 744 * stop the broadcast device!
641 */ 745 */
642 cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask()); 746 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
643 747
644 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 748 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
645} 749}
@@ -663,3 +767,14 @@ bool tick_broadcast_oneshot_available(void)
663} 767}
664 768
665#endif 769#endif
770
771void __init tick_broadcast_init(void)
772{
773 alloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
774 alloc_cpumask_var(&tmpmask, GFP_NOWAIT);
775#ifdef CONFIG_TICK_ONESHOT
776 alloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
777 alloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
778 alloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
779#endif
780}
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index b1600a6973f4..74413e396acc 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -416,4 +416,5 @@ static struct notifier_block tick_notifier = {
416void __init tick_init(void) 416void __init tick_init(void)
417{ 417{
418 clockevents_register_notifier(&tick_notifier); 418 clockevents_register_notifier(&tick_notifier);
419 tick_broadcast_init();
419} 420}
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index f5c9207967cf..f0299eae4602 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -96,7 +96,7 @@ extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
96extern void tick_shutdown_broadcast(unsigned int *cpup); 96extern void tick_shutdown_broadcast(unsigned int *cpup);
97extern void tick_suspend_broadcast(void); 97extern void tick_suspend_broadcast(void);
98extern int tick_resume_broadcast(void); 98extern int tick_resume_broadcast(void);
99 99extern void tick_broadcast_init(void);
100extern void 100extern void
101tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); 101tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
102 102
@@ -121,6 +121,7 @@ static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
121static inline void tick_shutdown_broadcast(unsigned int *cpup) { } 121static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
122static inline void tick_suspend_broadcast(void) { } 122static inline void tick_suspend_broadcast(void) { }
123static inline int tick_resume_broadcast(void) { return 0; } 123static inline int tick_resume_broadcast(void) { return 0; }
124static inline void tick_broadcast_init(void) { }
124 125
125/* 126/*
126 * Set the periodic handler in non broadcast mode 127 * Set the periodic handler in non broadcast mode
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index a19a39952c1b..225f8bf19095 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -482,8 +482,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
482 482
483 if (ratelimit < 10 && 483 if (ratelimit < 10 &&
484 (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { 484 (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
485 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", 485 pr_warn("NOHZ: local_softirq_pending %02x\n",
486 (unsigned int) local_softirq_pending()); 486 (unsigned int) local_softirq_pending());
487 ratelimit++; 487 ratelimit++;
488 } 488 }
489 return false; 489 return false;