diff options
-rw-r--r-- | arch/arm/kernel/process.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 12 | ||||
-rw-r--r-- | include/linux/clockchips.h | 12 | ||||
-rw-r--r-- | init/main.c | 2 | ||||
-rw-r--r-- | kernel/hrtimer.c | 6 | ||||
-rw-r--r-- | kernel/time/tick-broadcast.c | 225 | ||||
-rw-r--r-- | kernel/time/tick-common.c | 1 | ||||
-rw-r--r-- | kernel/time/tick-internal.h | 3 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 4 |
9 files changed, 214 insertions, 62 deletions
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 047d3e40e470..db4ffd09ee23 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c | |||
@@ -199,7 +199,16 @@ void cpu_idle(void) | |||
199 | #ifdef CONFIG_PL310_ERRATA_769419 | 199 | #ifdef CONFIG_PL310_ERRATA_769419 |
200 | wmb(); | 200 | wmb(); |
201 | #endif | 201 | #endif |
202 | if (hlt_counter) { | 202 | /* |
203 | * In poll mode we reenable interrupts and spin. | ||
204 | * | ||
205 | * Also if we detected in the wakeup from idle | ||
206 | * path that the tick broadcast device expired | ||
207 | * for us, we don't want to go deep idle as we | ||
208 | * know that the IPI is going to arrive right | ||
209 | * away | ||
210 | */ | ||
211 | if (hlt_counter || tick_check_broadcast_expired()) { | ||
203 | local_irq_enable(); | 212 | local_irq_enable(); |
204 | cpu_relax(); | 213 | cpu_relax(); |
205 | } else if (!need_resched()) { | 214 | } else if (!need_resched()) { |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 14ae10031ff0..aa524da03bba 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -336,6 +336,18 @@ void cpu_idle(void) | |||
336 | local_touch_nmi(); | 336 | local_touch_nmi(); |
337 | local_irq_disable(); | 337 | local_irq_disable(); |
338 | 338 | ||
339 | /* | ||
340 | * We detected in the wakeup path that the | ||
341 | * tick broadcast device expired for us, but | ||
342 | * we raced with the other CPU and came back | ||
343 | * here before it was able to fire the IPI. | ||
344 | * No point in going idle. | ||
345 | */ | ||
346 | if (tick_check_broadcast_expired()) { | ||
347 | local_irq_enable(); | ||
348 | continue; | ||
349 | } | ||
350 | |||
339 | enter_idle(); | 351 | enter_idle(); |
340 | 352 | ||
341 | /* Don't trace irqs off for idle */ | 353 | /* Don't trace irqs off for idle */ |
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 66346521cb65..464e229e7d84 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h | |||
@@ -55,6 +55,11 @@ enum clock_event_nofitiers { | |||
55 | #define CLOCK_EVT_FEAT_C3STOP 0x000008 | 55 | #define CLOCK_EVT_FEAT_C3STOP 0x000008 |
56 | #define CLOCK_EVT_FEAT_DUMMY 0x000010 | 56 | #define CLOCK_EVT_FEAT_DUMMY 0x000010 |
57 | 57 | ||
58 | /* | ||
59 | * Core shall set the interrupt affinity dynamically in broadcast mode | ||
60 | */ | ||
61 | #define CLOCK_EVT_FEAT_DYNIRQ 0x000020 | ||
62 | |||
58 | /** | 63 | /** |
59 | * struct clock_event_device - clock event device descriptor | 64 | * struct clock_event_device - clock event device descriptor |
60 | * @event_handler: Assigned by the framework to be called by the low | 65 | * @event_handler: Assigned by the framework to be called by the low |
@@ -170,6 +175,12 @@ extern void tick_broadcast(const struct cpumask *mask); | |||
170 | extern int tick_receive_broadcast(void); | 175 | extern int tick_receive_broadcast(void); |
171 | #endif | 176 | #endif |
172 | 177 | ||
178 | #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) | ||
179 | extern int tick_check_broadcast_expired(void); | ||
180 | #else | ||
181 | static inline int tick_check_broadcast_expired(void) { return 0; } | ||
182 | #endif | ||
183 | |||
173 | #ifdef CONFIG_GENERIC_CLOCKEVENTS | 184 | #ifdef CONFIG_GENERIC_CLOCKEVENTS |
174 | extern void clockevents_notify(unsigned long reason, void *arg); | 185 | extern void clockevents_notify(unsigned long reason, void *arg); |
175 | #else | 186 | #else |
@@ -182,6 +193,7 @@ static inline void clockevents_suspend(void) {} | |||
182 | static inline void clockevents_resume(void) {} | 193 | static inline void clockevents_resume(void) {} |
183 | 194 | ||
184 | #define clockevents_notify(reason, arg) do { } while (0) | 195 | #define clockevents_notify(reason, arg) do { } while (0) |
196 | static inline int tick_check_broadcast_expired(void) { return 0; } | ||
185 | 197 | ||
186 | #endif | 198 | #endif |
187 | 199 | ||
diff --git a/init/main.c b/init/main.c index 63534a141b4e..b3e061428545 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -494,7 +494,6 @@ asmlinkage void __init start_kernel(void) | |||
494 | * Interrupts are still disabled. Do necessary setups, then | 494 | * Interrupts are still disabled. Do necessary setups, then |
495 | * enable them | 495 | * enable them |
496 | */ | 496 | */ |
497 | tick_init(); | ||
498 | boot_cpu_init(); | 497 | boot_cpu_init(); |
499 | page_address_init(); | 498 | page_address_init(); |
500 | printk(KERN_NOTICE "%s", linux_banner); | 499 | printk(KERN_NOTICE "%s", linux_banner); |
@@ -551,6 +550,7 @@ asmlinkage void __init start_kernel(void) | |||
551 | /* init some links before init_ISA_irqs() */ | 550 | /* init some links before init_ISA_irqs() */ |
552 | early_irq_init(); | 551 | early_irq_init(); |
553 | init_IRQ(); | 552 | init_IRQ(); |
553 | tick_init(); | ||
554 | init_timers(); | 554 | init_timers(); |
555 | hrtimers_init(); | 555 | hrtimers_init(); |
556 | softirq_init(); | 556 | softirq_init(); |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 258720741d3e..d6830d5ae730 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -1022,7 +1022,8 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |||
1022 | * @timer: the timer to be added | 1022 | * @timer: the timer to be added |
1023 | * @tim: expiry time | 1023 | * @tim: expiry time |
1024 | * @delta_ns: "slack" range for the timer | 1024 | * @delta_ns: "slack" range for the timer |
1025 | * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) | 1025 | * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or |
1026 | * relative (HRTIMER_MODE_REL) | ||
1026 | * | 1027 | * |
1027 | * Returns: | 1028 | * Returns: |
1028 | * 0 on success | 1029 | * 0 on success |
@@ -1039,7 +1040,8 @@ EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); | |||
1039 | * hrtimer_start - (re)start an hrtimer on the current CPU | 1040 | * hrtimer_start - (re)start an hrtimer on the current CPU |
1040 | * @timer: the timer to be added | 1041 | * @timer: the timer to be added |
1041 | * @tim: expiry time | 1042 | * @tim: expiry time |
1042 | * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) | 1043 | * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or |
1044 | * relative (HRTIMER_MODE_REL) | ||
1043 | * | 1045 | * |
1044 | * Returns: | 1046 | * Returns: |
1045 | * 0 on success | 1047 | * 0 on success |
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 2fb8cb88df8d..d76d816afc5d 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
@@ -28,9 +28,8 @@ | |||
28 | */ | 28 | */ |
29 | 29 | ||
30 | static struct tick_device tick_broadcast_device; | 30 | static struct tick_device tick_broadcast_device; |
31 | /* FIXME: Use cpumask_var_t. */ | 31 | static cpumask_var_t tick_broadcast_mask; |
32 | static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS); | 32 | static cpumask_var_t tmpmask; |
33 | static DECLARE_BITMAP(tmpmask, NR_CPUS); | ||
34 | static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); | 33 | static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); |
35 | static int tick_broadcast_force; | 34 | static int tick_broadcast_force; |
36 | 35 | ||
@@ -50,7 +49,7 @@ struct tick_device *tick_get_broadcast_device(void) | |||
50 | 49 | ||
51 | struct cpumask *tick_get_broadcast_mask(void) | 50 | struct cpumask *tick_get_broadcast_mask(void) |
52 | { | 51 | { |
53 | return to_cpumask(tick_broadcast_mask); | 52 | return tick_broadcast_mask; |
54 | } | 53 | } |
55 | 54 | ||
56 | /* | 55 | /* |
@@ -74,7 +73,7 @@ int tick_check_broadcast_device(struct clock_event_device *dev) | |||
74 | 73 | ||
75 | clockevents_exchange_device(tick_broadcast_device.evtdev, dev); | 74 | clockevents_exchange_device(tick_broadcast_device.evtdev, dev); |
76 | tick_broadcast_device.evtdev = dev; | 75 | tick_broadcast_device.evtdev = dev; |
77 | if (!cpumask_empty(tick_get_broadcast_mask())) | 76 | if (!cpumask_empty(tick_broadcast_mask)) |
78 | tick_broadcast_start_periodic(dev); | 77 | tick_broadcast_start_periodic(dev); |
79 | return 1; | 78 | return 1; |
80 | } | 79 | } |
@@ -123,7 +122,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) | |||
123 | if (!tick_device_is_functional(dev)) { | 122 | if (!tick_device_is_functional(dev)) { |
124 | dev->event_handler = tick_handle_periodic; | 123 | dev->event_handler = tick_handle_periodic; |
125 | tick_device_setup_broadcast_func(dev); | 124 | tick_device_setup_broadcast_func(dev); |
126 | cpumask_set_cpu(cpu, tick_get_broadcast_mask()); | 125 | cpumask_set_cpu(cpu, tick_broadcast_mask); |
127 | tick_broadcast_start_periodic(tick_broadcast_device.evtdev); | 126 | tick_broadcast_start_periodic(tick_broadcast_device.evtdev); |
128 | ret = 1; | 127 | ret = 1; |
129 | } else { | 128 | } else { |
@@ -134,7 +133,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) | |||
134 | */ | 133 | */ |
135 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { | 134 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { |
136 | int cpu = smp_processor_id(); | 135 | int cpu = smp_processor_id(); |
137 | cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); | 136 | cpumask_clear_cpu(cpu, tick_broadcast_mask); |
138 | tick_broadcast_clear_oneshot(cpu); | 137 | tick_broadcast_clear_oneshot(cpu); |
139 | } else { | 138 | } else { |
140 | tick_device_setup_broadcast_func(dev); | 139 | tick_device_setup_broadcast_func(dev); |
@@ -198,9 +197,8 @@ static void tick_do_periodic_broadcast(void) | |||
198 | { | 197 | { |
199 | raw_spin_lock(&tick_broadcast_lock); | 198 | raw_spin_lock(&tick_broadcast_lock); |
200 | 199 | ||
201 | cpumask_and(to_cpumask(tmpmask), | 200 | cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask); |
202 | cpu_online_mask, tick_get_broadcast_mask()); | 201 | tick_do_broadcast(tmpmask); |
203 | tick_do_broadcast(to_cpumask(tmpmask)); | ||
204 | 202 | ||
205 | raw_spin_unlock(&tick_broadcast_lock); | 203 | raw_spin_unlock(&tick_broadcast_lock); |
206 | } | 204 | } |
@@ -263,13 +261,12 @@ static void tick_do_broadcast_on_off(unsigned long *reason) | |||
263 | if (!tick_device_is_functional(dev)) | 261 | if (!tick_device_is_functional(dev)) |
264 | goto out; | 262 | goto out; |
265 | 263 | ||
266 | bc_stopped = cpumask_empty(tick_get_broadcast_mask()); | 264 | bc_stopped = cpumask_empty(tick_broadcast_mask); |
267 | 265 | ||
268 | switch (*reason) { | 266 | switch (*reason) { |
269 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: | 267 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: |
270 | case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: | 268 | case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: |
271 | if (!cpumask_test_cpu(cpu, tick_get_broadcast_mask())) { | 269 | if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { |
272 | cpumask_set_cpu(cpu, tick_get_broadcast_mask()); | ||
273 | if (tick_broadcast_device.mode == | 270 | if (tick_broadcast_device.mode == |
274 | TICKDEV_MODE_PERIODIC) | 271 | TICKDEV_MODE_PERIODIC) |
275 | clockevents_shutdown(dev); | 272 | clockevents_shutdown(dev); |
@@ -279,8 +276,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason) | |||
279 | break; | 276 | break; |
280 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: | 277 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: |
281 | if (!tick_broadcast_force && | 278 | if (!tick_broadcast_force && |
282 | cpumask_test_cpu(cpu, tick_get_broadcast_mask())) { | 279 | cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { |
283 | cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); | ||
284 | if (tick_broadcast_device.mode == | 280 | if (tick_broadcast_device.mode == |
285 | TICKDEV_MODE_PERIODIC) | 281 | TICKDEV_MODE_PERIODIC) |
286 | tick_setup_periodic(dev, 0); | 282 | tick_setup_periodic(dev, 0); |
@@ -288,7 +284,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason) | |||
288 | break; | 284 | break; |
289 | } | 285 | } |
290 | 286 | ||
291 | if (cpumask_empty(tick_get_broadcast_mask())) { | 287 | if (cpumask_empty(tick_broadcast_mask)) { |
292 | if (!bc_stopped) | 288 | if (!bc_stopped) |
293 | clockevents_shutdown(bc); | 289 | clockevents_shutdown(bc); |
294 | } else if (bc_stopped) { | 290 | } else if (bc_stopped) { |
@@ -337,10 +333,10 @@ void tick_shutdown_broadcast(unsigned int *cpup) | |||
337 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | 333 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
338 | 334 | ||
339 | bc = tick_broadcast_device.evtdev; | 335 | bc = tick_broadcast_device.evtdev; |
340 | cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); | 336 | cpumask_clear_cpu(cpu, tick_broadcast_mask); |
341 | 337 | ||
342 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { | 338 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { |
343 | if (bc && cpumask_empty(tick_get_broadcast_mask())) | 339 | if (bc && cpumask_empty(tick_broadcast_mask)) |
344 | clockevents_shutdown(bc); | 340 | clockevents_shutdown(bc); |
345 | } | 341 | } |
346 | 342 | ||
@@ -376,13 +372,13 @@ int tick_resume_broadcast(void) | |||
376 | 372 | ||
377 | switch (tick_broadcast_device.mode) { | 373 | switch (tick_broadcast_device.mode) { |
378 | case TICKDEV_MODE_PERIODIC: | 374 | case TICKDEV_MODE_PERIODIC: |
379 | if (!cpumask_empty(tick_get_broadcast_mask())) | 375 | if (!cpumask_empty(tick_broadcast_mask)) |
380 | tick_broadcast_start_periodic(bc); | 376 | tick_broadcast_start_periodic(bc); |
381 | broadcast = cpumask_test_cpu(smp_processor_id(), | 377 | broadcast = cpumask_test_cpu(smp_processor_id(), |
382 | tick_get_broadcast_mask()); | 378 | tick_broadcast_mask); |
383 | break; | 379 | break; |
384 | case TICKDEV_MODE_ONESHOT: | 380 | case TICKDEV_MODE_ONESHOT: |
385 | if (!cpumask_empty(tick_get_broadcast_mask())) | 381 | if (!cpumask_empty(tick_broadcast_mask)) |
386 | broadcast = tick_resume_broadcast_oneshot(bc); | 382 | broadcast = tick_resume_broadcast_oneshot(bc); |
387 | break; | 383 | break; |
388 | } | 384 | } |
@@ -395,25 +391,58 @@ int tick_resume_broadcast(void) | |||
395 | 391 | ||
396 | #ifdef CONFIG_TICK_ONESHOT | 392 | #ifdef CONFIG_TICK_ONESHOT |
397 | 393 | ||
398 | /* FIXME: use cpumask_var_t. */ | 394 | static cpumask_var_t tick_broadcast_oneshot_mask; |
399 | static DECLARE_BITMAP(tick_broadcast_oneshot_mask, NR_CPUS); | 395 | static cpumask_var_t tick_broadcast_pending_mask; |
396 | static cpumask_var_t tick_broadcast_force_mask; | ||
400 | 397 | ||
401 | /* | 398 | /* |
402 | * Exposed for debugging: see timer_list.c | 399 | * Exposed for debugging: see timer_list.c |
403 | */ | 400 | */ |
404 | struct cpumask *tick_get_broadcast_oneshot_mask(void) | 401 | struct cpumask *tick_get_broadcast_oneshot_mask(void) |
405 | { | 402 | { |
406 | return to_cpumask(tick_broadcast_oneshot_mask); | 403 | return tick_broadcast_oneshot_mask; |
407 | } | 404 | } |
408 | 405 | ||
409 | static int tick_broadcast_set_event(ktime_t expires, int force) | 406 | /* |
407 | * Called before going idle with interrupts disabled. Checks whether a | ||
408 | * broadcast event from the other core is about to happen. We detected | ||
409 | * that in tick_broadcast_oneshot_control(). The callsite can use this | ||
410 | * to avoid a deep idle transition as we are about to get the | ||
411 | * broadcast IPI right away. | ||
412 | */ | ||
413 | int tick_check_broadcast_expired(void) | ||
410 | { | 414 | { |
411 | struct clock_event_device *bc = tick_broadcast_device.evtdev; | 415 | return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask); |
416 | } | ||
417 | |||
418 | /* | ||
419 | * Set broadcast interrupt affinity | ||
420 | */ | ||
421 | static void tick_broadcast_set_affinity(struct clock_event_device *bc, | ||
422 | const struct cpumask *cpumask) | ||
423 | { | ||
424 | if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ)) | ||
425 | return; | ||
426 | |||
427 | if (cpumask_equal(bc->cpumask, cpumask)) | ||
428 | return; | ||
429 | |||
430 | bc->cpumask = cpumask; | ||
431 | irq_set_affinity(bc->irq, bc->cpumask); | ||
432 | } | ||
433 | |||
434 | static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu, | ||
435 | ktime_t expires, int force) | ||
436 | { | ||
437 | int ret; | ||
412 | 438 | ||
413 | if (bc->mode != CLOCK_EVT_MODE_ONESHOT) | 439 | if (bc->mode != CLOCK_EVT_MODE_ONESHOT) |
414 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); | 440 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); |
415 | 441 | ||
416 | return clockevents_program_event(bc, expires, force); | 442 | ret = clockevents_program_event(bc, expires, force); |
443 | if (!ret) | ||
444 | tick_broadcast_set_affinity(bc, cpumask_of(cpu)); | ||
445 | return ret; | ||
417 | } | 446 | } |
418 | 447 | ||
419 | int tick_resume_broadcast_oneshot(struct clock_event_device *bc) | 448 | int tick_resume_broadcast_oneshot(struct clock_event_device *bc) |
@@ -428,7 +457,7 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc) | |||
428 | */ | 457 | */ |
429 | void tick_check_oneshot_broadcast(int cpu) | 458 | void tick_check_oneshot_broadcast(int cpu) |
430 | { | 459 | { |
431 | if (cpumask_test_cpu(cpu, to_cpumask(tick_broadcast_oneshot_mask))) { | 460 | if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) { |
432 | struct tick_device *td = &per_cpu(tick_cpu_device, cpu); | 461 | struct tick_device *td = &per_cpu(tick_cpu_device, cpu); |
433 | 462 | ||
434 | clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT); | 463 | clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT); |
@@ -442,27 +471,39 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) | |||
442 | { | 471 | { |
443 | struct tick_device *td; | 472 | struct tick_device *td; |
444 | ktime_t now, next_event; | 473 | ktime_t now, next_event; |
445 | int cpu; | 474 | int cpu, next_cpu = 0; |
446 | 475 | ||
447 | raw_spin_lock(&tick_broadcast_lock); | 476 | raw_spin_lock(&tick_broadcast_lock); |
448 | again: | 477 | again: |
449 | dev->next_event.tv64 = KTIME_MAX; | 478 | dev->next_event.tv64 = KTIME_MAX; |
450 | next_event.tv64 = KTIME_MAX; | 479 | next_event.tv64 = KTIME_MAX; |
451 | cpumask_clear(to_cpumask(tmpmask)); | 480 | cpumask_clear(tmpmask); |
452 | now = ktime_get(); | 481 | now = ktime_get(); |
453 | /* Find all expired events */ | 482 | /* Find all expired events */ |
454 | for_each_cpu(cpu, tick_get_broadcast_oneshot_mask()) { | 483 | for_each_cpu(cpu, tick_broadcast_oneshot_mask) { |
455 | td = &per_cpu(tick_cpu_device, cpu); | 484 | td = &per_cpu(tick_cpu_device, cpu); |
456 | if (td->evtdev->next_event.tv64 <= now.tv64) | 485 | if (td->evtdev->next_event.tv64 <= now.tv64) { |
457 | cpumask_set_cpu(cpu, to_cpumask(tmpmask)); | 486 | cpumask_set_cpu(cpu, tmpmask); |
458 | else if (td->evtdev->next_event.tv64 < next_event.tv64) | 487 | /* |
488 | * Mark the remote cpu in the pending mask, so | ||
489 | * it can avoid reprogramming the cpu local | ||
490 | * timer in tick_broadcast_oneshot_control(). | ||
491 | */ | ||
492 | cpumask_set_cpu(cpu, tick_broadcast_pending_mask); | ||
493 | } else if (td->evtdev->next_event.tv64 < next_event.tv64) { | ||
459 | next_event.tv64 = td->evtdev->next_event.tv64; | 494 | next_event.tv64 = td->evtdev->next_event.tv64; |
495 | next_cpu = cpu; | ||
496 | } | ||
460 | } | 497 | } |
461 | 498 | ||
499 | /* Take care of enforced broadcast requests */ | ||
500 | cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); | ||
501 | cpumask_clear(tick_broadcast_force_mask); | ||
502 | |||
462 | /* | 503 | /* |
463 | * Wakeup the cpus which have an expired event. | 504 | * Wakeup the cpus which have an expired event. |
464 | */ | 505 | */ |
465 | tick_do_broadcast(to_cpumask(tmpmask)); | 506 | tick_do_broadcast(tmpmask); |
466 | 507 | ||
467 | /* | 508 | /* |
468 | * Two reasons for reprogram: | 509 | * Two reasons for reprogram: |
@@ -479,7 +520,7 @@ again: | |||
479 | * Rearm the broadcast device. If event expired, | 520 | * Rearm the broadcast device. If event expired, |
480 | * repeat the above | 521 | * repeat the above |
481 | */ | 522 | */ |
482 | if (tick_broadcast_set_event(next_event, 0)) | 523 | if (tick_broadcast_set_event(dev, next_cpu, next_event, 0)) |
483 | goto again; | 524 | goto again; |
484 | } | 525 | } |
485 | raw_spin_unlock(&tick_broadcast_lock); | 526 | raw_spin_unlock(&tick_broadcast_lock); |
@@ -494,6 +535,7 @@ void tick_broadcast_oneshot_control(unsigned long reason) | |||
494 | struct clock_event_device *bc, *dev; | 535 | struct clock_event_device *bc, *dev; |
495 | struct tick_device *td; | 536 | struct tick_device *td; |
496 | unsigned long flags; | 537 | unsigned long flags; |
538 | ktime_t now; | ||
497 | int cpu; | 539 | int cpu; |
498 | 540 | ||
499 | /* | 541 | /* |
@@ -518,21 +560,84 @@ void tick_broadcast_oneshot_control(unsigned long reason) | |||
518 | 560 | ||
519 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | 561 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
520 | if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { | 562 | if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { |
521 | if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) { | 563 | WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); |
522 | cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask()); | 564 | if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { |
523 | clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); | 565 | clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); |
524 | if (dev->next_event.tv64 < bc->next_event.tv64) | 566 | /* |
525 | tick_broadcast_set_event(dev->next_event, 1); | 567 | * We only reprogram the broadcast timer if we |
568 | * did not mark ourself in the force mask and | ||
569 | * if the cpu local event is earlier than the | ||
570 | * broadcast event. If the current CPU is in | ||
571 | * the force mask, then we are going to be | ||
572 | * woken by the IPI right away. | ||
573 | */ | ||
574 | if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) && | ||
575 | dev->next_event.tv64 < bc->next_event.tv64) | ||
576 | tick_broadcast_set_event(bc, cpu, dev->next_event, 1); | ||
526 | } | 577 | } |
527 | } else { | 578 | } else { |
528 | if (cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) { | 579 | if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { |
529 | cpumask_clear_cpu(cpu, | ||
530 | tick_get_broadcast_oneshot_mask()); | ||
531 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); | 580 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); |
532 | if (dev->next_event.tv64 != KTIME_MAX) | 581 | if (dev->next_event.tv64 == KTIME_MAX) |
533 | tick_program_event(dev->next_event, 1); | 582 | goto out; |
583 | /* | ||
584 | * The cpu which was handling the broadcast | ||
585 | * timer marked this cpu in the broadcast | ||
586 | * pending mask and fired the broadcast | ||
587 | * IPI. So we are going to handle the expired | ||
588 | * event anyway via the broadcast IPI | ||
589 | * handler. No need to reprogram the timer | ||
590 | * with an already expired event. | ||
591 | */ | ||
592 | if (cpumask_test_and_clear_cpu(cpu, | ||
593 | tick_broadcast_pending_mask)) | ||
594 | goto out; | ||
595 | |||
596 | /* | ||
597 | * If the pending bit is not set, then we are | ||
598 | * either the CPU handling the broadcast | ||
599 | * interrupt or we got woken by something else. | ||
600 | * | ||
601 | * We are not longer in the broadcast mask, so | ||
602 | * if the cpu local expiry time is already | ||
603 | * reached, we would reprogram the cpu local | ||
604 | * timer with an already expired event. | ||
605 | * | ||
606 | * This can lead to a ping-pong when we return | ||
607 | * to idle and therefor rearm the broadcast | ||
608 | * timer before the cpu local timer was able | ||
609 | * to fire. This happens because the forced | ||
610 | * reprogramming makes sure that the event | ||
611 | * will happen in the future and depending on | ||
612 | * the min_delta setting this might be far | ||
613 | * enough out that the ping-pong starts. | ||
614 | * | ||
615 | * If the cpu local next_event has expired | ||
616 | * then we know that the broadcast timer | ||
617 | * next_event has expired as well and | ||
618 | * broadcast is about to be handled. So we | ||
619 | * avoid reprogramming and enforce that the | ||
620 | * broadcast handler, which did not run yet, | ||
621 | * will invoke the cpu local handler. | ||
622 | * | ||
623 | * We cannot call the handler directly from | ||
624 | * here, because we might be in a NOHZ phase | ||
625 | * and we did not go through the irq_enter() | ||
626 | * nohz fixups. | ||
627 | */ | ||
628 | now = ktime_get(); | ||
629 | if (dev->next_event.tv64 <= now.tv64) { | ||
630 | cpumask_set_cpu(cpu, tick_broadcast_force_mask); | ||
631 | goto out; | ||
632 | } | ||
633 | /* | ||
634 | * We got woken by something else. Reprogram | ||
635 | * the cpu local timer device. | ||
636 | */ | ||
637 | tick_program_event(dev->next_event, 1); | ||
534 | } | 638 | } |
535 | } | 639 | } |
640 | out: | ||
536 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 641 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
537 | } | 642 | } |
538 | 643 | ||
@@ -543,7 +648,7 @@ void tick_broadcast_oneshot_control(unsigned long reason) | |||
543 | */ | 648 | */ |
544 | static void tick_broadcast_clear_oneshot(int cpu) | 649 | static void tick_broadcast_clear_oneshot(int cpu) |
545 | { | 650 | { |
546 | cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask()); | 651 | cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); |
547 | } | 652 | } |
548 | 653 | ||
549 | static void tick_broadcast_init_next_event(struct cpumask *mask, | 654 | static void tick_broadcast_init_next_event(struct cpumask *mask, |
@@ -581,17 +686,16 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | |||
581 | * oneshot_mask bits for those and program the | 686 | * oneshot_mask bits for those and program the |
582 | * broadcast device to fire. | 687 | * broadcast device to fire. |
583 | */ | 688 | */ |
584 | cpumask_copy(to_cpumask(tmpmask), tick_get_broadcast_mask()); | 689 | cpumask_copy(tmpmask, tick_broadcast_mask); |
585 | cpumask_clear_cpu(cpu, to_cpumask(tmpmask)); | 690 | cpumask_clear_cpu(cpu, tmpmask); |
586 | cpumask_or(tick_get_broadcast_oneshot_mask(), | 691 | cpumask_or(tick_broadcast_oneshot_mask, |
587 | tick_get_broadcast_oneshot_mask(), | 692 | tick_broadcast_oneshot_mask, tmpmask); |
588 | to_cpumask(tmpmask)); | ||
589 | 693 | ||
590 | if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) { | 694 | if (was_periodic && !cpumask_empty(tmpmask)) { |
591 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); | 695 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); |
592 | tick_broadcast_init_next_event(to_cpumask(tmpmask), | 696 | tick_broadcast_init_next_event(tmpmask, |
593 | tick_next_period); | 697 | tick_next_period); |
594 | tick_broadcast_set_event(tick_next_period, 1); | 698 | tick_broadcast_set_event(bc, cpu, tick_next_period, 1); |
595 | } else | 699 | } else |
596 | bc->next_event.tv64 = KTIME_MAX; | 700 | bc->next_event.tv64 = KTIME_MAX; |
597 | } else { | 701 | } else { |
@@ -639,7 +743,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) | |||
639 | * Clear the broadcast mask flag for the dead cpu, but do not | 743 | * Clear the broadcast mask flag for the dead cpu, but do not |
640 | * stop the broadcast device! | 744 | * stop the broadcast device! |
641 | */ | 745 | */ |
642 | cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask()); | 746 | cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); |
643 | 747 | ||
644 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 748 | raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
645 | } | 749 | } |
@@ -663,3 +767,14 @@ bool tick_broadcast_oneshot_available(void) | |||
663 | } | 767 | } |
664 | 768 | ||
665 | #endif | 769 | #endif |
770 | |||
771 | void __init tick_broadcast_init(void) | ||
772 | { | ||
773 | alloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); | ||
774 | alloc_cpumask_var(&tmpmask, GFP_NOWAIT); | ||
775 | #ifdef CONFIG_TICK_ONESHOT | ||
776 | alloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); | ||
777 | alloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT); | ||
778 | alloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT); | ||
779 | #endif | ||
780 | } | ||
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index b1600a6973f4..74413e396acc 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c | |||
@@ -416,4 +416,5 @@ static struct notifier_block tick_notifier = { | |||
416 | void __init tick_init(void) | 416 | void __init tick_init(void) |
417 | { | 417 | { |
418 | clockevents_register_notifier(&tick_notifier); | 418 | clockevents_register_notifier(&tick_notifier); |
419 | tick_broadcast_init(); | ||
419 | } | 420 | } |
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index f5c9207967cf..f0299eae4602 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h | |||
@@ -96,7 +96,7 @@ extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); | |||
96 | extern void tick_shutdown_broadcast(unsigned int *cpup); | 96 | extern void tick_shutdown_broadcast(unsigned int *cpup); |
97 | extern void tick_suspend_broadcast(void); | 97 | extern void tick_suspend_broadcast(void); |
98 | extern int tick_resume_broadcast(void); | 98 | extern int tick_resume_broadcast(void); |
99 | 99 | extern void tick_broadcast_init(void); | |
100 | extern void | 100 | extern void |
101 | tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); | 101 | tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); |
102 | 102 | ||
@@ -121,6 +121,7 @@ static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { } | |||
121 | static inline void tick_shutdown_broadcast(unsigned int *cpup) { } | 121 | static inline void tick_shutdown_broadcast(unsigned int *cpup) { } |
122 | static inline void tick_suspend_broadcast(void) { } | 122 | static inline void tick_suspend_broadcast(void) { } |
123 | static inline int tick_resume_broadcast(void) { return 0; } | 123 | static inline int tick_resume_broadcast(void) { return 0; } |
124 | static inline void tick_broadcast_init(void) { } | ||
124 | 125 | ||
125 | /* | 126 | /* |
126 | * Set the periodic handler in non broadcast mode | 127 | * Set the periodic handler in non broadcast mode |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a19a39952c1b..225f8bf19095 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -482,8 +482,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | |||
482 | 482 | ||
483 | if (ratelimit < 10 && | 483 | if (ratelimit < 10 && |
484 | (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | 484 | (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { |
485 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | 485 | pr_warn("NOHZ: local_softirq_pending %02x\n", |
486 | (unsigned int) local_softirq_pending()); | 486 | (unsigned int) local_softirq_pending()); |
487 | ratelimit++; | 487 | ratelimit++; |
488 | } | 488 | } |
489 | return false; | 489 | return false; |