diff options
-rw-r--r-- | drivers/cpuidle/coupled.c | 107 |
1 files changed, 82 insertions, 25 deletions
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index db92bcbe6946..5d3962730063 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c | |||
@@ -106,6 +106,7 @@ struct cpuidle_coupled { | |||
106 | cpumask_t coupled_cpus; | 106 | cpumask_t coupled_cpus; |
107 | int requested_state[NR_CPUS]; | 107 | int requested_state[NR_CPUS]; |
108 | atomic_t ready_waiting_counts; | 108 | atomic_t ready_waiting_counts; |
109 | atomic_t abort_barrier; | ||
109 | int online_count; | 110 | int online_count; |
110 | int refcnt; | 111 | int refcnt; |
111 | int prevent; | 112 | int prevent; |
@@ -122,12 +123,19 @@ static DEFINE_MUTEX(cpuidle_coupled_lock); | |||
122 | static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb); | 123 | static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb); |
123 | 124 | ||
124 | /* | 125 | /* |
125 | * The cpuidle_coupled_poked_mask mask is used to avoid calling | 126 | * The cpuidle_coupled_poke_pending mask is used to avoid calling |
126 | * __smp_call_function_single with the per cpu call_single_data struct already | 127 | * __smp_call_function_single with the per cpu call_single_data struct already |
127 | * in use. This prevents a deadlock where two cpus are waiting for each others | 128 | * in use. This prevents a deadlock where two cpus are waiting for each others |
128 | * call_single_data struct to be available | 129 | * call_single_data struct to be available |
129 | */ | 130 | */ |
130 | static cpumask_t cpuidle_coupled_poked_mask; | 131 | static cpumask_t cpuidle_coupled_poke_pending; |
132 | |||
133 | /* | ||
134 | * The cpuidle_coupled_poked mask is used to ensure that each cpu has been poked | ||
135 | * once to minimize entering the ready loop with a poke pending, which would | ||
136 | * require aborting and retrying. | ||
137 | */ | ||
138 | static cpumask_t cpuidle_coupled_poked; | ||
131 | 139 | ||
132 | /** | 140 | /** |
133 | * cpuidle_coupled_parallel_barrier - synchronize all online coupled cpus | 141 | * cpuidle_coupled_parallel_barrier - synchronize all online coupled cpus |
@@ -291,10 +299,11 @@ static inline int cpuidle_coupled_get_state(struct cpuidle_device *dev, | |||
291 | return state; | 299 | return state; |
292 | } | 300 | } |
293 | 301 | ||
294 | static void cpuidle_coupled_poked(void *info) | 302 | static void cpuidle_coupled_handle_poke(void *info) |
295 | { | 303 | { |
296 | int cpu = (unsigned long)info; | 304 | int cpu = (unsigned long)info; |
297 | cpumask_clear_cpu(cpu, &cpuidle_coupled_poked_mask); | 305 | cpumask_set_cpu(cpu, &cpuidle_coupled_poked); |
306 | cpumask_clear_cpu(cpu, &cpuidle_coupled_poke_pending); | ||
298 | } | 307 | } |
299 | 308 | ||
300 | /** | 309 | /** |
@@ -313,7 +322,7 @@ static void cpuidle_coupled_poke(int cpu) | |||
313 | { | 322 | { |
314 | struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); | 323 | struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); |
315 | 324 | ||
316 | if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poked_mask)) | 325 | if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending)) |
317 | __smp_call_function_single(cpu, csd, 0); | 326 | __smp_call_function_single(cpu, csd, 0); |
318 | } | 327 | } |
319 | 328 | ||
@@ -340,30 +349,19 @@ static void cpuidle_coupled_poke_others(int this_cpu, | |||
340 | * @coupled: the struct coupled that contains the current cpu | 349 | * @coupled: the struct coupled that contains the current cpu |
341 | * @next_state: the index in drv->states of the requested state for this cpu | 350 | * @next_state: the index in drv->states of the requested state for this cpu |
342 | * | 351 | * |
343 | * Updates the requested idle state for the specified cpuidle device, | 352 | * Updates the requested idle state for the specified cpuidle device. |
344 | * poking all coupled cpus out of idle if necessary to let them see the new | 353 | * Returns the number of waiting cpus. |
345 | * state. | ||
346 | */ | 354 | */ |
347 | static void cpuidle_coupled_set_waiting(int cpu, | 355 | static int cpuidle_coupled_set_waiting(int cpu, |
348 | struct cpuidle_coupled *coupled, int next_state) | 356 | struct cpuidle_coupled *coupled, int next_state) |
349 | { | 357 | { |
350 | int w; | ||
351 | |||
352 | coupled->requested_state[cpu] = next_state; | 358 | coupled->requested_state[cpu] = next_state; |
353 | 359 | ||
354 | /* | 360 | /* |
355 | * If this is the last cpu to enter the waiting state, poke | ||
356 | * all the other cpus out of their waiting state so they can | ||
357 | * enter a deeper state. This can race with one of the cpus | ||
358 | * exiting the waiting state due to an interrupt and | ||
359 | * decrementing waiting_count, see comment below. | ||
360 | * | ||
361 | * The atomic_inc_return provides a write barrier to order the write | 361 | * The atomic_inc_return provides a write barrier to order the write |
362 | * to requested_state with the later write that increments ready_count. | 362 | * to requested_state with the later write that increments ready_count. |
363 | */ | 363 | */ |
364 | w = atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK; | 364 | return atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK; |
365 | if (w == coupled->online_count) | ||
366 | cpuidle_coupled_poke_others(cpu, coupled); | ||
367 | } | 365 | } |
368 | 366 | ||
369 | /** | 367 | /** |
@@ -418,13 +416,24 @@ static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled) | |||
418 | static int cpuidle_coupled_clear_pokes(int cpu) | 416 | static int cpuidle_coupled_clear_pokes(int cpu) |
419 | { | 417 | { |
420 | local_irq_enable(); | 418 | local_irq_enable(); |
421 | while (cpumask_test_cpu(cpu, &cpuidle_coupled_poked_mask)) | 419 | while (cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending)) |
422 | cpu_relax(); | 420 | cpu_relax(); |
423 | local_irq_disable(); | 421 | local_irq_disable(); |
424 | 422 | ||
425 | return need_resched() ? -EINTR : 0; | 423 | return need_resched() ? -EINTR : 0; |
426 | } | 424 | } |
427 | 425 | ||
426 | static bool cpuidle_coupled_any_pokes_pending(struct cpuidle_coupled *coupled) | ||
427 | { | ||
428 | cpumask_t cpus; | ||
429 | int ret; | ||
430 | |||
431 | cpumask_and(&cpus, cpu_online_mask, &coupled->coupled_cpus); | ||
432 | ret = cpumask_and(&cpus, &cpuidle_coupled_poke_pending, &cpus); | ||
433 | |||
434 | return ret; | ||
435 | } | ||
436 | |||
428 | /** | 437 | /** |
429 | * cpuidle_enter_state_coupled - attempt to enter a state with coupled cpus | 438 | * cpuidle_enter_state_coupled - attempt to enter a state with coupled cpus |
430 | * @dev: struct cpuidle_device for the current cpu | 439 | * @dev: struct cpuidle_device for the current cpu |
@@ -449,6 +458,7 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev, | |||
449 | { | 458 | { |
450 | int entered_state = -1; | 459 | int entered_state = -1; |
451 | struct cpuidle_coupled *coupled = dev->coupled; | 460 | struct cpuidle_coupled *coupled = dev->coupled; |
461 | int w; | ||
452 | 462 | ||
453 | if (!coupled) | 463 | if (!coupled) |
454 | return -EINVAL; | 464 | return -EINVAL; |
@@ -466,14 +476,33 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev, | |||
466 | /* Read barrier ensures online_count is read after prevent is cleared */ | 476 | /* Read barrier ensures online_count is read after prevent is cleared */ |
467 | smp_rmb(); | 477 | smp_rmb(); |
468 | 478 | ||
469 | cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state); | 479 | reset: |
480 | cpumask_clear_cpu(dev->cpu, &cpuidle_coupled_poked); | ||
481 | |||
482 | w = cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state); | ||
483 | /* | ||
484 | * If this is the last cpu to enter the waiting state, poke | ||
485 | * all the other cpus out of their waiting state so they can | ||
486 | * enter a deeper state. This can race with one of the cpus | ||
487 | * exiting the waiting state due to an interrupt and | ||
488 | * decrementing waiting_count, see comment below. | ||
489 | */ | ||
490 | if (w == coupled->online_count) { | ||
491 | cpumask_set_cpu(dev->cpu, &cpuidle_coupled_poked); | ||
492 | cpuidle_coupled_poke_others(dev->cpu, coupled); | ||
493 | } | ||
470 | 494 | ||
471 | retry: | 495 | retry: |
472 | /* | 496 | /* |
473 | * Wait for all coupled cpus to be idle, using the deepest state | 497 | * Wait for all coupled cpus to be idle, using the deepest state |
474 | * allowed for a single cpu. | 498 | * allowed for a single cpu. If this was not the poking cpu, wait |
499 | * for at least one poke before leaving to avoid a race where | ||
500 | * two cpus could arrive at the waiting loop at the same time, | ||
501 | * but the first of the two to arrive could skip the loop without | ||
502 | * processing the pokes from the last to arrive. | ||
475 | */ | 503 | */ |
476 | while (!cpuidle_coupled_cpus_waiting(coupled)) { | 504 | while (!cpuidle_coupled_cpus_waiting(coupled) || |
505 | !cpumask_test_cpu(dev->cpu, &cpuidle_coupled_poked)) { | ||
477 | if (cpuidle_coupled_clear_pokes(dev->cpu)) { | 506 | if (cpuidle_coupled_clear_pokes(dev->cpu)) { |
478 | cpuidle_coupled_set_not_waiting(dev->cpu, coupled); | 507 | cpuidle_coupled_set_not_waiting(dev->cpu, coupled); |
479 | goto out; | 508 | goto out; |
@@ -495,6 +524,12 @@ retry: | |||
495 | } | 524 | } |
496 | 525 | ||
497 | /* | 526 | /* |
527 | * Make sure final poke status for this cpu is visible before setting | ||
528 | * cpu as ready. | ||
529 | */ | ||
530 | smp_wmb(); | ||
531 | |||
532 | /* | ||
498 | * All coupled cpus are probably idle. There is a small chance that | 533 | * All coupled cpus are probably idle. There is a small chance that |
499 | * one of the other cpus just became active. Increment the ready count, | 534 | * one of the other cpus just became active. Increment the ready count, |
500 | * and spin until all coupled cpus have incremented the counter. Once a | 535 | * and spin until all coupled cpus have incremented the counter. Once a |
@@ -513,6 +548,28 @@ retry: | |||
513 | cpu_relax(); | 548 | cpu_relax(); |
514 | } | 549 | } |
515 | 550 | ||
551 | /* | ||
552 | * Make sure read of all cpus ready is done before reading pending pokes | ||
553 | */ | ||
554 | smp_rmb(); | ||
555 | |||
556 | /* | ||
557 | * There is a small chance that a cpu left and reentered idle after this | ||
558 | * cpu saw that all cpus were waiting. The cpu that reentered idle will | ||
559 | * have sent this cpu a poke, which will still be pending after the | ||
560 | * ready loop. The pending interrupt may be lost by the interrupt | ||
561 | * controller when entering the deep idle state. It's not possible to | ||
562 | * clear a pending interrupt without turning interrupts on and handling | ||
563 | * it, and it's too late to turn on interrupts here, so reset the | ||
564 | * coupled idle state of all cpus and retry. | ||
565 | */ | ||
566 | if (cpuidle_coupled_any_pokes_pending(coupled)) { | ||
567 | cpuidle_coupled_set_done(dev->cpu, coupled); | ||
568 | /* Wait for all cpus to see the pending pokes */ | ||
569 | cpuidle_coupled_parallel_barrier(dev, &coupled->abort_barrier); | ||
570 | goto reset; | ||
571 | } | ||
572 | |||
516 | /* all cpus have acked the coupled state */ | 573 | /* all cpus have acked the coupled state */ |
517 | next_state = cpuidle_coupled_get_state(dev, coupled); | 574 | next_state = cpuidle_coupled_get_state(dev, coupled); |
518 | 575 | ||
@@ -598,7 +655,7 @@ have_coupled: | |||
598 | coupled->refcnt++; | 655 | coupled->refcnt++; |
599 | 656 | ||
600 | csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu); | 657 | csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu); |
601 | csd->func = cpuidle_coupled_poked; | 658 | csd->func = cpuidle_coupled_handle_poke; |
602 | csd->info = (void *)(unsigned long)dev->cpu; | 659 | csd->info = (void *)(unsigned long)dev->cpu; |
603 | 660 | ||
604 | return 0; | 661 | return 0; |