diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2016-12-12 14:45:22 -0500 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2016-12-12 14:45:22 -0500 |
commit | 404ea9f1a792eebdc208d53fd38bdd1619531430 (patch) | |
tree | 4c660034bb4d5a80b8ed6049a665b6edcfd9863c | |
parent | 0e7414b7aa8b294fddefbad020798f7c8ebe1622 (diff) | |
parent | feb6cd6a0f9f7d214351624d79e408cb2af91631 (diff) |
Merge powerclamp driver updates (that depend on cpuidle material) for v4.10.
-rw-r--r-- | drivers/thermal/intel_powerclamp.c | 359 |
1 files changed, 184 insertions, 175 deletions
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index afada655f861..83e697186410 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c | |||
@@ -43,7 +43,6 @@ | |||
43 | #include <linux/kernel.h> | 43 | #include <linux/kernel.h> |
44 | #include <linux/delay.h> | 44 | #include <linux/delay.h> |
45 | #include <linux/kthread.h> | 45 | #include <linux/kthread.h> |
46 | #include <linux/freezer.h> | ||
47 | #include <linux/cpu.h> | 46 | #include <linux/cpu.h> |
48 | #include <linux/thermal.h> | 47 | #include <linux/thermal.h> |
49 | #include <linux/slab.h> | 48 | #include <linux/slab.h> |
@@ -86,11 +85,26 @@ static unsigned int control_cpu; /* The cpu assigned to collect stat and update | |||
86 | */ | 85 | */ |
87 | static bool clamping; | 86 | static bool clamping; |
88 | 87 | ||
88 | static const struct sched_param sparam = { | ||
89 | .sched_priority = MAX_USER_RT_PRIO / 2, | ||
90 | }; | ||
91 | struct powerclamp_worker_data { | ||
92 | struct kthread_worker *worker; | ||
93 | struct kthread_work balancing_work; | ||
94 | struct kthread_delayed_work idle_injection_work; | ||
95 | unsigned int cpu; | ||
96 | unsigned int count; | ||
97 | unsigned int guard; | ||
98 | unsigned int window_size_now; | ||
99 | unsigned int target_ratio; | ||
100 | unsigned int duration_jiffies; | ||
101 | bool clamping; | ||
102 | }; | ||
89 | 103 | ||
90 | static struct task_struct * __percpu *powerclamp_thread; | 104 | static struct powerclamp_worker_data * __percpu worker_data; |
91 | static struct thermal_cooling_device *cooling_dev; | 105 | static struct thermal_cooling_device *cooling_dev; |
92 | static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu | 106 | static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu |
93 | * clamping thread | 107 | * clamping kthread worker |
94 | */ | 108 | */ |
95 | 109 | ||
96 | static unsigned int duration; | 110 | static unsigned int duration; |
@@ -262,11 +276,6 @@ static u64 pkg_state_counter(void) | |||
262 | return count; | 276 | return count; |
263 | } | 277 | } |
264 | 278 | ||
265 | static void noop_timer(unsigned long foo) | ||
266 | { | ||
267 | /* empty... just the fact that we get the interrupt wakes us up */ | ||
268 | } | ||
269 | |||
270 | static unsigned int get_compensation(int ratio) | 279 | static unsigned int get_compensation(int ratio) |
271 | { | 280 | { |
272 | unsigned int comp = 0; | 281 | unsigned int comp = 0; |
@@ -368,103 +377,79 @@ static bool powerclamp_adjust_controls(unsigned int target_ratio, | |||
368 | return set_target_ratio + guard <= current_ratio; | 377 | return set_target_ratio + guard <= current_ratio; |
369 | } | 378 | } |
370 | 379 | ||
371 | static int clamp_thread(void *arg) | 380 | static void clamp_balancing_func(struct kthread_work *work) |
372 | { | 381 | { |
373 | int cpunr = (unsigned long)arg; | 382 | struct powerclamp_worker_data *w_data; |
374 | DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0); | 383 | int sleeptime; |
375 | static const struct sched_param param = { | 384 | unsigned long target_jiffies; |
376 | .sched_priority = MAX_USER_RT_PRIO/2, | 385 | unsigned int compensated_ratio; |
377 | }; | 386 | int interval; /* jiffies to sleep for each attempt */ |
378 | unsigned int count = 0; | ||
379 | unsigned int target_ratio; | ||
380 | 387 | ||
381 | set_bit(cpunr, cpu_clamping_mask); | 388 | w_data = container_of(work, struct powerclamp_worker_data, |
382 | set_freezable(); | 389 | balancing_work); |
383 | init_timer_on_stack(&wakeup_timer); | ||
384 | sched_setscheduler(current, SCHED_FIFO, ¶m); | ||
385 | |||
386 | while (true == clamping && !kthread_should_stop() && | ||
387 | cpu_online(cpunr)) { | ||
388 | int sleeptime; | ||
389 | unsigned long target_jiffies; | ||
390 | unsigned int guard; | ||
391 | unsigned int compensated_ratio; | ||
392 | int interval; /* jiffies to sleep for each attempt */ | ||
393 | unsigned int duration_jiffies = msecs_to_jiffies(duration); | ||
394 | unsigned int window_size_now; | ||
395 | |||
396 | try_to_freeze(); | ||
397 | /* | ||
398 | * make sure user selected ratio does not take effect until | ||
399 | * the next round. adjust target_ratio if user has changed | ||
400 | * target such that we can converge quickly. | ||
401 | */ | ||
402 | target_ratio = set_target_ratio; | ||
403 | guard = 1 + target_ratio/20; | ||
404 | window_size_now = window_size; | ||
405 | count++; | ||
406 | |||
407 | /* | ||
408 | * systems may have different ability to enter package level | ||
409 | * c-states, thus we need to compensate the injected idle ratio | ||
410 | * to achieve the actual target reported by the HW. | ||
411 | */ | ||
412 | compensated_ratio = target_ratio + | ||
413 | get_compensation(target_ratio); | ||
414 | if (compensated_ratio <= 0) | ||
415 | compensated_ratio = 1; | ||
416 | interval = duration_jiffies * 100 / compensated_ratio; | ||
417 | |||
418 | /* align idle time */ | ||
419 | target_jiffies = roundup(jiffies, interval); | ||
420 | sleeptime = target_jiffies - jiffies; | ||
421 | if (sleeptime <= 0) | ||
422 | sleeptime = 1; | ||
423 | schedule_timeout_interruptible(sleeptime); | ||
424 | /* | ||
425 | * only elected controlling cpu can collect stats and update | ||
426 | * control parameters. | ||
427 | */ | ||
428 | if (cpunr == control_cpu && !(count%window_size_now)) { | ||
429 | should_skip = | ||
430 | powerclamp_adjust_controls(target_ratio, | ||
431 | guard, window_size_now); | ||
432 | smp_mb(); | ||
433 | } | ||
434 | 390 | ||
435 | if (should_skip) | 391 | /* |
436 | continue; | 392 | * make sure user selected ratio does not take effect until |
437 | 393 | * the next round. adjust target_ratio if user has changed | |
438 | target_jiffies = jiffies + duration_jiffies; | 394 | * target such that we can converge quickly. |
439 | mod_timer(&wakeup_timer, target_jiffies); | 395 | */ |
440 | if (unlikely(local_softirq_pending())) | 396 | w_data->target_ratio = READ_ONCE(set_target_ratio); |
441 | continue; | 397 | w_data->guard = 1 + w_data->target_ratio / 20; |
442 | /* | 398 | w_data->window_size_now = window_size; |
443 | * stop tick sched during idle time, interrupts are still | 399 | w_data->duration_jiffies = msecs_to_jiffies(duration); |
444 | * allowed. thus jiffies are updated properly. | 400 | w_data->count++; |
445 | */ | 401 | |
446 | preempt_disable(); | 402 | /* |
447 | /* mwait until target jiffies is reached */ | 403 | * systems may have different ability to enter package level |
448 | while (time_before(jiffies, target_jiffies)) { | 404 | * c-states, thus we need to compensate the injected idle ratio |
449 | unsigned long ecx = 1; | 405 | * to achieve the actual target reported by the HW. |
450 | unsigned long eax = target_mwait; | 406 | */ |
451 | 407 | compensated_ratio = w_data->target_ratio + | |
452 | /* | 408 | get_compensation(w_data->target_ratio); |
453 | * REVISIT: may call enter_idle() to notify drivers who | 409 | if (compensated_ratio <= 0) |
454 | * can save power during cpu idle. same for exit_idle() | 410 | compensated_ratio = 1; |
455 | */ | 411 | interval = w_data->duration_jiffies * 100 / compensated_ratio; |
456 | local_touch_nmi(); | 412 | |
457 | stop_critical_timings(); | 413 | /* align idle time */ |
458 | mwait_idle_with_hints(eax, ecx); | 414 | target_jiffies = roundup(jiffies, interval); |
459 | start_critical_timings(); | 415 | sleeptime = target_jiffies - jiffies; |
460 | atomic_inc(&idle_wakeup_counter); | 416 | if (sleeptime <= 0) |
461 | } | 417 | sleeptime = 1; |
462 | preempt_enable(); | 418 | |
419 | if (clamping && w_data->clamping && cpu_online(w_data->cpu)) | ||
420 | kthread_queue_delayed_work(w_data->worker, | ||
421 | &w_data->idle_injection_work, | ||
422 | sleeptime); | ||
423 | } | ||
424 | |||
425 | static void clamp_idle_injection_func(struct kthread_work *work) | ||
426 | { | ||
427 | struct powerclamp_worker_data *w_data; | ||
428 | |||
429 | w_data = container_of(work, struct powerclamp_worker_data, | ||
430 | idle_injection_work.work); | ||
431 | |||
432 | /* | ||
433 | * only elected controlling cpu can collect stats and update | ||
434 | * control parameters. | ||
435 | */ | ||
436 | if (w_data->cpu == control_cpu && | ||
437 | !(w_data->count % w_data->window_size_now)) { | ||
438 | should_skip = | ||
439 | powerclamp_adjust_controls(w_data->target_ratio, | ||
440 | w_data->guard, | ||
441 | w_data->window_size_now); | ||
442 | smp_mb(); | ||
463 | } | 443 | } |
464 | del_timer_sync(&wakeup_timer); | ||
465 | clear_bit(cpunr, cpu_clamping_mask); | ||
466 | 444 | ||
467 | return 0; | 445 | if (should_skip) |
446 | goto balance; | ||
447 | |||
448 | play_idle(jiffies_to_msecs(w_data->duration_jiffies)); | ||
449 | |||
450 | balance: | ||
451 | if (clamping && w_data->clamping && cpu_online(w_data->cpu)) | ||
452 | kthread_queue_work(w_data->worker, &w_data->balancing_work); | ||
468 | } | 453 | } |
469 | 454 | ||
470 | /* | 455 | /* |
@@ -508,10 +493,60 @@ static void poll_pkg_cstate(struct work_struct *dummy) | |||
508 | schedule_delayed_work(&poll_pkg_cstate_work, HZ); | 493 | schedule_delayed_work(&poll_pkg_cstate_work, HZ); |
509 | } | 494 | } |
510 | 495 | ||
496 | static void start_power_clamp_worker(unsigned long cpu) | ||
497 | { | ||
498 | struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu); | ||
499 | struct kthread_worker *worker; | ||
500 | |||
501 | worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inject/%ld", cpu); | ||
502 | if (IS_ERR(worker)) | ||
503 | return; | ||
504 | |||
505 | w_data->worker = worker; | ||
506 | w_data->count = 0; | ||
507 | w_data->cpu = cpu; | ||
508 | w_data->clamping = true; | ||
509 | set_bit(cpu, cpu_clamping_mask); | ||
510 | sched_setscheduler(worker->task, SCHED_FIFO, &sparam); | ||
511 | kthread_init_work(&w_data->balancing_work, clamp_balancing_func); | ||
512 | kthread_init_delayed_work(&w_data->idle_injection_work, | ||
513 | clamp_idle_injection_func); | ||
514 | kthread_queue_work(w_data->worker, &w_data->balancing_work); | ||
515 | } | ||
516 | |||
517 | static void stop_power_clamp_worker(unsigned long cpu) | ||
518 | { | ||
519 | struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu); | ||
520 | |||
521 | if (!w_data->worker) | ||
522 | return; | ||
523 | |||
524 | w_data->clamping = false; | ||
525 | /* | ||
526 | * Make sure that all works that get queued after this point see | ||
527 | * the clamping disabled. The counter part is not needed because | ||
528 | * there is an implicit memory barrier when the queued work | ||
529 | * is proceed. | ||
530 | */ | ||
531 | smp_wmb(); | ||
532 | kthread_cancel_work_sync(&w_data->balancing_work); | ||
533 | kthread_cancel_delayed_work_sync(&w_data->idle_injection_work); | ||
534 | /* | ||
535 | * The balancing work still might be queued here because | ||
536 | * the handling of the "clapming" variable, cancel, and queue | ||
537 | * operations are not synchronized via a lock. But it is not | ||
538 | * a big deal. The balancing work is fast and destroy kthread | ||
539 | * will wait for it. | ||
540 | */ | ||
541 | clear_bit(w_data->cpu, cpu_clamping_mask); | ||
542 | kthread_destroy_worker(w_data->worker); | ||
543 | |||
544 | w_data->worker = NULL; | ||
545 | } | ||
546 | |||
511 | static int start_power_clamp(void) | 547 | static int start_power_clamp(void) |
512 | { | 548 | { |
513 | unsigned long cpu; | 549 | unsigned long cpu; |
514 | struct task_struct *thread; | ||
515 | 550 | ||
516 | set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1); | 551 | set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1); |
517 | /* prevent cpu hotplug */ | 552 | /* prevent cpu hotplug */ |
@@ -525,22 +560,9 @@ static int start_power_clamp(void) | |||
525 | clamping = true; | 560 | clamping = true; |
526 | schedule_delayed_work(&poll_pkg_cstate_work, 0); | 561 | schedule_delayed_work(&poll_pkg_cstate_work, 0); |
527 | 562 | ||
528 | /* start one thread per online cpu */ | 563 | /* start one kthread worker per online cpu */ |
529 | for_each_online_cpu(cpu) { | 564 | for_each_online_cpu(cpu) { |
530 | struct task_struct **p = | 565 | start_power_clamp_worker(cpu); |
531 | per_cpu_ptr(powerclamp_thread, cpu); | ||
532 | |||
533 | thread = kthread_create_on_node(clamp_thread, | ||
534 | (void *) cpu, | ||
535 | cpu_to_node(cpu), | ||
536 | "kidle_inject/%ld", cpu); | ||
537 | /* bind to cpu here */ | ||
538 | if (likely(!IS_ERR(thread))) { | ||
539 | kthread_bind(thread, cpu); | ||
540 | wake_up_process(thread); | ||
541 | *p = thread; | ||
542 | } | ||
543 | |||
544 | } | 566 | } |
545 | put_online_cpus(); | 567 | put_online_cpus(); |
546 | 568 | ||
@@ -550,71 +572,49 @@ static int start_power_clamp(void) | |||
550 | static void end_power_clamp(void) | 572 | static void end_power_clamp(void) |
551 | { | 573 | { |
552 | int i; | 574 | int i; |
553 | struct task_struct *thread; | ||
554 | 575 | ||
555 | clamping = false; | ||
556 | /* | 576 | /* |
557 | * make clamping visible to other cpus and give per cpu clamping threads | 577 | * Block requeuing in all the kthread workers. They will flush and |
558 | * sometime to exit, or gets killed later. | 578 | * stop faster. |
559 | */ | 579 | */ |
560 | smp_mb(); | 580 | clamping = false; |
561 | msleep(20); | ||
562 | if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) { | 581 | if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) { |
563 | for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) { | 582 | for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) { |
564 | pr_debug("clamping thread for cpu %d alive, kill\n", i); | 583 | pr_debug("clamping worker for cpu %d alive, destroy\n", |
565 | thread = *per_cpu_ptr(powerclamp_thread, i); | 584 | i); |
566 | kthread_stop(thread); | 585 | stop_power_clamp_worker(i); |
567 | } | 586 | } |
568 | } | 587 | } |
569 | } | 588 | } |
570 | 589 | ||
571 | static int powerclamp_cpu_callback(struct notifier_block *nfb, | 590 | static int powerclamp_cpu_online(unsigned int cpu) |
572 | unsigned long action, void *hcpu) | ||
573 | { | 591 | { |
574 | unsigned long cpu = (unsigned long)hcpu; | 592 | if (clamping == false) |
575 | struct task_struct *thread; | 593 | return 0; |
576 | struct task_struct **percpu_thread = | 594 | start_power_clamp_worker(cpu); |
577 | per_cpu_ptr(powerclamp_thread, cpu); | 595 | /* prefer BSP as controlling CPU */ |
578 | 596 | if (cpu == 0) { | |
579 | if (false == clamping) | 597 | control_cpu = 0; |
580 | goto exit_ok; | 598 | smp_mb(); |
581 | |||
582 | switch (action) { | ||
583 | case CPU_ONLINE: | ||
584 | thread = kthread_create_on_node(clamp_thread, | ||
585 | (void *) cpu, | ||
586 | cpu_to_node(cpu), | ||
587 | "kidle_inject/%lu", cpu); | ||
588 | if (likely(!IS_ERR(thread))) { | ||
589 | kthread_bind(thread, cpu); | ||
590 | wake_up_process(thread); | ||
591 | *percpu_thread = thread; | ||
592 | } | ||
593 | /* prefer BSP as controlling CPU */ | ||
594 | if (cpu == 0) { | ||
595 | control_cpu = 0; | ||
596 | smp_mb(); | ||
597 | } | ||
598 | break; | ||
599 | case CPU_DEAD: | ||
600 | if (test_bit(cpu, cpu_clamping_mask)) { | ||
601 | pr_err("cpu %lu dead but powerclamping thread is not\n", | ||
602 | cpu); | ||
603 | kthread_stop(*percpu_thread); | ||
604 | } | ||
605 | if (cpu == control_cpu) { | ||
606 | control_cpu = smp_processor_id(); | ||
607 | smp_mb(); | ||
608 | } | ||
609 | } | 599 | } |
610 | 600 | return 0; | |
611 | exit_ok: | ||
612 | return NOTIFY_OK; | ||
613 | } | 601 | } |
614 | 602 | ||
615 | static struct notifier_block powerclamp_cpu_notifier = { | 603 | static int powerclamp_cpu_predown(unsigned int cpu) |
616 | .notifier_call = powerclamp_cpu_callback, | 604 | { |
617 | }; | 605 | if (clamping == false) |
606 | return 0; | ||
607 | |||
608 | stop_power_clamp_worker(cpu); | ||
609 | if (cpu != control_cpu) | ||
610 | return 0; | ||
611 | |||
612 | control_cpu = cpumask_first(cpu_online_mask); | ||
613 | if (control_cpu == cpu) | ||
614 | control_cpu = cpumask_next(cpu, cpu_online_mask); | ||
615 | smp_mb(); | ||
616 | return 0; | ||
617 | } | ||
618 | 618 | ||
619 | static int powerclamp_get_max_state(struct thermal_cooling_device *cdev, | 619 | static int powerclamp_get_max_state(struct thermal_cooling_device *cdev, |
620 | unsigned long *state) | 620 | unsigned long *state) |
@@ -742,6 +742,8 @@ file_error: | |||
742 | debugfs_remove_recursive(debug_dir); | 742 | debugfs_remove_recursive(debug_dir); |
743 | } | 743 | } |
744 | 744 | ||
745 | static enum cpuhp_state hp_state; | ||
746 | |||
745 | static int __init powerclamp_init(void) | 747 | static int __init powerclamp_init(void) |
746 | { | 748 | { |
747 | int retval; | 749 | int retval; |
@@ -759,10 +761,17 @@ static int __init powerclamp_init(void) | |||
759 | 761 | ||
760 | /* set default limit, maybe adjusted during runtime based on feedback */ | 762 | /* set default limit, maybe adjusted during runtime based on feedback */ |
761 | window_size = 2; | 763 | window_size = 2; |
762 | register_hotcpu_notifier(&powerclamp_cpu_notifier); | 764 | retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, |
765 | "thermal/intel_powerclamp:online", | ||
766 | powerclamp_cpu_online, | ||
767 | powerclamp_cpu_predown); | ||
768 | if (retval < 0) | ||
769 | goto exit_free; | ||
770 | |||
771 | hp_state = retval; | ||
763 | 772 | ||
764 | powerclamp_thread = alloc_percpu(struct task_struct *); | 773 | worker_data = alloc_percpu(struct powerclamp_worker_data); |
765 | if (!powerclamp_thread) { | 774 | if (!worker_data) { |
766 | retval = -ENOMEM; | 775 | retval = -ENOMEM; |
767 | goto exit_unregister; | 776 | goto exit_unregister; |
768 | } | 777 | } |
@@ -782,9 +791,9 @@ static int __init powerclamp_init(void) | |||
782 | return 0; | 791 | return 0; |
783 | 792 | ||
784 | exit_free_thread: | 793 | exit_free_thread: |
785 | free_percpu(powerclamp_thread); | 794 | free_percpu(worker_data); |
786 | exit_unregister: | 795 | exit_unregister: |
787 | unregister_hotcpu_notifier(&powerclamp_cpu_notifier); | 796 | cpuhp_remove_state_nocalls(hp_state); |
788 | exit_free: | 797 | exit_free: |
789 | kfree(cpu_clamping_mask); | 798 | kfree(cpu_clamping_mask); |
790 | return retval; | 799 | return retval; |
@@ -793,9 +802,9 @@ module_init(powerclamp_init); | |||
793 | 802 | ||
794 | static void __exit powerclamp_exit(void) | 803 | static void __exit powerclamp_exit(void) |
795 | { | 804 | { |
796 | unregister_hotcpu_notifier(&powerclamp_cpu_notifier); | ||
797 | end_power_clamp(); | 805 | end_power_clamp(); |
798 | free_percpu(powerclamp_thread); | 806 | cpuhp_remove_state_nocalls(hp_state); |
807 | free_percpu(worker_data); | ||
799 | thermal_cooling_device_unregister(cooling_dev); | 808 | thermal_cooling_device_unregister(cooling_dev); |
800 | kfree(cpu_clamping_mask); | 809 | kfree(cpu_clamping_mask); |
801 | 810 | ||