aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2016-12-12 14:45:22 -0500
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2016-12-12 14:45:22 -0500
commit404ea9f1a792eebdc208d53fd38bdd1619531430 (patch)
tree4c660034bb4d5a80b8ed6049a665b6edcfd9863c
parent0e7414b7aa8b294fddefbad020798f7c8ebe1622 (diff)
parentfeb6cd6a0f9f7d214351624d79e408cb2af91631 (diff)
Merge powerclamp driver updates (that depend on cpuidle material) for v4.10.
-rw-r--r--drivers/thermal/intel_powerclamp.c359
1 files changed, 184 insertions, 175 deletions
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
index afada655f861..83e697186410 100644
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c
@@ -43,7 +43,6 @@
43#include <linux/kernel.h> 43#include <linux/kernel.h>
44#include <linux/delay.h> 44#include <linux/delay.h>
45#include <linux/kthread.h> 45#include <linux/kthread.h>
46#include <linux/freezer.h>
47#include <linux/cpu.h> 46#include <linux/cpu.h>
48#include <linux/thermal.h> 47#include <linux/thermal.h>
49#include <linux/slab.h> 48#include <linux/slab.h>
@@ -86,11 +85,26 @@ static unsigned int control_cpu; /* The cpu assigned to collect stat and update
86 */ 85 */
87static bool clamping; 86static bool clamping;
88 87
88static const struct sched_param sparam = {
89 .sched_priority = MAX_USER_RT_PRIO / 2,
90};
91struct powerclamp_worker_data {
92 struct kthread_worker *worker;
93 struct kthread_work balancing_work;
94 struct kthread_delayed_work idle_injection_work;
95 unsigned int cpu;
96 unsigned int count;
97 unsigned int guard;
98 unsigned int window_size_now;
99 unsigned int target_ratio;
100 unsigned int duration_jiffies;
101 bool clamping;
102};
89 103
90static struct task_struct * __percpu *powerclamp_thread; 104static struct powerclamp_worker_data * __percpu worker_data;
91static struct thermal_cooling_device *cooling_dev; 105static struct thermal_cooling_device *cooling_dev;
92static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu 106static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu
93 * clamping thread 107 * clamping kthread worker
94 */ 108 */
95 109
96static unsigned int duration; 110static unsigned int duration;
@@ -262,11 +276,6 @@ static u64 pkg_state_counter(void)
262 return count; 276 return count;
263} 277}
264 278
265static void noop_timer(unsigned long foo)
266{
267 /* empty... just the fact that we get the interrupt wakes us up */
268}
269
270static unsigned int get_compensation(int ratio) 279static unsigned int get_compensation(int ratio)
271{ 280{
272 unsigned int comp = 0; 281 unsigned int comp = 0;
@@ -368,103 +377,79 @@ static bool powerclamp_adjust_controls(unsigned int target_ratio,
368 return set_target_ratio + guard <= current_ratio; 377 return set_target_ratio + guard <= current_ratio;
369} 378}
370 379
371static int clamp_thread(void *arg) 380static void clamp_balancing_func(struct kthread_work *work)
372{ 381{
373 int cpunr = (unsigned long)arg; 382 struct powerclamp_worker_data *w_data;
374 DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0); 383 int sleeptime;
375 static const struct sched_param param = { 384 unsigned long target_jiffies;
376 .sched_priority = MAX_USER_RT_PRIO/2, 385 unsigned int compensated_ratio;
377 }; 386 int interval; /* jiffies to sleep for each attempt */
378 unsigned int count = 0;
379 unsigned int target_ratio;
380 387
381 set_bit(cpunr, cpu_clamping_mask); 388 w_data = container_of(work, struct powerclamp_worker_data,
382 set_freezable(); 389 balancing_work);
383 init_timer_on_stack(&wakeup_timer);
384 sched_setscheduler(current, SCHED_FIFO, &param);
385
386 while (true == clamping && !kthread_should_stop() &&
387 cpu_online(cpunr)) {
388 int sleeptime;
389 unsigned long target_jiffies;
390 unsigned int guard;
391 unsigned int compensated_ratio;
392 int interval; /* jiffies to sleep for each attempt */
393 unsigned int duration_jiffies = msecs_to_jiffies(duration);
394 unsigned int window_size_now;
395
396 try_to_freeze();
397 /*
398 * make sure user selected ratio does not take effect until
399 * the next round. adjust target_ratio if user has changed
400 * target such that we can converge quickly.
401 */
402 target_ratio = set_target_ratio;
403 guard = 1 + target_ratio/20;
404 window_size_now = window_size;
405 count++;
406
407 /*
408 * systems may have different ability to enter package level
409 * c-states, thus we need to compensate the injected idle ratio
410 * to achieve the actual target reported by the HW.
411 */
412 compensated_ratio = target_ratio +
413 get_compensation(target_ratio);
414 if (compensated_ratio <= 0)
415 compensated_ratio = 1;
416 interval = duration_jiffies * 100 / compensated_ratio;
417
418 /* align idle time */
419 target_jiffies = roundup(jiffies, interval);
420 sleeptime = target_jiffies - jiffies;
421 if (sleeptime <= 0)
422 sleeptime = 1;
423 schedule_timeout_interruptible(sleeptime);
424 /*
425 * only elected controlling cpu can collect stats and update
426 * control parameters.
427 */
428 if (cpunr == control_cpu && !(count%window_size_now)) {
429 should_skip =
430 powerclamp_adjust_controls(target_ratio,
431 guard, window_size_now);
432 smp_mb();
433 }
434 390
435 if (should_skip) 391 /*
436 continue; 392 * make sure user selected ratio does not take effect until
437 393 * the next round. adjust target_ratio if user has changed
438 target_jiffies = jiffies + duration_jiffies; 394 * target such that we can converge quickly.
439 mod_timer(&wakeup_timer, target_jiffies); 395 */
440 if (unlikely(local_softirq_pending())) 396 w_data->target_ratio = READ_ONCE(set_target_ratio);
441 continue; 397 w_data->guard = 1 + w_data->target_ratio / 20;
442 /* 398 w_data->window_size_now = window_size;
443 * stop tick sched during idle time, interrupts are still 399 w_data->duration_jiffies = msecs_to_jiffies(duration);
444 * allowed. thus jiffies are updated properly. 400 w_data->count++;
445 */ 401
446 preempt_disable(); 402 /*
447 /* mwait until target jiffies is reached */ 403 * systems may have different ability to enter package level
448 while (time_before(jiffies, target_jiffies)) { 404 * c-states, thus we need to compensate the injected idle ratio
449 unsigned long ecx = 1; 405 * to achieve the actual target reported by the HW.
450 unsigned long eax = target_mwait; 406 */
451 407 compensated_ratio = w_data->target_ratio +
452 /* 408 get_compensation(w_data->target_ratio);
453 * REVISIT: may call enter_idle() to notify drivers who 409 if (compensated_ratio <= 0)
454 * can save power during cpu idle. same for exit_idle() 410 compensated_ratio = 1;
455 */ 411 interval = w_data->duration_jiffies * 100 / compensated_ratio;
456 local_touch_nmi(); 412
457 stop_critical_timings(); 413 /* align idle time */
458 mwait_idle_with_hints(eax, ecx); 414 target_jiffies = roundup(jiffies, interval);
459 start_critical_timings(); 415 sleeptime = target_jiffies - jiffies;
460 atomic_inc(&idle_wakeup_counter); 416 if (sleeptime <= 0)
461 } 417 sleeptime = 1;
462 preempt_enable(); 418
419 if (clamping && w_data->clamping && cpu_online(w_data->cpu))
420 kthread_queue_delayed_work(w_data->worker,
421 &w_data->idle_injection_work,
422 sleeptime);
423}
424
425static void clamp_idle_injection_func(struct kthread_work *work)
426{
427 struct powerclamp_worker_data *w_data;
428
429 w_data = container_of(work, struct powerclamp_worker_data,
430 idle_injection_work.work);
431
432 /*
433 * only elected controlling cpu can collect stats and update
434 * control parameters.
435 */
436 if (w_data->cpu == control_cpu &&
437 !(w_data->count % w_data->window_size_now)) {
438 should_skip =
439 powerclamp_adjust_controls(w_data->target_ratio,
440 w_data->guard,
441 w_data->window_size_now);
442 smp_mb();
463 } 443 }
464 del_timer_sync(&wakeup_timer);
465 clear_bit(cpunr, cpu_clamping_mask);
466 444
467 return 0; 445 if (should_skip)
446 goto balance;
447
448 play_idle(jiffies_to_msecs(w_data->duration_jiffies));
449
450balance:
451 if (clamping && w_data->clamping && cpu_online(w_data->cpu))
452 kthread_queue_work(w_data->worker, &w_data->balancing_work);
468} 453}
469 454
470/* 455/*
@@ -508,10 +493,60 @@ static void poll_pkg_cstate(struct work_struct *dummy)
508 schedule_delayed_work(&poll_pkg_cstate_work, HZ); 493 schedule_delayed_work(&poll_pkg_cstate_work, HZ);
509} 494}
510 495
496static void start_power_clamp_worker(unsigned long cpu)
497{
498 struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
499 struct kthread_worker *worker;
500
501 worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inject/%ld", cpu);
502 if (IS_ERR(worker))
503 return;
504
505 w_data->worker = worker;
506 w_data->count = 0;
507 w_data->cpu = cpu;
508 w_data->clamping = true;
509 set_bit(cpu, cpu_clamping_mask);
510 sched_setscheduler(worker->task, SCHED_FIFO, &sparam);
511 kthread_init_work(&w_data->balancing_work, clamp_balancing_func);
512 kthread_init_delayed_work(&w_data->idle_injection_work,
513 clamp_idle_injection_func);
514 kthread_queue_work(w_data->worker, &w_data->balancing_work);
515}
516
517static void stop_power_clamp_worker(unsigned long cpu)
518{
519 struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
520
521 if (!w_data->worker)
522 return;
523
524 w_data->clamping = false;
525 /*
526 * Make sure that all works that get queued after this point see
527 * the clamping disabled. The counter part is not needed because
528 * there is an implicit memory barrier when the queued work
529 * is proceed.
530 */
531 smp_wmb();
532 kthread_cancel_work_sync(&w_data->balancing_work);
533 kthread_cancel_delayed_work_sync(&w_data->idle_injection_work);
534 /*
535 * The balancing work still might be queued here because
536 * the handling of the "clapming" variable, cancel, and queue
537 * operations are not synchronized via a lock. But it is not
538 * a big deal. The balancing work is fast and destroy kthread
539 * will wait for it.
540 */
541 clear_bit(w_data->cpu, cpu_clamping_mask);
542 kthread_destroy_worker(w_data->worker);
543
544 w_data->worker = NULL;
545}
546
511static int start_power_clamp(void) 547static int start_power_clamp(void)
512{ 548{
513 unsigned long cpu; 549 unsigned long cpu;
514 struct task_struct *thread;
515 550
516 set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1); 551 set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
517 /* prevent cpu hotplug */ 552 /* prevent cpu hotplug */
@@ -525,22 +560,9 @@ static int start_power_clamp(void)
525 clamping = true; 560 clamping = true;
526 schedule_delayed_work(&poll_pkg_cstate_work, 0); 561 schedule_delayed_work(&poll_pkg_cstate_work, 0);
527 562
528 /* start one thread per online cpu */ 563 /* start one kthread worker per online cpu */
529 for_each_online_cpu(cpu) { 564 for_each_online_cpu(cpu) {
530 struct task_struct **p = 565 start_power_clamp_worker(cpu);
531 per_cpu_ptr(powerclamp_thread, cpu);
532
533 thread = kthread_create_on_node(clamp_thread,
534 (void *) cpu,
535 cpu_to_node(cpu),
536 "kidle_inject/%ld", cpu);
537 /* bind to cpu here */
538 if (likely(!IS_ERR(thread))) {
539 kthread_bind(thread, cpu);
540 wake_up_process(thread);
541 *p = thread;
542 }
543
544 } 566 }
545 put_online_cpus(); 567 put_online_cpus();
546 568
@@ -550,71 +572,49 @@ static int start_power_clamp(void)
550static void end_power_clamp(void) 572static void end_power_clamp(void)
551{ 573{
552 int i; 574 int i;
553 struct task_struct *thread;
554 575
555 clamping = false;
556 /* 576 /*
557 * make clamping visible to other cpus and give per cpu clamping threads 577 * Block requeuing in all the kthread workers. They will flush and
558 * sometime to exit, or gets killed later. 578 * stop faster.
559 */ 579 */
560 smp_mb(); 580 clamping = false;
561 msleep(20);
562 if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) { 581 if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
563 for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) { 582 for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
564 pr_debug("clamping thread for cpu %d alive, kill\n", i); 583 pr_debug("clamping worker for cpu %d alive, destroy\n",
565 thread = *per_cpu_ptr(powerclamp_thread, i); 584 i);
566 kthread_stop(thread); 585 stop_power_clamp_worker(i);
567 } 586 }
568 } 587 }
569} 588}
570 589
571static int powerclamp_cpu_callback(struct notifier_block *nfb, 590static int powerclamp_cpu_online(unsigned int cpu)
572 unsigned long action, void *hcpu)
573{ 591{
574 unsigned long cpu = (unsigned long)hcpu; 592 if (clamping == false)
575 struct task_struct *thread; 593 return 0;
576 struct task_struct **percpu_thread = 594 start_power_clamp_worker(cpu);
577 per_cpu_ptr(powerclamp_thread, cpu); 595 /* prefer BSP as controlling CPU */
578 596 if (cpu == 0) {
579 if (false == clamping) 597 control_cpu = 0;
580 goto exit_ok; 598 smp_mb();
581
582 switch (action) {
583 case CPU_ONLINE:
584 thread = kthread_create_on_node(clamp_thread,
585 (void *) cpu,
586 cpu_to_node(cpu),
587 "kidle_inject/%lu", cpu);
588 if (likely(!IS_ERR(thread))) {
589 kthread_bind(thread, cpu);
590 wake_up_process(thread);
591 *percpu_thread = thread;
592 }
593 /* prefer BSP as controlling CPU */
594 if (cpu == 0) {
595 control_cpu = 0;
596 smp_mb();
597 }
598 break;
599 case CPU_DEAD:
600 if (test_bit(cpu, cpu_clamping_mask)) {
601 pr_err("cpu %lu dead but powerclamping thread is not\n",
602 cpu);
603 kthread_stop(*percpu_thread);
604 }
605 if (cpu == control_cpu) {
606 control_cpu = smp_processor_id();
607 smp_mb();
608 }
609 } 599 }
610 600 return 0;
611exit_ok:
612 return NOTIFY_OK;
613} 601}
614 602
615static struct notifier_block powerclamp_cpu_notifier = { 603static int powerclamp_cpu_predown(unsigned int cpu)
616 .notifier_call = powerclamp_cpu_callback, 604{
617}; 605 if (clamping == false)
606 return 0;
607
608 stop_power_clamp_worker(cpu);
609 if (cpu != control_cpu)
610 return 0;
611
612 control_cpu = cpumask_first(cpu_online_mask);
613 if (control_cpu == cpu)
614 control_cpu = cpumask_next(cpu, cpu_online_mask);
615 smp_mb();
616 return 0;
617}
618 618
619static int powerclamp_get_max_state(struct thermal_cooling_device *cdev, 619static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
620 unsigned long *state) 620 unsigned long *state)
@@ -742,6 +742,8 @@ file_error:
742 debugfs_remove_recursive(debug_dir); 742 debugfs_remove_recursive(debug_dir);
743} 743}
744 744
745static enum cpuhp_state hp_state;
746
745static int __init powerclamp_init(void) 747static int __init powerclamp_init(void)
746{ 748{
747 int retval; 749 int retval;
@@ -759,10 +761,17 @@ static int __init powerclamp_init(void)
759 761
760 /* set default limit, maybe adjusted during runtime based on feedback */ 762 /* set default limit, maybe adjusted during runtime based on feedback */
761 window_size = 2; 763 window_size = 2;
762 register_hotcpu_notifier(&powerclamp_cpu_notifier); 764 retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
765 "thermal/intel_powerclamp:online",
766 powerclamp_cpu_online,
767 powerclamp_cpu_predown);
768 if (retval < 0)
769 goto exit_free;
770
771 hp_state = retval;
763 772
764 powerclamp_thread = alloc_percpu(struct task_struct *); 773 worker_data = alloc_percpu(struct powerclamp_worker_data);
765 if (!powerclamp_thread) { 774 if (!worker_data) {
766 retval = -ENOMEM; 775 retval = -ENOMEM;
767 goto exit_unregister; 776 goto exit_unregister;
768 } 777 }
@@ -782,9 +791,9 @@ static int __init powerclamp_init(void)
782 return 0; 791 return 0;
783 792
784exit_free_thread: 793exit_free_thread:
785 free_percpu(powerclamp_thread); 794 free_percpu(worker_data);
786exit_unregister: 795exit_unregister:
787 unregister_hotcpu_notifier(&powerclamp_cpu_notifier); 796 cpuhp_remove_state_nocalls(hp_state);
788exit_free: 797exit_free:
789 kfree(cpu_clamping_mask); 798 kfree(cpu_clamping_mask);
790 return retval; 799 return retval;
@@ -793,9 +802,9 @@ module_init(powerclamp_init);
793 802
794static void __exit powerclamp_exit(void) 803static void __exit powerclamp_exit(void)
795{ 804{
796 unregister_hotcpu_notifier(&powerclamp_cpu_notifier);
797 end_power_clamp(); 805 end_power_clamp();
798 free_percpu(powerclamp_thread); 806 cpuhp_remove_state_nocalls(hp_state);
807 free_percpu(worker_data);
799 thermal_cooling_device_unregister(cooling_dev); 808 thermal_cooling_device_unregister(cooling_dev);
800 kfree(cpu_clamping_mask); 809 kfree(cpu_clamping_mask);
801 810