aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPetr Mladek <pmladek@suse.com>2016-11-28 16:44:50 -0500
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2016-11-29 17:34:10 -0500
commit8d962ac7f396bc83fb381469521c27aed7b70f84 (patch)
tree43ff2ccc703459d9c1a1b1c2c9b27f012d1eab9b
parent14f3f7d8cbceedab17f16cf301414fa3384117fe (diff)
thermal/intel_powerclamp: Convert the kthread to kthread worker API
Kthreads are currently implemented as an infinite loop. Each has its own variant of checks for terminating, freezing, awakening. In many cases it is unclear to say in which state it is and sometimes it is done a wrong way. The plan is to convert kthreads into kthread_worker or workqueues API. It allows to split the functionality into separate operations. It helps to make a better structure. Also it defines a clean state where no locks are taken, IRQs blocked, the kthread might sleep or even be safely migrated. The kthread worker API is useful when we want to have a dedicated single thread for the work. It helps to make sure that it is available when needed. Also it allows a better control, e.g. define a scheduling priority. This patch converts the intel powerclamp kthreads into the kthread worker because they need to have a good control over the assigned CPUs. IMHO, the most natural way is to split one cycle into two works. First one does some balancing and let the CPU work normal way for some time. The second work checks what the CPU has done in the meantime and put it into C-state to reach the required idle time ratio. The delay between the two works is achieved by the delayed kthread work. The two works have to share some data that used to be local variables of the single kthread function. This is achieved by the new per-CPU struct kthread_worker_data. It might look as a complication. On the other hand, the long original kthread function was not nice either. The patch tries to avoid extra init and cleanup works. All the actions might be done outside the thread. They are moved to the functions that create or destroy the worker. Especially, I checked that the timers are assigned to the right CPU. The two works are queuing each other. It makes it a bit tricky to break it when we want to stop the worker. We use the global and per-worker "clamping" variables to make sure that the re-queuing eventually stops. We also cancel the works to make it faster. Note that the canceling is not reliable because the handling of the two variables and queuing is not synchronized via a lock. But it is not a big deal because it is just an optimization. The job is stopped faster than before in most cases. Signed-off-by: Petr Mladek <pmladek@suse.com> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-rw-r--r--drivers/thermal/intel_powerclamp.c292
1 files changed, 170 insertions, 122 deletions
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
index 63657d193db5..a94f7c849a4e 100644
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c
@@ -86,11 +86,27 @@ static unsigned int control_cpu; /* The cpu assigned to collect stat and update
86 */ 86 */
87static bool clamping; 87static bool clamping;
88 88
89static const struct sched_param sparam = {
90 .sched_priority = MAX_USER_RT_PRIO / 2,
91};
92struct powerclamp_worker_data {
93 struct kthread_worker *worker;
94 struct kthread_work balancing_work;
95 struct kthread_delayed_work idle_injection_work;
96 struct timer_list wakeup_timer;
97 unsigned int cpu;
98 unsigned int count;
99 unsigned int guard;
100 unsigned int window_size_now;
101 unsigned int target_ratio;
102 unsigned int duration_jiffies;
103 bool clamping;
104};
89 105
90static struct task_struct * __percpu *powerclamp_thread; 106static struct powerclamp_worker_data * __percpu worker_data;
91static struct thermal_cooling_device *cooling_dev; 107static struct thermal_cooling_device *cooling_dev;
92static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu 108static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu
93 * clamping thread 109 * clamping kthread worker
94 */ 110 */
95 111
96static unsigned int duration; 112static unsigned int duration;
@@ -368,103 +384,104 @@ static bool powerclamp_adjust_controls(unsigned int target_ratio,
368 return set_target_ratio + guard <= current_ratio; 384 return set_target_ratio + guard <= current_ratio;
369} 385}
370 386
371static int clamp_thread(void *arg) 387static void clamp_balancing_func(struct kthread_work *work)
372{ 388{
373 int cpunr = (unsigned long)arg; 389 struct powerclamp_worker_data *w_data;
374 DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0); 390 int sleeptime;
375 static const struct sched_param param = { 391 unsigned long target_jiffies;
376 .sched_priority = MAX_USER_RT_PRIO/2, 392 unsigned int compensated_ratio;
377 }; 393 int interval; /* jiffies to sleep for each attempt */
378 unsigned int count = 0;
379 unsigned int target_ratio;
380 394
381 set_bit(cpunr, cpu_clamping_mask); 395 w_data = container_of(work, struct powerclamp_worker_data,
382 set_freezable(); 396 balancing_work);
383 init_timer_on_stack(&wakeup_timer);
384 sched_setscheduler(current, SCHED_FIFO, &param);
385
386 while (true == clamping && !kthread_should_stop() &&
387 cpu_online(cpunr)) {
388 int sleeptime;
389 unsigned long target_jiffies;
390 unsigned int guard;
391 unsigned int compensated_ratio;
392 int interval; /* jiffies to sleep for each attempt */
393 unsigned int duration_jiffies = msecs_to_jiffies(duration);
394 unsigned int window_size_now;
395
396 try_to_freeze();
397 /*
398 * make sure user selected ratio does not take effect until
399 * the next round. adjust target_ratio if user has changed
400 * target such that we can converge quickly.
401 */
402 target_ratio = set_target_ratio;
403 guard = 1 + target_ratio/20;
404 window_size_now = window_size;
405 count++;
406 397
407 /* 398 /*
408 * systems may have different ability to enter package level 399 * make sure user selected ratio does not take effect until
409 * c-states, thus we need to compensate the injected idle ratio 400 * the next round. adjust target_ratio if user has changed
410 * to achieve the actual target reported by the HW. 401 * target such that we can converge quickly.
411 */ 402 */
412 compensated_ratio = target_ratio + 403 w_data->target_ratio = READ_ONCE(set_target_ratio);
413 get_compensation(target_ratio); 404 w_data->guard = 1 + w_data->target_ratio / 20;
414 if (compensated_ratio <= 0) 405 w_data->window_size_now = window_size;
415 compensated_ratio = 1; 406 w_data->duration_jiffies = msecs_to_jiffies(duration);
416 interval = duration_jiffies * 100 / compensated_ratio; 407 w_data->count++;
417 408
418 /* align idle time */ 409 /*
419 target_jiffies = roundup(jiffies, interval); 410 * systems may have different ability to enter package level
420 sleeptime = target_jiffies - jiffies; 411 * c-states, thus we need to compensate the injected idle ratio
421 if (sleeptime <= 0) 412 * to achieve the actual target reported by the HW.
422 sleeptime = 1; 413 */
423 schedule_timeout_interruptible(sleeptime); 414 compensated_ratio = w_data->target_ratio +
424 /* 415 get_compensation(w_data->target_ratio);
425 * only elected controlling cpu can collect stats and update 416 if (compensated_ratio <= 0)
426 * control parameters. 417 compensated_ratio = 1;
427 */ 418 interval = w_data->duration_jiffies * 100 / compensated_ratio;
428 if (cpunr == control_cpu && !(count%window_size_now)) { 419
429 should_skip = 420 /* align idle time */
430 powerclamp_adjust_controls(target_ratio, 421 target_jiffies = roundup(jiffies, interval);
431 guard, window_size_now); 422 sleeptime = target_jiffies - jiffies;
432 smp_mb(); 423 if (sleeptime <= 0)
433 } 424 sleeptime = 1;
425
426 if (clamping && w_data->clamping && cpu_online(w_data->cpu))
427 kthread_queue_delayed_work(w_data->worker,
428 &w_data->idle_injection_work,
429 sleeptime);
430}
431
432static void clamp_idle_injection_func(struct kthread_work *work)
433{
434 struct powerclamp_worker_data *w_data;
435 unsigned long target_jiffies;
436
437 w_data = container_of(work, struct powerclamp_worker_data,
438 idle_injection_work.work);
439
440 /*
441 * only elected controlling cpu can collect stats and update
442 * control parameters.
443 */
444 if (w_data->cpu == control_cpu &&
445 !(w_data->count % w_data->window_size_now)) {
446 should_skip =
447 powerclamp_adjust_controls(w_data->target_ratio,
448 w_data->guard,
449 w_data->window_size_now);
450 smp_mb();
451 }
434 452
435 if (should_skip) 453 if (should_skip)
436 continue; 454 goto balance;
455
456 target_jiffies = jiffies + w_data->duration_jiffies;
457 mod_timer(&w_data->wakeup_timer, target_jiffies);
458 if (unlikely(local_softirq_pending()))
459 goto balance;
460 /*
461 * stop tick sched during idle time, interrupts are still
462 * allowed. thus jiffies are updated properly.
463 */
464 preempt_disable();
465 /* mwait until target jiffies is reached */
466 while (time_before(jiffies, target_jiffies)) {
467 unsigned long ecx = 1;
468 unsigned long eax = target_mwait;
437 469
438 target_jiffies = jiffies + duration_jiffies;
439 mod_timer(&wakeup_timer, target_jiffies);
440 if (unlikely(local_softirq_pending()))
441 continue;
442 /* 470 /*
443 * stop tick sched during idle time, interrupts are still 471 * REVISIT: may call enter_idle() to notify drivers who
444 * allowed. thus jiffies are updated properly. 472 * can save power during cpu idle. same for exit_idle()
445 */ 473 */
446 preempt_disable(); 474 local_touch_nmi();
447 /* mwait until target jiffies is reached */ 475 stop_critical_timings();
448 while (time_before(jiffies, target_jiffies)) { 476 mwait_idle_with_hints(eax, ecx);
449 unsigned long ecx = 1; 477 start_critical_timings();
450 unsigned long eax = target_mwait; 478 atomic_inc(&idle_wakeup_counter);
451
452 /*
453 * REVISIT: may call enter_idle() to notify drivers who
454 * can save power during cpu idle. same for exit_idle()
455 */
456 local_touch_nmi();
457 stop_critical_timings();
458 mwait_idle_with_hints(eax, ecx);
459 start_critical_timings();
460 atomic_inc(&idle_wakeup_counter);
461 }
462 preempt_enable();
463 } 479 }
464 del_timer_sync(&wakeup_timer); 480 preempt_enable();
465 clear_bit(cpunr, cpu_clamping_mask);
466 481
467 return 0; 482balance:
483 if (clamping && w_data->clamping && cpu_online(w_data->cpu))
484 kthread_queue_work(w_data->worker, &w_data->balancing_work);
468} 485}
469 486
470/* 487/*
@@ -508,22 +525,58 @@ static void poll_pkg_cstate(struct work_struct *dummy)
508 schedule_delayed_work(&poll_pkg_cstate_work, HZ); 525 schedule_delayed_work(&poll_pkg_cstate_work, HZ);
509} 526}
510 527
511static void start_power_clamp_thread(unsigned long cpu) 528static void start_power_clamp_worker(unsigned long cpu)
512{ 529{
513 struct task_struct **p = per_cpu_ptr(powerclamp_thread, cpu); 530 struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
514 struct task_struct *thread; 531 struct kthread_worker *worker;
515 532
516 thread = kthread_create_on_node(clamp_thread, 533 worker = kthread_create_worker_on_cpu(cpu, KTW_FREEZABLE,
517 (void *) cpu, 534 "kidle_inject/%ld", cpu);
518 cpu_to_node(cpu), 535 if (IS_ERR(worker))
519 "kidle_inject/%ld", cpu);
520 if (IS_ERR(thread))
521 return; 536 return;
522 537
523 /* bind to cpu here */ 538 w_data->worker = worker;
524 kthread_bind(thread, cpu); 539 w_data->count = 0;
525 wake_up_process(thread); 540 w_data->cpu = cpu;
526 *p = thread; 541 w_data->clamping = true;
542 set_bit(cpu, cpu_clamping_mask);
543 setup_timer(&w_data->wakeup_timer, noop_timer, 0);
544 sched_setscheduler(worker->task, SCHED_FIFO, &sparam);
545 kthread_init_work(&w_data->balancing_work, clamp_balancing_func);
546 kthread_init_delayed_work(&w_data->idle_injection_work,
547 clamp_idle_injection_func);
548 kthread_queue_work(w_data->worker, &w_data->balancing_work);
549}
550
551static void stop_power_clamp_worker(unsigned long cpu)
552{
553 struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
554
555 if (!w_data->worker)
556 return;
557
558 w_data->clamping = false;
559 /*
560 * Make sure that all works that get queued after this point see
561 * the clamping disabled. The counter part is not needed because
562 * there is an implicit memory barrier when the queued work
563 * is proceed.
564 */
565 smp_wmb();
566 kthread_cancel_work_sync(&w_data->balancing_work);
567 kthread_cancel_delayed_work_sync(&w_data->idle_injection_work);
568 /*
569 * The balancing work still might be queued here because
570 * the handling of the "clapming" variable, cancel, and queue
571 * operations are not synchronized via a lock. But it is not
572 * a big deal. The balancing work is fast and destroy kthread
573 * will wait for it.
574 */
575 del_timer_sync(&w_data->wakeup_timer);
576 clear_bit(w_data->cpu, cpu_clamping_mask);
577 kthread_destroy_worker(w_data->worker);
578
579 w_data->worker = NULL;
527} 580}
528 581
529static int start_power_clamp(void) 582static int start_power_clamp(void)
@@ -542,9 +595,9 @@ static int start_power_clamp(void)
542 clamping = true; 595 clamping = true;
543 schedule_delayed_work(&poll_pkg_cstate_work, 0); 596 schedule_delayed_work(&poll_pkg_cstate_work, 0);
544 597
545 /* start one thread per online cpu */ 598 /* start one kthread worker per online cpu */
546 for_each_online_cpu(cpu) { 599 for_each_online_cpu(cpu) {
547 start_power_clamp_thread(cpu); 600 start_power_clamp_worker(cpu);
548 } 601 }
549 put_online_cpus(); 602 put_online_cpus();
550 603
@@ -554,20 +607,17 @@ static int start_power_clamp(void)
554static void end_power_clamp(void) 607static void end_power_clamp(void)
555{ 608{
556 int i; 609 int i;
557 struct task_struct *thread;
558 610
559 clamping = false;
560 /* 611 /*
561 * make clamping visible to other cpus and give per cpu clamping threads 612 * Block requeuing in all the kthread workers. They will flush and
562 * sometime to exit, or gets killed later. 613 * stop faster.
563 */ 614 */
564 smp_mb(); 615 clamping = false;
565 msleep(20);
566 if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) { 616 if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
567 for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) { 617 for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
568 pr_debug("clamping thread for cpu %d alive, kill\n", i); 618 pr_debug("clamping worker for cpu %d alive, destroy\n",
569 thread = *per_cpu_ptr(powerclamp_thread, i); 619 i);
570 kthread_stop(thread); 620 stop_power_clamp_worker(i);
571 } 621 }
572 } 622 }
573} 623}
@@ -576,15 +626,13 @@ static int powerclamp_cpu_callback(struct notifier_block *nfb,
576 unsigned long action, void *hcpu) 626 unsigned long action, void *hcpu)
577{ 627{
578 unsigned long cpu = (unsigned long)hcpu; 628 unsigned long cpu = (unsigned long)hcpu;
579 struct task_struct **percpu_thread =
580 per_cpu_ptr(powerclamp_thread, cpu);
581 629
582 if (false == clamping) 630 if (false == clamping)
583 goto exit_ok; 631 goto exit_ok;
584 632
585 switch (action) { 633 switch (action) {
586 case CPU_ONLINE: 634 case CPU_ONLINE:
587 start_power_clamp_thread(cpu); 635 start_power_clamp_worker(cpu);
588 /* prefer BSP as controlling CPU */ 636 /* prefer BSP as controlling CPU */
589 if (cpu == 0) { 637 if (cpu == 0) {
590 control_cpu = 0; 638 control_cpu = 0;
@@ -595,7 +643,7 @@ static int powerclamp_cpu_callback(struct notifier_block *nfb,
595 if (test_bit(cpu, cpu_clamping_mask)) { 643 if (test_bit(cpu, cpu_clamping_mask)) {
596 pr_err("cpu %lu dead but powerclamping thread is not\n", 644 pr_err("cpu %lu dead but powerclamping thread is not\n",
597 cpu); 645 cpu);
598 kthread_stop(*percpu_thread); 646 stop_power_clamp_worker(cpu);
599 } 647 }
600 if (cpu == control_cpu) { 648 if (cpu == control_cpu) {
601 control_cpu = smp_processor_id(); 649 control_cpu = smp_processor_id();
@@ -759,8 +807,8 @@ static int __init powerclamp_init(void)
759 window_size = 2; 807 window_size = 2;
760 register_hotcpu_notifier(&powerclamp_cpu_notifier); 808 register_hotcpu_notifier(&powerclamp_cpu_notifier);
761 809
762 powerclamp_thread = alloc_percpu(struct task_struct *); 810 worker_data = alloc_percpu(struct powerclamp_worker_data);
763 if (!powerclamp_thread) { 811 if (!worker_data) {
764 retval = -ENOMEM; 812 retval = -ENOMEM;
765 goto exit_unregister; 813 goto exit_unregister;
766 } 814 }
@@ -780,7 +828,7 @@ static int __init powerclamp_init(void)
780 return 0; 828 return 0;
781 829
782exit_free_thread: 830exit_free_thread:
783 free_percpu(powerclamp_thread); 831 free_percpu(worker_data);
784exit_unregister: 832exit_unregister:
785 unregister_hotcpu_notifier(&powerclamp_cpu_notifier); 833 unregister_hotcpu_notifier(&powerclamp_cpu_notifier);
786exit_free: 834exit_free:
@@ -793,7 +841,7 @@ static void __exit powerclamp_exit(void)
793{ 841{
794 unregister_hotcpu_notifier(&powerclamp_cpu_notifier); 842 unregister_hotcpu_notifier(&powerclamp_cpu_notifier);
795 end_power_clamp(); 843 end_power_clamp();
796 free_percpu(powerclamp_thread); 844 free_percpu(worker_data);
797 thermal_cooling_device_unregister(cooling_dev); 845 thermal_cooling_device_unregister(cooling_dev);
798 kfree(cpu_clamping_mask); 846 kfree(cpu_clamping_mask);
799 847