aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2018-04-05 13:07:57 -0400
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2018-04-07 12:48:24 -0400
commit23a8d888107ce4ce444eab2dcebf4cfb3578770b (patch)
tree2147dbac5c1e70ada0deb4d9e9197a6ccdbb299e
parent45f1ff59e27ca59d33cc1a317e669d90022ccf7d (diff)
time: tick-sched: Split tick_nohz_stop_sched_tick()
In order to address the issue with short idle duration predictions by the idle governor after the scheduler tick has been stopped, split tick_nohz_stop_sched_tick() into two separate routines, one computing the time to the next timer event and the other simply stopping the tick when the time to the next timer event is known. Prepare these two routines to be called separately, as one of them will be called by the idle governor in the cpuidle_select() code path after subsequent changes. Update the former callers of tick_nohz_stop_sched_tick() to use the new routines, tick_nohz_next_event() and tick_nohz_stop_tick(), instead of it and move the updates of the sleep_length field in struct tick_sched into __tick_nohz_idle_stop_tick() as it doesn't need to be updated anywhere else. There should be no intentional visible changes in functionality resulting from this change. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
-rw-r--r--kernel/time/tick-sched.c124
-rw-r--r--kernel/time/tick-sched.h4
2 files changed, 82 insertions, 46 deletions
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 69fe113cfc7f..f56d2c695712 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -641,13 +641,10 @@ static inline bool local_timer_softirq_pending(void)
641 return local_softirq_pending() & TIMER_SOFTIRQ; 641 return local_softirq_pending() & TIMER_SOFTIRQ;
642} 642}
643 643
644static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, 644static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
645 ktime_t now, int cpu)
646{ 645{
647 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
648 u64 basemono, next_tick, next_tmr, next_rcu, delta, expires; 646 u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
649 unsigned long seq, basejiff; 647 unsigned long seq, basejiff;
650 ktime_t tick;
651 648
652 /* Read jiffies and the time when jiffies were updated last */ 649 /* Read jiffies and the time when jiffies were updated last */
653 do { 650 do {
@@ -656,6 +653,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
656 basejiff = jiffies; 653 basejiff = jiffies;
657 } while (read_seqretry(&jiffies_lock, seq)); 654 } while (read_seqretry(&jiffies_lock, seq));
658 ts->last_jiffies = basejiff; 655 ts->last_jiffies = basejiff;
656 ts->timer_expires_base = basemono;
659 657
660 /* 658 /*
661 * Keep the periodic tick, when RCU, architecture or irq_work 659 * Keep the periodic tick, when RCU, architecture or irq_work
@@ -700,47 +698,63 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
700 * next period, so no point in stopping it either, bail. 698 * next period, so no point in stopping it either, bail.
701 */ 699 */
702 if (!ts->tick_stopped) { 700 if (!ts->tick_stopped) {
703 tick = 0; 701 ts->timer_expires = 0;
704 goto out; 702 goto out;
705 } 703 }
706 } 704 }
707 705
708 /* 706 /*
707 * If this CPU is the one which had the do_timer() duty last, we limit
708 * the sleep time to the timekeeping max_deferment value.
709 * Otherwise we can sleep as long as we want.
710 */
711 delta = timekeeping_max_deferment();
712 if (cpu != tick_do_timer_cpu &&
713 (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
714 delta = KTIME_MAX;
715
716 /* Calculate the next expiry time */
717 if (delta < (KTIME_MAX - basemono))
718 expires = basemono + delta;
719 else
720 expires = KTIME_MAX;
721
722 ts->timer_expires = min_t(u64, expires, next_tick);
723
724out:
725 return ts->timer_expires;
726}
727
728static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
729{
730 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
731 u64 basemono = ts->timer_expires_base;
732 u64 expires = ts->timer_expires;
733 ktime_t tick = expires;
734
735 /* Make sure we won't be trying to stop it twice in a row. */
736 ts->timer_expires_base = 0;
737
738 /*
709 * If this CPU is the one which updates jiffies, then give up 739 * If this CPU is the one which updates jiffies, then give up
710 * the assignment and let it be taken by the CPU which runs 740 * the assignment and let it be taken by the CPU which runs
711 * the tick timer next, which might be this CPU as well. If we 741 * the tick timer next, which might be this CPU as well. If we
712 * don't drop this here the jiffies might be stale and 742 * don't drop this here the jiffies might be stale and
713 * do_timer() never invoked. Keep track of the fact that it 743 * do_timer() never invoked. Keep track of the fact that it
714 * was the one which had the do_timer() duty last. If this CPU 744 * was the one which had the do_timer() duty last.
715 * is the one which had the do_timer() duty last, we limit the
716 * sleep time to the timekeeping max_deferment value.
717 * Otherwise we can sleep as long as we want.
718 */ 745 */
719 delta = timekeeping_max_deferment();
720 if (cpu == tick_do_timer_cpu) { 746 if (cpu == tick_do_timer_cpu) {
721 tick_do_timer_cpu = TICK_DO_TIMER_NONE; 747 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
722 ts->do_timer_last = 1; 748 ts->do_timer_last = 1;
723 } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { 749 } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
724 delta = KTIME_MAX;
725 ts->do_timer_last = 0; 750 ts->do_timer_last = 0;
726 } else if (!ts->do_timer_last) {
727 delta = KTIME_MAX;
728 } 751 }
729 752
730 /* Calculate the next expiry time */
731 if (delta < (KTIME_MAX - basemono))
732 expires = basemono + delta;
733 else
734 expires = KTIME_MAX;
735
736 expires = min_t(u64, expires, next_tick);
737 tick = expires;
738
739 /* Skip reprogram of event if its not changed */ 753 /* Skip reprogram of event if its not changed */
740 if (ts->tick_stopped && (expires == ts->next_tick)) { 754 if (ts->tick_stopped && (expires == ts->next_tick)) {
741 /* Sanity check: make sure clockevent is actually programmed */ 755 /* Sanity check: make sure clockevent is actually programmed */
742 if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer)) 756 if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
743 goto out; 757 return;
744 758
745 WARN_ON_ONCE(1); 759 WARN_ON_ONCE(1);
746 printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n", 760 printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
@@ -774,7 +788,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
774 if (unlikely(expires == KTIME_MAX)) { 788 if (unlikely(expires == KTIME_MAX)) {
775 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) 789 if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
776 hrtimer_cancel(&ts->sched_timer); 790 hrtimer_cancel(&ts->sched_timer);
777 goto out; 791 return;
778 } 792 }
779 793
780 hrtimer_set_expires(&ts->sched_timer, tick); 794 hrtimer_set_expires(&ts->sched_timer, tick);
@@ -783,15 +797,23 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
783 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); 797 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
784 else 798 else
785 tick_program_event(tick, 1); 799 tick_program_event(tick, 1);
786out:
787 /*
788 * Update the estimated sleep length until the next timer
789 * (not only the tick).
790 */
791 ts->sleep_length = ktime_sub(dev->next_event, now);
792 return tick;
793} 800}
794 801
802static void tick_nohz_retain_tick(struct tick_sched *ts)
803{
804 ts->timer_expires_base = 0;
805}
806
807#ifdef CONFIG_NO_HZ_FULL
808static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
809{
810 if (tick_nohz_next_event(ts, cpu))
811 tick_nohz_stop_tick(ts, cpu);
812 else
813 tick_nohz_retain_tick(ts);
814}
815#endif /* CONFIG_NO_HZ_FULL */
816
795static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) 817static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
796{ 818{
797 /* Update jiffies first */ 819 /* Update jiffies first */
@@ -827,7 +849,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
827 return; 849 return;
828 850
829 if (can_stop_full_tick(cpu, ts)) 851 if (can_stop_full_tick(cpu, ts))
830 tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); 852 tick_nohz_stop_sched_tick(ts, cpu);
831 else if (ts->tick_stopped) 853 else if (ts->tick_stopped)
832 tick_nohz_restart_sched_tick(ts, ktime_get()); 854 tick_nohz_restart_sched_tick(ts, ktime_get());
833#endif 855#endif
@@ -853,10 +875,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
853 return false; 875 return false;
854 } 876 }
855 877
856 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) { 878 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
857 ts->sleep_length = NSEC_PER_SEC / HZ;
858 return false; 879 return false;
859 }
860 880
861 if (need_resched()) 881 if (need_resched())
862 return false; 882 return false;
@@ -893,29 +913,37 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
893 913
894static void __tick_nohz_idle_stop_tick(struct tick_sched *ts) 914static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
895{ 915{
916 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
896 ktime_t expires; 917 ktime_t expires;
897 int cpu = smp_processor_id(); 918 int cpu = smp_processor_id();
898 919
899 if (can_stop_idle_tick(cpu, ts)) { 920 WARN_ON_ONCE(ts->timer_expires_base);
921
922 if (!can_stop_idle_tick(cpu, ts))
923 goto out;
924
925 expires = tick_nohz_next_event(ts, cpu);
926
927 ts->idle_calls++;
928
929 if (expires > 0LL) {
900 int was_stopped = ts->tick_stopped; 930 int was_stopped = ts->tick_stopped;
901 931
902 ts->idle_calls++; 932 tick_nohz_stop_tick(ts, cpu);
903 933
904 /* 934 ts->idle_sleeps++;
905 * The idle entry time should be a sufficient approximation of 935 ts->idle_expires = expires;
906 * the current time at this point.
907 */
908 expires = tick_nohz_stop_sched_tick(ts, ts->idle_entrytime, cpu);
909 if (expires > 0LL) {
910 ts->idle_sleeps++;
911 ts->idle_expires = expires;
912 }
913 936
914 if (!was_stopped && ts->tick_stopped) { 937 if (!was_stopped && ts->tick_stopped) {
915 ts->idle_jiffies = ts->last_jiffies; 938 ts->idle_jiffies = ts->last_jiffies;
916 nohz_balance_enter_idle(cpu); 939 nohz_balance_enter_idle(cpu);
917 } 940 }
941 } else {
942 tick_nohz_retain_tick(ts);
918 } 943 }
944
945out:
946 ts->sleep_length = ktime_sub(dev->next_event, ts->idle_entrytime);
919} 947}
920 948
921/** 949/**
@@ -942,6 +970,9 @@ void tick_nohz_idle_enter(void)
942 local_irq_disable(); 970 local_irq_disable();
943 971
944 ts = this_cpu_ptr(&tick_cpu_sched); 972 ts = this_cpu_ptr(&tick_cpu_sched);
973
974 WARN_ON_ONCE(ts->timer_expires_base);
975
945 ts->inidle = 1; 976 ts->inidle = 1;
946 tick_nohz_start_idle(ts); 977 tick_nohz_start_idle(ts);
947 978
@@ -1067,6 +1098,7 @@ void tick_nohz_idle_exit(void)
1067 local_irq_disable(); 1098 local_irq_disable();
1068 1099
1069 WARN_ON_ONCE(!ts->inidle); 1100 WARN_ON_ONCE(!ts->inidle);
1101 WARN_ON_ONCE(ts->timer_expires_base);
1070 1102
1071 ts->inidle = 0; 1103 ts->inidle = 0;
1072 1104
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 954b43dbf21c..53e45a39bdbc 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -39,6 +39,8 @@ enum tick_nohz_mode {
39 * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped 39 * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
40 * @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding 40 * @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding
41 * @sleep_length: Duration of the current idle sleep 41 * @sleep_length: Duration of the current idle sleep
42 * @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)
43 * @timer_expires_base: Base time clock monotonic for @timer_expires
42 * @do_timer_lst: CPU was the last one doing do_timer before going idle 44 * @do_timer_lst: CPU was the last one doing do_timer before going idle
43 */ 45 */
44struct tick_sched { 46struct tick_sched {
@@ -60,6 +62,8 @@ struct tick_sched {
60 ktime_t iowait_sleeptime; 62 ktime_t iowait_sleeptime;
61 ktime_t sleep_length; 63 ktime_t sleep_length;
62 unsigned long last_jiffies; 64 unsigned long last_jiffies;
65 u64 timer_expires;
66 u64 timer_expires_base;
63 u64 next_timer; 67 u64 next_timer;
64 ktime_t idle_expires; 68 ktime_t idle_expires;
65 int do_timer_last; 69 int do_timer_last;