diff options
-rw-r--r-- | kernel/time/tick-sched.c | 124 | ||||
-rw-r--r-- | kernel/time/tick-sched.h | 4 |
2 files changed, 82 insertions, 46 deletions
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 69fe113cfc7f..f56d2c695712 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -641,13 +641,10 @@ static inline bool local_timer_softirq_pending(void) | |||
641 | return local_softirq_pending() & TIMER_SOFTIRQ; | 641 | return local_softirq_pending() & TIMER_SOFTIRQ; |
642 | } | 642 | } |
643 | 643 | ||
644 | static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | 644 | static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) |
645 | ktime_t now, int cpu) | ||
646 | { | 645 | { |
647 | struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); | ||
648 | u64 basemono, next_tick, next_tmr, next_rcu, delta, expires; | 646 | u64 basemono, next_tick, next_tmr, next_rcu, delta, expires; |
649 | unsigned long seq, basejiff; | 647 | unsigned long seq, basejiff; |
650 | ktime_t tick; | ||
651 | 648 | ||
652 | /* Read jiffies and the time when jiffies were updated last */ | 649 | /* Read jiffies and the time when jiffies were updated last */ |
653 | do { | 650 | do { |
@@ -656,6 +653,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
656 | basejiff = jiffies; | 653 | basejiff = jiffies; |
657 | } while (read_seqretry(&jiffies_lock, seq)); | 654 | } while (read_seqretry(&jiffies_lock, seq)); |
658 | ts->last_jiffies = basejiff; | 655 | ts->last_jiffies = basejiff; |
656 | ts->timer_expires_base = basemono; | ||
659 | 657 | ||
660 | /* | 658 | /* |
661 | * Keep the periodic tick, when RCU, architecture or irq_work | 659 | * Keep the periodic tick, when RCU, architecture or irq_work |
@@ -700,47 +698,63 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
700 | * next period, so no point in stopping it either, bail. | 698 | * next period, so no point in stopping it either, bail. |
701 | */ | 699 | */ |
702 | if (!ts->tick_stopped) { | 700 | if (!ts->tick_stopped) { |
703 | tick = 0; | 701 | ts->timer_expires = 0; |
704 | goto out; | 702 | goto out; |
705 | } | 703 | } |
706 | } | 704 | } |
707 | 705 | ||
708 | /* | 706 | /* |
707 | * If this CPU is the one which had the do_timer() duty last, we limit | ||
708 | * the sleep time to the timekeeping max_deferment value. | ||
709 | * Otherwise we can sleep as long as we want. | ||
710 | */ | ||
711 | delta = timekeeping_max_deferment(); | ||
712 | if (cpu != tick_do_timer_cpu && | ||
713 | (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last)) | ||
714 | delta = KTIME_MAX; | ||
715 | |||
716 | /* Calculate the next expiry time */ | ||
717 | if (delta < (KTIME_MAX - basemono)) | ||
718 | expires = basemono + delta; | ||
719 | else | ||
720 | expires = KTIME_MAX; | ||
721 | |||
722 | ts->timer_expires = min_t(u64, expires, next_tick); | ||
723 | |||
724 | out: | ||
725 | return ts->timer_expires; | ||
726 | } | ||
727 | |||
728 | static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) | ||
729 | { | ||
730 | struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); | ||
731 | u64 basemono = ts->timer_expires_base; | ||
732 | u64 expires = ts->timer_expires; | ||
733 | ktime_t tick = expires; | ||
734 | |||
735 | /* Make sure we won't be trying to stop it twice in a row. */ | ||
736 | ts->timer_expires_base = 0; | ||
737 | |||
738 | /* | ||
709 | * If this CPU is the one which updates jiffies, then give up | 739 | * If this CPU is the one which updates jiffies, then give up |
710 | * the assignment and let it be taken by the CPU which runs | 740 | * the assignment and let it be taken by the CPU which runs |
711 | * the tick timer next, which might be this CPU as well. If we | 741 | * the tick timer next, which might be this CPU as well. If we |
712 | * don't drop this here the jiffies might be stale and | 742 | * don't drop this here the jiffies might be stale and |
713 | * do_timer() never invoked. Keep track of the fact that it | 743 | * do_timer() never invoked. Keep track of the fact that it |
714 | * was the one which had the do_timer() duty last. If this CPU | 744 | * was the one which had the do_timer() duty last. |
715 | * is the one which had the do_timer() duty last, we limit the | ||
716 | * sleep time to the timekeeping max_deferment value. | ||
717 | * Otherwise we can sleep as long as we want. | ||
718 | */ | 745 | */ |
719 | delta = timekeeping_max_deferment(); | ||
720 | if (cpu == tick_do_timer_cpu) { | 746 | if (cpu == tick_do_timer_cpu) { |
721 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | 747 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; |
722 | ts->do_timer_last = 1; | 748 | ts->do_timer_last = 1; |
723 | } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { | 749 | } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { |
724 | delta = KTIME_MAX; | ||
725 | ts->do_timer_last = 0; | 750 | ts->do_timer_last = 0; |
726 | } else if (!ts->do_timer_last) { | ||
727 | delta = KTIME_MAX; | ||
728 | } | 751 | } |
729 | 752 | ||
730 | /* Calculate the next expiry time */ | ||
731 | if (delta < (KTIME_MAX - basemono)) | ||
732 | expires = basemono + delta; | ||
733 | else | ||
734 | expires = KTIME_MAX; | ||
735 | |||
736 | expires = min_t(u64, expires, next_tick); | ||
737 | tick = expires; | ||
738 | |||
739 | /* Skip reprogram of event if its not changed */ | 753 | /* Skip reprogram of event if its not changed */ |
740 | if (ts->tick_stopped && (expires == ts->next_tick)) { | 754 | if (ts->tick_stopped && (expires == ts->next_tick)) { |
741 | /* Sanity check: make sure clockevent is actually programmed */ | 755 | /* Sanity check: make sure clockevent is actually programmed */ |
742 | if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer)) | 756 | if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer)) |
743 | goto out; | 757 | return; |
744 | 758 | ||
745 | WARN_ON_ONCE(1); | 759 | WARN_ON_ONCE(1); |
746 | printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n", | 760 | printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n", |
@@ -774,7 +788,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
774 | if (unlikely(expires == KTIME_MAX)) { | 788 | if (unlikely(expires == KTIME_MAX)) { |
775 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) | 789 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) |
776 | hrtimer_cancel(&ts->sched_timer); | 790 | hrtimer_cancel(&ts->sched_timer); |
777 | goto out; | 791 | return; |
778 | } | 792 | } |
779 | 793 | ||
780 | hrtimer_set_expires(&ts->sched_timer, tick); | 794 | hrtimer_set_expires(&ts->sched_timer, tick); |
@@ -783,15 +797,23 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
783 | hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); | 797 | hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); |
784 | else | 798 | else |
785 | tick_program_event(tick, 1); | 799 | tick_program_event(tick, 1); |
786 | out: | ||
787 | /* | ||
788 | * Update the estimated sleep length until the next timer | ||
789 | * (not only the tick). | ||
790 | */ | ||
791 | ts->sleep_length = ktime_sub(dev->next_event, now); | ||
792 | return tick; | ||
793 | } | 800 | } |
794 | 801 | ||
802 | static void tick_nohz_retain_tick(struct tick_sched *ts) | ||
803 | { | ||
804 | ts->timer_expires_base = 0; | ||
805 | } | ||
806 | |||
807 | #ifdef CONFIG_NO_HZ_FULL | ||
808 | static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu) | ||
809 | { | ||
810 | if (tick_nohz_next_event(ts, cpu)) | ||
811 | tick_nohz_stop_tick(ts, cpu); | ||
812 | else | ||
813 | tick_nohz_retain_tick(ts); | ||
814 | } | ||
815 | #endif /* CONFIG_NO_HZ_FULL */ | ||
816 | |||
795 | static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) | 817 | static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) |
796 | { | 818 | { |
797 | /* Update jiffies first */ | 819 | /* Update jiffies first */ |
@@ -827,7 +849,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) | |||
827 | return; | 849 | return; |
828 | 850 | ||
829 | if (can_stop_full_tick(cpu, ts)) | 851 | if (can_stop_full_tick(cpu, ts)) |
830 | tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); | 852 | tick_nohz_stop_sched_tick(ts, cpu); |
831 | else if (ts->tick_stopped) | 853 | else if (ts->tick_stopped) |
832 | tick_nohz_restart_sched_tick(ts, ktime_get()); | 854 | tick_nohz_restart_sched_tick(ts, ktime_get()); |
833 | #endif | 855 | #endif |
@@ -853,10 +875,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | |||
853 | return false; | 875 | return false; |
854 | } | 876 | } |
855 | 877 | ||
856 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) { | 878 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) |
857 | ts->sleep_length = NSEC_PER_SEC / HZ; | ||
858 | return false; | 879 | return false; |
859 | } | ||
860 | 880 | ||
861 | if (need_resched()) | 881 | if (need_resched()) |
862 | return false; | 882 | return false; |
@@ -893,29 +913,37 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | |||
893 | 913 | ||
894 | static void __tick_nohz_idle_stop_tick(struct tick_sched *ts) | 914 | static void __tick_nohz_idle_stop_tick(struct tick_sched *ts) |
895 | { | 915 | { |
916 | struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); | ||
896 | ktime_t expires; | 917 | ktime_t expires; |
897 | int cpu = smp_processor_id(); | 918 | int cpu = smp_processor_id(); |
898 | 919 | ||
899 | if (can_stop_idle_tick(cpu, ts)) { | 920 | WARN_ON_ONCE(ts->timer_expires_base); |
921 | |||
922 | if (!can_stop_idle_tick(cpu, ts)) | ||
923 | goto out; | ||
924 | |||
925 | expires = tick_nohz_next_event(ts, cpu); | ||
926 | |||
927 | ts->idle_calls++; | ||
928 | |||
929 | if (expires > 0LL) { | ||
900 | int was_stopped = ts->tick_stopped; | 930 | int was_stopped = ts->tick_stopped; |
901 | 931 | ||
902 | ts->idle_calls++; | 932 | tick_nohz_stop_tick(ts, cpu); |
903 | 933 | ||
904 | /* | 934 | ts->idle_sleeps++; |
905 | * The idle entry time should be a sufficient approximation of | 935 | ts->idle_expires = expires; |
906 | * the current time at this point. | ||
907 | */ | ||
908 | expires = tick_nohz_stop_sched_tick(ts, ts->idle_entrytime, cpu); | ||
909 | if (expires > 0LL) { | ||
910 | ts->idle_sleeps++; | ||
911 | ts->idle_expires = expires; | ||
912 | } | ||
913 | 936 | ||
914 | if (!was_stopped && ts->tick_stopped) { | 937 | if (!was_stopped && ts->tick_stopped) { |
915 | ts->idle_jiffies = ts->last_jiffies; | 938 | ts->idle_jiffies = ts->last_jiffies; |
916 | nohz_balance_enter_idle(cpu); | 939 | nohz_balance_enter_idle(cpu); |
917 | } | 940 | } |
941 | } else { | ||
942 | tick_nohz_retain_tick(ts); | ||
918 | } | 943 | } |
944 | |||
945 | out: | ||
946 | ts->sleep_length = ktime_sub(dev->next_event, ts->idle_entrytime); | ||
919 | } | 947 | } |
920 | 948 | ||
921 | /** | 949 | /** |
@@ -942,6 +970,9 @@ void tick_nohz_idle_enter(void) | |||
942 | local_irq_disable(); | 970 | local_irq_disable(); |
943 | 971 | ||
944 | ts = this_cpu_ptr(&tick_cpu_sched); | 972 | ts = this_cpu_ptr(&tick_cpu_sched); |
973 | |||
974 | WARN_ON_ONCE(ts->timer_expires_base); | ||
975 | |||
945 | ts->inidle = 1; | 976 | ts->inidle = 1; |
946 | tick_nohz_start_idle(ts); | 977 | tick_nohz_start_idle(ts); |
947 | 978 | ||
@@ -1067,6 +1098,7 @@ void tick_nohz_idle_exit(void) | |||
1067 | local_irq_disable(); | 1098 | local_irq_disable(); |
1068 | 1099 | ||
1069 | WARN_ON_ONCE(!ts->inidle); | 1100 | WARN_ON_ONCE(!ts->inidle); |
1101 | WARN_ON_ONCE(ts->timer_expires_base); | ||
1070 | 1102 | ||
1071 | ts->inidle = 0; | 1103 | ts->inidle = 0; |
1072 | 1104 | ||
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index 954b43dbf21c..53e45a39bdbc 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h | |||
@@ -39,6 +39,8 @@ enum tick_nohz_mode { | |||
39 | * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped | 39 | * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped |
40 | * @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding | 40 | * @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding |
41 | * @sleep_length: Duration of the current idle sleep | 41 | * @sleep_length: Duration of the current idle sleep |
42 | * @timer_expires: Anticipated timer expiration time (in case sched tick is stopped) | ||
43 | * @timer_expires_base: Base time clock monotonic for @timer_expires | ||
42 | * @do_timer_lst: CPU was the last one doing do_timer before going idle | 44 | * @do_timer_lst: CPU was the last one doing do_timer before going idle |
43 | */ | 45 | */ |
44 | struct tick_sched { | 46 | struct tick_sched { |
@@ -60,6 +62,8 @@ struct tick_sched { | |||
60 | ktime_t iowait_sleeptime; | 62 | ktime_t iowait_sleeptime; |
61 | ktime_t sleep_length; | 63 | ktime_t sleep_length; |
62 | unsigned long last_jiffies; | 64 | unsigned long last_jiffies; |
65 | u64 timer_expires; | ||
66 | u64 timer_expires_base; | ||
63 | u64 next_timer; | 67 | u64 next_timer; |
64 | ktime_t idle_expires; | 68 | ktime_t idle_expires; |
65 | int do_timer_last; | 69 | int do_timer_last; |