aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJulius Werner <jwerner@chromium.org>2012-11-27 08:17:58 -0500
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2012-11-27 08:17:58 -0500
commita474a515497ef3566cfc17a2cab3d54d6d50ff1c (patch)
tree377b53565f728dfee5604ae0c8c6879d38a2a3ec
parenta093b93ee0e08cd73a07848752bc09ecea68cb13 (diff)
cpuidle: Measure idle state durations with monotonic clock
Many cpuidle drivers measure their time spent in an idle state by reading the wallclock time before and after idling and calculating the difference. This leads to erroneous results when the wallclock time gets updated by another processor in the meantime, adding that clock adjustment to the idle state's time counter. If the clock adjustment was negative, the result is even worse due to an erroneous cast from int to unsigned long long of the last_residency variable. The negative 32 bit integer will zero-extend and result in a forward time jump of roughly four billion milliseconds or 1.3 hours on the idle state residency counter. This patch changes all affected cpuidle drivers to either use the monotonic clock for their measurements or make use of the generic time measurement wrapper in cpuidle.c, which was already working correctly. Some superfluous CLIs/STIs in the ACPI code are removed (interrupts should always already be disabled before entering the idle function, and not get reenabled until the generic wrapper has performed its second measurement). It also removes the erroneous cast, making sure that negative residency values are applied correctly even though they should not appear anymore. Signed-off-by: Julius Werner <jwerner@chromium.org> Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com> Tested-by: Daniel Lezcano <daniel.lezcano@linaro.org> Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org> Acked-by: Len Brown <len.brown@intel.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-rw-r--r--arch/powerpc/platforms/pseries/processor_idle.c4
-rw-r--r--drivers/acpi/processor_idle.c57
-rw-r--r--drivers/cpuidle/cpuidle.c3
-rw-r--r--drivers/idle/intel_idle.c14
4 files changed, 7 insertions, 71 deletions
diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c
index 45d00e5fe14..4d806b41960 100644
--- a/arch/powerpc/platforms/pseries/processor_idle.c
+++ b/arch/powerpc/platforms/pseries/processor_idle.c
@@ -36,7 +36,7 @@ static struct cpuidle_state *cpuidle_state_table;
36static inline void idle_loop_prolog(unsigned long *in_purr, ktime_t *kt_before) 36static inline void idle_loop_prolog(unsigned long *in_purr, ktime_t *kt_before)
37{ 37{
38 38
39 *kt_before = ktime_get_real(); 39 *kt_before = ktime_get();
40 *in_purr = mfspr(SPRN_PURR); 40 *in_purr = mfspr(SPRN_PURR);
41 /* 41 /*
42 * Indicate to the HV that we are idle. Now would be 42 * Indicate to the HV that we are idle. Now would be
@@ -50,7 +50,7 @@ static inline s64 idle_loop_epilog(unsigned long in_purr, ktime_t kt_before)
50 get_lppaca()->wait_state_cycles += mfspr(SPRN_PURR) - in_purr; 50 get_lppaca()->wait_state_cycles += mfspr(SPRN_PURR) - in_purr;
51 get_lppaca()->idle = 0; 51 get_lppaca()->idle = 0;
52 52
53 return ktime_to_us(ktime_sub(ktime_get_real(), kt_before)); 53 return ktime_to_us(ktime_sub(ktime_get(), kt_before));
54} 54}
55 55
56static int snooze_loop(struct cpuidle_device *dev, 56static int snooze_loop(struct cpuidle_device *dev,
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index e8086c72530..f1a5da44591 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -735,31 +735,18 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
735static int acpi_idle_enter_c1(struct cpuidle_device *dev, 735static int acpi_idle_enter_c1(struct cpuidle_device *dev,
736 struct cpuidle_driver *drv, int index) 736 struct cpuidle_driver *drv, int index)
737{ 737{
738 ktime_t kt1, kt2;
739 s64 idle_time;
740 struct acpi_processor *pr; 738 struct acpi_processor *pr;
741 struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; 739 struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
742 struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); 740 struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage);
743 741
744 pr = __this_cpu_read(processors); 742 pr = __this_cpu_read(processors);
745 dev->last_residency = 0;
746 743
747 if (unlikely(!pr)) 744 if (unlikely(!pr))
748 return -EINVAL; 745 return -EINVAL;
749 746
750 local_irq_disable();
751
752
753 lapic_timer_state_broadcast(pr, cx, 1); 747 lapic_timer_state_broadcast(pr, cx, 1);
754 kt1 = ktime_get_real();
755 acpi_idle_do_entry(cx); 748 acpi_idle_do_entry(cx);
756 kt2 = ktime_get_real();
757 idle_time = ktime_to_us(ktime_sub(kt2, kt1));
758
759 /* Update device last_residency*/
760 dev->last_residency = (int)idle_time;
761 749
762 local_irq_enable();
763 lapic_timer_state_broadcast(pr, cx, 0); 750 lapic_timer_state_broadcast(pr, cx, 0);
764 751
765 return index; 752 return index;
@@ -806,19 +793,12 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
806 struct acpi_processor *pr; 793 struct acpi_processor *pr;
807 struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; 794 struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
808 struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); 795 struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage);
809 ktime_t kt1, kt2;
810 s64 idle_time_ns;
811 s64 idle_time;
812 796
813 pr = __this_cpu_read(processors); 797 pr = __this_cpu_read(processors);
814 dev->last_residency = 0;
815 798
816 if (unlikely(!pr)) 799 if (unlikely(!pr))
817 return -EINVAL; 800 return -EINVAL;
818 801
819 local_irq_disable();
820
821
822 if (cx->entry_method != ACPI_CSTATE_FFH) { 802 if (cx->entry_method != ACPI_CSTATE_FFH) {
823 current_thread_info()->status &= ~TS_POLLING; 803 current_thread_info()->status &= ~TS_POLLING;
824 /* 804 /*
@@ -829,7 +809,6 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
829 809
830 if (unlikely(need_resched())) { 810 if (unlikely(need_resched())) {
831 current_thread_info()->status |= TS_POLLING; 811 current_thread_info()->status |= TS_POLLING;
832 local_irq_enable();
833 return -EINVAL; 812 return -EINVAL;
834 } 813 }
835 } 814 }
@@ -843,22 +822,12 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
843 if (cx->type == ACPI_STATE_C3) 822 if (cx->type == ACPI_STATE_C3)
844 ACPI_FLUSH_CPU_CACHE(); 823 ACPI_FLUSH_CPU_CACHE();
845 824
846 kt1 = ktime_get_real();
847 /* Tell the scheduler that we are going deep-idle: */ 825 /* Tell the scheduler that we are going deep-idle: */
848 sched_clock_idle_sleep_event(); 826 sched_clock_idle_sleep_event();
849 acpi_idle_do_entry(cx); 827 acpi_idle_do_entry(cx);
850 kt2 = ktime_get_real();
851 idle_time_ns = ktime_to_ns(ktime_sub(kt2, kt1));
852 idle_time = idle_time_ns;
853 do_div(idle_time, NSEC_PER_USEC);
854 828
855 /* Update device last_residency*/ 829 sched_clock_idle_wakeup_event(0);
856 dev->last_residency = (int)idle_time;
857 830
858 /* Tell the scheduler how much we idled: */
859 sched_clock_idle_wakeup_event(idle_time_ns);
860
861 local_irq_enable();
862 if (cx->entry_method != ACPI_CSTATE_FFH) 831 if (cx->entry_method != ACPI_CSTATE_FFH)
863 current_thread_info()->status |= TS_POLLING; 832 current_thread_info()->status |= TS_POLLING;
864 833
@@ -883,13 +852,8 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
883 struct acpi_processor *pr; 852 struct acpi_processor *pr;
884 struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; 853 struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
885 struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); 854 struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage);
886 ktime_t kt1, kt2;
887 s64 idle_time_ns;
888 s64 idle_time;
889
890 855
891 pr = __this_cpu_read(processors); 856 pr = __this_cpu_read(processors);
892 dev->last_residency = 0;
893 857
894 if (unlikely(!pr)) 858 if (unlikely(!pr))
895 return -EINVAL; 859 return -EINVAL;
@@ -899,16 +863,11 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
899 return drv->states[drv->safe_state_index].enter(dev, 863 return drv->states[drv->safe_state_index].enter(dev,
900 drv, drv->safe_state_index); 864 drv, drv->safe_state_index);
901 } else { 865 } else {
902 local_irq_disable();
903 acpi_safe_halt(); 866 acpi_safe_halt();
904 local_irq_enable();
905 return -EBUSY; 867 return -EBUSY;
906 } 868 }
907 } 869 }
908 870
909 local_irq_disable();
910
911
912 if (cx->entry_method != ACPI_CSTATE_FFH) { 871 if (cx->entry_method != ACPI_CSTATE_FFH) {
913 current_thread_info()->status &= ~TS_POLLING; 872 current_thread_info()->status &= ~TS_POLLING;
914 /* 873 /*
@@ -919,7 +878,6 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
919 878
920 if (unlikely(need_resched())) { 879 if (unlikely(need_resched())) {
921 current_thread_info()->status |= TS_POLLING; 880 current_thread_info()->status |= TS_POLLING;
922 local_irq_enable();
923 return -EINVAL; 881 return -EINVAL;
924 } 882 }
925 } 883 }
@@ -934,7 +892,6 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
934 */ 892 */
935 lapic_timer_state_broadcast(pr, cx, 1); 893 lapic_timer_state_broadcast(pr, cx, 1);
936 894
937 kt1 = ktime_get_real();
938 /* 895 /*
939 * disable bus master 896 * disable bus master
940 * bm_check implies we need ARB_DIS 897 * bm_check implies we need ARB_DIS
@@ -965,18 +922,9 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
965 c3_cpu_count--; 922 c3_cpu_count--;
966 raw_spin_unlock(&c3_lock); 923 raw_spin_unlock(&c3_lock);
967 } 924 }
968 kt2 = ktime_get_real();
969 idle_time_ns = ktime_to_ns(ktime_sub(kt2, kt1));
970 idle_time = idle_time_ns;
971 do_div(idle_time, NSEC_PER_USEC);
972
973 /* Update device last_residency*/
974 dev->last_residency = (int)idle_time;
975 925
976 /* Tell the scheduler how much we idled: */ 926 sched_clock_idle_wakeup_event(0);
977 sched_clock_idle_wakeup_event(idle_time_ns);
978 927
979 local_irq_enable();
980 if (cx->entry_method != ACPI_CSTATE_FFH) 928 if (cx->entry_method != ACPI_CSTATE_FFH)
981 current_thread_info()->status |= TS_POLLING; 929 current_thread_info()->status |= TS_POLLING;
982 930
@@ -987,6 +935,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
987struct cpuidle_driver acpi_idle_driver = { 935struct cpuidle_driver acpi_idle_driver = {
988 .name = "acpi_idle", 936 .name = "acpi_idle",
989 .owner = THIS_MODULE, 937 .owner = THIS_MODULE,
938 .en_core_tk_irqen = 1,
990}; 939};
991 940
992/** 941/**
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 711dd83fd3b..8df53dd8dbe 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -109,8 +109,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
109 /* This can be moved to within driver enter routine 109 /* This can be moved to within driver enter routine
110 * but that results in multiple copies of same code. 110 * but that results in multiple copies of same code.
111 */ 111 */
112 dev->states_usage[entered_state].time += 112 dev->states_usage[entered_state].time += dev->last_residency;
113 (unsigned long long)dev->last_residency;
114 dev->states_usage[entered_state].usage++; 113 dev->states_usage[entered_state].usage++;
115 } else { 114 } else {
116 dev->last_residency = 0; 115 dev->last_residency = 0;
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index b0f6b4c8ee1..c49c04d9c2b 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -56,7 +56,6 @@
56#include <linux/kernel.h> 56#include <linux/kernel.h>
57#include <linux/cpuidle.h> 57#include <linux/cpuidle.h>
58#include <linux/clockchips.h> 58#include <linux/clockchips.h>
59#include <linux/hrtimer.h> /* ktime_get_real() */
60#include <trace/events/power.h> 59#include <trace/events/power.h>
61#include <linux/sched.h> 60#include <linux/sched.h>
62#include <linux/notifier.h> 61#include <linux/notifier.h>
@@ -72,6 +71,7 @@
72static struct cpuidle_driver intel_idle_driver = { 71static struct cpuidle_driver intel_idle_driver = {
73 .name = "intel_idle", 72 .name = "intel_idle",
74 .owner = THIS_MODULE, 73 .owner = THIS_MODULE,
74 .en_core_tk_irqen = 1,
75}; 75};
76/* intel_idle.max_cstate=0 disables driver */ 76/* intel_idle.max_cstate=0 disables driver */
77static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1; 77static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1;
@@ -281,8 +281,6 @@ static int intel_idle(struct cpuidle_device *dev,
281 struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; 281 struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
282 unsigned long eax = (unsigned long)cpuidle_get_statedata(state_usage); 282 unsigned long eax = (unsigned long)cpuidle_get_statedata(state_usage);
283 unsigned int cstate; 283 unsigned int cstate;
284 ktime_t kt_before, kt_after;
285 s64 usec_delta;
286 int cpu = smp_processor_id(); 284 int cpu = smp_processor_id();
287 285
288 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1; 286 cstate = (((eax) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) + 1;
@@ -297,8 +295,6 @@ static int intel_idle(struct cpuidle_device *dev,
297 if (!(lapic_timer_reliable_states & (1 << (cstate)))) 295 if (!(lapic_timer_reliable_states & (1 << (cstate))))
298 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); 296 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
299 297
300 kt_before = ktime_get_real();
301
302 stop_critical_timings(); 298 stop_critical_timings();
303 if (!need_resched()) { 299 if (!need_resched()) {
304 300
@@ -310,17 +306,9 @@ static int intel_idle(struct cpuidle_device *dev,
310 306
311 start_critical_timings(); 307 start_critical_timings();
312 308
313 kt_after = ktime_get_real();
314 usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before));
315
316 local_irq_enable();
317
318 if (!(lapic_timer_reliable_states & (1 << (cstate)))) 309 if (!(lapic_timer_reliable_states & (1 << (cstate))))
319 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); 310 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
320 311
321 /* Update cpuidle counters */
322 dev->last_residency = (int)usec_delta;
323
324 return index; 312 return index;
325} 313}
326 314