aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2019-04-18 10:11:37 -0400
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2019-04-25 17:20:11 -0400
commitc208ac8f8f862dba7b01eb54557f4803b3c17296 (patch)
tree2d5aaed88ace2da727f4f3d86c2bf0f74a7c2861
parent7973b799dbea1770742851487a98276a24c961a5 (diff)
x86: tsc: Rework time_cpufreq_notifier()
There are problems with running time_cpufreq_notifier() on SMP systems. First off, the rdtsc() called from there runs on the CPU executing that code and not necessarily on the CPU whose sched_clock() rate is updated which is questionable at best. Second, in the cases when the frequencies of all CPUs in an SMP system are always in sync, it is not sufficient to update just one of them or the set associated with a given cpufreq policy on frequency changes - all CPUs in the system should be updated and that would require more than a simple transition notifier. Note, however, that the underlying issue (the TSC rate depending on the CPU frequency) has not been present in hardware shipping for the last few years and in quite a few relevant cases (acpi-cpufreq in particular) running time_cpufreq_notifier() will cause the TSC to be marked as unstable anyway. For this reason, make time_cpufreq_notifier() simply mark the TSC as unstable and give up when run on SMP and only try to carry out any adjustments otherwise. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
-rw-r--r--arch/x86/kernel/tsc.c29
1 files changed, 14 insertions, 15 deletions
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 3fae23834069..cc6df5c6d7b3 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -185,8 +185,7 @@ static void __init cyc2ns_init_boot_cpu(void)
185/* 185/*
186 * Secondary CPUs do not run through tsc_init(), so set up 186 * Secondary CPUs do not run through tsc_init(), so set up
187 * all the scale factors for all CPUs, assuming the same 187 * all the scale factors for all CPUs, assuming the same
188 * speed as the bootup CPU. (cpufreq notifiers will fix this 188 * speed as the bootup CPU.
189 * up if their speed diverges)
190 */ 189 */
191static void __init cyc2ns_init_secondary_cpus(void) 190static void __init cyc2ns_init_secondary_cpus(void)
192{ 191{
@@ -937,12 +936,12 @@ void tsc_restore_sched_clock_state(void)
937} 936}
938 937
939#ifdef CONFIG_CPU_FREQ 938#ifdef CONFIG_CPU_FREQ
940/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency 939/*
940 * Frequency scaling support. Adjust the TSC based timer when the CPU frequency
941 * changes. 941 * changes.
942 * 942 *
943 * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's 943 * NOTE: On SMP the situation is not fixable in general, so simply mark the TSC
944 * not that important because current Opteron setups do not support 944 * as unstable and give up in those cases.
945 * scaling on SMP anyroads.
946 * 945 *
947 * Should fix up last_tsc too. Currently gettimeofday in the 946 * Should fix up last_tsc too. Currently gettimeofday in the
948 * first tick after the change will be slightly wrong. 947 * first tick after the change will be slightly wrong.
@@ -956,22 +955,22 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
956 void *data) 955 void *data)
957{ 956{
958 struct cpufreq_freqs *freq = data; 957 struct cpufreq_freqs *freq = data;
959 unsigned long *lpj;
960 958
961 lpj = &boot_cpu_data.loops_per_jiffy; 959 if (num_online_cpus() > 1) {
962#ifdef CONFIG_SMP 960 mark_tsc_unstable("cpufreq changes on SMP");
963 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 961 return 0;
964 lpj = &cpu_data(freq->cpu).loops_per_jiffy; 962 }
965#endif
966 963
967 if (!ref_freq) { 964 if (!ref_freq) {
968 ref_freq = freq->old; 965 ref_freq = freq->old;
969 loops_per_jiffy_ref = *lpj; 966 loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy;
970 tsc_khz_ref = tsc_khz; 967 tsc_khz_ref = tsc_khz;
971 } 968 }
969
972 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || 970 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
973 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { 971 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
974 *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); 972 boot_cpu_data.loops_per_jiffy =
973 cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
975 974
976 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); 975 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
977 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) 976 if (!(freq->flags & CPUFREQ_CONST_LOOPS))