aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/tsc.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /arch/x86/kernel/tsc.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'arch/x86/kernel/tsc.c')
-rw-r--r--arch/x86/kernel/tsc.c183
1 files changed, 100 insertions, 83 deletions
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 26a863a9c2a8..6cc6922262af 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -104,10 +104,14 @@ int __init notsc_setup(char *str)
104 104
105__setup("notsc", notsc_setup); 105__setup("notsc", notsc_setup);
106 106
107static int no_sched_irq_time;
108
107static int __init tsc_setup(char *str) 109static int __init tsc_setup(char *str)
108{ 110{
109 if (!strcmp(str, "reliable")) 111 if (!strcmp(str, "reliable"))
110 tsc_clocksource_reliable = 1; 112 tsc_clocksource_reliable = 1;
113 if (!strncmp(str, "noirqtime", 9))
114 no_sched_irq_time = 1;
111 return 1; 115 return 1;
112} 116}
113 117
@@ -423,7 +427,7 @@ unsigned long native_calibrate_tsc(void)
423 * the delta to the previous read. We keep track of the min 427 * the delta to the previous read. We keep track of the min
424 * and max values of that delta. The delta is mostly defined 428 * and max values of that delta. The delta is mostly defined
425 * by the IO time of the PIT access, so we can detect when a 429 * by the IO time of the PIT access, so we can detect when a
426 * SMI/SMM disturbance happend between the two reads. If the 430 * SMI/SMM disturbance happened between the two reads. If the
427 * maximum time is significantly larger than the minimum time, 431 * maximum time is significantly larger than the minimum time,
428 * then we discard the result and have another try. 432 * then we discard the result and have another try.
429 * 433 *
@@ -460,7 +464,7 @@ unsigned long native_calibrate_tsc(void)
460 tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); 464 tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
461 465
462 /* hpet or pmtimer available ? */ 466 /* hpet or pmtimer available ? */
463 if (!hpet && !ref1 && !ref2) 467 if (ref1 == ref2)
464 continue; 468 continue;
465 469
466 /* Check, whether the sampling was disturbed by an SMI */ 470 /* Check, whether the sampling was disturbed by an SMI */
@@ -655,7 +659,7 @@ void restore_sched_clock_state(void)
655 659
656 local_irq_save(flags); 660 local_irq_save(flags);
657 661
658 __get_cpu_var(cyc2ns_offset) = 0; 662 __this_cpu_write(cyc2ns_offset, 0);
659 offset = cyc2ns_suspend - sched_clock(); 663 offset = cyc2ns_suspend - sched_clock();
660 664
661 for_each_possible_cpu(cpu) 665 for_each_possible_cpu(cpu)
@@ -759,25 +763,6 @@ static cycle_t read_tsc(struct clocksource *cs)
759 ret : clocksource_tsc.cycle_last; 763 ret : clocksource_tsc.cycle_last;
760} 764}
761 765
762#ifdef CONFIG_X86_64
763static cycle_t __vsyscall_fn vread_tsc(void)
764{
765 cycle_t ret;
766
767 /*
768 * Surround the RDTSC by barriers, to make sure it's not
769 * speculated to outside the seqlock critical section and
770 * does not cause time warps:
771 */
772 rdtsc_barrier();
773 ret = (cycle_t)vget_cycles();
774 rdtsc_barrier();
775
776 return ret >= __vsyscall_gtod_data.clock.cycle_last ?
777 ret : __vsyscall_gtod_data.clock.cycle_last;
778}
779#endif
780
781static void resume_tsc(struct clocksource *cs) 766static void resume_tsc(struct clocksource *cs)
782{ 767{
783 clocksource_tsc.cycle_last = 0; 768 clocksource_tsc.cycle_last = 0;
@@ -801,6 +786,7 @@ void mark_tsc_unstable(char *reason)
801 if (!tsc_unstable) { 786 if (!tsc_unstable) {
802 tsc_unstable = 1; 787 tsc_unstable = 1;
803 sched_clock_stable = 0; 788 sched_clock_stable = 0;
789 disable_sched_clock_irqtime();
804 printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); 790 printk(KERN_INFO "Marking TSC unstable due to %s\n", reason);
805 /* Change only the rating, when not registered */ 791 /* Change only the rating, when not registered */
806 if (clocksource_tsc.mult) 792 if (clocksource_tsc.mult)
@@ -867,6 +853,9 @@ __cpuinit int unsynchronized_tsc(void)
867 853
868 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 854 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
869 return 0; 855 return 0;
856
857 if (tsc_clocksource_reliable)
858 return 0;
870 /* 859 /*
871 * Intel systems are normally all synchronized. 860 * Intel systems are normally all synchronized.
872 * Exceptions must mark TSC as unstable: 861 * Exceptions must mark TSC as unstable:
@@ -874,14 +863,92 @@ __cpuinit int unsynchronized_tsc(void)
874 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { 863 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
875 /* assume multi socket systems are not synchronized: */ 864 /* assume multi socket systems are not synchronized: */
876 if (num_possible_cpus() > 1) 865 if (num_possible_cpus() > 1)
877 tsc_unstable = 1; 866 return 1;
878 } 867 }
879 868
880 return tsc_unstable; 869 return 0;
881} 870}
882 871
883static void __init init_tsc_clocksource(void) 872
873static void tsc_refine_calibration_work(struct work_struct *work);
874static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
875/**
876 * tsc_refine_calibration_work - Further refine tsc freq calibration
877 * @work - ignored.
878 *
879 * This functions uses delayed work over a period of a
880 * second to further refine the TSC freq value. Since this is
881 * timer based, instead of loop based, we don't block the boot
882 * process while this longer calibration is done.
883 *
884 * If there are any calibration anomalies (too many SMIs, etc),
885 * or the refined calibration is off by 1% of the fast early
886 * calibration, we throw out the new calibration and use the
887 * early calibration.
888 */
889static void tsc_refine_calibration_work(struct work_struct *work)
884{ 890{
891 static u64 tsc_start = -1, ref_start;
892 static int hpet;
893 u64 tsc_stop, ref_stop, delta;
894 unsigned long freq;
895
896 /* Don't bother refining TSC on unstable systems */
897 if (check_tsc_unstable())
898 goto out;
899
900 /*
901 * Since the work is started early in boot, we may be
902 * delayed the first time we expire. So set the workqueue
903 * again once we know timers are working.
904 */
905 if (tsc_start == -1) {
906 /*
907 * Only set hpet once, to avoid mixing hardware
908 * if the hpet becomes enabled later.
909 */
910 hpet = is_hpet_enabled();
911 schedule_delayed_work(&tsc_irqwork, HZ);
912 tsc_start = tsc_read_refs(&ref_start, hpet);
913 return;
914 }
915
916 tsc_stop = tsc_read_refs(&ref_stop, hpet);
917
918 /* hpet or pmtimer available ? */
919 if (ref_start == ref_stop)
920 goto out;
921
922 /* Check, whether the sampling was disturbed by an SMI */
923 if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
924 goto out;
925
926 delta = tsc_stop - tsc_start;
927 delta *= 1000000LL;
928 if (hpet)
929 freq = calc_hpet_ref(delta, ref_start, ref_stop);
930 else
931 freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
932
933 /* Make sure we're within 1% */
934 if (abs(tsc_khz - freq) > tsc_khz/100)
935 goto out;
936
937 tsc_khz = freq;
938 printk(KERN_INFO "Refined TSC clocksource calibration: "
939 "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000,
940 (unsigned long)tsc_khz % 1000);
941
942out:
943 clocksource_register_khz(&clocksource_tsc, tsc_khz);
944}
945
946
947static int __init init_tsc_clocksource(void)
948{
949 if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz)
950 return 0;
951
885 if (tsc_clocksource_reliable) 952 if (tsc_clocksource_reliable)
886 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; 953 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
887 /* lower the rating if we already know its unstable: */ 954 /* lower the rating if we already know its unstable: */
@@ -889,62 +956,14 @@ static void __init init_tsc_clocksource(void)
889 clocksource_tsc.rating = 0; 956 clocksource_tsc.rating = 0;
890 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 957 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
891 } 958 }
892 clocksource_register_khz(&clocksource_tsc, tsc_khz); 959 schedule_delayed_work(&tsc_irqwork, 0);
960 return 0;
893} 961}
894
895#ifdef CONFIG_X86_64
896/* 962/*
897 * calibrate_cpu is used on systems with fixed rate TSCs to determine 963 * We use device_initcall here, to ensure we run after the hpet
898 * processor frequency 964 * is fully initialized, which may occur at fs_initcall time.
899 */ 965 */
900#define TICK_COUNT 100000000 966device_initcall(init_tsc_clocksource);
901static unsigned long __init calibrate_cpu(void)
902{
903 int tsc_start, tsc_now;
904 int i, no_ctr_free;
905 unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0;
906 unsigned long flags;
907
908 for (i = 0; i < 4; i++)
909 if (avail_to_resrv_perfctr_nmi_bit(i))
910 break;
911 no_ctr_free = (i == 4);
912 if (no_ctr_free) {
913 WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... "
914 "cpu_khz value may be incorrect.\n");
915 i = 3;
916 rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
917 wrmsrl(MSR_K7_EVNTSEL3, 0);
918 rdmsrl(MSR_K7_PERFCTR3, pmc3);
919 } else {
920 reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i);
921 reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
922 }
923 local_irq_save(flags);
924 /* start measuring cycles, incrementing from 0 */
925 wrmsrl(MSR_K7_PERFCTR0 + i, 0);
926 wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
927 rdtscl(tsc_start);
928 do {
929 rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
930 tsc_now = get_cycles();
931 } while ((tsc_now - tsc_start) < TICK_COUNT);
932
933 local_irq_restore(flags);
934 if (no_ctr_free) {
935 wrmsrl(MSR_K7_EVNTSEL3, 0);
936 wrmsrl(MSR_K7_PERFCTR3, pmc3);
937 wrmsrl(MSR_K7_EVNTSEL3, evntsel3);
938 } else {
939 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
940 release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
941 }
942
943 return pmc_now * tsc_khz / (tsc_now - tsc_start);
944}
945#else
946static inline unsigned long calibrate_cpu(void) { return cpu_khz; }
947#endif
948 967
949void __init tsc_init(void) 968void __init tsc_init(void)
950{ 969{
@@ -964,10 +983,6 @@ void __init tsc_init(void)
964 return; 983 return;
965 } 984 }
966 985
967 if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
968 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
969 cpu_khz = calibrate_cpu();
970
971 printk("Detected %lu.%03lu MHz processor.\n", 986 printk("Detected %lu.%03lu MHz processor.\n",
972 (unsigned long)cpu_khz / 1000, 987 (unsigned long)cpu_khz / 1000,
973 (unsigned long)cpu_khz % 1000); 988 (unsigned long)cpu_khz % 1000);
@@ -987,6 +1002,9 @@ void __init tsc_init(void)
987 /* now allow native_sched_clock() to use rdtsc */ 1002 /* now allow native_sched_clock() to use rdtsc */
988 tsc_disabled = 0; 1003 tsc_disabled = 0;
989 1004
1005 if (!no_sched_irq_time)
1006 enable_sched_clock_irqtime();
1007
990 lpj = ((u64)tsc_khz * 1000); 1008 lpj = ((u64)tsc_khz * 1000);
991 do_div(lpj, HZ); 1009 do_div(lpj, HZ);
992 lpj_fine = lpj; 1010 lpj_fine = lpj;
@@ -999,6 +1017,5 @@ void __init tsc_init(void)
999 mark_tsc_unstable("TSCs unsynchronized"); 1017 mark_tsc_unstable("TSCs unsynchronized");
1000 1018
1001 check_system_tsc_reliable(); 1019 check_system_tsc_reliable();
1002 init_tsc_clocksource();
1003} 1020}
1004 1021