diff options
Diffstat (limited to 'arch/x86/kernel/tsc.c')
-rw-r--r-- | arch/x86/kernel/tsc.c | 183 |
1 files changed, 100 insertions, 83 deletions
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 26a863a9c2a8..6cc6922262af 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -104,10 +104,14 @@ int __init notsc_setup(char *str) | |||
104 | 104 | ||
105 | __setup("notsc", notsc_setup); | 105 | __setup("notsc", notsc_setup); |
106 | 106 | ||
107 | static int no_sched_irq_time; | ||
108 | |||
107 | static int __init tsc_setup(char *str) | 109 | static int __init tsc_setup(char *str) |
108 | { | 110 | { |
109 | if (!strcmp(str, "reliable")) | 111 | if (!strcmp(str, "reliable")) |
110 | tsc_clocksource_reliable = 1; | 112 | tsc_clocksource_reliable = 1; |
113 | if (!strncmp(str, "noirqtime", 9)) | ||
114 | no_sched_irq_time = 1; | ||
111 | return 1; | 115 | return 1; |
112 | } | 116 | } |
113 | 117 | ||
@@ -423,7 +427,7 @@ unsigned long native_calibrate_tsc(void) | |||
423 | * the delta to the previous read. We keep track of the min | 427 | * the delta to the previous read. We keep track of the min |
424 | * and max values of that delta. The delta is mostly defined | 428 | * and max values of that delta. The delta is mostly defined |
425 | * by the IO time of the PIT access, so we can detect when a | 429 | * by the IO time of the PIT access, so we can detect when a |
426 | * SMI/SMM disturbance happend between the two reads. If the | 430 | * SMI/SMM disturbance happened between the two reads. If the |
427 | * maximum time is significantly larger than the minimum time, | 431 | * maximum time is significantly larger than the minimum time, |
428 | * then we discard the result and have another try. | 432 | * then we discard the result and have another try. |
429 | * | 433 | * |
@@ -460,7 +464,7 @@ unsigned long native_calibrate_tsc(void) | |||
460 | tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); | 464 | tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); |
461 | 465 | ||
462 | /* hpet or pmtimer available ? */ | 466 | /* hpet or pmtimer available ? */ |
463 | if (!hpet && !ref1 && !ref2) | 467 | if (ref1 == ref2) |
464 | continue; | 468 | continue; |
465 | 469 | ||
466 | /* Check, whether the sampling was disturbed by an SMI */ | 470 | /* Check, whether the sampling was disturbed by an SMI */ |
@@ -655,7 +659,7 @@ void restore_sched_clock_state(void) | |||
655 | 659 | ||
656 | local_irq_save(flags); | 660 | local_irq_save(flags); |
657 | 661 | ||
658 | __get_cpu_var(cyc2ns_offset) = 0; | 662 | __this_cpu_write(cyc2ns_offset, 0); |
659 | offset = cyc2ns_suspend - sched_clock(); | 663 | offset = cyc2ns_suspend - sched_clock(); |
660 | 664 | ||
661 | for_each_possible_cpu(cpu) | 665 | for_each_possible_cpu(cpu) |
@@ -759,25 +763,6 @@ static cycle_t read_tsc(struct clocksource *cs) | |||
759 | ret : clocksource_tsc.cycle_last; | 763 | ret : clocksource_tsc.cycle_last; |
760 | } | 764 | } |
761 | 765 | ||
762 | #ifdef CONFIG_X86_64 | ||
763 | static cycle_t __vsyscall_fn vread_tsc(void) | ||
764 | { | ||
765 | cycle_t ret; | ||
766 | |||
767 | /* | ||
768 | * Surround the RDTSC by barriers, to make sure it's not | ||
769 | * speculated to outside the seqlock critical section and | ||
770 | * does not cause time warps: | ||
771 | */ | ||
772 | rdtsc_barrier(); | ||
773 | ret = (cycle_t)vget_cycles(); | ||
774 | rdtsc_barrier(); | ||
775 | |||
776 | return ret >= __vsyscall_gtod_data.clock.cycle_last ? | ||
777 | ret : __vsyscall_gtod_data.clock.cycle_last; | ||
778 | } | ||
779 | #endif | ||
780 | |||
781 | static void resume_tsc(struct clocksource *cs) | 766 | static void resume_tsc(struct clocksource *cs) |
782 | { | 767 | { |
783 | clocksource_tsc.cycle_last = 0; | 768 | clocksource_tsc.cycle_last = 0; |
@@ -801,6 +786,7 @@ void mark_tsc_unstable(char *reason) | |||
801 | if (!tsc_unstable) { | 786 | if (!tsc_unstable) { |
802 | tsc_unstable = 1; | 787 | tsc_unstable = 1; |
803 | sched_clock_stable = 0; | 788 | sched_clock_stable = 0; |
789 | disable_sched_clock_irqtime(); | ||
804 | printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); | 790 | printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); |
805 | /* Change only the rating, when not registered */ | 791 | /* Change only the rating, when not registered */ |
806 | if (clocksource_tsc.mult) | 792 | if (clocksource_tsc.mult) |
@@ -867,6 +853,9 @@ __cpuinit int unsynchronized_tsc(void) | |||
867 | 853 | ||
868 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 854 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) |
869 | return 0; | 855 | return 0; |
856 | |||
857 | if (tsc_clocksource_reliable) | ||
858 | return 0; | ||
870 | /* | 859 | /* |
871 | * Intel systems are normally all synchronized. | 860 | * Intel systems are normally all synchronized. |
872 | * Exceptions must mark TSC as unstable: | 861 | * Exceptions must mark TSC as unstable: |
@@ -874,14 +863,92 @@ __cpuinit int unsynchronized_tsc(void) | |||
874 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { | 863 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { |
875 | /* assume multi socket systems are not synchronized: */ | 864 | /* assume multi socket systems are not synchronized: */ |
876 | if (num_possible_cpus() > 1) | 865 | if (num_possible_cpus() > 1) |
877 | tsc_unstable = 1; | 866 | return 1; |
878 | } | 867 | } |
879 | 868 | ||
880 | return tsc_unstable; | 869 | return 0; |
881 | } | 870 | } |
882 | 871 | ||
883 | static void __init init_tsc_clocksource(void) | 872 | |
873 | static void tsc_refine_calibration_work(struct work_struct *work); | ||
874 | static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); | ||
875 | /** | ||
876 | * tsc_refine_calibration_work - Further refine tsc freq calibration | ||
877 | * @work - ignored. | ||
878 | * | ||
879 | * This functions uses delayed work over a period of a | ||
880 | * second to further refine the TSC freq value. Since this is | ||
881 | * timer based, instead of loop based, we don't block the boot | ||
882 | * process while this longer calibration is done. | ||
883 | * | ||
884 | * If there are any calibration anomalies (too many SMIs, etc), | ||
885 | * or the refined calibration is off by 1% of the fast early | ||
886 | * calibration, we throw out the new calibration and use the | ||
887 | * early calibration. | ||
888 | */ | ||
889 | static void tsc_refine_calibration_work(struct work_struct *work) | ||
884 | { | 890 | { |
891 | static u64 tsc_start = -1, ref_start; | ||
892 | static int hpet; | ||
893 | u64 tsc_stop, ref_stop, delta; | ||
894 | unsigned long freq; | ||
895 | |||
896 | /* Don't bother refining TSC on unstable systems */ | ||
897 | if (check_tsc_unstable()) | ||
898 | goto out; | ||
899 | |||
900 | /* | ||
901 | * Since the work is started early in boot, we may be | ||
902 | * delayed the first time we expire. So set the workqueue | ||
903 | * again once we know timers are working. | ||
904 | */ | ||
905 | if (tsc_start == -1) { | ||
906 | /* | ||
907 | * Only set hpet once, to avoid mixing hardware | ||
908 | * if the hpet becomes enabled later. | ||
909 | */ | ||
910 | hpet = is_hpet_enabled(); | ||
911 | schedule_delayed_work(&tsc_irqwork, HZ); | ||
912 | tsc_start = tsc_read_refs(&ref_start, hpet); | ||
913 | return; | ||
914 | } | ||
915 | |||
916 | tsc_stop = tsc_read_refs(&ref_stop, hpet); | ||
917 | |||
918 | /* hpet or pmtimer available ? */ | ||
919 | if (ref_start == ref_stop) | ||
920 | goto out; | ||
921 | |||
922 | /* Check, whether the sampling was disturbed by an SMI */ | ||
923 | if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) | ||
924 | goto out; | ||
925 | |||
926 | delta = tsc_stop - tsc_start; | ||
927 | delta *= 1000000LL; | ||
928 | if (hpet) | ||
929 | freq = calc_hpet_ref(delta, ref_start, ref_stop); | ||
930 | else | ||
931 | freq = calc_pmtimer_ref(delta, ref_start, ref_stop); | ||
932 | |||
933 | /* Make sure we're within 1% */ | ||
934 | if (abs(tsc_khz - freq) > tsc_khz/100) | ||
935 | goto out; | ||
936 | |||
937 | tsc_khz = freq; | ||
938 | printk(KERN_INFO "Refined TSC clocksource calibration: " | ||
939 | "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000, | ||
940 | (unsigned long)tsc_khz % 1000); | ||
941 | |||
942 | out: | ||
943 | clocksource_register_khz(&clocksource_tsc, tsc_khz); | ||
944 | } | ||
945 | |||
946 | |||
947 | static int __init init_tsc_clocksource(void) | ||
948 | { | ||
949 | if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz) | ||
950 | return 0; | ||
951 | |||
885 | if (tsc_clocksource_reliable) | 952 | if (tsc_clocksource_reliable) |
886 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | 953 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; |
887 | /* lower the rating if we already know its unstable: */ | 954 | /* lower the rating if we already know its unstable: */ |
@@ -889,62 +956,14 @@ static void __init init_tsc_clocksource(void) | |||
889 | clocksource_tsc.rating = 0; | 956 | clocksource_tsc.rating = 0; |
890 | clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; | 957 | clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; |
891 | } | 958 | } |
892 | clocksource_register_khz(&clocksource_tsc, tsc_khz); | 959 | schedule_delayed_work(&tsc_irqwork, 0); |
960 | return 0; | ||
893 | } | 961 | } |
894 | |||
895 | #ifdef CONFIG_X86_64 | ||
896 | /* | 962 | /* |
897 | * calibrate_cpu is used on systems with fixed rate TSCs to determine | 963 | * We use device_initcall here, to ensure we run after the hpet |
898 | * processor frequency | 964 | * is fully initialized, which may occur at fs_initcall time. |
899 | */ | 965 | */ |
900 | #define TICK_COUNT 100000000 | 966 | device_initcall(init_tsc_clocksource); |
901 | static unsigned long __init calibrate_cpu(void) | ||
902 | { | ||
903 | int tsc_start, tsc_now; | ||
904 | int i, no_ctr_free; | ||
905 | unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0; | ||
906 | unsigned long flags; | ||
907 | |||
908 | for (i = 0; i < 4; i++) | ||
909 | if (avail_to_resrv_perfctr_nmi_bit(i)) | ||
910 | break; | ||
911 | no_ctr_free = (i == 4); | ||
912 | if (no_ctr_free) { | ||
913 | WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... " | ||
914 | "cpu_khz value may be incorrect.\n"); | ||
915 | i = 3; | ||
916 | rdmsrl(MSR_K7_EVNTSEL3, evntsel3); | ||
917 | wrmsrl(MSR_K7_EVNTSEL3, 0); | ||
918 | rdmsrl(MSR_K7_PERFCTR3, pmc3); | ||
919 | } else { | ||
920 | reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
921 | reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
922 | } | ||
923 | local_irq_save(flags); | ||
924 | /* start measuring cycles, incrementing from 0 */ | ||
925 | wrmsrl(MSR_K7_PERFCTR0 + i, 0); | ||
926 | wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76); | ||
927 | rdtscl(tsc_start); | ||
928 | do { | ||
929 | rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now); | ||
930 | tsc_now = get_cycles(); | ||
931 | } while ((tsc_now - tsc_start) < TICK_COUNT); | ||
932 | |||
933 | local_irq_restore(flags); | ||
934 | if (no_ctr_free) { | ||
935 | wrmsrl(MSR_K7_EVNTSEL3, 0); | ||
936 | wrmsrl(MSR_K7_PERFCTR3, pmc3); | ||
937 | wrmsrl(MSR_K7_EVNTSEL3, evntsel3); | ||
938 | } else { | ||
939 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
940 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
941 | } | ||
942 | |||
943 | return pmc_now * tsc_khz / (tsc_now - tsc_start); | ||
944 | } | ||
945 | #else | ||
946 | static inline unsigned long calibrate_cpu(void) { return cpu_khz; } | ||
947 | #endif | ||
948 | 967 | ||
949 | void __init tsc_init(void) | 968 | void __init tsc_init(void) |
950 | { | 969 | { |
@@ -964,10 +983,6 @@ void __init tsc_init(void) | |||
964 | return; | 983 | return; |
965 | } | 984 | } |
966 | 985 | ||
967 | if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && | ||
968 | (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) | ||
969 | cpu_khz = calibrate_cpu(); | ||
970 | |||
971 | printk("Detected %lu.%03lu MHz processor.\n", | 986 | printk("Detected %lu.%03lu MHz processor.\n", |
972 | (unsigned long)cpu_khz / 1000, | 987 | (unsigned long)cpu_khz / 1000, |
973 | (unsigned long)cpu_khz % 1000); | 988 | (unsigned long)cpu_khz % 1000); |
@@ -987,6 +1002,9 @@ void __init tsc_init(void) | |||
987 | /* now allow native_sched_clock() to use rdtsc */ | 1002 | /* now allow native_sched_clock() to use rdtsc */ |
988 | tsc_disabled = 0; | 1003 | tsc_disabled = 0; |
989 | 1004 | ||
1005 | if (!no_sched_irq_time) | ||
1006 | enable_sched_clock_irqtime(); | ||
1007 | |||
990 | lpj = ((u64)tsc_khz * 1000); | 1008 | lpj = ((u64)tsc_khz * 1000); |
991 | do_div(lpj, HZ); | 1009 | do_div(lpj, HZ); |
992 | lpj_fine = lpj; | 1010 | lpj_fine = lpj; |
@@ -999,6 +1017,5 @@ void __init tsc_init(void) | |||
999 | mark_tsc_unstable("TSCs unsynchronized"); | 1017 | mark_tsc_unstable("TSCs unsynchronized"); |
1000 | 1018 | ||
1001 | check_system_tsc_reliable(); | 1019 | check_system_tsc_reliable(); |
1002 | init_tsc_clocksource(); | ||
1003 | } | 1020 | } |
1004 | 1021 | ||