diff options
author | Andi Kleen <ak@suse.de> | 2005-05-17 00:53:28 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-05-17 10:59:15 -0400 |
commit | 312df5f1a1da780e084b328bcabb02a6dcd044c3 (patch) | |
tree | 04f0a70177979e4b8924015448a72644f1ce1c79 /arch/x86_64 | |
parent | 0af2be0b721997512191e981a051fcb070b87260 (diff) |
[PATCH] x86_64: Add pmtimer support
There are unfortunately more and more multi processor Opteron systems which
don't have HPET timer support in the southbridge. This covers in particular
Nvidia and VIA chipsets. They also don't guarantee that the TSCs are
synchronized between CPUs; and especially with MP powernow the systems are
nearly unusable because the time gets very inconsistent between CPUs.
The timer code for x86-64 was originally written under the assumption that we
could fall back to the HPET timer on such systems. But this doesn't work
there.
Another alternative is to use the ACPI PM timer as primary time source. This
patch does that. The kernel only uses PM timer when there is no other choice
because it has some disadvantages.
Ported over from i386. It should be faster than the i386 version because I
dropped the "read three times" workaround, but is still considerable slower
than HPET and also does not work together with vsyscalls which have to be
disabled.
Cc: <mark.langsdorf@amd.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/Kconfig | 14 | ||||
-rw-r--r-- | arch/x86_64/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86_64/kernel/pmtimer.c | 101 | ||||
-rw-r--r-- | arch/x86_64/kernel/time.c | 62 | ||||
-rw-r--r-- | arch/x86_64/kernel/vsyscall.c | 3 |
5 files changed, 162 insertions, 19 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 44ee7f6acf7b..82cb2a3f127a 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig | |||
@@ -303,6 +303,20 @@ config HPET_TIMER | |||
303 | as it is off-chip. You can find the HPET spec at | 303 | as it is off-chip. You can find the HPET spec at |
304 | <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>. | 304 | <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>. |
305 | 305 | ||
306 | config X86_PM_TIMER | ||
307 | bool "PM timer" | ||
308 | default y | ||
309 | help | ||
310 | Support the ACPI PM timer for time keeping. This is slow, | ||
311 | but is useful on some chipsets without HPET on systems with more | ||
312 | than one CPU. On a single processor or single socket multi core | ||
313 | system it is normally not required. | ||
314 | When the PM timer is active 64bit vsyscalls are disabled | ||
315 | and should not be enabled (/proc/sys/kernel/vsyscall64 should | ||
316 | not be changed). | ||
317 | The kernel selects the PM timer only as a last resort, so it is | ||
318 | useful to enable just in case. | ||
319 | |||
306 | config HPET_EMULATE_RTC | 320 | config HPET_EMULATE_RTC |
307 | bool "Provide RTC interrupt" | 321 | bool "Provide RTC interrupt" |
308 | depends on HPET_TIMER && RTC=y | 322 | depends on HPET_TIMER && RTC=y |
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 0a3318e08ab6..5ca4a4598fda 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile | |||
@@ -28,6 +28,7 @@ obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o | |||
28 | obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o | 28 | obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o |
29 | obj-$(CONFIG_SWIOTLB) += swiotlb.o | 29 | obj-$(CONFIG_SWIOTLB) += swiotlb.o |
30 | obj-$(CONFIG_KPROBES) += kprobes.o | 30 | obj-$(CONFIG_KPROBES) += kprobes.o |
31 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o | ||
31 | 32 | ||
32 | obj-$(CONFIG_MODULES) += module.o | 33 | obj-$(CONFIG_MODULES) += module.o |
33 | 34 | ||
diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c new file mode 100644 index 000000000000..feb5f108dd26 --- /dev/null +++ b/arch/x86_64/kernel/pmtimer.c | |||
@@ -0,0 +1,101 @@ | |||
1 | /* Ported over from i386 by AK, original copyright was: | ||
2 | * | ||
3 | * (C) Dominik Brodowski <linux@brodo.de> 2003 | ||
4 | * | ||
5 | * Driver to use the Power Management Timer (PMTMR) available in some | ||
6 | * southbridges as primary timing source for the Linux kernel. | ||
7 | * | ||
8 | * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, | ||
9 | * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. | ||
10 | * | ||
11 | * This file is licensed under the GPL v2. | ||
12 | * | ||
13 | * Dropped all the hardware bug workarounds for now. Hopefully they | ||
14 | * are not needed on 64bit chipsets. | ||
15 | */ | ||
16 | |||
17 | #include <linux/jiffies.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/time.h> | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/cpumask.h> | ||
22 | #include <asm/io.h> | ||
23 | #include <asm/proto.h> | ||
24 | #include <asm/msr.h> | ||
25 | #include <asm/vsyscall.h> | ||
26 | |||
27 | /* The I/O port the PMTMR resides at. | ||
28 | * The location is detected during setup_arch(), | ||
29 | * in arch/i386/kernel/acpi/boot.c */ | ||
30 | u32 pmtmr_ioport; | ||
31 | |||
32 | /* value of the Power timer at last timer interrupt */ | ||
33 | static u32 offset_delay; | ||
34 | static u32 last_pmtmr_tick; | ||
35 | |||
36 | #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ | ||
37 | |||
38 | static inline u32 cyc2us(u32 cycles) | ||
39 | { | ||
40 | /* The Power Management Timer ticks at 3.579545 ticks per microsecond. | ||
41 | * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] | ||
42 | * | ||
43 | * Even with HZ = 100, delta is at maximum 35796 ticks, so it can | ||
44 | * easily be multiplied with 286 (=0x11E) without having to fear | ||
45 | * u32 overflows. | ||
46 | */ | ||
47 | cycles *= 286; | ||
48 | return (cycles >> 10); | ||
49 | } | ||
50 | |||
51 | int pmtimer_mark_offset(void) | ||
52 | { | ||
53 | static int first_run = 1; | ||
54 | unsigned long tsc; | ||
55 | u32 lost; | ||
56 | |||
57 | u32 tick = inl(pmtmr_ioport); | ||
58 | u32 delta; | ||
59 | |||
60 | delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK); | ||
61 | |||
62 | last_pmtmr_tick = tick; | ||
63 | monotonic_base += delta * NSEC_PER_USEC; | ||
64 | |||
65 | delta += offset_delay; | ||
66 | |||
67 | lost = delta / (USEC_PER_SEC / HZ); | ||
68 | offset_delay = delta % (USEC_PER_SEC / HZ); | ||
69 | |||
70 | rdtscll(tsc); | ||
71 | vxtime.last_tsc = tsc - offset_delay * cpu_khz; | ||
72 | |||
73 | /* don't calculate delay for first run, | ||
74 | or if we've got less then a tick */ | ||
75 | if (first_run || (lost < 1)) { | ||
76 | first_run = 0; | ||
77 | offset_delay = 0; | ||
78 | } | ||
79 | |||
80 | return lost - 1; | ||
81 | } | ||
82 | |||
83 | unsigned int do_gettimeoffset_pm(void) | ||
84 | { | ||
85 | u32 now, offset, delta = 0; | ||
86 | |||
87 | offset = last_pmtmr_tick; | ||
88 | now = inl(pmtmr_ioport); | ||
89 | delta = (now - offset) & ACPI_PM_MASK; | ||
90 | |||
91 | return offset_delay + cyc2us(delta); | ||
92 | } | ||
93 | |||
94 | |||
95 | static int __init nopmtimer_setup(char *s) | ||
96 | { | ||
97 | pmtmr_ioport = 0; | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | __setup("nopmtimer", nopmtimer_setup); | ||
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 0e3b60e1ac5b..735b6767c8ed 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -26,6 +26,8 @@ | |||
26 | #include <linux/sysdev.h> | 26 | #include <linux/sysdev.h> |
27 | #include <linux/bcd.h> | 27 | #include <linux/bcd.h> |
28 | #include <linux/kallsyms.h> | 28 | #include <linux/kallsyms.h> |
29 | #include <linux/acpi.h> | ||
30 | #include <acpi/achware.h> /* for PM timer frequency */ | ||
29 | #include <asm/8253pit.h> | 31 | #include <asm/8253pit.h> |
30 | #include <asm/pgtable.h> | 32 | #include <asm/pgtable.h> |
31 | #include <asm/vsyscall.h> | 33 | #include <asm/vsyscall.h> |
@@ -396,6 +398,10 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) | |||
396 | (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; | 398 | (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; |
397 | 399 | ||
398 | vxtime.last = offset; | 400 | vxtime.last = offset; |
401 | #ifdef CONFIG_X86_PM_TIMER | ||
402 | } else if (vxtime.mode == VXTIME_PMTMR) { | ||
403 | lost = pmtimer_mark_offset(); | ||
404 | #endif | ||
399 | } else { | 405 | } else { |
400 | offset = (((tsc - vxtime.last_tsc) * | 406 | offset = (((tsc - vxtime.last_tsc) * |
401 | vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); | 407 | vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); |
@@ -898,6 +904,13 @@ void __init time_init(void) | |||
898 | hpet_period; | 904 | hpet_period; |
899 | cpu_khz = hpet_calibrate_tsc(); | 905 | cpu_khz = hpet_calibrate_tsc(); |
900 | timename = "HPET"; | 906 | timename = "HPET"; |
907 | #ifdef CONFIG_X86_PM_TIMER | ||
908 | } else if (pmtmr_ioport) { | ||
909 | vxtime_hz = PM_TIMER_FREQUENCY; | ||
910 | timename = "PM"; | ||
911 | pit_init(); | ||
912 | cpu_khz = pit_calibrate_tsc(); | ||
913 | #endif | ||
901 | } else { | 914 | } else { |
902 | pit_init(); | 915 | pit_init(); |
903 | cpu_khz = pit_calibrate_tsc(); | 916 | cpu_khz = pit_calibrate_tsc(); |
@@ -923,35 +936,50 @@ void __init time_init(void) | |||
923 | } | 936 | } |
924 | 937 | ||
925 | /* | 938 | /* |
939 | * Make an educated guess if the TSC is trustworthy and synchronized | ||
940 | * over all CPUs. | ||
941 | */ | ||
942 | static __init int unsynchronized_tsc(void) | ||
943 | { | ||
944 | #ifdef CONFIG_SMP | ||
945 | if (oem_force_hpet_timer()) | ||
946 | return 1; | ||
947 | /* Intel systems are normally all synchronized. Exceptions | ||
948 | are handled in the OEM check above. */ | ||
949 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | ||
950 | return 0; | ||
951 | /* All in a single socket - should be synchronized */ | ||
952 | if (cpus_weight(cpu_core_map[0]) == num_online_cpus()) | ||
953 | return 0; | ||
954 | #endif | ||
955 | /* Assume multi socket systems are not synchronized */ | ||
956 | return num_online_cpus() > 1; | ||
957 | } | ||
958 | |||
959 | /* | ||
926 | * Decide after all CPUs are booted what mode gettimeofday should use. | 960 | * Decide after all CPUs are booted what mode gettimeofday should use. |
927 | */ | 961 | */ |
928 | void __init time_init_gtod(void) | 962 | void __init time_init_gtod(void) |
929 | { | 963 | { |
930 | char *timetype; | 964 | char *timetype; |
931 | 965 | ||
932 | /* | 966 | if (unsynchronized_tsc()) |
933 | * AMD systems with more than one CPU don't have fully synchronized | ||
934 | * TSCs. Always use HPET gettimeofday for these, although it is slower. | ||
935 | * Intel SMP systems usually have synchronized TSCs, so use always | ||
936 | * the TSC. | ||
937 | * | ||
938 | * Exceptions: | ||
939 | * IBM Summit2 checked by oem_force_hpet_timer(). | ||
940 | * AMD dual core may also not need HPET. Check me. | ||
941 | * | ||
942 | * Can be turned off with "notsc". | ||
943 | */ | ||
944 | if (num_online_cpus() > 1 && | ||
945 | boot_cpu_data.x86_vendor == X86_VENDOR_AMD) | ||
946 | notsc = 1; | ||
947 | /* Some systems will want to disable TSC and use HPET. */ | ||
948 | if (oem_force_hpet_timer()) | ||
949 | notsc = 1; | 967 | notsc = 1; |
950 | if (vxtime.hpet_address && notsc) { | 968 | if (vxtime.hpet_address && notsc) { |
951 | timetype = "HPET"; | 969 | timetype = "HPET"; |
952 | vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; | 970 | vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; |
953 | vxtime.mode = VXTIME_HPET; | 971 | vxtime.mode = VXTIME_HPET; |
954 | do_gettimeoffset = do_gettimeoffset_hpet; | 972 | do_gettimeoffset = do_gettimeoffset_hpet; |
973 | #ifdef CONFIG_X86_PM_TIMER | ||
974 | /* Using PM for gettimeofday is quite slow, but we have no other | ||
975 | choice because the TSC is too unreliable on some systems. */ | ||
976 | } else if (pmtmr_ioport && !vxtime.hpet_address && notsc) { | ||
977 | timetype = "PM"; | ||
978 | do_gettimeoffset = do_gettimeoffset_pm; | ||
979 | vxtime.mode = VXTIME_PMTMR; | ||
980 | sysctl_vsyscall = 0; | ||
981 | printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n"); | ||
982 | #endif | ||
955 | } else { | 983 | } else { |
956 | timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC"; | 984 | timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC"; |
957 | vxtime.mode = VXTIME_TSC; | 985 | vxtime.mode = VXTIME_TSC; |
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index b4b8dc59663a..1a7541435ef0 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c | |||
@@ -65,7 +65,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv) | |||
65 | usec = (__xtime.tv_nsec / 1000) + | 65 | usec = (__xtime.tv_nsec / 1000) + |
66 | (__jiffies - __wall_jiffies) * (1000000 / HZ); | 66 | (__jiffies - __wall_jiffies) * (1000000 / HZ); |
67 | 67 | ||
68 | if (__vxtime.mode == VXTIME_TSC) { | 68 | if (__vxtime.mode != VXTIME_HPET) { |
69 | sync_core(); | 69 | sync_core(); |
70 | rdtscll(t); | 70 | rdtscll(t); |
71 | if (t < __vxtime.last_tsc) | 71 | if (t < __vxtime.last_tsc) |
@@ -217,7 +217,6 @@ static int __init vsyscall_init(void) | |||
217 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); | 217 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); |
218 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); | 218 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); |
219 | map_vsyscall(); | 219 | map_vsyscall(); |
220 | sysctl_vsyscall = 1; | ||
221 | register_sysctl_table(kernel_root_table2, 0); | 220 | register_sysctl_table(kernel_root_table2, 0); |
222 | return 0; | 221 | return 0; |
223 | } | 222 | } |