aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2005-05-17 00:53:28 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-05-17 10:59:15 -0400
commit312df5f1a1da780e084b328bcabb02a6dcd044c3 (patch)
tree04f0a70177979e4b8924015448a72644f1ce1c79 /arch/x86_64
parent0af2be0b721997512191e981a051fcb070b87260 (diff)
[PATCH] x86_64: Add pmtimer support
There are unfortunately more and more multi processor Opteron systems which don't have HPET timer support in the southbridge. This covers in particular Nvidia and VIA chipsets. They also don't guarantee that the TSCs are synchronized between CPUs; and especially with MP powernow the systems are nearly unusable because the time gets very inconsistent between CPUs. The timer code for x86-64 was originally written under the assumption that we could fall back to the HPET timer on such systems. But this doesn't work there. Another alternative is to use the ACPI PM timer as primary time source. This patch does that. The kernel only uses PM timer when there is no other choice because it has some disadvantages. Ported over from i386. It should be faster than the i386 version because I dropped the "read three times" workaround, but is still considerable slower than HPET and also does not work together with vsyscalls which have to be disabled. Cc: <mark.langsdorf@amd.com> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig14
-rw-r--r--arch/x86_64/kernel/Makefile1
-rw-r--r--arch/x86_64/kernel/pmtimer.c101
-rw-r--r--arch/x86_64/kernel/time.c62
-rw-r--r--arch/x86_64/kernel/vsyscall.c3
5 files changed, 162 insertions, 19 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 44ee7f6acf7b..82cb2a3f127a 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -303,6 +303,20 @@ config HPET_TIMER
303 as it is off-chip. You can find the HPET spec at 303 as it is off-chip. You can find the HPET spec at
304 <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>. 304 <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>.
305 305
306config X86_PM_TIMER
307 bool "PM timer"
308 default y
309 help
310 Support the ACPI PM timer for time keeping. This is slow,
311 but is useful on some chipsets without HPET on systems with more
312 than one CPU. On a single processor or single socket multi core
313 system it is normally not required.
314 When the PM timer is active 64bit vsyscalls are disabled
315 and should not be enabled (/proc/sys/kernel/vsyscall64 should
316 not be changed).
317 The kernel selects the PM timer only as a last resort, so it is
318 useful to enable just in case.
319
306config HPET_EMULATE_RTC 320config HPET_EMULATE_RTC
307 bool "Provide RTC interrupt" 321 bool "Provide RTC interrupt"
308 depends on HPET_TIMER && RTC=y 322 depends on HPET_TIMER && RTC=y
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 0a3318e08ab6..5ca4a4598fda 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
28obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o 28obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
29obj-$(CONFIG_SWIOTLB) += swiotlb.o 29obj-$(CONFIG_SWIOTLB) += swiotlb.o
30obj-$(CONFIG_KPROBES) += kprobes.o 30obj-$(CONFIG_KPROBES) += kprobes.o
31obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
31 32
32obj-$(CONFIG_MODULES) += module.o 33obj-$(CONFIG_MODULES) += module.o
33 34
diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c
new file mode 100644
index 000000000000..feb5f108dd26
--- /dev/null
+++ b/arch/x86_64/kernel/pmtimer.c
@@ -0,0 +1,101 @@
1/* Ported over from i386 by AK, original copyright was:
2 *
3 * (C) Dominik Brodowski <linux@brodo.de> 2003
4 *
5 * Driver to use the Power Management Timer (PMTMR) available in some
6 * southbridges as primary timing source for the Linux kernel.
7 *
8 * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
9 * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
10 *
11 * This file is licensed under the GPL v2.
12 *
13 * Dropped all the hardware bug workarounds for now. Hopefully they
14 * are not needed on 64bit chipsets.
15 */
16
17#include <linux/jiffies.h>
18#include <linux/kernel.h>
19#include <linux/time.h>
20#include <linux/init.h>
21#include <linux/cpumask.h>
22#include <asm/io.h>
23#include <asm/proto.h>
24#include <asm/msr.h>
25#include <asm/vsyscall.h>
26
27/* The I/O port the PMTMR resides at.
28 * The location is detected during setup_arch(),
29 * in arch/i386/kernel/acpi/boot.c */
30u32 pmtmr_ioport;
31
32/* value of the Power timer at last timer interrupt */
33static u32 offset_delay;
34static u32 last_pmtmr_tick;
35
36#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
37
38static inline u32 cyc2us(u32 cycles)
39{
40 /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
41 * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
42 *
43 * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
44 * easily be multiplied with 286 (=0x11E) without having to fear
45 * u32 overflows.
46 */
47 cycles *= 286;
48 return (cycles >> 10);
49}
50
51int pmtimer_mark_offset(void)
52{
53 static int first_run = 1;
54 unsigned long tsc;
55 u32 lost;
56
57 u32 tick = inl(pmtmr_ioport);
58 u32 delta;
59
60 delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK);
61
62 last_pmtmr_tick = tick;
63 monotonic_base += delta * NSEC_PER_USEC;
64
65 delta += offset_delay;
66
67 lost = delta / (USEC_PER_SEC / HZ);
68 offset_delay = delta % (USEC_PER_SEC / HZ);
69
70 rdtscll(tsc);
71 vxtime.last_tsc = tsc - offset_delay * cpu_khz;
72
73 /* don't calculate delay for first run,
74 or if we've got less then a tick */
75 if (first_run || (lost < 1)) {
76 first_run = 0;
77 offset_delay = 0;
78 }
79
80 return lost - 1;
81}
82
83unsigned int do_gettimeoffset_pm(void)
84{
85 u32 now, offset, delta = 0;
86
87 offset = last_pmtmr_tick;
88 now = inl(pmtmr_ioport);
89 delta = (now - offset) & ACPI_PM_MASK;
90
91 return offset_delay + cyc2us(delta);
92}
93
94
95static int __init nopmtimer_setup(char *s)
96{
97 pmtmr_ioport = 0;
98 return 0;
99}
100
101__setup("nopmtimer", nopmtimer_setup);
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 0e3b60e1ac5b..735b6767c8ed 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -26,6 +26,8 @@
26#include <linux/sysdev.h> 26#include <linux/sysdev.h>
27#include <linux/bcd.h> 27#include <linux/bcd.h>
28#include <linux/kallsyms.h> 28#include <linux/kallsyms.h>
29#include <linux/acpi.h>
30#include <acpi/achware.h> /* for PM timer frequency */
29#include <asm/8253pit.h> 31#include <asm/8253pit.h>
30#include <asm/pgtable.h> 32#include <asm/pgtable.h>
31#include <asm/vsyscall.h> 33#include <asm/vsyscall.h>
@@ -396,6 +398,10 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
396 (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; 398 (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick;
397 399
398 vxtime.last = offset; 400 vxtime.last = offset;
401#ifdef CONFIG_X86_PM_TIMER
402 } else if (vxtime.mode == VXTIME_PMTMR) {
403 lost = pmtimer_mark_offset();
404#endif
399 } else { 405 } else {
400 offset = (((tsc - vxtime.last_tsc) * 406 offset = (((tsc - vxtime.last_tsc) *
401 vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); 407 vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ);
@@ -898,6 +904,13 @@ void __init time_init(void)
898 hpet_period; 904 hpet_period;
899 cpu_khz = hpet_calibrate_tsc(); 905 cpu_khz = hpet_calibrate_tsc();
900 timename = "HPET"; 906 timename = "HPET";
907#ifdef CONFIG_X86_PM_TIMER
908 } else if (pmtmr_ioport) {
909 vxtime_hz = PM_TIMER_FREQUENCY;
910 timename = "PM";
911 pit_init();
912 cpu_khz = pit_calibrate_tsc();
913#endif
901 } else { 914 } else {
902 pit_init(); 915 pit_init();
903 cpu_khz = pit_calibrate_tsc(); 916 cpu_khz = pit_calibrate_tsc();
@@ -923,35 +936,50 @@ void __init time_init(void)
923} 936}
924 937
925/* 938/*
939 * Make an educated guess if the TSC is trustworthy and synchronized
940 * over all CPUs.
941 */
942static __init int unsynchronized_tsc(void)
943{
944#ifdef CONFIG_SMP
945 if (oem_force_hpet_timer())
946 return 1;
947 /* Intel systems are normally all synchronized. Exceptions
948 are handled in the OEM check above. */
949 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
950 return 0;
951 /* All in a single socket - should be synchronized */
952 if (cpus_weight(cpu_core_map[0]) == num_online_cpus())
953 return 0;
954#endif
955 /* Assume multi socket systems are not synchronized */
956 return num_online_cpus() > 1;
957}
958
959/*
926 * Decide after all CPUs are booted what mode gettimeofday should use. 960 * Decide after all CPUs are booted what mode gettimeofday should use.
927 */ 961 */
928void __init time_init_gtod(void) 962void __init time_init_gtod(void)
929{ 963{
930 char *timetype; 964 char *timetype;
931 965
932 /* 966 if (unsynchronized_tsc())
933 * AMD systems with more than one CPU don't have fully synchronized
934 * TSCs. Always use HPET gettimeofday for these, although it is slower.
935 * Intel SMP systems usually have synchronized TSCs, so use always
936 * the TSC.
937 *
938 * Exceptions:
939 * IBM Summit2 checked by oem_force_hpet_timer().
940 * AMD dual core may also not need HPET. Check me.
941 *
942 * Can be turned off with "notsc".
943 */
944 if (num_online_cpus() > 1 &&
945 boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
946 notsc = 1;
947 /* Some systems will want to disable TSC and use HPET. */
948 if (oem_force_hpet_timer())
949 notsc = 1; 967 notsc = 1;
950 if (vxtime.hpet_address && notsc) { 968 if (vxtime.hpet_address && notsc) {
951 timetype = "HPET"; 969 timetype = "HPET";
952 vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; 970 vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
953 vxtime.mode = VXTIME_HPET; 971 vxtime.mode = VXTIME_HPET;
954 do_gettimeoffset = do_gettimeoffset_hpet; 972 do_gettimeoffset = do_gettimeoffset_hpet;
973#ifdef CONFIG_X86_PM_TIMER
974 /* Using PM for gettimeofday is quite slow, but we have no other
975 choice because the TSC is too unreliable on some systems. */
976 } else if (pmtmr_ioport && !vxtime.hpet_address && notsc) {
977 timetype = "PM";
978 do_gettimeoffset = do_gettimeoffset_pm;
979 vxtime.mode = VXTIME_PMTMR;
980 sysctl_vsyscall = 0;
981 printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n");
982#endif
955 } else { 983 } else {
956 timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC"; 984 timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC";
957 vxtime.mode = VXTIME_TSC; 985 vxtime.mode = VXTIME_TSC;
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index b4b8dc59663a..1a7541435ef0 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -65,7 +65,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv)
65 usec = (__xtime.tv_nsec / 1000) + 65 usec = (__xtime.tv_nsec / 1000) +
66 (__jiffies - __wall_jiffies) * (1000000 / HZ); 66 (__jiffies - __wall_jiffies) * (1000000 / HZ);
67 67
68 if (__vxtime.mode == VXTIME_TSC) { 68 if (__vxtime.mode != VXTIME_HPET) {
69 sync_core(); 69 sync_core();
70 rdtscll(t); 70 rdtscll(t);
71 if (t < __vxtime.last_tsc) 71 if (t < __vxtime.last_tsc)
@@ -217,7 +217,6 @@ static int __init vsyscall_init(void)
217 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); 217 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
218 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); 218 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
219 map_vsyscall(); 219 map_vsyscall();
220 sysctl_vsyscall = 1;
221 register_sysctl_table(kernel_root_table2, 0); 220 register_sysctl_table(kernel_root_table2, 0);
222 return 0; 221 return 0;
223} 222}