diff options
-rw-r--r-- | arch/x86_64/Kconfig | 14 | ||||
-rw-r--r-- | arch/x86_64/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86_64/kernel/pmtimer.c | 101 | ||||
-rw-r--r-- | arch/x86_64/kernel/time.c | 62 | ||||
-rw-r--r-- | arch/x86_64/kernel/vsyscall.c | 3 | ||||
-rw-r--r-- | include/asm-x86_64/proto.h | 5 | ||||
-rw-r--r-- | include/asm-x86_64/vsyscall.h | 3 |
7 files changed, 170 insertions, 19 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 44ee7f6acf7b..82cb2a3f127a 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig | |||
@@ -303,6 +303,20 @@ config HPET_TIMER | |||
303 | as it is off-chip. You can find the HPET spec at | 303 | as it is off-chip. You can find the HPET spec at |
304 | <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>. | 304 | <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>. |
305 | 305 | ||
306 | config X86_PM_TIMER | ||
307 | bool "PM timer" | ||
308 | default y | ||
309 | help | ||
310 | Support the ACPI PM timer for time keeping. This is slow, | ||
311 | but is useful on some chipsets without HPET on systems with more | ||
312 | than one CPU. On a single processor or single socket multi core | ||
313 | system it is normally not required. | ||
314 | When the PM timer is active 64bit vsyscalls are disabled | ||
315 | and should not be enabled (/proc/sys/kernel/vsyscall64 should | ||
316 | not be changed). | ||
317 | The kernel selects the PM timer only as a last resort, so it is | ||
318 | useful to enable just in case. | ||
319 | |||
306 | config HPET_EMULATE_RTC | 320 | config HPET_EMULATE_RTC |
307 | bool "Provide RTC interrupt" | 321 | bool "Provide RTC interrupt" |
308 | depends on HPET_TIMER && RTC=y | 322 | depends on HPET_TIMER && RTC=y |
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 0a3318e08ab6..5ca4a4598fda 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile | |||
@@ -28,6 +28,7 @@ obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o | |||
28 | obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o | 28 | obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o |
29 | obj-$(CONFIG_SWIOTLB) += swiotlb.o | 29 | obj-$(CONFIG_SWIOTLB) += swiotlb.o |
30 | obj-$(CONFIG_KPROBES) += kprobes.o | 30 | obj-$(CONFIG_KPROBES) += kprobes.o |
31 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o | ||
31 | 32 | ||
32 | obj-$(CONFIG_MODULES) += module.o | 33 | obj-$(CONFIG_MODULES) += module.o |
33 | 34 | ||
diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c new file mode 100644 index 000000000000..feb5f108dd26 --- /dev/null +++ b/arch/x86_64/kernel/pmtimer.c | |||
@@ -0,0 +1,101 @@ | |||
1 | /* Ported over from i386 by AK, original copyright was: | ||
2 | * | ||
3 | * (C) Dominik Brodowski <linux@brodo.de> 2003 | ||
4 | * | ||
5 | * Driver to use the Power Management Timer (PMTMR) available in some | ||
6 | * southbridges as primary timing source for the Linux kernel. | ||
7 | * | ||
8 | * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, | ||
9 | * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. | ||
10 | * | ||
11 | * This file is licensed under the GPL v2. | ||
12 | * | ||
13 | * Dropped all the hardware bug workarounds for now. Hopefully they | ||
14 | * are not needed on 64bit chipsets. | ||
15 | */ | ||
16 | |||
17 | #include <linux/jiffies.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/time.h> | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/cpumask.h> | ||
22 | #include <asm/io.h> | ||
23 | #include <asm/proto.h> | ||
24 | #include <asm/msr.h> | ||
25 | #include <asm/vsyscall.h> | ||
26 | |||
27 | /* The I/O port the PMTMR resides at. | ||
28 | * The location is detected during setup_arch(), | ||
29 | * in arch/i386/kernel/acpi/boot.c */ | ||
30 | u32 pmtmr_ioport; | ||
31 | |||
32 | /* value of the Power timer at last timer interrupt */ | ||
33 | static u32 offset_delay; | ||
34 | static u32 last_pmtmr_tick; | ||
35 | |||
36 | #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ | ||
37 | |||
38 | static inline u32 cyc2us(u32 cycles) | ||
39 | { | ||
40 | /* The Power Management Timer ticks at 3.579545 ticks per microsecond. | ||
41 | * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] | ||
42 | * | ||
43 | * Even with HZ = 100, delta is at maximum 35796 ticks, so it can | ||
44 | * easily be multiplied with 286 (=0x11E) without having to fear | ||
45 | * u32 overflows. | ||
46 | */ | ||
47 | cycles *= 286; | ||
48 | return (cycles >> 10); | ||
49 | } | ||
50 | |||
51 | int pmtimer_mark_offset(void) | ||
52 | { | ||
53 | static int first_run = 1; | ||
54 | unsigned long tsc; | ||
55 | u32 lost; | ||
56 | |||
57 | u32 tick = inl(pmtmr_ioport); | ||
58 | u32 delta; | ||
59 | |||
60 | delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK); | ||
61 | |||
62 | last_pmtmr_tick = tick; | ||
63 | monotonic_base += delta * NSEC_PER_USEC; | ||
64 | |||
65 | delta += offset_delay; | ||
66 | |||
67 | lost = delta / (USEC_PER_SEC / HZ); | ||
68 | offset_delay = delta % (USEC_PER_SEC / HZ); | ||
69 | |||
70 | rdtscll(tsc); | ||
71 | vxtime.last_tsc = tsc - offset_delay * cpu_khz; | ||
72 | |||
73 | /* don't calculate delay for first run, | ||
74 | or if we've got less then a tick */ | ||
75 | if (first_run || (lost < 1)) { | ||
76 | first_run = 0; | ||
77 | offset_delay = 0; | ||
78 | } | ||
79 | |||
80 | return lost - 1; | ||
81 | } | ||
82 | |||
83 | unsigned int do_gettimeoffset_pm(void) | ||
84 | { | ||
85 | u32 now, offset, delta = 0; | ||
86 | |||
87 | offset = last_pmtmr_tick; | ||
88 | now = inl(pmtmr_ioport); | ||
89 | delta = (now - offset) & ACPI_PM_MASK; | ||
90 | |||
91 | return offset_delay + cyc2us(delta); | ||
92 | } | ||
93 | |||
94 | |||
95 | static int __init nopmtimer_setup(char *s) | ||
96 | { | ||
97 | pmtmr_ioport = 0; | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | __setup("nopmtimer", nopmtimer_setup); | ||
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 0e3b60e1ac5b..735b6767c8ed 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -26,6 +26,8 @@ | |||
26 | #include <linux/sysdev.h> | 26 | #include <linux/sysdev.h> |
27 | #include <linux/bcd.h> | 27 | #include <linux/bcd.h> |
28 | #include <linux/kallsyms.h> | 28 | #include <linux/kallsyms.h> |
29 | #include <linux/acpi.h> | ||
30 | #include <acpi/achware.h> /* for PM timer frequency */ | ||
29 | #include <asm/8253pit.h> | 31 | #include <asm/8253pit.h> |
30 | #include <asm/pgtable.h> | 32 | #include <asm/pgtable.h> |
31 | #include <asm/vsyscall.h> | 33 | #include <asm/vsyscall.h> |
@@ -396,6 +398,10 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) | |||
396 | (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; | 398 | (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; |
397 | 399 | ||
398 | vxtime.last = offset; | 400 | vxtime.last = offset; |
401 | #ifdef CONFIG_X86_PM_TIMER | ||
402 | } else if (vxtime.mode == VXTIME_PMTMR) { | ||
403 | lost = pmtimer_mark_offset(); | ||
404 | #endif | ||
399 | } else { | 405 | } else { |
400 | offset = (((tsc - vxtime.last_tsc) * | 406 | offset = (((tsc - vxtime.last_tsc) * |
401 | vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); | 407 | vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); |
@@ -898,6 +904,13 @@ void __init time_init(void) | |||
898 | hpet_period; | 904 | hpet_period; |
899 | cpu_khz = hpet_calibrate_tsc(); | 905 | cpu_khz = hpet_calibrate_tsc(); |
900 | timename = "HPET"; | 906 | timename = "HPET"; |
907 | #ifdef CONFIG_X86_PM_TIMER | ||
908 | } else if (pmtmr_ioport) { | ||
909 | vxtime_hz = PM_TIMER_FREQUENCY; | ||
910 | timename = "PM"; | ||
911 | pit_init(); | ||
912 | cpu_khz = pit_calibrate_tsc(); | ||
913 | #endif | ||
901 | } else { | 914 | } else { |
902 | pit_init(); | 915 | pit_init(); |
903 | cpu_khz = pit_calibrate_tsc(); | 916 | cpu_khz = pit_calibrate_tsc(); |
@@ -923,35 +936,50 @@ void __init time_init(void) | |||
923 | } | 936 | } |
924 | 937 | ||
925 | /* | 938 | /* |
939 | * Make an educated guess if the TSC is trustworthy and synchronized | ||
940 | * over all CPUs. | ||
941 | */ | ||
942 | static __init int unsynchronized_tsc(void) | ||
943 | { | ||
944 | #ifdef CONFIG_SMP | ||
945 | if (oem_force_hpet_timer()) | ||
946 | return 1; | ||
947 | /* Intel systems are normally all synchronized. Exceptions | ||
948 | are handled in the OEM check above. */ | ||
949 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | ||
950 | return 0; | ||
951 | /* All in a single socket - should be synchronized */ | ||
952 | if (cpus_weight(cpu_core_map[0]) == num_online_cpus()) | ||
953 | return 0; | ||
954 | #endif | ||
955 | /* Assume multi socket systems are not synchronized */ | ||
956 | return num_online_cpus() > 1; | ||
957 | } | ||
958 | |||
959 | /* | ||
926 | * Decide after all CPUs are booted what mode gettimeofday should use. | 960 | * Decide after all CPUs are booted what mode gettimeofday should use. |
927 | */ | 961 | */ |
928 | void __init time_init_gtod(void) | 962 | void __init time_init_gtod(void) |
929 | { | 963 | { |
930 | char *timetype; | 964 | char *timetype; |
931 | 965 | ||
932 | /* | 966 | if (unsynchronized_tsc()) |
933 | * AMD systems with more than one CPU don't have fully synchronized | ||
934 | * TSCs. Always use HPET gettimeofday for these, although it is slower. | ||
935 | * Intel SMP systems usually have synchronized TSCs, so use always | ||
936 | * the TSC. | ||
937 | * | ||
938 | * Exceptions: | ||
939 | * IBM Summit2 checked by oem_force_hpet_timer(). | ||
940 | * AMD dual core may also not need HPET. Check me. | ||
941 | * | ||
942 | * Can be turned off with "notsc". | ||
943 | */ | ||
944 | if (num_online_cpus() > 1 && | ||
945 | boot_cpu_data.x86_vendor == X86_VENDOR_AMD) | ||
946 | notsc = 1; | ||
947 | /* Some systems will want to disable TSC and use HPET. */ | ||
948 | if (oem_force_hpet_timer()) | ||
949 | notsc = 1; | 967 | notsc = 1; |
950 | if (vxtime.hpet_address && notsc) { | 968 | if (vxtime.hpet_address && notsc) { |
951 | timetype = "HPET"; | 969 | timetype = "HPET"; |
952 | vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; | 970 | vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; |
953 | vxtime.mode = VXTIME_HPET; | 971 | vxtime.mode = VXTIME_HPET; |
954 | do_gettimeoffset = do_gettimeoffset_hpet; | 972 | do_gettimeoffset = do_gettimeoffset_hpet; |
973 | #ifdef CONFIG_X86_PM_TIMER | ||
974 | /* Using PM for gettimeofday is quite slow, but we have no other | ||
975 | choice because the TSC is too unreliable on some systems. */ | ||
976 | } else if (pmtmr_ioport && !vxtime.hpet_address && notsc) { | ||
977 | timetype = "PM"; | ||
978 | do_gettimeoffset = do_gettimeoffset_pm; | ||
979 | vxtime.mode = VXTIME_PMTMR; | ||
980 | sysctl_vsyscall = 0; | ||
981 | printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n"); | ||
982 | #endif | ||
955 | } else { | 983 | } else { |
956 | timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC"; | 984 | timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC"; |
957 | vxtime.mode = VXTIME_TSC; | 985 | vxtime.mode = VXTIME_TSC; |
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index b4b8dc59663a..1a7541435ef0 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c | |||
@@ -65,7 +65,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv) | |||
65 | usec = (__xtime.tv_nsec / 1000) + | 65 | usec = (__xtime.tv_nsec / 1000) + |
66 | (__jiffies - __wall_jiffies) * (1000000 / HZ); | 66 | (__jiffies - __wall_jiffies) * (1000000 / HZ); |
67 | 67 | ||
68 | if (__vxtime.mode == VXTIME_TSC) { | 68 | if (__vxtime.mode != VXTIME_HPET) { |
69 | sync_core(); | 69 | sync_core(); |
70 | rdtscll(t); | 70 | rdtscll(t); |
71 | if (t < __vxtime.last_tsc) | 71 | if (t < __vxtime.last_tsc) |
@@ -217,7 +217,6 @@ static int __init vsyscall_init(void) | |||
217 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); | 217 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); |
218 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); | 218 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); |
219 | map_vsyscall(); | 219 | map_vsyscall(); |
220 | sysctl_vsyscall = 1; | ||
221 | register_sysctl_table(kernel_root_table2, 0); | 220 | register_sysctl_table(kernel_root_table2, 0); |
222 | return 0; | 221 | return 0; |
223 | } | 222 | } |
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index d0f8f8b4c394..f2f073642d62 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h | |||
@@ -30,6 +30,11 @@ extern void ia32_syscall(void); | |||
30 | extern void iommu_hole_init(void); | 30 | extern void iommu_hole_init(void); |
31 | 31 | ||
32 | extern void time_init_gtod(void); | 32 | extern void time_init_gtod(void); |
33 | extern int pmtimer_mark_offset(void); | ||
34 | extern unsigned int do_gettimeoffset_pm(void); | ||
35 | extern u32 pmtmr_ioport; | ||
36 | extern unsigned long long monotonic_base; | ||
37 | extern int sysctl_vsyscall; | ||
33 | 38 | ||
34 | extern void do_softirq_thunk(void); | 39 | extern void do_softirq_thunk(void); |
35 | 40 | ||
diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h index b0c8d4339906..2872da23fc7e 100644 --- a/include/asm-x86_64/vsyscall.h +++ b/include/asm-x86_64/vsyscall.h | |||
@@ -25,6 +25,7 @@ enum vsyscall_num { | |||
25 | 25 | ||
26 | #define VXTIME_TSC 1 | 26 | #define VXTIME_TSC 1 |
27 | #define VXTIME_HPET 2 | 27 | #define VXTIME_HPET 2 |
28 | #define VXTIME_PMTMR 3 | ||
28 | 29 | ||
29 | struct vxtime_data { | 30 | struct vxtime_data { |
30 | long hpet_address; /* HPET base address */ | 31 | long hpet_address; /* HPET base address */ |
@@ -54,6 +55,8 @@ extern struct timezone sys_tz; | |||
54 | extern int sysctl_vsyscall; | 55 | extern int sysctl_vsyscall; |
55 | extern seqlock_t xtime_lock; | 56 | extern seqlock_t xtime_lock; |
56 | 57 | ||
58 | extern int sysctl_vsyscall; | ||
59 | |||
57 | #define ARCH_HAVE_XTIME_LOCK 1 | 60 | #define ARCH_HAVE_XTIME_LOCK 1 |
58 | 61 | ||
59 | #endif /* __KERNEL__ */ | 62 | #endif /* __KERNEL__ */ |