aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386')
-rw-r--r--arch/i386/Kconfig37
-rw-r--r--arch/i386/Kconfig.cpu5
-rw-r--r--arch/i386/Kconfig.debug2
-rw-r--r--arch/i386/boot/compressed/relocs.c9
-rw-r--r--arch/i386/defconfig48
-rw-r--r--arch/i386/kernel/Makefile6
-rw-r--r--arch/i386/kernel/acpi/boot.c249
-rw-r--r--arch/i386/kernel/acpi/earlyquirk.c4
-rw-r--r--arch/i386/kernel/apic.c1631
-rw-r--r--arch/i386/kernel/apm.c72
-rw-r--r--arch/i386/kernel/asm-offsets.c2
-rw-r--r--arch/i386/kernel/cpu/common.c14
-rw-r--r--arch/i386/kernel/cpu/cpufreq/Kconfig9
-rw-r--r--arch/i386/kernel/cpu/cpufreq/Makefile1
-rw-r--r--arch/i386/kernel/cpu/cpufreq/e_powersaver.c334
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.c374
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.h153
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.c6
-rw-r--r--arch/i386/kernel/cpu/cyrix.c52
-rw-r--r--arch/i386/kernel/cpu/mcheck/mce.c1
-rw-r--r--arch/i386/kernel/cpu/mcheck/mce.h2
-rw-r--r--arch/i386/kernel/cpu/mcheck/p4.c2
-rw-r--r--arch/i386/kernel/cpu/mtrr/if.c32
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c6
-rw-r--r--arch/i386/kernel/cpu/mtrr/mtrr.h2
-rw-r--r--arch/i386/kernel/cpu/proc.c14
-rw-r--r--arch/i386/kernel/cpu/transmeta.c5
-rw-r--r--arch/i386/kernel/cpuid.c9
-rw-r--r--arch/i386/kernel/e820.c18
-rw-r--r--arch/i386/kernel/entry.S78
-rw-r--r--arch/i386/kernel/head.S40
-rw-r--r--arch/i386/kernel/hpet.c499
-rw-r--r--arch/i386/kernel/i8253.c96
-rw-r--r--arch/i386/kernel/i8259.c7
-rw-r--r--arch/i386/kernel/io_apic.c33
-rw-r--r--arch/i386/kernel/irq.c25
-rw-r--r--arch/i386/kernel/kprobes.c6
-rw-r--r--arch/i386/kernel/microcode.c4
-rw-r--r--arch/i386/kernel/mpparse.c4
-rw-r--r--arch/i386/kernel/msr.c15
-rw-r--r--arch/i386/kernel/nmi.c107
-rw-r--r--arch/i386/kernel/paravirt.c116
-rw-r--r--arch/i386/kernel/pcspeaker.c20
-rw-r--r--arch/i386/kernel/process.c102
-rw-r--r--arch/i386/kernel/ptrace.c16
-rw-r--r--arch/i386/kernel/setup.c39
-rw-r--r--arch/i386/kernel/signal.c16
-rw-r--r--arch/i386/kernel/smp.c7
-rw-r--r--arch/i386/kernel/smpboot.c203
-rw-r--r--arch/i386/kernel/srat.c84
-rw-r--r--arch/i386/kernel/sysenter.c55
-rw-r--r--arch/i386/kernel/time.c138
-rw-r--r--arch/i386/kernel/time_hpet.c497
-rw-r--r--arch/i386/kernel/topology.c2
-rw-r--r--arch/i386/kernel/traps.c27
-rw-r--r--arch/i386/kernel/tsc.c195
-rw-r--r--arch/i386/kernel/tsc_sync.c1
-rw-r--r--arch/i386/kernel/vm86.c33
-rw-r--r--arch/i386/kernel/vmi.c949
-rw-r--r--arch/i386/kernel/vmitime.c499
-rw-r--r--arch/i386/kernel/vmlinux.lds.S9
-rw-r--r--arch/i386/mach-default/setup.c8
-rw-r--r--arch/i386/mach-es7000/es7000.h9
-rw-r--r--arch/i386/mach-es7000/es7000plat.c53
-rw-r--r--arch/i386/math-emu/get_address.c14
-rw-r--r--arch/i386/math-emu/status_w.h8
-rw-r--r--arch/i386/mm/discontig.c1
-rw-r--r--arch/i386/mm/fault.c44
-rw-r--r--arch/i386/mm/highmem.c7
-rw-r--r--arch/i386/mm/init.c4
-rw-r--r--arch/i386/mm/pageattr.c4
-rw-r--r--arch/i386/mm/pgtable.c26
-rw-r--r--arch/i386/oprofile/nmi_int.c14
-rw-r--r--arch/i386/oprofile/op_model_ppro.c9
-rw-r--r--arch/i386/pci/Makefile2
-rw-r--r--arch/i386/pci/common.c88
-rw-r--r--arch/i386/pci/mmconfig-shared.c264
-rw-r--r--arch/i386/pci/mmconfig.c102
-rw-r--r--arch/i386/pci/pci.h10
79 files changed, 4854 insertions, 2834 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 0dfee812811a..2f7672545fe9 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -18,6 +18,19 @@ config GENERIC_TIME
18 bool 18 bool
19 default y 19 default y
20 20
21config CLOCKSOURCE_WATCHDOG
22 bool
23 default y
24
25config GENERIC_CLOCKEVENTS
26 bool
27 default y
28
29config GENERIC_CLOCKEVENTS_BROADCAST
30 bool
31 default y
32 depends on X86_LOCAL_APIC
33
21config LOCKDEP_SUPPORT 34config LOCKDEP_SUPPORT
22 bool 35 bool
23 default y 36 default y
@@ -38,6 +51,10 @@ config MMU
38 bool 51 bool
39 default y 52 default y
40 53
54config ZONE_DMA
55 bool
56 default y
57
41config SBUS 58config SBUS
42 bool 59 bool
43 60
@@ -70,6 +87,8 @@ source "init/Kconfig"
70 87
71menu "Processor type and features" 88menu "Processor type and features"
72 89
90source "kernel/time/Kconfig"
91
73config SMP 92config SMP
74 bool "Symmetric multi-processing support" 93 bool "Symmetric multi-processing support"
75 ---help--- 94 ---help---
@@ -199,6 +218,15 @@ config PARAVIRT
199 However, when run without a hypervisor the kernel is 218 However, when run without a hypervisor the kernel is
200 theoretically slower. If in doubt, say N. 219 theoretically slower. If in doubt, say N.
201 220
221config VMI
222 bool "VMI Paravirt-ops support"
223 depends on PARAVIRT && !NO_HZ
224 default y
225 help
226 VMI provides a paravirtualized interface to multiple hypervisors
227 include VMware ESX server and Xen by connecting to a ROM module
228 provided by the hypervisor.
229
202config ACPI_SRAT 230config ACPI_SRAT
203 bool 231 bool
204 default y 232 default y
@@ -1259,3 +1287,12 @@ config X86_TRAMPOLINE
1259config KTIME_SCALAR 1287config KTIME_SCALAR
1260 bool 1288 bool
1261 default y 1289 default y
1290
1291config NO_IDLE_HZ
1292 bool
1293 depends on PARAVIRT
1294 default y
1295 help
1296 Switches the regular HZ timer off when the system is going idle.
1297 This helps a hypervisor detect that the Linux system is idle,
1298 reducing the overhead of idle systems.
diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu
index 2aecfba4ac4f..b99c0e2a4e63 100644
--- a/arch/i386/Kconfig.cpu
+++ b/arch/i386/Kconfig.cpu
@@ -226,11 +226,6 @@ config X86_CMPXCHG
226 depends on !M386 226 depends on !M386
227 default y 227 default y
228 228
229config X86_XADD
230 bool
231 depends on !M386
232 default y
233
234config X86_L1_CACHE_SHIFT 229config X86_L1_CACHE_SHIFT
235 int 230 int
236 default "7" if MPENTIUM4 || X86_GENERIC 231 default "7" if MPENTIUM4 || X86_GENERIC
diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
index f68cc6f215f8..458bc1611933 100644
--- a/arch/i386/Kconfig.debug
+++ b/arch/i386/Kconfig.debug
@@ -87,7 +87,7 @@ config DOUBLEFAULT
87 87
88config DEBUG_PARAVIRT 88config DEBUG_PARAVIRT
89 bool "Enable some paravirtualization debugging" 89 bool "Enable some paravirtualization debugging"
90 default y 90 default n
91 depends on PARAVIRT && DEBUG_KERNEL 91 depends on PARAVIRT && DEBUG_KERNEL
92 help 92 help
93 Currently deliberately clobbers regs which are allowed to be 93 Currently deliberately clobbers regs which are allowed to be
diff --git a/arch/i386/boot/compressed/relocs.c b/arch/i386/boot/compressed/relocs.c
index 881951ca03e1..ce4fda261aaf 100644
--- a/arch/i386/boot/compressed/relocs.c
+++ b/arch/i386/boot/compressed/relocs.c
@@ -11,6 +11,7 @@
11#include <endian.h> 11#include <endian.h>
12 12
13#define MAX_SHDRS 100 13#define MAX_SHDRS 100
14#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
14static Elf32_Ehdr ehdr; 15static Elf32_Ehdr ehdr;
15static Elf32_Shdr shdr[MAX_SHDRS]; 16static Elf32_Shdr shdr[MAX_SHDRS];
16static Elf32_Sym *symtab[MAX_SHDRS]; 17static Elf32_Sym *symtab[MAX_SHDRS];
@@ -71,7 +72,7 @@ static const char *sym_type(unsigned type)
71#undef SYM_TYPE 72#undef SYM_TYPE
72 }; 73 };
73 const char *name = "unknown sym type name"; 74 const char *name = "unknown sym type name";
74 if (type < sizeof(type_name)/sizeof(type_name[0])) { 75 if (type < ARRAY_SIZE(type_name)) {
75 name = type_name[type]; 76 name = type_name[type];
76 } 77 }
77 return name; 78 return name;
@@ -87,7 +88,7 @@ static const char *sym_bind(unsigned bind)
87#undef SYM_BIND 88#undef SYM_BIND
88 }; 89 };
89 const char *name = "unknown sym bind name"; 90 const char *name = "unknown sym bind name";
90 if (bind < sizeof(bind_name)/sizeof(bind_name[0])) { 91 if (bind < ARRAY_SIZE(bind_name)) {
91 name = bind_name[bind]; 92 name = bind_name[bind];
92 } 93 }
93 return name; 94 return name;
@@ -104,7 +105,7 @@ static const char *sym_visibility(unsigned visibility)
104#undef SYM_VISIBILITY 105#undef SYM_VISIBILITY
105 }; 106 };
106 const char *name = "unknown sym visibility name"; 107 const char *name = "unknown sym visibility name";
107 if (visibility < sizeof(visibility_name)/sizeof(visibility_name[0])) { 108 if (visibility < ARRAY_SIZE(visibility_name)) {
108 name = visibility_name[visibility]; 109 name = visibility_name[visibility];
109 } 110 }
110 return name; 111 return name;
@@ -128,7 +129,7 @@ static const char *rel_type(unsigned type)
128#undef REL_TYPE 129#undef REL_TYPE
129 }; 130 };
130 const char *name = "unknown type rel type name"; 131 const char *name = "unknown type rel type name";
131 if (type < sizeof(type_name)/sizeof(type_name[0])) { 132 if (type < ARRAY_SIZE(type_name)) {
132 name = type_name[type]; 133 name = type_name[type];
133 } 134 }
134 return name; 135 return name;
diff --git a/arch/i386/defconfig b/arch/i386/defconfig
index 5d80edfc61b7..5ae1e0bc8fd7 100644
--- a/arch/i386/defconfig
+++ b/arch/i386/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.20-rc3 3# Linux kernel version: 2.6.20-git8
4# Fri Jan 5 11:54:46 2007 4# Tue Feb 13 11:25:18 2007
5# 5#
6CONFIG_X86_32=y 6CONFIG_X86_32=y
7CONFIG_GENERIC_TIME=y 7CONFIG_GENERIC_TIME=y
@@ -10,6 +10,7 @@ CONFIG_STACKTRACE_SUPPORT=y
10CONFIG_SEMAPHORE_SLEEPERS=y 10CONFIG_SEMAPHORE_SLEEPERS=y
11CONFIG_X86=y 11CONFIG_X86=y
12CONFIG_MMU=y 12CONFIG_MMU=y
13CONFIG_ZONE_DMA=y
13CONFIG_GENERIC_ISA_DMA=y 14CONFIG_GENERIC_ISA_DMA=y
14CONFIG_GENERIC_IOMAP=y 15CONFIG_GENERIC_IOMAP=y
15CONFIG_GENERIC_BUG=y 16CONFIG_GENERIC_BUG=y
@@ -139,7 +140,6 @@ CONFIG_MPENTIUMIII=y
139# CONFIG_MVIAC3_2 is not set 140# CONFIG_MVIAC3_2 is not set
140CONFIG_X86_GENERIC=y 141CONFIG_X86_GENERIC=y
141CONFIG_X86_CMPXCHG=y 142CONFIG_X86_CMPXCHG=y
142CONFIG_X86_XADD=y
143CONFIG_X86_L1_CACHE_SHIFT=7 143CONFIG_X86_L1_CACHE_SHIFT=7
144CONFIG_RWSEM_XCHGADD_ALGORITHM=y 144CONFIG_RWSEM_XCHGADD_ALGORITHM=y
145# CONFIG_ARCH_HAS_ILOG2_U32 is not set 145# CONFIG_ARCH_HAS_ILOG2_U32 is not set
@@ -198,6 +198,7 @@ CONFIG_FLAT_NODE_MEM_MAP=y
198# CONFIG_SPARSEMEM_STATIC is not set 198# CONFIG_SPARSEMEM_STATIC is not set
199CONFIG_SPLIT_PTLOCK_CPUS=4 199CONFIG_SPLIT_PTLOCK_CPUS=4
200CONFIG_RESOURCES_64BIT=y 200CONFIG_RESOURCES_64BIT=y
201CONFIG_ZONE_DMA_FLAG=1
201# CONFIG_HIGHPTE is not set 202# CONFIG_HIGHPTE is not set
202# CONFIG_MATH_EMULATION is not set 203# CONFIG_MATH_EMULATION is not set
203CONFIG_MTRR=y 204CONFIG_MTRR=y
@@ -211,6 +212,7 @@ CONFIG_HZ_250=y
211CONFIG_HZ=250 212CONFIG_HZ=250
212# CONFIG_KEXEC is not set 213# CONFIG_KEXEC is not set
213# CONFIG_CRASH_DUMP is not set 214# CONFIG_CRASH_DUMP is not set
215CONFIG_PHYSICAL_START=0x100000
214# CONFIG_RELOCATABLE is not set 216# CONFIG_RELOCATABLE is not set
215CONFIG_PHYSICAL_ALIGN=0x100000 217CONFIG_PHYSICAL_ALIGN=0x100000
216# CONFIG_HOTPLUG_CPU is not set 218# CONFIG_HOTPLUG_CPU is not set
@@ -229,13 +231,14 @@ CONFIG_PM_SYSFS_DEPRECATED=y
229# ACPI (Advanced Configuration and Power Interface) Support 231# ACPI (Advanced Configuration and Power Interface) Support
230# 232#
231CONFIG_ACPI=y 233CONFIG_ACPI=y
234CONFIG_ACPI_PROCFS=y
232CONFIG_ACPI_AC=y 235CONFIG_ACPI_AC=y
233CONFIG_ACPI_BATTERY=y 236CONFIG_ACPI_BATTERY=y
234CONFIG_ACPI_BUTTON=y 237CONFIG_ACPI_BUTTON=y
235# CONFIG_ACPI_VIDEO is not set
236# CONFIG_ACPI_HOTKEY is not set 238# CONFIG_ACPI_HOTKEY is not set
237CONFIG_ACPI_FAN=y 239CONFIG_ACPI_FAN=y
238# CONFIG_ACPI_DOCK is not set 240# CONFIG_ACPI_DOCK is not set
241# CONFIG_ACPI_BAY is not set
239CONFIG_ACPI_PROCESSOR=y 242CONFIG_ACPI_PROCESSOR=y
240CONFIG_ACPI_THERMAL=y 243CONFIG_ACPI_THERMAL=y
241# CONFIG_ACPI_ASUS is not set 244# CONFIG_ACPI_ASUS is not set
@@ -306,7 +309,6 @@ CONFIG_PCI_DIRECT=y
306CONFIG_PCI_MMCONFIG=y 309CONFIG_PCI_MMCONFIG=y
307# CONFIG_PCIEPORTBUS is not set 310# CONFIG_PCIEPORTBUS is not set
308CONFIG_PCI_MSI=y 311CONFIG_PCI_MSI=y
309# CONFIG_PCI_MULTITHREAD_PROBE is not set
310# CONFIG_PCI_DEBUG is not set 312# CONFIG_PCI_DEBUG is not set
311# CONFIG_HT_IRQ is not set 313# CONFIG_HT_IRQ is not set
312CONFIG_ISA_DMA_API=y 314CONFIG_ISA_DMA_API=y
@@ -347,6 +349,7 @@ CONFIG_UNIX=y
347CONFIG_XFRM=y 349CONFIG_XFRM=y
348# CONFIG_XFRM_USER is not set 350# CONFIG_XFRM_USER is not set
349# CONFIG_XFRM_SUB_POLICY is not set 351# CONFIG_XFRM_SUB_POLICY is not set
352# CONFIG_XFRM_MIGRATE is not set
350# CONFIG_NET_KEY is not set 353# CONFIG_NET_KEY is not set
351CONFIG_INET=y 354CONFIG_INET=y
352CONFIG_IP_MULTICAST=y 355CONFIG_IP_MULTICAST=y
@@ -446,6 +449,7 @@ CONFIG_STANDALONE=y
446CONFIG_PREVENT_FIRMWARE_BUILD=y 449CONFIG_PREVENT_FIRMWARE_BUILD=y
447CONFIG_FW_LOADER=y 450CONFIG_FW_LOADER=y
448# CONFIG_DEBUG_DRIVER is not set 451# CONFIG_DEBUG_DRIVER is not set
452# CONFIG_DEBUG_DEVRES is not set
449# CONFIG_SYS_HYPERVISOR is not set 453# CONFIG_SYS_HYPERVISOR is not set
450 454
451# 455#
@@ -514,6 +518,7 @@ CONFIG_BLK_DEV_IDECD=y
514# CONFIG_BLK_DEV_IDETAPE is not set 518# CONFIG_BLK_DEV_IDETAPE is not set
515# CONFIG_BLK_DEV_IDEFLOPPY is not set 519# CONFIG_BLK_DEV_IDEFLOPPY is not set
516# CONFIG_BLK_DEV_IDESCSI is not set 520# CONFIG_BLK_DEV_IDESCSI is not set
521CONFIG_BLK_DEV_IDEACPI=y
517# CONFIG_IDE_TASK_IOCTL is not set 522# CONFIG_IDE_TASK_IOCTL is not set
518 523
519# 524#
@@ -546,6 +551,7 @@ CONFIG_BLK_DEV_AMD74XX=y
546# CONFIG_BLK_DEV_JMICRON is not set 551# CONFIG_BLK_DEV_JMICRON is not set
547# CONFIG_BLK_DEV_SC1200 is not set 552# CONFIG_BLK_DEV_SC1200 is not set
548CONFIG_BLK_DEV_PIIX=y 553CONFIG_BLK_DEV_PIIX=y
554# CONFIG_BLK_DEV_IT8213 is not set
549# CONFIG_BLK_DEV_IT821X is not set 555# CONFIG_BLK_DEV_IT821X is not set
550# CONFIG_BLK_DEV_NS87415 is not set 556# CONFIG_BLK_DEV_NS87415 is not set
551# CONFIG_BLK_DEV_PDC202XX_OLD is not set 557# CONFIG_BLK_DEV_PDC202XX_OLD is not set
@@ -556,6 +562,7 @@ CONFIG_BLK_DEV_PIIX=y
556# CONFIG_BLK_DEV_SLC90E66 is not set 562# CONFIG_BLK_DEV_SLC90E66 is not set
557# CONFIG_BLK_DEV_TRM290 is not set 563# CONFIG_BLK_DEV_TRM290 is not set
558# CONFIG_BLK_DEV_VIA82CXXX is not set 564# CONFIG_BLK_DEV_VIA82CXXX is not set
565# CONFIG_BLK_DEV_TC86C001 is not set
559# CONFIG_IDE_ARM is not set 566# CONFIG_IDE_ARM is not set
560CONFIG_BLK_DEV_IDEDMA=y 567CONFIG_BLK_DEV_IDEDMA=y
561# CONFIG_IDEDMA_IVB is not set 568# CONFIG_IDEDMA_IVB is not set
@@ -654,6 +661,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0
654# Serial ATA (prod) and Parallel ATA (experimental) drivers 661# Serial ATA (prod) and Parallel ATA (experimental) drivers
655# 662#
656CONFIG_ATA=y 663CONFIG_ATA=y
664# CONFIG_ATA_NONSTANDARD is not set
657CONFIG_SATA_AHCI=y 665CONFIG_SATA_AHCI=y
658CONFIG_SATA_SVW=y 666CONFIG_SATA_SVW=y
659CONFIG_ATA_PIIX=y 667CONFIG_ATA_PIIX=y
@@ -669,6 +677,7 @@ CONFIG_SATA_SIL=y
669# CONFIG_SATA_ULI is not set 677# CONFIG_SATA_ULI is not set
670CONFIG_SATA_VIA=y 678CONFIG_SATA_VIA=y
671# CONFIG_SATA_VITESSE is not set 679# CONFIG_SATA_VITESSE is not set
680# CONFIG_SATA_INIC162X is not set
672CONFIG_SATA_INTEL_COMBINED=y 681CONFIG_SATA_INTEL_COMBINED=y
673# CONFIG_PATA_ALI is not set 682# CONFIG_PATA_ALI is not set
674# CONFIG_PATA_AMD is not set 683# CONFIG_PATA_AMD is not set
@@ -686,6 +695,7 @@ CONFIG_SATA_INTEL_COMBINED=y
686# CONFIG_PATA_HPT3X2N is not set 695# CONFIG_PATA_HPT3X2N is not set
687# CONFIG_PATA_HPT3X3 is not set 696# CONFIG_PATA_HPT3X3 is not set
688# CONFIG_PATA_IT821X is not set 697# CONFIG_PATA_IT821X is not set
698# CONFIG_PATA_IT8213 is not set
689# CONFIG_PATA_JMICRON is not set 699# CONFIG_PATA_JMICRON is not set
690# CONFIG_PATA_TRIFLEX is not set 700# CONFIG_PATA_TRIFLEX is not set
691# CONFIG_PATA_MARVELL is not set 701# CONFIG_PATA_MARVELL is not set
@@ -738,9 +748,7 @@ CONFIG_IEEE1394=y
738# Subsystem Options 748# Subsystem Options
739# 749#
740# CONFIG_IEEE1394_VERBOSEDEBUG is not set 750# CONFIG_IEEE1394_VERBOSEDEBUG is not set
741# CONFIG_IEEE1394_OUI_DB is not set
742# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set 751# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
743# CONFIG_IEEE1394_EXPORT_FULL_API is not set
744 752
745# 753#
746# Device Drivers 754# Device Drivers
@@ -766,6 +774,11 @@ CONFIG_IEEE1394_RAWIO=y
766# CONFIG_I2O is not set 774# CONFIG_I2O is not set
767 775
768# 776#
777# Macintosh device drivers
778#
779# CONFIG_MAC_EMUMOUSEBTN is not set
780
781#
769# Network device support 782# Network device support
770# 783#
771CONFIG_NETDEVICES=y 784CONFIG_NETDEVICES=y
@@ -832,6 +845,7 @@ CONFIG_8139TOO=y
832# CONFIG_SUNDANCE is not set 845# CONFIG_SUNDANCE is not set
833# CONFIG_TLAN is not set 846# CONFIG_TLAN is not set
834# CONFIG_VIA_RHINE is not set 847# CONFIG_VIA_RHINE is not set
848# CONFIG_SC92031 is not set
835 849
836# 850#
837# Ethernet (1000 Mbit) 851# Ethernet (1000 Mbit)
@@ -854,11 +868,13 @@ CONFIG_SKY2=y
854CONFIG_TIGON3=y 868CONFIG_TIGON3=y
855CONFIG_BNX2=y 869CONFIG_BNX2=y
856# CONFIG_QLA3XXX is not set 870# CONFIG_QLA3XXX is not set
871# CONFIG_ATL1 is not set
857 872
858# 873#
859# Ethernet (10000 Mbit) 874# Ethernet (10000 Mbit)
860# 875#
861# CONFIG_CHELSIO_T1 is not set 876# CONFIG_CHELSIO_T1 is not set
877# CONFIG_CHELSIO_T3 is not set
862# CONFIG_IXGB is not set 878# CONFIG_IXGB is not set
863# CONFIG_S2IO is not set 879# CONFIG_S2IO is not set
864# CONFIG_MYRI10GE is not set 880# CONFIG_MYRI10GE is not set
@@ -1089,6 +1105,7 @@ CONFIG_SOUND=y
1089# Open Sound System 1105# Open Sound System
1090# 1106#
1091CONFIG_SOUND_PRIME=y 1107CONFIG_SOUND_PRIME=y
1108CONFIG_OBSOLETE_OSS=y
1092# CONFIG_SOUND_BT878 is not set 1109# CONFIG_SOUND_BT878 is not set
1093# CONFIG_SOUND_ES1371 is not set 1110# CONFIG_SOUND_ES1371 is not set
1094CONFIG_SOUND_ICH=y 1111CONFIG_SOUND_ICH=y
@@ -1102,6 +1119,7 @@ CONFIG_SOUND_ICH=y
1102# HID Devices 1119# HID Devices
1103# 1120#
1104CONFIG_HID=y 1121CONFIG_HID=y
1122# CONFIG_HID_DEBUG is not set
1105 1123
1106# 1124#
1107# USB support 1125# USB support
@@ -1116,10 +1134,8 @@ CONFIG_USB=y
1116# Miscellaneous USB options 1134# Miscellaneous USB options
1117# 1135#
1118CONFIG_USB_DEVICEFS=y 1136CONFIG_USB_DEVICEFS=y
1119# CONFIG_USB_BANDWIDTH is not set
1120# CONFIG_USB_DYNAMIC_MINORS is not set 1137# CONFIG_USB_DYNAMIC_MINORS is not set
1121# CONFIG_USB_SUSPEND is not set 1138# CONFIG_USB_SUSPEND is not set
1122# CONFIG_USB_MULTITHREAD_PROBE is not set
1123# CONFIG_USB_OTG is not set 1139# CONFIG_USB_OTG is not set
1124 1140
1125# 1141#
@@ -1129,9 +1145,11 @@ CONFIG_USB_EHCI_HCD=y
1129# CONFIG_USB_EHCI_SPLIT_ISO is not set 1145# CONFIG_USB_EHCI_SPLIT_ISO is not set
1130# CONFIG_USB_EHCI_ROOT_HUB_TT is not set 1146# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
1131# CONFIG_USB_EHCI_TT_NEWSCHED is not set 1147# CONFIG_USB_EHCI_TT_NEWSCHED is not set
1148# CONFIG_USB_EHCI_BIG_ENDIAN_MMIO is not set
1132# CONFIG_USB_ISP116X_HCD is not set 1149# CONFIG_USB_ISP116X_HCD is not set
1133CONFIG_USB_OHCI_HCD=y 1150CONFIG_USB_OHCI_HCD=y
1134# CONFIG_USB_OHCI_BIG_ENDIAN is not set 1151# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set
1152# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set
1135CONFIG_USB_OHCI_LITTLE_ENDIAN=y 1153CONFIG_USB_OHCI_LITTLE_ENDIAN=y
1136CONFIG_USB_UHCI_HCD=y 1154CONFIG_USB_UHCI_HCD=y
1137# CONFIG_USB_SL811_HCD is not set 1155# CONFIG_USB_SL811_HCD is not set
@@ -1182,6 +1200,7 @@ CONFIG_USB_HID=y
1182# CONFIG_USB_ATI_REMOTE2 is not set 1200# CONFIG_USB_ATI_REMOTE2 is not set
1183# CONFIG_USB_KEYSPAN_REMOTE is not set 1201# CONFIG_USB_KEYSPAN_REMOTE is not set
1184# CONFIG_USB_APPLETOUCH is not set 1202# CONFIG_USB_APPLETOUCH is not set
1203# CONFIG_USB_GTCO is not set
1185 1204
1186# 1205#
1187# USB Imaging devices 1206# USB Imaging devices
@@ -1287,6 +1306,10 @@ CONFIG_USB_MON=y
1287# 1306#
1288 1307
1289# 1308#
1309# Auxiliary Display support
1310#
1311
1312#
1290# Virtualization 1313# Virtualization
1291# 1314#
1292# CONFIG_KVM is not set 1315# CONFIG_KVM is not set
@@ -1479,6 +1502,7 @@ CONFIG_UNUSED_SYMBOLS=y
1479# CONFIG_DEBUG_FS is not set 1502# CONFIG_DEBUG_FS is not set
1480# CONFIG_HEADERS_CHECK is not set 1503# CONFIG_HEADERS_CHECK is not set
1481CONFIG_DEBUG_KERNEL=y 1504CONFIG_DEBUG_KERNEL=y
1505# CONFIG_DEBUG_SHIRQ is not set
1482CONFIG_LOG_BUF_SHIFT=18 1506CONFIG_LOG_BUF_SHIFT=18
1483CONFIG_DETECT_SOFTLOCKUP=y 1507CONFIG_DETECT_SOFTLOCKUP=y
1484# CONFIG_SCHEDSTATS is not set 1508# CONFIG_SCHEDSTATS is not set
@@ -1487,7 +1511,6 @@ CONFIG_DETECT_SOFTLOCKUP=y
1487# CONFIG_RT_MUTEX_TESTER is not set 1511# CONFIG_RT_MUTEX_TESTER is not set
1488# CONFIG_DEBUG_SPINLOCK is not set 1512# CONFIG_DEBUG_SPINLOCK is not set
1489# CONFIG_DEBUG_MUTEXES is not set 1513# CONFIG_DEBUG_MUTEXES is not set
1490# CONFIG_DEBUG_RWSEMS is not set
1491# CONFIG_DEBUG_LOCK_ALLOC is not set 1514# CONFIG_DEBUG_LOCK_ALLOC is not set
1492# CONFIG_PROVE_LOCKING is not set 1515# CONFIG_PROVE_LOCKING is not set
1493# CONFIG_DEBUG_SPINLOCK_SLEEP is not set 1516# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1532,7 +1555,8 @@ CONFIG_CRC32=y
1532# CONFIG_LIBCRC32C is not set 1555# CONFIG_LIBCRC32C is not set
1533CONFIG_ZLIB_INFLATE=y 1556CONFIG_ZLIB_INFLATE=y
1534CONFIG_PLIST=y 1557CONFIG_PLIST=y
1535CONFIG_IOMAP_COPY=y 1558CONFIG_HAS_IOMEM=y
1559CONFIG_HAS_IOPORT=y
1536CONFIG_GENERIC_HARDIRQS=y 1560CONFIG_GENERIC_HARDIRQS=y
1537CONFIG_GENERIC_IRQ_PROBE=y 1561CONFIG_GENERIC_IRQ_PROBE=y
1538CONFIG_GENERIC_PENDING_IRQ=y 1562CONFIG_GENERIC_PENDING_IRQ=y
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 1e8988e558c5..4ae3dcf1d2f0 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_X86_MSR) += msr.o
18obj-$(CONFIG_X86_CPUID) += cpuid.o 18obj-$(CONFIG_X86_CPUID) += cpuid.o
19obj-$(CONFIG_MICROCODE) += microcode.o 19obj-$(CONFIG_MICROCODE) += microcode.o
20obj-$(CONFIG_APM) += apm.o 20obj-$(CONFIG_APM) += apm.o
21obj-$(CONFIG_X86_SMP) += smp.o smpboot.o 21obj-$(CONFIG_X86_SMP) += smp.o smpboot.o tsc_sync.o
22obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 22obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
23obj-$(CONFIG_X86_MPPARSE) += mpparse.o 23obj-$(CONFIG_X86_MPPARSE) += mpparse.o
24obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o 24obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
@@ -32,7 +32,6 @@ obj-$(CONFIG_KPROBES) += kprobes.o
32obj-$(CONFIG_MODULES) += module.o 32obj-$(CONFIG_MODULES) += module.o
33obj-y += sysenter.o vsyscall.o 33obj-y += sysenter.o vsyscall.o
34obj-$(CONFIG_ACPI_SRAT) += srat.o 34obj-$(CONFIG_ACPI_SRAT) += srat.o
35obj-$(CONFIG_HPET_TIMER) += time_hpet.o
36obj-$(CONFIG_EFI) += efi.o efi_stub.o 35obj-$(CONFIG_EFI) += efi.o efi_stub.o
37obj-$(CONFIG_DOUBLEFAULT) += doublefault.o 36obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
38obj-$(CONFIG_VM86) += vm86.o 37obj-$(CONFIG_VM86) += vm86.o
@@ -40,8 +39,9 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
40obj-$(CONFIG_HPET_TIMER) += hpet.o 39obj-$(CONFIG_HPET_TIMER) += hpet.o
41obj-$(CONFIG_K8_NB) += k8.o 40obj-$(CONFIG_K8_NB) += k8.o
42 41
43# Make sure this is linked after any other paravirt_ops structs: see head.S 42obj-$(CONFIG_VMI) += vmi.o vmitime.o
44obj-$(CONFIG_PARAVIRT) += paravirt.o 43obj-$(CONFIG_PARAVIRT) += paravirt.o
44obj-y += pcspeaker.o
45 45
46EXTRA_AFLAGS := -traditional 46EXTRA_AFLAGS := -traditional
47 47
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index cbcb2c27f48b..e5eb97a910ed 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -25,6 +25,7 @@
25 25
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/acpi.h> 27#include <linux/acpi.h>
28#include <linux/acpi_pmtmr.h>
28#include <linux/efi.h> 29#include <linux/efi.h>
29#include <linux/cpumask.h> 30#include <linux/cpumask.h>
30#include <linux/module.h> 31#include <linux/module.h>
@@ -66,7 +67,7 @@ static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return
66 67
67#define BAD_MADT_ENTRY(entry, end) ( \ 68#define BAD_MADT_ENTRY(entry, end) ( \
68 (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ 69 (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
69 ((acpi_table_entry_header *)entry)->length < sizeof(*entry)) 70 ((struct acpi_subtable_header *)entry)->length < sizeof(*entry))
70 71
71#define PREFIX "ACPI: " 72#define PREFIX "ACPI: "
72 73
@@ -79,7 +80,7 @@ int acpi_ioapic;
79int acpi_strict; 80int acpi_strict;
80EXPORT_SYMBOL(acpi_strict); 81EXPORT_SYMBOL(acpi_strict);
81 82
82acpi_interrupt_flags acpi_sci_flags __initdata; 83u8 acpi_sci_flags __initdata;
83int acpi_sci_override_gsi __initdata; 84int acpi_sci_override_gsi __initdata;
84int acpi_skip_timer_override __initdata; 85int acpi_skip_timer_override __initdata;
85int acpi_use_timer_override __initdata; 86int acpi_use_timer_override __initdata;
@@ -92,11 +93,6 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
92#warning ACPI uses CMPXCHG, i486 and later hardware 93#warning ACPI uses CMPXCHG, i486 and later hardware
93#endif 94#endif
94 95
95#define MAX_MADT_ENTRIES 256
96u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] =
97 {[0 ... MAX_MADT_ENTRIES - 1] = 0xff };
98EXPORT_SYMBOL(x86_acpiid_to_apicid);
99
100/* -------------------------------------------------------------------------- 96/* --------------------------------------------------------------------------
101 Boot-time Configuration 97 Boot-time Configuration
102 -------------------------------------------------------------------------- */ 98 -------------------------------------------------------------------------- */
@@ -166,30 +162,26 @@ char *__acpi_map_table(unsigned long phys, unsigned long size)
166 162
167#ifdef CONFIG_PCI_MMCONFIG 163#ifdef CONFIG_PCI_MMCONFIG
168/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ 164/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
169struct acpi_table_mcfg_config *pci_mmcfg_config; 165struct acpi_mcfg_allocation *pci_mmcfg_config;
170int pci_mmcfg_config_num; 166int pci_mmcfg_config_num;
171 167
172int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size) 168int __init acpi_parse_mcfg(struct acpi_table_header *header)
173{ 169{
174 struct acpi_table_mcfg *mcfg; 170 struct acpi_table_mcfg *mcfg;
175 unsigned long i; 171 unsigned long i;
176 int config_size; 172 int config_size;
177 173
178 if (!phys_addr || !size) 174 if (!header)
179 return -EINVAL; 175 return -EINVAL;
180 176
181 mcfg = (struct acpi_table_mcfg *)__acpi_map_table(phys_addr, size); 177 mcfg = (struct acpi_table_mcfg *)header;
182 if (!mcfg) {
183 printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
184 return -ENODEV;
185 }
186 178
187 /* how many config structures do we have */ 179 /* how many config structures do we have */
188 pci_mmcfg_config_num = 0; 180 pci_mmcfg_config_num = 0;
189 i = size - sizeof(struct acpi_table_mcfg); 181 i = header->length - sizeof(struct acpi_table_mcfg);
190 while (i >= sizeof(struct acpi_table_mcfg_config)) { 182 while (i >= sizeof(struct acpi_mcfg_allocation)) {
191 ++pci_mmcfg_config_num; 183 ++pci_mmcfg_config_num;
192 i -= sizeof(struct acpi_table_mcfg_config); 184 i -= sizeof(struct acpi_mcfg_allocation);
193 }; 185 };
194 if (pci_mmcfg_config_num == 0) { 186 if (pci_mmcfg_config_num == 0) {
195 printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); 187 printk(KERN_ERR PREFIX "MMCONFIG has no entries\n");
@@ -204,9 +196,9 @@ int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
204 return -ENOMEM; 196 return -ENOMEM;
205 } 197 }
206 198
207 memcpy(pci_mmcfg_config, &mcfg->config, config_size); 199 memcpy(pci_mmcfg_config, &mcfg[1], config_size);
208 for (i = 0; i < pci_mmcfg_config_num; ++i) { 200 for (i = 0; i < pci_mmcfg_config_num; ++i) {
209 if (mcfg->config[i].base_reserved) { 201 if (pci_mmcfg_config[i].address > 0xFFFFFFFF) {
210 printk(KERN_ERR PREFIX 202 printk(KERN_ERR PREFIX
211 "MMCONFIG not in low 4GB of memory\n"); 203 "MMCONFIG not in low 4GB of memory\n");
212 kfree(pci_mmcfg_config); 204 kfree(pci_mmcfg_config);
@@ -220,24 +212,24 @@ int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
220#endif /* CONFIG_PCI_MMCONFIG */ 212#endif /* CONFIG_PCI_MMCONFIG */
221 213
222#ifdef CONFIG_X86_LOCAL_APIC 214#ifdef CONFIG_X86_LOCAL_APIC
223static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size) 215static int __init acpi_parse_madt(struct acpi_table_header *table)
224{ 216{
225 struct acpi_table_madt *madt = NULL; 217 struct acpi_table_madt *madt = NULL;
226 218
227 if (!phys_addr || !size || !cpu_has_apic) 219 if (!cpu_has_apic)
228 return -EINVAL; 220 return -EINVAL;
229 221
230 madt = (struct acpi_table_madt *)__acpi_map_table(phys_addr, size); 222 madt = (struct acpi_table_madt *)table;
231 if (!madt) { 223 if (!madt) {
232 printk(KERN_WARNING PREFIX "Unable to map MADT\n"); 224 printk(KERN_WARNING PREFIX "Unable to map MADT\n");
233 return -ENODEV; 225 return -ENODEV;
234 } 226 }
235 227
236 if (madt->lapic_address) { 228 if (madt->address) {
237 acpi_lapic_addr = (u64) madt->lapic_address; 229 acpi_lapic_addr = (u64) madt->address;
238 230
239 printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n", 231 printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
240 madt->lapic_address); 232 madt->address);
241 } 233 }
242 234
243 acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); 235 acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id);
@@ -246,21 +238,17 @@ static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size)
246} 238}
247 239
248static int __init 240static int __init
249acpi_parse_lapic(acpi_table_entry_header * header, const unsigned long end) 241acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end)
250{ 242{
251 struct acpi_table_lapic *processor = NULL; 243 struct acpi_madt_local_apic *processor = NULL;
252 244
253 processor = (struct acpi_table_lapic *)header; 245 processor = (struct acpi_madt_local_apic *)header;
254 246
255 if (BAD_MADT_ENTRY(processor, end)) 247 if (BAD_MADT_ENTRY(processor, end))
256 return -EINVAL; 248 return -EINVAL;
257 249
258 acpi_table_print_madt_entry(header); 250 acpi_table_print_madt_entry(header);
259 251
260 /* Record local apic id only when enabled */
261 if (processor->flags.enabled)
262 x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
263
264 /* 252 /*
265 * We need to register disabled CPU as well to permit 253 * We need to register disabled CPU as well to permit
266 * counting disabled CPUs. This allows us to size 254 * counting disabled CPUs. This allows us to size
@@ -269,18 +257,18 @@ acpi_parse_lapic(acpi_table_entry_header * header, const unsigned long end)
269 * when we use CPU hotplug. 257 * when we use CPU hotplug.
270 */ 258 */
271 mp_register_lapic(processor->id, /* APIC ID */ 259 mp_register_lapic(processor->id, /* APIC ID */
272 processor->flags.enabled); /* Enabled? */ 260 processor->lapic_flags & ACPI_MADT_ENABLED); /* Enabled? */
273 261
274 return 0; 262 return 0;
275} 263}
276 264
277static int __init 265static int __init
278acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header, 266acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header,
279 const unsigned long end) 267 const unsigned long end)
280{ 268{
281 struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL; 269 struct acpi_madt_local_apic_override *lapic_addr_ovr = NULL;
282 270
283 lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr *)header; 271 lapic_addr_ovr = (struct acpi_madt_local_apic_override *)header;
284 272
285 if (BAD_MADT_ENTRY(lapic_addr_ovr, end)) 273 if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
286 return -EINVAL; 274 return -EINVAL;
@@ -291,11 +279,11 @@ acpi_parse_lapic_addr_ovr(acpi_table_entry_header * header,
291} 279}
292 280
293static int __init 281static int __init
294acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end) 282acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end)
295{ 283{
296 struct acpi_table_lapic_nmi *lapic_nmi = NULL; 284 struct acpi_madt_local_apic_nmi *lapic_nmi = NULL;
297 285
298 lapic_nmi = (struct acpi_table_lapic_nmi *)header; 286 lapic_nmi = (struct acpi_madt_local_apic_nmi *)header;
299 287
300 if (BAD_MADT_ENTRY(lapic_nmi, end)) 288 if (BAD_MADT_ENTRY(lapic_nmi, end))
301 return -EINVAL; 289 return -EINVAL;
@@ -313,11 +301,11 @@ acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end)
313#ifdef CONFIG_X86_IO_APIC 301#ifdef CONFIG_X86_IO_APIC
314 302
315static int __init 303static int __init
316acpi_parse_ioapic(acpi_table_entry_header * header, const unsigned long end) 304acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
317{ 305{
318 struct acpi_table_ioapic *ioapic = NULL; 306 struct acpi_madt_io_apic *ioapic = NULL;
319 307
320 ioapic = (struct acpi_table_ioapic *)header; 308 ioapic = (struct acpi_madt_io_apic *)header;
321 309
322 if (BAD_MADT_ENTRY(ioapic, end)) 310 if (BAD_MADT_ENTRY(ioapic, end))
323 return -EINVAL; 311 return -EINVAL;
@@ -342,11 +330,11 @@ static void __init acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
342 polarity = 3; 330 polarity = 3;
343 331
344 /* Command-line over-ride via acpi_sci= */ 332 /* Command-line over-ride via acpi_sci= */
345 if (acpi_sci_flags.trigger) 333 if (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK)
346 trigger = acpi_sci_flags.trigger; 334 trigger = (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2;
347 335
348 if (acpi_sci_flags.polarity) 336 if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
349 polarity = acpi_sci_flags.polarity; 337 polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
350 338
351 /* 339 /*
352 * mp_config_acpi_legacy_irqs() already setup IRQs < 16 340 * mp_config_acpi_legacy_irqs() already setup IRQs < 16
@@ -357,51 +345,52 @@ static void __init acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
357 345
358 /* 346 /*
359 * stash over-ride to indicate we've been here 347 * stash over-ride to indicate we've been here
360 * and for later update of acpi_fadt 348 * and for later update of acpi_gbl_FADT
361 */ 349 */
362 acpi_sci_override_gsi = gsi; 350 acpi_sci_override_gsi = gsi;
363 return; 351 return;
364} 352}
365 353
366static int __init 354static int __init
367acpi_parse_int_src_ovr(acpi_table_entry_header * header, 355acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
368 const unsigned long end) 356 const unsigned long end)
369{ 357{
370 struct acpi_table_int_src_ovr *intsrc = NULL; 358 struct acpi_madt_interrupt_override *intsrc = NULL;
371 359
372 intsrc = (struct acpi_table_int_src_ovr *)header; 360 intsrc = (struct acpi_madt_interrupt_override *)header;
373 361
374 if (BAD_MADT_ENTRY(intsrc, end)) 362 if (BAD_MADT_ENTRY(intsrc, end))
375 return -EINVAL; 363 return -EINVAL;
376 364
377 acpi_table_print_madt_entry(header); 365 acpi_table_print_madt_entry(header);
378 366
379 if (intsrc->bus_irq == acpi_fadt.sci_int) { 367 if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) {
380 acpi_sci_ioapic_setup(intsrc->global_irq, 368 acpi_sci_ioapic_setup(intsrc->global_irq,
381 intsrc->flags.polarity, 369 intsrc->inti_flags & ACPI_MADT_POLARITY_MASK,
382 intsrc->flags.trigger); 370 (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2);
383 return 0; 371 return 0;
384 } 372 }
385 373
386 if (acpi_skip_timer_override && 374 if (acpi_skip_timer_override &&
387 intsrc->bus_irq == 0 && intsrc->global_irq == 2) { 375 intsrc->source_irq == 0 && intsrc->global_irq == 2) {
388 printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); 376 printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
389 return 0; 377 return 0;
390 } 378 }
391 379
392 mp_override_legacy_irq(intsrc->bus_irq, 380 mp_override_legacy_irq(intsrc->source_irq,
393 intsrc->flags.polarity, 381 intsrc->inti_flags & ACPI_MADT_POLARITY_MASK,
394 intsrc->flags.trigger, intsrc->global_irq); 382 (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2,
383 intsrc->global_irq);
395 384
396 return 0; 385 return 0;
397} 386}
398 387
399static int __init 388static int __init
400acpi_parse_nmi_src(acpi_table_entry_header * header, const unsigned long end) 389acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end)
401{ 390{
402 struct acpi_table_nmi_src *nmi_src = NULL; 391 struct acpi_madt_nmi_source *nmi_src = NULL;
403 392
404 nmi_src = (struct acpi_table_nmi_src *)header; 393 nmi_src = (struct acpi_madt_nmi_source *)header;
405 394
406 if (BAD_MADT_ENTRY(nmi_src, end)) 395 if (BAD_MADT_ENTRY(nmi_src, end))
407 return -EINVAL; 396 return -EINVAL;
@@ -417,7 +406,7 @@ acpi_parse_nmi_src(acpi_table_entry_header * header, const unsigned long end)
417 406
418/* 407/*
419 * acpi_pic_sci_set_trigger() 408 * acpi_pic_sci_set_trigger()
420 * 409 *
421 * use ELCR to set PIC-mode trigger type for SCI 410 * use ELCR to set PIC-mode trigger type for SCI
422 * 411 *
423 * If a PIC-mode SCI is not recognized or gives spurious IRQ7's 412 * If a PIC-mode SCI is not recognized or gives spurious IRQ7's
@@ -511,7 +500,7 @@ int acpi_map_lsapic(acpi_handle handle, int *pcpu)
511{ 500{
512 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 501 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
513 union acpi_object *obj; 502 union acpi_object *obj;
514 struct acpi_table_lapic *lapic; 503 struct acpi_madt_local_apic *lapic;
515 cpumask_t tmp_map, new_map; 504 cpumask_t tmp_map, new_map;
516 u8 physid; 505 u8 physid;
517 int cpu; 506 int cpu;
@@ -529,10 +518,10 @@ int acpi_map_lsapic(acpi_handle handle, int *pcpu)
529 return -EINVAL; 518 return -EINVAL;
530 } 519 }
531 520
532 lapic = (struct acpi_table_lapic *)obj->buffer.pointer; 521 lapic = (struct acpi_madt_local_apic *)obj->buffer.pointer;
533 522
534 if ((lapic->header.type != ACPI_MADT_LAPIC) || 523 if (lapic->header.type != ACPI_MADT_TYPE_LOCAL_APIC ||
535 (!lapic->flags.enabled)) { 524 !(lapic->lapic_flags & ACPI_MADT_ENABLED)) {
536 kfree(buffer.pointer); 525 kfree(buffer.pointer);
537 return -EINVAL; 526 return -EINVAL;
538 } 527 }
@@ -544,7 +533,7 @@ int acpi_map_lsapic(acpi_handle handle, int *pcpu)
544 buffer.pointer = NULL; 533 buffer.pointer = NULL;
545 534
546 tmp_map = cpu_present_map; 535 tmp_map = cpu_present_map;
547 mp_register_lapic(physid, lapic->flags.enabled); 536 mp_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED);
548 537
549 /* 538 /*
550 * If mp_register_lapic successfully generates a new logical cpu 539 * If mp_register_lapic successfully generates a new logical cpu
@@ -566,14 +555,6 @@ EXPORT_SYMBOL(acpi_map_lsapic);
566 555
567int acpi_unmap_lsapic(int cpu) 556int acpi_unmap_lsapic(int cpu)
568{ 557{
569 int i;
570
571 for_each_possible_cpu(i) {
572 if (x86_acpiid_to_apicid[i] == x86_cpu_to_apicid[cpu]) {
573 x86_acpiid_to_apicid[i] = -1;
574 break;
575 }
576 }
577 x86_cpu_to_apicid[cpu] = -1; 558 x86_cpu_to_apicid[cpu] = -1;
578 cpu_clear(cpu, cpu_present_map); 559 cpu_clear(cpu, cpu_present_map);
579 num_processors--; 560 num_processors--;
@@ -619,42 +600,37 @@ acpi_scan_rsdp(unsigned long start, unsigned long length)
619 return 0; 600 return 0;
620} 601}
621 602
622static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size) 603static int __init acpi_parse_sbf(struct acpi_table_header *table)
623{ 604{
624 struct acpi_table_sbf *sb; 605 struct acpi_table_boot *sb;
625
626 if (!phys_addr || !size)
627 return -EINVAL;
628 606
629 sb = (struct acpi_table_sbf *)__acpi_map_table(phys_addr, size); 607 sb = (struct acpi_table_boot *)table;
630 if (!sb) { 608 if (!sb) {
631 printk(KERN_WARNING PREFIX "Unable to map SBF\n"); 609 printk(KERN_WARNING PREFIX "Unable to map SBF\n");
632 return -ENODEV; 610 return -ENODEV;
633 } 611 }
634 612
635 sbf_port = sb->sbf_cmos; /* Save CMOS port */ 613 sbf_port = sb->cmos_index; /* Save CMOS port */
636 614
637 return 0; 615 return 0;
638} 616}
639 617
640#ifdef CONFIG_HPET_TIMER 618#ifdef CONFIG_HPET_TIMER
619#include <asm/hpet.h>
641 620
642static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) 621static int __init acpi_parse_hpet(struct acpi_table_header *table)
643{ 622{
644 struct acpi_table_hpet *hpet_tbl; 623 struct acpi_table_hpet *hpet_tbl;
645 struct resource *hpet_res; 624 struct resource *hpet_res;
646 resource_size_t res_start; 625 resource_size_t res_start;
647 626
648 if (!phys || !size) 627 hpet_tbl = (struct acpi_table_hpet *)table;
649 return -EINVAL;
650
651 hpet_tbl = (struct acpi_table_hpet *)__acpi_map_table(phys, size);
652 if (!hpet_tbl) { 628 if (!hpet_tbl) {
653 printk(KERN_WARNING PREFIX "Unable to map HPET\n"); 629 printk(KERN_WARNING PREFIX "Unable to map HPET\n");
654 return -ENODEV; 630 return -ENODEV;
655 } 631 }
656 632
657 if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) { 633 if (hpet_tbl->address.space_id != ACPI_SPACE_MEM) {
658 printk(KERN_WARNING PREFIX "HPET timers must be located in " 634 printk(KERN_WARNING PREFIX "HPET timers must be located in "
659 "memory.\n"); 635 "memory.\n");
660 return -1; 636 return -1;
@@ -667,29 +643,15 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
667 hpet_res->name = (void *)&hpet_res[1]; 643 hpet_res->name = (void *)&hpet_res[1];
668 hpet_res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; 644 hpet_res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
669 snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, 645 snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE,
670 "HPET %u", hpet_tbl->number); 646 "HPET %u", hpet_tbl->sequence);
671 hpet_res->end = (1 * 1024) - 1; 647 hpet_res->end = (1 * 1024) - 1;
672 } 648 }
673 649
674#ifdef CONFIG_X86_64 650 hpet_address = hpet_tbl->address.address;
675 vxtime.hpet_address = hpet_tbl->addr.addrl |
676 ((long)hpet_tbl->addr.addrh << 32);
677
678 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", 651 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
679 hpet_tbl->id, vxtime.hpet_address); 652 hpet_tbl->id, hpet_address);
680 653
681 res_start = vxtime.hpet_address; 654 res_start = hpet_address;
682#else /* X86 */
683 {
684 extern unsigned long hpet_address;
685
686 hpet_address = hpet_tbl->addr.addrl;
687 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
688 hpet_tbl->id, hpet_address);
689
690 res_start = hpet_address;
691 }
692#endif /* X86 */
693 655
694 if (hpet_res) { 656 if (hpet_res) {
695 hpet_res->start = res_start; 657 hpet_res->start = res_start;
@@ -703,46 +665,28 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
703#define acpi_parse_hpet NULL 665#define acpi_parse_hpet NULL
704#endif 666#endif
705 667
706#ifdef CONFIG_X86_PM_TIMER 668static int __init acpi_parse_fadt(struct acpi_table_header *table)
707extern u32 pmtmr_ioport;
708#endif
709
710static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
711{ 669{
712 struct fadt_descriptor *fadt = NULL;
713
714 fadt = (struct fadt_descriptor *)__acpi_map_table(phys, size);
715 if (!fadt) {
716 printk(KERN_WARNING PREFIX "Unable to map FADT\n");
717 return 0;
718 }
719 /* initialize sci_int early for INT_SRC_OVR MADT parsing */
720 acpi_fadt.sci_int = fadt->sci_int;
721
722 /* initialize rev and apic_phys_dest_mode for x86_64 genapic */
723 acpi_fadt.revision = fadt->revision;
724 acpi_fadt.force_apic_physical_destination_mode =
725 fadt->force_apic_physical_destination_mode;
726 670
727#ifdef CONFIG_X86_PM_TIMER 671#ifdef CONFIG_X86_PM_TIMER
728 /* detect the location of the ACPI PM Timer */ 672 /* detect the location of the ACPI PM Timer */
729 if (fadt->revision >= FADT2_REVISION_ID) { 673 if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) {
730 /* FADT rev. 2 */ 674 /* FADT rev. 2 */
731 if (fadt->xpm_tmr_blk.address_space_id != 675 if (acpi_gbl_FADT.xpm_timer_block.space_id !=
732 ACPI_ADR_SPACE_SYSTEM_IO) 676 ACPI_ADR_SPACE_SYSTEM_IO)
733 return 0; 677 return 0;
734 678
735 pmtmr_ioport = fadt->xpm_tmr_blk.address; 679 pmtmr_ioport = acpi_gbl_FADT.xpm_timer_block.address;
736 /* 680 /*
737 * "X" fields are optional extensions to the original V1.0 681 * "X" fields are optional extensions to the original V1.0
738 * fields, so we must selectively expand V1.0 fields if the 682 * fields, so we must selectively expand V1.0 fields if the
739 * corresponding X field is zero. 683 * corresponding X field is zero.
740 */ 684 */
741 if (!pmtmr_ioport) 685 if (!pmtmr_ioport)
742 pmtmr_ioport = fadt->V1_pm_tmr_blk; 686 pmtmr_ioport = acpi_gbl_FADT.pm_timer_block;
743 } else { 687 } else {
744 /* FADT rev. 1 */ 688 /* FADT rev. 1 */
745 pmtmr_ioport = fadt->V1_pm_tmr_blk; 689 pmtmr_ioport = acpi_gbl_FADT.pm_timer_block;
746 } 690 }
747 if (pmtmr_ioport) 691 if (pmtmr_ioport)
748 printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", 692 printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n",
@@ -784,13 +728,13 @@ static int __init acpi_parse_madt_lapic_entries(void)
784 if (!cpu_has_apic) 728 if (!cpu_has_apic)
785 return -ENODEV; 729 return -ENODEV;
786 730
787 /* 731 /*
788 * Note that the LAPIC address is obtained from the MADT (32-bit value) 732 * Note that the LAPIC address is obtained from the MADT (32-bit value)
789 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). 733 * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
790 */ 734 */
791 735
792 count = 736 count =
793 acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, 737 acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
794 acpi_parse_lapic_addr_ovr, 0); 738 acpi_parse_lapic_addr_ovr, 0);
795 if (count < 0) { 739 if (count < 0) {
796 printk(KERN_ERR PREFIX 740 printk(KERN_ERR PREFIX
@@ -800,7 +744,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
800 744
801 mp_register_lapic_address(acpi_lapic_addr); 745 mp_register_lapic_address(acpi_lapic_addr);
802 746
803 count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic, 747 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_parse_lapic,
804 MAX_APICS); 748 MAX_APICS);
805 if (!count) { 749 if (!count) {
806 printk(KERN_ERR PREFIX "No LAPIC entries present\n"); 750 printk(KERN_ERR PREFIX "No LAPIC entries present\n");
@@ -813,7 +757,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
813 } 757 }
814 758
815 count = 759 count =
816 acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0); 760 acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0);
817 if (count < 0) { 761 if (count < 0) {
818 printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); 762 printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
819 /* TBD: Cleanup to allow fallback to MPS */ 763 /* TBD: Cleanup to allow fallback to MPS */
@@ -842,7 +786,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
842 return -ENODEV; 786 return -ENODEV;
843 } 787 }
844 788
845 if (!cpu_has_apic) 789 if (!cpu_has_apic)
846 return -ENODEV; 790 return -ENODEV;
847 791
848 /* 792 /*
@@ -855,7 +799,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
855 } 799 }
856 800
857 count = 801 count =
858 acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic, 802 acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic,
859 MAX_IO_APICS); 803 MAX_IO_APICS);
860 if (!count) { 804 if (!count) {
861 printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); 805 printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
@@ -866,7 +810,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
866 } 810 }
867 811
868 count = 812 count =
869 acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 813 acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr,
870 NR_IRQ_VECTORS); 814 NR_IRQ_VECTORS);
871 if (count < 0) { 815 if (count < 0) {
872 printk(KERN_ERR PREFIX 816 printk(KERN_ERR PREFIX
@@ -880,13 +824,13 @@ static int __init acpi_parse_madt_ioapic_entries(void)
880 * pretend we got one so we can set the SCI flags. 824 * pretend we got one so we can set the SCI flags.
881 */ 825 */
882 if (!acpi_sci_override_gsi) 826 if (!acpi_sci_override_gsi)
883 acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0); 827 acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0);
884 828
885 /* Fill in identity legacy mapings where no override */ 829 /* Fill in identity legacy mapings where no override */
886 mp_config_acpi_legacy_irqs(); 830 mp_config_acpi_legacy_irqs();
887 831
888 count = 832 count =
889 acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 833 acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src,
890 NR_IRQ_VECTORS); 834 NR_IRQ_VECTORS);
891 if (count < 0) { 835 if (count < 0) {
892 printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); 836 printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
@@ -906,10 +850,9 @@ static inline int acpi_parse_madt_ioapic_entries(void)
906static void __init acpi_process_madt(void) 850static void __init acpi_process_madt(void)
907{ 851{
908#ifdef CONFIG_X86_LOCAL_APIC 852#ifdef CONFIG_X86_LOCAL_APIC
909 int count, error; 853 int error;
910 854
911 count = acpi_table_parse(ACPI_APIC, acpi_parse_madt); 855 if (!acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) {
912 if (count >= 1) {
913 856
914 /* 857 /*
915 * Parse MADT LAPIC entries 858 * Parse MADT LAPIC entries
@@ -1195,7 +1138,7 @@ int __init acpi_boot_table_init(void)
1195 if (acpi_disabled && !acpi_ht) 1138 if (acpi_disabled && !acpi_ht)
1196 return 1; 1139 return 1;
1197 1140
1198 /* 1141 /*
1199 * Initialize the ACPI boot-time table parser. 1142 * Initialize the ACPI boot-time table parser.
1200 */ 1143 */
1201 error = acpi_table_init(); 1144 error = acpi_table_init();
@@ -1204,7 +1147,7 @@ int __init acpi_boot_table_init(void)
1204 return error; 1147 return error;
1205 } 1148 }
1206 1149
1207 acpi_table_parse(ACPI_BOOT, acpi_parse_sbf); 1150 acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
1208 1151
1209 /* 1152 /*
1210 * blacklist may disable ACPI entirely 1153 * blacklist may disable ACPI entirely
@@ -1232,19 +1175,19 @@ int __init acpi_boot_init(void)
1232 if (acpi_disabled && !acpi_ht) 1175 if (acpi_disabled && !acpi_ht)
1233 return 1; 1176 return 1;
1234 1177
1235 acpi_table_parse(ACPI_BOOT, acpi_parse_sbf); 1178 acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
1236 1179
1237 /* 1180 /*
1238 * set sci_int and PM timer address 1181 * set sci_int and PM timer address
1239 */ 1182 */
1240 acpi_table_parse(ACPI_FADT, acpi_parse_fadt); 1183 acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt);
1241 1184
1242 /* 1185 /*
1243 * Process the Multiple APIC Description Table (MADT), if present 1186 * Process the Multiple APIC Description Table (MADT), if present
1244 */ 1187 */
1245 acpi_process_madt(); 1188 acpi_process_madt();
1246 1189
1247 acpi_table_parse(ACPI_HPET, acpi_parse_hpet); 1190 acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet);
1248 1191
1249 return 0; 1192 return 0;
1250} 1193}
@@ -1315,13 +1258,17 @@ static int __init setup_acpi_sci(char *s)
1315 if (!s) 1258 if (!s)
1316 return -EINVAL; 1259 return -EINVAL;
1317 if (!strcmp(s, "edge")) 1260 if (!strcmp(s, "edge"))
1318 acpi_sci_flags.trigger = 1; 1261 acpi_sci_flags = ACPI_MADT_TRIGGER_EDGE |
1262 (acpi_sci_flags & ~ACPI_MADT_TRIGGER_MASK);
1319 else if (!strcmp(s, "level")) 1263 else if (!strcmp(s, "level"))
1320 acpi_sci_flags.trigger = 3; 1264 acpi_sci_flags = ACPI_MADT_TRIGGER_LEVEL |
1265 (acpi_sci_flags & ~ACPI_MADT_TRIGGER_MASK);
1321 else if (!strcmp(s, "high")) 1266 else if (!strcmp(s, "high"))
1322 acpi_sci_flags.polarity = 1; 1267 acpi_sci_flags = ACPI_MADT_POLARITY_ACTIVE_HIGH |
1268 (acpi_sci_flags & ~ACPI_MADT_POLARITY_MASK);
1323 else if (!strcmp(s, "low")) 1269 else if (!strcmp(s, "low"))
1324 acpi_sci_flags.polarity = 3; 1270 acpi_sci_flags = ACPI_MADT_POLARITY_ACTIVE_LOW |
1271 (acpi_sci_flags & ~ACPI_MADT_POLARITY_MASK);
1325 else 1272 else
1326 return -EINVAL; 1273 return -EINVAL;
1327 return 0; 1274 return 0;
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c
index 4b60af7f91dd..bf86f7662d8b 100644
--- a/arch/i386/kernel/acpi/earlyquirk.c
+++ b/arch/i386/kernel/acpi/earlyquirk.c
@@ -16,7 +16,7 @@
16 16
17static int nvidia_hpet_detected __initdata; 17static int nvidia_hpet_detected __initdata;
18 18
19static int __init nvidia_hpet_check(unsigned long phys, unsigned long size) 19static int __init nvidia_hpet_check(struct acpi_table_header *header)
20{ 20{
21 nvidia_hpet_detected = 1; 21 nvidia_hpet_detected = 1;
22 return 0; 22 return 0;
@@ -30,7 +30,7 @@ static int __init check_bridge(int vendor, int device)
30 is enabled. */ 30 is enabled. */
31 if (!acpi_use_timer_override && vendor == PCI_VENDOR_ID_NVIDIA) { 31 if (!acpi_use_timer_override && vendor == PCI_VENDOR_ID_NVIDIA) {
32 nvidia_hpet_detected = 0; 32 nvidia_hpet_detected = 0;
33 acpi_table_parse(ACPI_HPET, nvidia_hpet_check); 33 acpi_table_parse(ACPI_SIG_HPET, nvidia_hpet_check);
34 if (nvidia_hpet_detected == 0) { 34 if (nvidia_hpet_detected == 0) {
35 acpi_skip_timer_override = 1; 35 acpi_skip_timer_override = 1;
36 printk(KERN_INFO "Nvidia board " 36 printk(KERN_INFO "Nvidia board "
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 776d9be26af9..9655c233e6f1 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -25,6 +25,8 @@
25#include <linux/kernel_stat.h> 25#include <linux/kernel_stat.h>
26#include <linux/sysdev.h> 26#include <linux/sysdev.h>
27#include <linux/cpu.h> 27#include <linux/cpu.h>
28#include <linux/clockchips.h>
29#include <linux/acpi_pmtmr.h>
28#include <linux/module.h> 30#include <linux/module.h>
29 31
30#include <asm/atomic.h> 32#include <asm/atomic.h>
@@ -36,6 +38,7 @@
36#include <asm/hpet.h> 38#include <asm/hpet.h>
37#include <asm/i8253.h> 39#include <asm/i8253.h>
38#include <asm/nmi.h> 40#include <asm/nmi.h>
41#include <asm/idle.h>
39 42
40#include <mach_apic.h> 43#include <mach_apic.h>
41#include <mach_apicdef.h> 44#include <mach_apicdef.h>
@@ -44,128 +47,549 @@
44#include "io_ports.h" 47#include "io_ports.h"
45 48
46/* 49/*
47 * cpu_mask that denotes the CPUs that needs timer interrupt coming in as 50 * Sanity check
48 * IPIs in place of local APIC timers
49 */ 51 */
50static cpumask_t timer_bcast_ipi; 52#if (SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F
53# error SPURIOUS_APIC_VECTOR definition error
54#endif
51 55
52/* 56/*
53 * Knob to control our willingness to enable the local APIC. 57 * Knob to control our willingness to enable the local APIC.
58 *
59 * -1=force-disable, +1=force-enable
54 */ 60 */
55static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ 61static int enable_local_apic __initdata = 0;
56
57static inline void lapic_disable(void)
58{
59 enable_local_apic = -1;
60 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
61}
62 62
63static inline void lapic_enable(void) 63/* Local APIC timer verification ok */
64{ 64static int local_apic_timer_verify_ok;
65 enable_local_apic = 1;
66}
67 65
68/* 66/*
69 * Debug level 67 * Debug level, exported for io_apic.c
70 */ 68 */
71int apic_verbosity; 69int apic_verbosity;
72 70
71static unsigned int calibration_result;
73 72
73static int lapic_next_event(unsigned long delta,
74 struct clock_event_device *evt);
75static void lapic_timer_setup(enum clock_event_mode mode,
76 struct clock_event_device *evt);
77static void lapic_timer_broadcast(cpumask_t mask);
74static void apic_pm_activate(void); 78static void apic_pm_activate(void);
75 79
80/*
81 * The local apic timer can be used for any function which is CPU local.
82 */
83static struct clock_event_device lapic_clockevent = {
84 .name = "lapic",
85 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
86 | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
87 .shift = 32,
88 .set_mode = lapic_timer_setup,
89 .set_next_event = lapic_next_event,
90 .broadcast = lapic_timer_broadcast,
91 .rating = 100,
92 .irq = -1,
93};
94static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
95
96/* Local APIC was disabled by the BIOS and enabled by the kernel */
97static int enabled_via_apicbase;
98
99/*
100 * Get the LAPIC version
101 */
102static inline int lapic_get_version(void)
103{
104 return GET_APIC_VERSION(apic_read(APIC_LVR));
105}
106
107/*
108 * Check, if the APIC is integrated or a seperate chip
109 */
110static inline int lapic_is_integrated(void)
111{
112 return APIC_INTEGRATED(lapic_get_version());
113}
114
115/*
116 * Check, whether this is a modern or a first generation APIC
117 */
76static int modern_apic(void) 118static int modern_apic(void)
77{ 119{
78 unsigned int lvr, version;
79 /* AMD systems use old APIC versions, so check the CPU */ 120 /* AMD systems use old APIC versions, so check the CPU */
80 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && 121 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
81 boot_cpu_data.x86 >= 0xf) 122 boot_cpu_data.x86 >= 0xf)
82 return 1; 123 return 1;
83 lvr = apic_read(APIC_LVR); 124 return lapic_get_version() >= 0x14;
84 version = GET_APIC_VERSION(lvr);
85 return version >= 0x14;
86} 125}
87 126
127/**
128 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
129 */
130void enable_NMI_through_LVT0 (void * dummy)
131{
132 unsigned int v = APIC_DM_NMI;
133
134 /* Level triggered for 82489DX */
135 if (!lapic_is_integrated())
136 v |= APIC_LVT_LEVEL_TRIGGER;
137 apic_write_around(APIC_LVT0, v);
138}
139
140/**
141 * get_physical_broadcast - Get number of physical broadcast IDs
142 */
143int get_physical_broadcast(void)
144{
145 return modern_apic() ? 0xff : 0xf;
146}
147
148/**
149 * lapic_get_maxlvt - get the maximum number of local vector table entries
150 */
151int lapic_get_maxlvt(void)
152{
153 unsigned int v = apic_read(APIC_LVR);
154
155 /* 82489DXs do not report # of LVT entries. */
156 return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
157}
158
159/*
160 * Local APIC timer
161 */
162
163/* Clock divisor is set to 16 */
164#define APIC_DIVISOR 16
165
88/* 166/*
89 * 'what should we do if we get a hw irq event on an illegal vector'. 167 * This function sets up the local APIC timer, with a timeout of
90 * each architecture has to answer this themselves. 168 * 'clocks' APIC bus clock. During calibration we actually call
169 * this function twice on the boot CPU, once with a bogus timeout
170 * value, second time for real. The other (noncalibrating) CPUs
171 * call this function only once, with the real, calibrated value.
172 *
173 * We do reads before writes even if unnecessary, to get around the
174 * P5 APIC double write bug.
91 */ 175 */
92void ack_bad_irq(unsigned int irq) 176static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
93{ 177{
94 printk("unexpected IRQ trap at vector %02x\n", irq); 178 unsigned int lvtt_value, tmp_value;
179
180 lvtt_value = LOCAL_TIMER_VECTOR;
181 if (!oneshot)
182 lvtt_value |= APIC_LVT_TIMER_PERIODIC;
183 if (!lapic_is_integrated())
184 lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
185
186 if (!irqen)
187 lvtt_value |= APIC_LVT_MASKED;
188
189 apic_write_around(APIC_LVTT, lvtt_value);
190
95 /* 191 /*
96 * Currently unexpected vectors happen only on SMP and APIC. 192 * Divide PICLK by 16
97 * We _must_ ack these because every local APIC has only N
98 * irq slots per priority level, and a 'hanging, unacked' IRQ
99 * holds up an irq slot - in excessive cases (when multiple
100 * unexpected vectors occur) that might lock up the APIC
101 * completely.
102 * But only ack when the APIC is enabled -AK
103 */ 193 */
104 if (cpu_has_apic) 194 tmp_value = apic_read(APIC_TDCR);
105 ack_APIC_irq(); 195 apic_write_around(APIC_TDCR, (tmp_value
196 & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
197 | APIC_TDR_DIV_16);
198
199 if (!oneshot)
200 apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
106} 201}
107 202
108void __init apic_intr_init(void) 203/*
204 * Program the next event, relative to now
205 */
206static int lapic_next_event(unsigned long delta,
207 struct clock_event_device *evt)
208{
209 apic_write_around(APIC_TMICT, delta);
210 return 0;
211}
212
213/*
214 * Setup the lapic timer in periodic or oneshot mode
215 */
216static void lapic_timer_setup(enum clock_event_mode mode,
217 struct clock_event_device *evt)
218{
219 unsigned long flags;
220 unsigned int v;
221
222 /* Lapic used for broadcast ? */
223 if (!local_apic_timer_verify_ok)
224 return;
225
226 local_irq_save(flags);
227
228 switch (mode) {
229 case CLOCK_EVT_MODE_PERIODIC:
230 case CLOCK_EVT_MODE_ONESHOT:
231 __setup_APIC_LVTT(calibration_result,
232 mode != CLOCK_EVT_MODE_PERIODIC, 1);
233 break;
234 case CLOCK_EVT_MODE_UNUSED:
235 case CLOCK_EVT_MODE_SHUTDOWN:
236 v = apic_read(APIC_LVTT);
237 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
238 apic_write_around(APIC_LVTT, v);
239 break;
240 }
241
242 local_irq_restore(flags);
243}
244
245/*
246 * Local APIC timer broadcast function
247 */
248static void lapic_timer_broadcast(cpumask_t mask)
109{ 249{
110#ifdef CONFIG_SMP 250#ifdef CONFIG_SMP
111 smp_intr_init(); 251 send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
112#endif 252#endif
113 /* self generated IPI for local APIC timer */ 253}
114 set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
115 254
116 /* IPI vectors for APIC spurious and error interrupts */ 255/*
117 set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 256 * Setup the local APIC timer for this CPU. Copy the initilized values
118 set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 257 * of the boot CPU and register the clock event in the framework.
258 */
259static void __devinit setup_APIC_timer(void)
260{
261 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
119 262
120 /* thermal monitor LVT interrupt */ 263 memcpy(levt, &lapic_clockevent, sizeof(*levt));
121#ifdef CONFIG_X86_MCE_P4THERMAL 264 levt->cpumask = cpumask_of_cpu(smp_processor_id());
122 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 265
123#endif 266 clockevents_register_device(levt);
124} 267}
125 268
126/* Using APIC to generate smp_local_timer_interrupt? */ 269/*
127int using_apic_timer __read_mostly = 0; 270 * In this functions we calibrate APIC bus clocks to the external timer.
271 *
272 * We want to do the calibration only once since we want to have local timer
273 * irqs syncron. CPUs connected by the same APIC bus have the very same bus
274 * frequency.
275 *
276 * This was previously done by reading the PIT/HPET and waiting for a wrap
277 * around to find out, that a tick has elapsed. I have a box, where the PIT
278 * readout is broken, so it never gets out of the wait loop again. This was
279 * also reported by others.
280 *
281 * Monitoring the jiffies value is inaccurate and the clockevents
282 * infrastructure allows us to do a simple substitution of the interrupt
283 * handler.
284 *
285 * The calibration routine also uses the pm_timer when possible, as the PIT
286 * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
287 * back to normal later in the boot process).
288 */
289
290#define LAPIC_CAL_LOOPS (HZ/10)
128 291
129static int enabled_via_apicbase; 292static __initdata volatile int lapic_cal_loops = -1;
293static __initdata long lapic_cal_t1, lapic_cal_t2;
294static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
295static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
296static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
130 297
131void enable_NMI_through_LVT0 (void * dummy) 298/*
299 * Temporary interrupt handler.
300 */
301static void __init lapic_cal_handler(struct clock_event_device *dev)
132{ 302{
133 unsigned int v, ver; 303 unsigned long long tsc = 0;
304 long tapic = apic_read(APIC_TMCCT);
305 unsigned long pm = acpi_pm_read_early();
134 306
135 ver = apic_read(APIC_LVR); 307 if (cpu_has_tsc)
136 ver = GET_APIC_VERSION(ver); 308 rdtscll(tsc);
137 v = APIC_DM_NMI; /* unmask and set to NMI */ 309
138 if (!APIC_INTEGRATED(ver)) /* 82489DX */ 310 switch (lapic_cal_loops++) {
139 v |= APIC_LVT_LEVEL_TRIGGER; 311 case 0:
140 apic_write_around(APIC_LVT0, v); 312 lapic_cal_t1 = tapic;
313 lapic_cal_tsc1 = tsc;
314 lapic_cal_pm1 = pm;
315 lapic_cal_j1 = jiffies;
316 break;
317
318 case LAPIC_CAL_LOOPS:
319 lapic_cal_t2 = tapic;
320 lapic_cal_tsc2 = tsc;
321 if (pm < lapic_cal_pm1)
322 pm += ACPI_PM_OVRRUN;
323 lapic_cal_pm2 = pm;
324 lapic_cal_j2 = jiffies;
325 break;
326 }
141} 327}
142 328
143int get_physical_broadcast(void) 329/*
330 * Setup the boot APIC
331 *
332 * Calibrate and verify the result.
333 */
334void __init setup_boot_APIC_clock(void)
144{ 335{
145 if (modern_apic()) 336 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
146 return 0xff; 337 const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
147 else 338 const long pm_thresh = pm_100ms/100;
148 return 0xf; 339 void (*real_handler)(struct clock_event_device *dev);
340 unsigned long deltaj;
341 long delta, deltapm;
342
343 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
344 "calibrating APIC timer ...\n");
345
346 local_irq_disable();
347
348 /* Replace the global interrupt handler */
349 real_handler = global_clock_event->event_handler;
350 global_clock_event->event_handler = lapic_cal_handler;
351
352 /*
353 * Setup the APIC counter to 1e9. There is no way the lapic
354 * can underflow in the 100ms detection time frame
355 */
356 __setup_APIC_LVTT(1000000000, 0, 0);
357
358 /* Let the interrupts run */
359 local_irq_enable();
360
361 while(lapic_cal_loops <= LAPIC_CAL_LOOPS);
362
363 local_irq_disable();
364
365 /* Restore the real event handler */
366 global_clock_event->event_handler = real_handler;
367
368 /* Build delta t1-t2 as apic timer counts down */
369 delta = lapic_cal_t1 - lapic_cal_t2;
370 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
371
372 /* Check, if the PM timer is available */
373 deltapm = lapic_cal_pm2 - lapic_cal_pm1;
374 apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
375
376 if (deltapm) {
377 unsigned long mult;
378 u64 res;
379
380 mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
381
382 if (deltapm > (pm_100ms - pm_thresh) &&
383 deltapm < (pm_100ms + pm_thresh)) {
384 apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
385 } else {
386 res = (((u64) deltapm) * mult) >> 22;
387 do_div(res, 1000000);
388 printk(KERN_WARNING "APIC calibration not consistent "
389 "with PM Timer: %ldms instead of 100ms\n",
390 (long)res);
391 /* Correct the lapic counter value */
392 res = (((u64) delta ) * pm_100ms);
393 do_div(res, deltapm);
394 printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
395 "%lu (%ld)\n", (unsigned long) res, delta);
396 delta = (long) res;
397 }
398 }
399
400 /* Calculate the scaled math multiplication factor */
401 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 32);
402 lapic_clockevent.max_delta_ns =
403 clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
404 lapic_clockevent.min_delta_ns =
405 clockevent_delta2ns(0xF, &lapic_clockevent);
406
407 calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
408
409 apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
410 apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult);
411 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
412 calibration_result);
413
414 if (cpu_has_tsc) {
415 delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
416 apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
417 "%ld.%04ld MHz.\n",
418 (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ),
419 (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ));
420 }
421
422 apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
423 "%u.%04u MHz.\n",
424 calibration_result / (1000000 / HZ),
425 calibration_result % (1000000 / HZ));
426
427
428 apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
429
430 /*
431 * Setup the apic timer manually
432 */
433 local_apic_timer_verify_ok = 1;
434 levt->event_handler = lapic_cal_handler;
435 lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
436 lapic_cal_loops = -1;
437
438 /* Let the interrupts run */
439 local_irq_enable();
440
441 while(lapic_cal_loops <= LAPIC_CAL_LOOPS);
442
443 local_irq_disable();
444
445 /* Stop the lapic timer */
446 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
447
448 local_irq_enable();
449
450 /* Jiffies delta */
451 deltaj = lapic_cal_j2 - lapic_cal_j1;
452 apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
453
454 /* Check, if the PM timer is available */
455 deltapm = lapic_cal_pm2 - lapic_cal_pm1;
456 apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
457
458 local_apic_timer_verify_ok = 0;
459
460 if (deltapm) {
461 if (deltapm > (pm_100ms - pm_thresh) &&
462 deltapm < (pm_100ms + pm_thresh)) {
463 apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
464 /* Check, if the jiffies result is consistent */
465 if (deltaj < LAPIC_CAL_LOOPS-2 ||
466 deltaj > LAPIC_CAL_LOOPS+2) {
467 /*
468 * Not sure, what we can do about this one.
469 * When high resultion timers are active
470 * and the lapic timer does not stop in C3
471 * we are fine. Otherwise more trouble might
472 * be waiting. -- tglx
473 */
474 printk(KERN_WARNING "Global event device %s "
475 "has wrong frequency "
476 "(%lu ticks instead of %d)\n",
477 global_clock_event->name, deltaj,
478 LAPIC_CAL_LOOPS);
479 }
480 local_apic_timer_verify_ok = 1;
481 }
482 } else {
483 /* Check, if the jiffies result is consistent */
484 if (deltaj >= LAPIC_CAL_LOOPS-2 &&
485 deltaj <= LAPIC_CAL_LOOPS+2) {
486 apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
487 local_apic_timer_verify_ok = 1;
488 }
489 }
490
491 if (!local_apic_timer_verify_ok) {
492 printk(KERN_WARNING
493 "APIC timer disabled due to verification failure.\n");
494 /* No broadcast on UP ! */
495 if (num_possible_cpus() == 1)
496 return;
497 } else
498 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
499
500 /* Setup the lapic or request the broadcast */
501 setup_APIC_timer();
502}
503
504void __devinit setup_secondary_APIC_clock(void)
505{
506 setup_APIC_timer();
149} 507}
150 508
151int get_maxlvt(void) 509/*
510 * The guts of the apic timer interrupt
511 */
512static void local_apic_timer_interrupt(void)
152{ 513{
153 unsigned int v, ver, maxlvt; 514 int cpu = smp_processor_id();
515 struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
154 516
155 v = apic_read(APIC_LVR); 517 /*
156 ver = GET_APIC_VERSION(v); 518 * Normally we should not be here till LAPIC has been initialized but
157 /* 82489DXs do not report # of LVT entries. */ 519 * in some cases like kdump, its possible that there is a pending LAPIC
158 maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2; 520 * timer interrupt from previous kernel's context and is delivered in
159 return maxlvt; 521 * new kernel the moment interrupts are enabled.
522 *
523 * Interrupts are enabled early and LAPIC is setup much later, hence
524 * its possible that when we get here evt->event_handler is NULL.
525 * Check for event_handler being NULL and discard the interrupt as
526 * spurious.
527 */
528 if (!evt->event_handler) {
529 printk(KERN_WARNING
530 "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
531 /* Switch it off */
532 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
533 return;
534 }
535
536 per_cpu(irq_stat, cpu).apic_timer_irqs++;
537
538 evt->event_handler(evt);
160} 539}
161 540
541/*
542 * Local APIC timer interrupt. This is the most natural way for doing
543 * local interrupts, but local timer interrupts can be emulated by
544 * broadcast interrupts too. [in case the hw doesn't support APIC timers]
545 *
546 * [ if a single-CPU system runs an SMP kernel then we call the local
547 * interrupt as well. Thus we cannot inline the local irq ... ]
548 */
549
550void fastcall smp_apic_timer_interrupt(struct pt_regs *regs)
551{
552 struct pt_regs *old_regs = set_irq_regs(regs);
553
554 /*
555 * NOTE! We'd better ACK the irq immediately,
556 * because timer handling can be slow.
557 */
558 ack_APIC_irq();
559 /*
560 * update_process_times() expects us to have done irq_enter().
561 * Besides, if we don't timer interrupts ignore the global
562 * interrupt lock, which is the WrongThing (tm) to do.
563 */
564 exit_idle();
565 irq_enter();
566 local_apic_timer_interrupt();
567 irq_exit();
568
569 set_irq_regs(old_regs);
570}
571
572int setup_profiling_timer(unsigned int multiplier)
573{
574 return -EINVAL;
575}
576
577/*
578 * Local APIC start and shutdown
579 */
580
581/**
582 * clear_local_APIC - shutdown the local APIC
583 *
584 * This is called, when a CPU is disabled and before rebooting, so the state of
585 * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
586 * leftovers during boot.
587 */
162void clear_local_APIC(void) 588void clear_local_APIC(void)
163{ 589{
164 int maxlvt; 590 int maxlvt = lapic_get_maxlvt();
165 unsigned long v; 591 unsigned long v;
166 592
167 maxlvt = get_maxlvt();
168
169 /* 593 /*
170 * Masking an LVT entry can trigger a local APIC error 594 * Masking an LVT entry can trigger a local APIC error
171 * if the vector is zero. Mask LVTERR first to prevent this. 595 * if the vector is zero. Mask LVTERR first to prevent this.
@@ -189,7 +613,7 @@ void clear_local_APIC(void)
189 apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); 613 apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
190 } 614 }
191 615
192/* lets not touch this if we didn't frob it */ 616 /* lets not touch this if we didn't frob it */
193#ifdef CONFIG_X86_MCE_P4THERMAL 617#ifdef CONFIG_X86_MCE_P4THERMAL
194 if (maxlvt >= 5) { 618 if (maxlvt >= 5) {
195 v = apic_read(APIC_LVTTHMR); 619 v = apic_read(APIC_LVTTHMR);
@@ -211,85 +635,18 @@ void clear_local_APIC(void)
211 if (maxlvt >= 5) 635 if (maxlvt >= 5)
212 apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); 636 apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
213#endif 637#endif
214 v = GET_APIC_VERSION(apic_read(APIC_LVR)); 638 /* Integrated APIC (!82489DX) ? */
215 if (APIC_INTEGRATED(v)) { /* !82489DX */ 639 if (lapic_is_integrated()) {
216 if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */ 640 if (maxlvt > 3)
641 /* Clear ESR due to Pentium errata 3AP and 11AP */
217 apic_write(APIC_ESR, 0); 642 apic_write(APIC_ESR, 0);
218 apic_read(APIC_ESR); 643 apic_read(APIC_ESR);
219 } 644 }
220} 645}
221 646
222void __init connect_bsp_APIC(void) 647/**
223{ 648 * disable_local_APIC - clear and disable the local APIC
224 if (pic_mode) { 649 */
225 /*
226 * Do not trust the local APIC being empty at bootup.
227 */
228 clear_local_APIC();
229 /*
230 * PIC mode, enable APIC mode in the IMCR, i.e.
231 * connect BSP's local APIC to INT and NMI lines.
232 */
233 apic_printk(APIC_VERBOSE, "leaving PIC mode, "
234 "enabling APIC mode.\n");
235 outb(0x70, 0x22);
236 outb(0x01, 0x23);
237 }
238 enable_apic_mode();
239}
240
241void disconnect_bsp_APIC(int virt_wire_setup)
242{
243 if (pic_mode) {
244 /*
245 * Put the board back into PIC mode (has an effect
246 * only on certain older boards). Note that APIC
247 * interrupts, including IPIs, won't work beyond
248 * this point! The only exception are INIT IPIs.
249 */
250 apic_printk(APIC_VERBOSE, "disabling APIC mode, "
251 "entering PIC mode.\n");
252 outb(0x70, 0x22);
253 outb(0x00, 0x23);
254 }
255 else {
256 /* Go back to Virtual Wire compatibility mode */
257 unsigned long value;
258
259 /* For the spurious interrupt use vector F, and enable it */
260 value = apic_read(APIC_SPIV);
261 value &= ~APIC_VECTOR_MASK;
262 value |= APIC_SPIV_APIC_ENABLED;
263 value |= 0xf;
264 apic_write_around(APIC_SPIV, value);
265
266 if (!virt_wire_setup) {
267 /* For LVT0 make it edge triggered, active high, external and enabled */
268 value = apic_read(APIC_LVT0);
269 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
270 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
271 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
272 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
273 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
274 apic_write_around(APIC_LVT0, value);
275 }
276 else {
277 /* Disable LVT0 */
278 apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
279 }
280
281 /* For LVT1 make it edge triggered, active high, nmi and enabled */
282 value = apic_read(APIC_LVT1);
283 value &= ~(
284 APIC_MODE_MASK | APIC_SEND_PENDING |
285 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
286 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
287 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
288 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
289 apic_write_around(APIC_LVT1, value);
290 }
291}
292
293void disable_local_APIC(void) 650void disable_local_APIC(void)
294{ 651{
295 unsigned long value; 652 unsigned long value;
@@ -304,8 +661,13 @@ void disable_local_APIC(void)
304 value &= ~APIC_SPIV_APIC_ENABLED; 661 value &= ~APIC_SPIV_APIC_ENABLED;
305 apic_write_around(APIC_SPIV, value); 662 apic_write_around(APIC_SPIV, value);
306 663
664 /*
665 * When LAPIC was disabled by the BIOS and enabled by the kernel,
666 * restore the disabled state.
667 */
307 if (enabled_via_apicbase) { 668 if (enabled_via_apicbase) {
308 unsigned int l, h; 669 unsigned int l, h;
670
309 rdmsr(MSR_IA32_APICBASE, l, h); 671 rdmsr(MSR_IA32_APICBASE, l, h);
310 l &= ~MSR_IA32_APICBASE_ENABLE; 672 l &= ~MSR_IA32_APICBASE_ENABLE;
311 wrmsr(MSR_IA32_APICBASE, l, h); 673 wrmsr(MSR_IA32_APICBASE, l, h);
@@ -313,6 +675,28 @@ void disable_local_APIC(void)
313} 675}
314 676
315/* 677/*
678 * If Linux enabled the LAPIC against the BIOS default disable it down before
679 * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and
680 * not power-off. Additionally clear all LVT entries before disable_local_APIC
681 * for the case where Linux didn't enable the LAPIC.
682 */
683void lapic_shutdown(void)
684{
685 unsigned long flags;
686
687 if (!cpu_has_apic)
688 return;
689
690 local_irq_save(flags);
691 clear_local_APIC();
692
693 if (enabled_via_apicbase)
694 disable_local_APIC();
695
696 local_irq_restore(flags);
697}
698
699/*
316 * This is to verify that we're looking at a real local APIC. 700 * This is to verify that we're looking at a real local APIC.
317 * Check these against your board if the CPUs aren't getting 701 * Check these against your board if the CPUs aren't getting
318 * started for no apparent reason. 702 * started for no apparent reason.
@@ -344,7 +728,7 @@ int __init verify_local_APIC(void)
344 reg1 = GET_APIC_VERSION(reg0); 728 reg1 = GET_APIC_VERSION(reg0);
345 if (reg1 == 0x00 || reg1 == 0xff) 729 if (reg1 == 0x00 || reg1 == 0xff)
346 return 0; 730 return 0;
347 reg1 = get_maxlvt(); 731 reg1 = lapic_get_maxlvt();
348 if (reg1 < 0x02 || reg1 == 0xff) 732 if (reg1 < 0x02 || reg1 == 0xff)
349 return 0; 733 return 0;
350 734
@@ -367,10 +751,15 @@ int __init verify_local_APIC(void)
367 return 1; 751 return 1;
368} 752}
369 753
754/**
755 * sync_Arb_IDs - synchronize APIC bus arbitration IDs
756 */
370void __init sync_Arb_IDs(void) 757void __init sync_Arb_IDs(void)
371{ 758{
372 /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 759 /*
373 And not needed on AMD */ 760 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
761 * needed on AMD.
762 */
374 if (modern_apic()) 763 if (modern_apic())
375 return; 764 return;
376 /* 765 /*
@@ -383,14 +772,12 @@ void __init sync_Arb_IDs(void)
383 | APIC_DM_INIT); 772 | APIC_DM_INIT);
384} 773}
385 774
386extern void __error_in_apic_c (void);
387
388/* 775/*
389 * An initial setup of the virtual wire mode. 776 * An initial setup of the virtual wire mode.
390 */ 777 */
391void __init init_bsp_APIC(void) 778void __init init_bsp_APIC(void)
392{ 779{
393 unsigned long value, ver; 780 unsigned long value;
394 781
395 /* 782 /*
396 * Don't do the setup now if we have a SMP BIOS as the 783 * Don't do the setup now if we have a SMP BIOS as the
@@ -399,9 +786,6 @@ void __init init_bsp_APIC(void)
399 if (smp_found_config || !cpu_has_apic) 786 if (smp_found_config || !cpu_has_apic)
400 return; 787 return;
401 788
402 value = apic_read(APIC_LVR);
403 ver = GET_APIC_VERSION(value);
404
405 /* 789 /*
406 * Do not trust the local APIC being empty at bootup. 790 * Do not trust the local APIC being empty at bootup.
407 */ 791 */
@@ -413,9 +797,10 @@ void __init init_bsp_APIC(void)
413 value = apic_read(APIC_SPIV); 797 value = apic_read(APIC_SPIV);
414 value &= ~APIC_VECTOR_MASK; 798 value &= ~APIC_VECTOR_MASK;
415 value |= APIC_SPIV_APIC_ENABLED; 799 value |= APIC_SPIV_APIC_ENABLED;
416 800
417 /* This bit is reserved on P4/Xeon and should be cleared */ 801 /* This bit is reserved on P4/Xeon and should be cleared */
418 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15)) 802 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
803 (boot_cpu_data.x86 == 15))
419 value &= ~APIC_SPIV_FOCUS_DISABLED; 804 value &= ~APIC_SPIV_FOCUS_DISABLED;
420 else 805 else
421 value |= APIC_SPIV_FOCUS_DISABLED; 806 value |= APIC_SPIV_FOCUS_DISABLED;
@@ -427,14 +812,17 @@ void __init init_bsp_APIC(void)
427 */ 812 */
428 apic_write_around(APIC_LVT0, APIC_DM_EXTINT); 813 apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
429 value = APIC_DM_NMI; 814 value = APIC_DM_NMI;
430 if (!APIC_INTEGRATED(ver)) /* 82489DX */ 815 if (!lapic_is_integrated()) /* 82489DX */
431 value |= APIC_LVT_LEVEL_TRIGGER; 816 value |= APIC_LVT_LEVEL_TRIGGER;
432 apic_write_around(APIC_LVT1, value); 817 apic_write_around(APIC_LVT1, value);
433} 818}
434 819
820/**
821 * setup_local_APIC - setup the local APIC
822 */
435void __devinit setup_local_APIC(void) 823void __devinit setup_local_APIC(void)
436{ 824{
437 unsigned long oldvalue, value, ver, maxlvt; 825 unsigned long oldvalue, value, maxlvt, integrated;
438 int i, j; 826 int i, j;
439 827
440 /* Pound the ESR really hard over the head with a big hammer - mbligh */ 828 /* Pound the ESR really hard over the head with a big hammer - mbligh */
@@ -445,11 +833,7 @@ void __devinit setup_local_APIC(void)
445 apic_write(APIC_ESR, 0); 833 apic_write(APIC_ESR, 0);
446 } 834 }
447 835
448 value = apic_read(APIC_LVR); 836 integrated = lapic_is_integrated();
449 ver = GET_APIC_VERSION(value);
450
451 if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f)
452 __error_in_apic_c();
453 837
454 /* 838 /*
455 * Double-check whether this APIC is really registered. 839 * Double-check whether this APIC is really registered.
@@ -520,13 +904,10 @@ void __devinit setup_local_APIC(void)
520 * like LRU than MRU (the short-term load is more even across CPUs). 904 * like LRU than MRU (the short-term load is more even across CPUs).
521 * See also the comment in end_level_ioapic_irq(). --macro 905 * See also the comment in end_level_ioapic_irq(). --macro
522 */ 906 */
523#if 1 907
524 /* Enable focus processor (bit==0) */ 908 /* Enable focus processor (bit==0) */
525 value &= ~APIC_SPIV_FOCUS_DISABLED; 909 value &= ~APIC_SPIV_FOCUS_DISABLED;
526#else 910
527 /* Disable focus processor (bit==1) */
528 value |= APIC_SPIV_FOCUS_DISABLED;
529#endif
530 /* 911 /*
531 * Set spurious IRQ vector 912 * Set spurious IRQ vector
532 */ 913 */
@@ -562,17 +943,18 @@ void __devinit setup_local_APIC(void)
562 value = APIC_DM_NMI; 943 value = APIC_DM_NMI;
563 else 944 else
564 value = APIC_DM_NMI | APIC_LVT_MASKED; 945 value = APIC_DM_NMI | APIC_LVT_MASKED;
565 if (!APIC_INTEGRATED(ver)) /* 82489DX */ 946 if (!integrated) /* 82489DX */
566 value |= APIC_LVT_LEVEL_TRIGGER; 947 value |= APIC_LVT_LEVEL_TRIGGER;
567 apic_write_around(APIC_LVT1, value); 948 apic_write_around(APIC_LVT1, value);
568 949
569 if (APIC_INTEGRATED(ver) && !esr_disable) { /* !82489DX */ 950 if (integrated && !esr_disable) { /* !82489DX */
570 maxlvt = get_maxlvt(); 951 maxlvt = lapic_get_maxlvt();
571 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 952 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
572 apic_write(APIC_ESR, 0); 953 apic_write(APIC_ESR, 0);
573 oldvalue = apic_read(APIC_ESR); 954 oldvalue = apic_read(APIC_ESR);
574 955
575 value = ERROR_APIC_VECTOR; // enables sending errors 956 /* enables sending errors */
957 value = ERROR_APIC_VECTOR;
576 apic_write_around(APIC_LVTERR, value); 958 apic_write_around(APIC_LVTERR, value);
577 /* 959 /*
578 * spec says clear errors after enabling vector. 960 * spec says clear errors after enabling vector.
@@ -585,207 +967,30 @@ void __devinit setup_local_APIC(void)
585 "vector: 0x%08lx after: 0x%08lx\n", 967 "vector: 0x%08lx after: 0x%08lx\n",
586 oldvalue, value); 968 oldvalue, value);
587 } else { 969 } else {
588 if (esr_disable) 970 if (esr_disable)
589 /* 971 /*
590 * Something untraceble is creating bad interrupts on 972 * Something untraceble is creating bad interrupts on
591 * secondary quads ... for the moment, just leave the 973 * secondary quads ... for the moment, just leave the
592 * ESR disabled - we can't do anything useful with the 974 * ESR disabled - we can't do anything useful with the
593 * errors anyway - mbligh 975 * errors anyway - mbligh
594 */ 976 */
595 printk("Leaving ESR disabled.\n"); 977 printk(KERN_INFO "Leaving ESR disabled.\n");
596 else 978 else
597 printk("No ESR for 82489DX.\n"); 979 printk(KERN_INFO "No ESR for 82489DX.\n");
598 } 980 }
599 981
982 /* Disable the local apic timer */
983 value = apic_read(APIC_LVTT);
984 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
985 apic_write_around(APIC_LVTT, value);
986
600 setup_apic_nmi_watchdog(NULL); 987 setup_apic_nmi_watchdog(NULL);
601 apic_pm_activate(); 988 apic_pm_activate();
602} 989}
603 990
604/* 991/*
605 * If Linux enabled the LAPIC against the BIOS default 992 * Detect and initialize APIC
606 * disable it down before re-entering the BIOS on shutdown.
607 * Otherwise the BIOS may get confused and not power-off.
608 * Additionally clear all LVT entries before disable_local_APIC
609 * for the case where Linux didn't enable the LAPIC.
610 */ 993 */
611void lapic_shutdown(void)
612{
613 unsigned long flags;
614
615 if (!cpu_has_apic)
616 return;
617
618 local_irq_save(flags);
619 clear_local_APIC();
620
621 if (enabled_via_apicbase)
622 disable_local_APIC();
623
624 local_irq_restore(flags);
625}
626
627#ifdef CONFIG_PM
628
629static struct {
630 int active;
631 /* r/w apic fields */
632 unsigned int apic_id;
633 unsigned int apic_taskpri;
634 unsigned int apic_ldr;
635 unsigned int apic_dfr;
636 unsigned int apic_spiv;
637 unsigned int apic_lvtt;
638 unsigned int apic_lvtpc;
639 unsigned int apic_lvt0;
640 unsigned int apic_lvt1;
641 unsigned int apic_lvterr;
642 unsigned int apic_tmict;
643 unsigned int apic_tdcr;
644 unsigned int apic_thmr;
645} apic_pm_state;
646
647static int lapic_suspend(struct sys_device *dev, pm_message_t state)
648{
649 unsigned long flags;
650 int maxlvt;
651
652 if (!apic_pm_state.active)
653 return 0;
654
655 maxlvt = get_maxlvt();
656
657 apic_pm_state.apic_id = apic_read(APIC_ID);
658 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
659 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
660 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
661 apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
662 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
663 if (maxlvt >= 4)
664 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
665 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
666 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
667 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
668 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
669 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
670#ifdef CONFIG_X86_MCE_P4THERMAL
671 if (maxlvt >= 5)
672 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
673#endif
674
675 local_irq_save(flags);
676 disable_local_APIC();
677 local_irq_restore(flags);
678 return 0;
679}
680
681static int lapic_resume(struct sys_device *dev)
682{
683 unsigned int l, h;
684 unsigned long flags;
685 int maxlvt;
686
687 if (!apic_pm_state.active)
688 return 0;
689
690 maxlvt = get_maxlvt();
691
692 local_irq_save(flags);
693
694 /*
695 * Make sure the APICBASE points to the right address
696 *
697 * FIXME! This will be wrong if we ever support suspend on
698 * SMP! We'll need to do this as part of the CPU restore!
699 */
700 rdmsr(MSR_IA32_APICBASE, l, h);
701 l &= ~MSR_IA32_APICBASE_BASE;
702 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
703 wrmsr(MSR_IA32_APICBASE, l, h);
704
705 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
706 apic_write(APIC_ID, apic_pm_state.apic_id);
707 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
708 apic_write(APIC_LDR, apic_pm_state.apic_ldr);
709 apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
710 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
711 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
712 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
713#ifdef CONFIG_X86_MCE_P4THERMAL
714 if (maxlvt >= 5)
715 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
716#endif
717 if (maxlvt >= 4)
718 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
719 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
720 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
721 apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
722 apic_write(APIC_ESR, 0);
723 apic_read(APIC_ESR);
724 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
725 apic_write(APIC_ESR, 0);
726 apic_read(APIC_ESR);
727 local_irq_restore(flags);
728 return 0;
729}
730
731/*
732 * This device has no shutdown method - fully functioning local APICs
733 * are needed on every CPU up until machine_halt/restart/poweroff.
734 */
735
736static struct sysdev_class lapic_sysclass = {
737 set_kset_name("lapic"),
738 .resume = lapic_resume,
739 .suspend = lapic_suspend,
740};
741
742static struct sys_device device_lapic = {
743 .id = 0,
744 .cls = &lapic_sysclass,
745};
746
747static void __devinit apic_pm_activate(void)
748{
749 apic_pm_state.active = 1;
750}
751
752static int __init init_lapic_sysfs(void)
753{
754 int error;
755
756 if (!cpu_has_apic)
757 return 0;
758 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
759
760 error = sysdev_class_register(&lapic_sysclass);
761 if (!error)
762 error = sysdev_register(&device_lapic);
763 return error;
764}
765device_initcall(init_lapic_sysfs);
766
767#else /* CONFIG_PM */
768
769static void apic_pm_activate(void) { }
770
771#endif /* CONFIG_PM */
772
773/*
774 * Detect and enable local APICs on non-SMP boards.
775 * Original code written by Keir Fraser.
776 */
777
778static int __init apic_set_verbosity(char *str)
779{
780 if (strcmp("debug", str) == 0)
781 apic_verbosity = APIC_DEBUG;
782 else if (strcmp("verbose", str) == 0)
783 apic_verbosity = APIC_VERBOSE;
784 return 1;
785}
786
787__setup("apic=", apic_set_verbosity);
788
789static int __init detect_init_APIC (void) 994static int __init detect_init_APIC (void)
790{ 995{
791 u32 h, l, features; 996 u32 h, l, features;
@@ -797,7 +1002,7 @@ static int __init detect_init_APIC (void)
797 switch (boot_cpu_data.x86_vendor) { 1002 switch (boot_cpu_data.x86_vendor) {
798 case X86_VENDOR_AMD: 1003 case X86_VENDOR_AMD:
799 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || 1004 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
800 (boot_cpu_data.x86 == 15)) 1005 (boot_cpu_data.x86 == 15))
801 break; 1006 break;
802 goto no_apic; 1007 goto no_apic;
803 case X86_VENDOR_INTEL: 1008 case X86_VENDOR_INTEL:
@@ -811,23 +1016,23 @@ static int __init detect_init_APIC (void)
811 1016
812 if (!cpu_has_apic) { 1017 if (!cpu_has_apic) {
813 /* 1018 /*
814 * Over-ride BIOS and try to enable the local 1019 * Over-ride BIOS and try to enable the local APIC only if
815 * APIC only if "lapic" specified. 1020 * "lapic" specified.
816 */ 1021 */
817 if (enable_local_apic <= 0) { 1022 if (enable_local_apic <= 0) {
818 printk("Local APIC disabled by BIOS -- " 1023 printk(KERN_INFO "Local APIC disabled by BIOS -- "
819 "you can enable it with \"lapic\"\n"); 1024 "you can enable it with \"lapic\"\n");
820 return -1; 1025 return -1;
821 } 1026 }
822 /* 1027 /*
823 * Some BIOSes disable the local APIC in the 1028 * Some BIOSes disable the local APIC in the APIC_BASE
824 * APIC_BASE MSR. This can only be done in 1029 * MSR. This can only be done in software for Intel P6 or later
825 * software for Intel P6 or later and AMD K7 1030 * and AMD K7 (Model > 1) or later.
826 * (Model > 1) or later.
827 */ 1031 */
828 rdmsr(MSR_IA32_APICBASE, l, h); 1032 rdmsr(MSR_IA32_APICBASE, l, h);
829 if (!(l & MSR_IA32_APICBASE_ENABLE)) { 1033 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
830 printk("Local APIC disabled by BIOS -- reenabling.\n"); 1034 printk(KERN_INFO
1035 "Local APIC disabled by BIOS -- reenabling.\n");
831 l &= ~MSR_IA32_APICBASE_BASE; 1036 l &= ~MSR_IA32_APICBASE_BASE;
832 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; 1037 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
833 wrmsr(MSR_IA32_APICBASE, l, h); 1038 wrmsr(MSR_IA32_APICBASE, l, h);
@@ -840,7 +1045,7 @@ static int __init detect_init_APIC (void)
840 */ 1045 */
841 features = cpuid_edx(1); 1046 features = cpuid_edx(1);
842 if (!(features & (1 << X86_FEATURE_APIC))) { 1047 if (!(features & (1 << X86_FEATURE_APIC))) {
843 printk("Could not enable APIC!\n"); 1048 printk(KERN_WARNING "Could not enable APIC!\n");
844 return -1; 1049 return -1;
845 } 1050 }
846 set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); 1051 set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
@@ -854,17 +1059,20 @@ static int __init detect_init_APIC (void)
854 if (nmi_watchdog != NMI_NONE) 1059 if (nmi_watchdog != NMI_NONE)
855 nmi_watchdog = NMI_LOCAL_APIC; 1060 nmi_watchdog = NMI_LOCAL_APIC;
856 1061
857 printk("Found and enabled local APIC!\n"); 1062 printk(KERN_INFO "Found and enabled local APIC!\n");
858 1063
859 apic_pm_activate(); 1064 apic_pm_activate();
860 1065
861 return 0; 1066 return 0;
862 1067
863no_apic: 1068no_apic:
864 printk("No local APIC present or hardware disabled\n"); 1069 printk(KERN_INFO "No local APIC present or hardware disabled\n");
865 return -1; 1070 return -1;
866} 1071}
867 1072
1073/**
1074 * init_apic_mappings - initialize APIC mappings
1075 */
868void __init init_apic_mappings(void) 1076void __init init_apic_mappings(void)
869{ 1077{
870 unsigned long apic_phys; 1078 unsigned long apic_phys;
@@ -924,387 +1132,96 @@ fake_ioapic_page:
924} 1132}
925 1133
926/* 1134/*
927 * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts 1135 * This initializes the IO-APIC and APIC hardware if this is
928 * per second. We assume that the caller has already set up the local 1136 * a UP kernel.
929 * APIC.
930 *
931 * The APIC timer is not exactly sync with the external timer chip, it
932 * closely follows bus clocks.
933 */
934
935/*
936 * The timer chip is already set up at HZ interrupts per second here,
937 * but we do not accept timer interrupts yet. We only allow the BP
938 * to calibrate.
939 */
940static unsigned int __devinit get_8254_timer_count(void)
941{
942 unsigned long flags;
943
944 unsigned int count;
945
946 spin_lock_irqsave(&i8253_lock, flags);
947
948 outb_p(0x00, PIT_MODE);
949 count = inb_p(PIT_CH0);
950 count |= inb_p(PIT_CH0) << 8;
951
952 spin_unlock_irqrestore(&i8253_lock, flags);
953
954 return count;
955}
956
957/* next tick in 8254 can be caught by catching timer wraparound */
958static void __devinit wait_8254_wraparound(void)
959{
960 unsigned int curr_count, prev_count;
961
962 curr_count = get_8254_timer_count();
963 do {
964 prev_count = curr_count;
965 curr_count = get_8254_timer_count();
966
967 /* workaround for broken Mercury/Neptune */
968 if (prev_count >= curr_count + 0x100)
969 curr_count = get_8254_timer_count();
970
971 } while (prev_count >= curr_count);
972}
973
974/*
975 * Default initialization for 8254 timers. If we use other timers like HPET,
976 * we override this later
977 */
978void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound;
979
980/*
981 * This function sets up the local APIC timer, with a timeout of
982 * 'clocks' APIC bus clock. During calibration we actually call
983 * this function twice on the boot CPU, once with a bogus timeout
984 * value, second time for real. The other (noncalibrating) CPUs
985 * call this function only once, with the real, calibrated value.
986 *
987 * We do reads before writes even if unnecessary, to get around the
988 * P5 APIC double write bug.
989 */ 1137 */
990 1138int __init APIC_init_uniprocessor (void)
991#define APIC_DIVISOR 16
992
993static void __setup_APIC_LVTT(unsigned int clocks)
994{ 1139{
995 unsigned int lvtt_value, tmp_value, ver; 1140 if (enable_local_apic < 0)
996 int cpu = smp_processor_id(); 1141 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
997
998 ver = GET_APIC_VERSION(apic_read(APIC_LVR));
999 lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
1000 if (!APIC_INTEGRATED(ver))
1001 lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
1002
1003 if (cpu_isset(cpu, timer_bcast_ipi))
1004 lvtt_value |= APIC_LVT_MASKED;
1005 1142
1006 apic_write_around(APIC_LVTT, lvtt_value); 1143 if (!smp_found_config && !cpu_has_apic)
1144 return -1;
1007 1145
1008 /* 1146 /*
1009 * Divide PICLK by 16 1147 * Complain if the BIOS pretends there is one.
1010 */ 1148 */
1011 tmp_value = apic_read(APIC_TDCR); 1149 if (!cpu_has_apic &&
1012 apic_write_around(APIC_TDCR, (tmp_value 1150 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
1013 & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) 1151 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
1014 | APIC_TDR_DIV_16); 1152 boot_cpu_physical_apicid);
1015 1153 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
1016 apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); 1154 return -1;
1017} 1155 }
1018 1156
1019static void __devinit setup_APIC_timer(unsigned int clocks) 1157 verify_local_APIC();
1020{
1021 unsigned long flags;
1022 1158
1023 local_irq_save(flags); 1159 connect_bsp_APIC();
1024 1160
1025 /* 1161 /*
1026 * Wait for IRQ0's slice: 1162 * Hack: In case of kdump, after a crash, kernel might be booting
1163 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
1164 * might be zero if read from MP tables. Get it from LAPIC.
1027 */ 1165 */
1028 wait_timer_tick(); 1166#ifdef CONFIG_CRASH_DUMP
1167 boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
1168#endif
1169 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
1029 1170
1030 __setup_APIC_LVTT(clocks); 1171 setup_local_APIC();
1031 1172
1032 local_irq_restore(flags); 1173#ifdef CONFIG_X86_IO_APIC
1174 if (smp_found_config)
1175 if (!skip_ioapic_setup && nr_ioapics)
1176 setup_IO_APIC();
1177#endif
1178 setup_boot_clock();
1179
1180 return 0;
1033} 1181}
1034 1182
1035/* 1183/*
1036 * In this function we calibrate APIC bus clocks to the external 1184 * APIC command line parameters
1037 * timer. Unfortunately we cannot use jiffies and the timer irq
1038 * to calibrate, since some later bootup code depends on getting
1039 * the first irq? Ugh.
1040 *
1041 * We want to do the calibration only once since we
1042 * want to have local timer irqs syncron. CPUs connected
1043 * by the same APIC bus have the very same bus frequency.
1044 * And we want to have irqs off anyways, no accidental
1045 * APIC irq that way.
1046 */ 1185 */
1047 1186static int __init parse_lapic(char *arg)
1048static int __init calibrate_APIC_clock(void)
1049{
1050 unsigned long long t1 = 0, t2 = 0;
1051 long tt1, tt2;
1052 long result;
1053 int i;
1054 const int LOOPS = HZ/10;
1055
1056 apic_printk(APIC_VERBOSE, "calibrating APIC timer ...\n");
1057
1058 /*
1059 * Put whatever arbitrary (but long enough) timeout
1060 * value into the APIC clock, we just want to get the
1061 * counter running for calibration.
1062 */
1063 __setup_APIC_LVTT(1000000000);
1064
1065 /*
1066 * The timer chip counts down to zero. Let's wait
1067 * for a wraparound to start exact measurement:
1068 * (the current tick might have been already half done)
1069 */
1070
1071 wait_timer_tick();
1072
1073 /*
1074 * We wrapped around just now. Let's start:
1075 */
1076 if (cpu_has_tsc)
1077 rdtscll(t1);
1078 tt1 = apic_read(APIC_TMCCT);
1079
1080 /*
1081 * Let's wait LOOPS wraprounds:
1082 */
1083 for (i = 0; i < LOOPS; i++)
1084 wait_timer_tick();
1085
1086 tt2 = apic_read(APIC_TMCCT);
1087 if (cpu_has_tsc)
1088 rdtscll(t2);
1089
1090 /*
1091 * The APIC bus clock counter is 32 bits only, it
1092 * might have overflown, but note that we use signed
1093 * longs, thus no extra care needed.
1094 *
1095 * underflown to be exact, as the timer counts down ;)
1096 */
1097
1098 result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
1099
1100 if (cpu_has_tsc)
1101 apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
1102 "%ld.%04ld MHz.\n",
1103 ((long)(t2-t1)/LOOPS)/(1000000/HZ),
1104 ((long)(t2-t1)/LOOPS)%(1000000/HZ));
1105
1106 apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
1107 "%ld.%04ld MHz.\n",
1108 result/(1000000/HZ),
1109 result%(1000000/HZ));
1110
1111 return result;
1112}
1113
1114static unsigned int calibration_result;
1115
1116void __init setup_boot_APIC_clock(void)
1117{
1118 unsigned long flags;
1119 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
1120 using_apic_timer = 1;
1121
1122 local_irq_save(flags);
1123
1124 calibration_result = calibrate_APIC_clock();
1125 /*
1126 * Now set up the timer for real.
1127 */
1128 setup_APIC_timer(calibration_result);
1129
1130 local_irq_restore(flags);
1131}
1132
1133void __devinit setup_secondary_APIC_clock(void)
1134{
1135 setup_APIC_timer(calibration_result);
1136}
1137
1138void disable_APIC_timer(void)
1139{
1140 if (using_apic_timer) {
1141 unsigned long v;
1142
1143 v = apic_read(APIC_LVTT);
1144 /*
1145 * When an illegal vector value (0-15) is written to an LVT
1146 * entry and delivery mode is Fixed, the APIC may signal an
1147 * illegal vector error, with out regard to whether the mask
1148 * bit is set or whether an interrupt is actually seen on input.
1149 *
1150 * Boot sequence might call this function when the LVTT has
1151 * '0' vector value. So make sure vector field is set to
1152 * valid value.
1153 */
1154 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1155 apic_write_around(APIC_LVTT, v);
1156 }
1157}
1158
1159void enable_APIC_timer(void)
1160{ 1187{
1161 int cpu = smp_processor_id(); 1188 enable_local_apic = 1;
1162 1189 return 0;
1163 if (using_apic_timer &&
1164 !cpu_isset(cpu, timer_bcast_ipi)) {
1165 unsigned long v;
1166
1167 v = apic_read(APIC_LVTT);
1168 apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED);
1169 }
1170} 1190}
1191early_param("lapic", parse_lapic);
1171 1192
1172void switch_APIC_timer_to_ipi(void *cpumask) 1193static int __init parse_nolapic(char *arg)
1173{ 1194{
1174 cpumask_t mask = *(cpumask_t *)cpumask; 1195 enable_local_apic = -1;
1175 int cpu = smp_processor_id(); 1196 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
1176 1197 return 0;
1177 if (cpu_isset(cpu, mask) &&
1178 !cpu_isset(cpu, timer_bcast_ipi)) {
1179 disable_APIC_timer();
1180 cpu_set(cpu, timer_bcast_ipi);
1181 }
1182} 1198}
1183EXPORT_SYMBOL(switch_APIC_timer_to_ipi); 1199early_param("nolapic", parse_nolapic);
1184 1200
1185void switch_ipi_to_APIC_timer(void *cpumask) 1201static int __init apic_set_verbosity(char *str)
1186{ 1202{
1187 cpumask_t mask = *(cpumask_t *)cpumask; 1203 if (strcmp("debug", str) == 0)
1188 int cpu = smp_processor_id(); 1204 apic_verbosity = APIC_DEBUG;
1189 1205 else if (strcmp("verbose", str) == 0)
1190 if (cpu_isset(cpu, mask) && 1206 apic_verbosity = APIC_VERBOSE;
1191 cpu_isset(cpu, timer_bcast_ipi)) { 1207 return 1;
1192 cpu_clear(cpu, timer_bcast_ipi);
1193 enable_APIC_timer();
1194 }
1195} 1208}
1196EXPORT_SYMBOL(switch_ipi_to_APIC_timer);
1197
1198#undef APIC_DIVISOR
1199 1209
1200/* 1210__setup("apic=", apic_set_verbosity);
1201 * Local timer interrupt handler. It does both profiling and
1202 * process statistics/rescheduling.
1203 *
1204 * We do profiling in every local tick, statistics/rescheduling
1205 * happen only every 'profiling multiplier' ticks. The default
1206 * multiplier is 1 and it can be changed by writing the new multiplier
1207 * value into /proc/profile.
1208 */
1209
1210inline void smp_local_timer_interrupt(void)
1211{
1212 profile_tick(CPU_PROFILING);
1213#ifdef CONFIG_SMP
1214 update_process_times(user_mode_vm(get_irq_regs()));
1215#endif
1216 1211
1217 /*
1218 * We take the 'long' return path, and there every subsystem
1219 * grabs the apropriate locks (kernel lock/ irq lock).
1220 *
1221 * we might want to decouple profiling from the 'long path',
1222 * and do the profiling totally in assembly.
1223 *
1224 * Currently this isn't too much of an issue (performance wise),
1225 * we can take more than 100K local irqs per second on a 100 MHz P5.
1226 */
1227}
1228 1212
1229/* 1213/*
1230 * Local APIC timer interrupt. This is the most natural way for doing 1214 * Local APIC interrupts
1231 * local interrupts, but local timer interrupts can be emulated by
1232 * broadcast interrupts too. [in case the hw doesn't support APIC timers]
1233 *
1234 * [ if a single-CPU system runs an SMP kernel then we call the local
1235 * interrupt as well. Thus we cannot inline the local irq ... ]
1236 */ 1215 */
1237 1216
1238fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
1239{
1240 struct pt_regs *old_regs = set_irq_regs(regs);
1241 int cpu = smp_processor_id();
1242
1243 /*
1244 * the NMI deadlock-detector uses this.
1245 */
1246 per_cpu(irq_stat, cpu).apic_timer_irqs++;
1247
1248 /*
1249 * NOTE! We'd better ACK the irq immediately,
1250 * because timer handling can be slow.
1251 */
1252 ack_APIC_irq();
1253 /*
1254 * update_process_times() expects us to have done irq_enter().
1255 * Besides, if we don't timer interrupts ignore the global
1256 * interrupt lock, which is the WrongThing (tm) to do.
1257 */
1258 irq_enter();
1259 smp_local_timer_interrupt();
1260 irq_exit();
1261 set_irq_regs(old_regs);
1262}
1263
1264#ifndef CONFIG_SMP
1265static void up_apic_timer_interrupt_call(void)
1266{
1267 int cpu = smp_processor_id();
1268
1269 /*
1270 * the NMI deadlock-detector uses this.
1271 */
1272 per_cpu(irq_stat, cpu).apic_timer_irqs++;
1273
1274 smp_local_timer_interrupt();
1275}
1276#endif
1277
1278void smp_send_timer_broadcast_ipi(void)
1279{
1280 cpumask_t mask;
1281
1282 cpus_and(mask, cpu_online_map, timer_bcast_ipi);
1283 if (!cpus_empty(mask)) {
1284#ifdef CONFIG_SMP
1285 send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
1286#else
1287 /*
1288 * We can directly call the apic timer interrupt handler
1289 * in UP case. Minus all irq related functions
1290 */
1291 up_apic_timer_interrupt_call();
1292#endif
1293 }
1294}
1295
1296int setup_profiling_timer(unsigned int multiplier)
1297{
1298 return -EINVAL;
1299}
1300
1301/* 1217/*
1302 * This interrupt should _never_ happen with our APIC/SMP architecture 1218 * This interrupt should _never_ happen with our APIC/SMP architecture
1303 */ 1219 */
1304fastcall void smp_spurious_interrupt(struct pt_regs *regs) 1220void smp_spurious_interrupt(struct pt_regs *regs)
1305{ 1221{
1306 unsigned long v; 1222 unsigned long v;
1307 1223
1224 exit_idle();
1308 irq_enter(); 1225 irq_enter();
1309 /* 1226 /*
1310 * Check if this really is a spurious interrupt and ACK it 1227 * Check if this really is a spurious interrupt and ACK it
@@ -1316,19 +1233,19 @@ fastcall void smp_spurious_interrupt(struct pt_regs *regs)
1316 ack_APIC_irq(); 1233 ack_APIC_irq();
1317 1234
1318 /* see sw-dev-man vol 3, chapter 7.4.13.5 */ 1235 /* see sw-dev-man vol 3, chapter 7.4.13.5 */
1319 printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never happen.\n", 1236 printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
1320 smp_processor_id()); 1237 "should never happen.\n", smp_processor_id());
1321 irq_exit(); 1238 irq_exit();
1322} 1239}
1323 1240
1324/* 1241/*
1325 * This interrupt should never happen with our APIC/SMP architecture 1242 * This interrupt should never happen with our APIC/SMP architecture
1326 */ 1243 */
1327 1244void smp_error_interrupt(struct pt_regs *regs)
1328fastcall void smp_error_interrupt(struct pt_regs *regs)
1329{ 1245{
1330 unsigned long v, v1; 1246 unsigned long v, v1;
1331 1247
1248 exit_idle();
1332 irq_enter(); 1249 irq_enter();
1333 /* First tickle the hardware, only then report what went on. -- REW */ 1250 /* First tickle the hardware, only then report what went on. -- REW */
1334 v = apic_read(APIC_ESR); 1251 v = apic_read(APIC_ESR);
@@ -1348,69 +1265,261 @@ fastcall void smp_error_interrupt(struct pt_regs *regs)
1348 7: Illegal register address 1265 7: Illegal register address
1349 */ 1266 */
1350 printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", 1267 printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
1351 smp_processor_id(), v , v1); 1268 smp_processor_id(), v , v1);
1352 irq_exit(); 1269 irq_exit();
1353} 1270}
1354 1271
1355/* 1272/*
1356 * This initializes the IO-APIC and APIC hardware if this is 1273 * Initialize APIC interrupts
1357 * a UP kernel.
1358 */ 1274 */
1359int __init APIC_init_uniprocessor (void) 1275void __init apic_intr_init(void)
1360{ 1276{
1361 if (enable_local_apic < 0) 1277#ifdef CONFIG_SMP
1362 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); 1278 smp_intr_init();
1279#endif
1280 /* self generated IPI for local APIC timer */
1281 set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
1363 1282
1364 if (!smp_found_config && !cpu_has_apic) 1283 /* IPI vectors for APIC spurious and error interrupts */
1365 return -1; 1284 set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
1285 set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
1366 1286
1367 /* 1287 /* thermal monitor LVT interrupt */
1368 * Complain if the BIOS pretends there is one. 1288#ifdef CONFIG_X86_MCE_P4THERMAL
1369 */ 1289 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
1370 if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { 1290#endif
1371 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", 1291}
1372 boot_cpu_physical_apicid); 1292
1373 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); 1293/**
1374 return -1; 1294 * connect_bsp_APIC - attach the APIC to the interrupt system
1295 */
1296void __init connect_bsp_APIC(void)
1297{
1298 if (pic_mode) {
1299 /*
1300 * Do not trust the local APIC being empty at bootup.
1301 */
1302 clear_local_APIC();
1303 /*
1304 * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's
1305 * local APIC to INT and NMI lines.
1306 */
1307 apic_printk(APIC_VERBOSE, "leaving PIC mode, "
1308 "enabling APIC mode.\n");
1309 outb(0x70, 0x22);
1310 outb(0x01, 0x23);
1375 } 1311 }
1312 enable_apic_mode();
1313}
1376 1314
1377 verify_local_APIC(); 1315/**
1316 * disconnect_bsp_APIC - detach the APIC from the interrupt system
1317 * @virt_wire_setup: indicates, whether virtual wire mode is selected
1318 *
1319 * Virtual wire mode is necessary to deliver legacy interrupts even when the
1320 * APIC is disabled.
1321 */
1322void disconnect_bsp_APIC(int virt_wire_setup)
1323{
1324 if (pic_mode) {
1325 /*
1326 * Put the board back into PIC mode (has an effect only on
1327 * certain older boards). Note that APIC interrupts, including
1328 * IPIs, won't work beyond this point! The only exception are
1329 * INIT IPIs.
1330 */
1331 apic_printk(APIC_VERBOSE, "disabling APIC mode, "
1332 "entering PIC mode.\n");
1333 outb(0x70, 0x22);
1334 outb(0x00, 0x23);
1335 } else {
1336 /* Go back to Virtual Wire compatibility mode */
1337 unsigned long value;
1378 1338
1379 connect_bsp_APIC(); 1339 /* For the spurious interrupt use vector F, and enable it */
1340 value = apic_read(APIC_SPIV);
1341 value &= ~APIC_VECTOR_MASK;
1342 value |= APIC_SPIV_APIC_ENABLED;
1343 value |= 0xf;
1344 apic_write_around(APIC_SPIV, value);
1380 1345
1381 /* 1346 if (!virt_wire_setup) {
1382 * Hack: In case of kdump, after a crash, kernel might be booting 1347 /*
1383 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid 1348 * For LVT0 make it edge triggered, active high,
1384 * might be zero if read from MP tables. Get it from LAPIC. 1349 * external and enabled
1385 */ 1350 */
1386#ifdef CONFIG_CRASH_DUMP 1351 value = apic_read(APIC_LVT0);
1387 boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); 1352 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1388#endif 1353 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1389 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); 1354 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
1355 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1356 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
1357 apic_write_around(APIC_LVT0, value);
1358 } else {
1359 /* Disable LVT0 */
1360 apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
1361 }
1390 1362
1391 setup_local_APIC(); 1363 /*
1364 * For LVT1 make it edge triggered, active high, nmi and
1365 * enabled
1366 */
1367 value = apic_read(APIC_LVT1);
1368 value &= ~(
1369 APIC_MODE_MASK | APIC_SEND_PENDING |
1370 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1371 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1372 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1373 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
1374 apic_write_around(APIC_LVT1, value);
1375 }
1376}
1392 1377
1393#ifdef CONFIG_X86_IO_APIC 1378/*
1394 if (smp_found_config) 1379 * Power management
1395 if (!skip_ioapic_setup && nr_ioapics) 1380 */
1396 setup_IO_APIC(); 1381#ifdef CONFIG_PM
1382
1383static struct {
1384 int active;
1385 /* r/w apic fields */
1386 unsigned int apic_id;
1387 unsigned int apic_taskpri;
1388 unsigned int apic_ldr;
1389 unsigned int apic_dfr;
1390 unsigned int apic_spiv;
1391 unsigned int apic_lvtt;
1392 unsigned int apic_lvtpc;
1393 unsigned int apic_lvt0;
1394 unsigned int apic_lvt1;
1395 unsigned int apic_lvterr;
1396 unsigned int apic_tmict;
1397 unsigned int apic_tdcr;
1398 unsigned int apic_thmr;
1399} apic_pm_state;
1400
1401static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1402{
1403 unsigned long flags;
1404 int maxlvt;
1405
1406 if (!apic_pm_state.active)
1407 return 0;
1408
1409 maxlvt = lapic_get_maxlvt();
1410
1411 apic_pm_state.apic_id = apic_read(APIC_ID);
1412 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
1413 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
1414 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
1415 apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
1416 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
1417 if (maxlvt >= 4)
1418 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
1419 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
1420 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
1421 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
1422 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
1423 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
1424#ifdef CONFIG_X86_MCE_P4THERMAL
1425 if (maxlvt >= 5)
1426 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
1397#endif 1427#endif
1398 setup_boot_APIC_clock();
1399 1428
1429 local_irq_save(flags);
1430 disable_local_APIC();
1431 local_irq_restore(flags);
1400 return 0; 1432 return 0;
1401} 1433}
1402 1434
1403static int __init parse_lapic(char *arg) 1435static int lapic_resume(struct sys_device *dev)
1404{ 1436{
1405 lapic_enable(); 1437 unsigned int l, h;
1438 unsigned long flags;
1439 int maxlvt;
1440
1441 if (!apic_pm_state.active)
1442 return 0;
1443
1444 maxlvt = lapic_get_maxlvt();
1445
1446 local_irq_save(flags);
1447
1448 /*
1449 * Make sure the APICBASE points to the right address
1450 *
1451 * FIXME! This will be wrong if we ever support suspend on
1452 * SMP! We'll need to do this as part of the CPU restore!
1453 */
1454 rdmsr(MSR_IA32_APICBASE, l, h);
1455 l &= ~MSR_IA32_APICBASE_BASE;
1456 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1457 wrmsr(MSR_IA32_APICBASE, l, h);
1458
1459 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1460 apic_write(APIC_ID, apic_pm_state.apic_id);
1461 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
1462 apic_write(APIC_LDR, apic_pm_state.apic_ldr);
1463 apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
1464 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
1465 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
1466 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
1467#ifdef CONFIG_X86_MCE_P4THERMAL
1468 if (maxlvt >= 5)
1469 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
1470#endif
1471 if (maxlvt >= 4)
1472 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
1473 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
1474 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
1475 apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
1476 apic_write(APIC_ESR, 0);
1477 apic_read(APIC_ESR);
1478 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
1479 apic_write(APIC_ESR, 0);
1480 apic_read(APIC_ESR);
1481 local_irq_restore(flags);
1406 return 0; 1482 return 0;
1407} 1483}
1408early_param("lapic", parse_lapic);
1409 1484
1410static int __init parse_nolapic(char *arg) 1485/*
1486 * This device has no shutdown method - fully functioning local APICs
1487 * are needed on every CPU up until machine_halt/restart/poweroff.
1488 */
1489
1490static struct sysdev_class lapic_sysclass = {
1491 set_kset_name("lapic"),
1492 .resume = lapic_resume,
1493 .suspend = lapic_suspend,
1494};
1495
1496static struct sys_device device_lapic = {
1497 .id = 0,
1498 .cls = &lapic_sysclass,
1499};
1500
1501static void __devinit apic_pm_activate(void)
1411{ 1502{
1412 lapic_disable(); 1503 apic_pm_state.active = 1;
1413 return 0;
1414} 1504}
1415early_param("nolapic", parse_nolapic);
1416 1505
1506static int __init init_lapic_sysfs(void)
1507{
1508 int error;
1509
1510 if (!cpu_has_apic)
1511 return 0;
1512 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
1513
1514 error = sysdev_class_register(&lapic_sysclass);
1515 if (!error)
1516 error = sysdev_register(&device_lapic);
1517 return error;
1518}
1519device_initcall(init_lapic_sysfs);
1520
1521#else /* CONFIG_PM */
1522
1523static void apic_pm_activate(void) { }
1524
1525#endif /* CONFIG_PM */
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 199016927541..064bbf2861f4 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -211,6 +211,7 @@
211#include <linux/slab.h> 211#include <linux/slab.h>
212#include <linux/stat.h> 212#include <linux/stat.h>
213#include <linux/proc_fs.h> 213#include <linux/proc_fs.h>
214#include <linux/seq_file.h>
214#include <linux/miscdevice.h> 215#include <linux/miscdevice.h>
215#include <linux/apm_bios.h> 216#include <linux/apm_bios.h>
216#include <linux/init.h> 217#include <linux/init.h>
@@ -235,7 +236,6 @@
235 236
236#include "io_ports.h" 237#include "io_ports.h"
237 238
238extern unsigned long get_cmos_time(void);
239extern void machine_real_restart(unsigned char *, int); 239extern void machine_real_restart(unsigned char *, int);
240 240
241#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) 241#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
@@ -1175,28 +1175,6 @@ out:
1175 spin_unlock(&user_list_lock); 1175 spin_unlock(&user_list_lock);
1176} 1176}
1177 1177
1178static void set_time(void)
1179{
1180 struct timespec ts;
1181 if (got_clock_diff) { /* Must know time zone in order to set clock */
1182 ts.tv_sec = get_cmos_time() + clock_cmos_diff;
1183 ts.tv_nsec = 0;
1184 do_settimeofday(&ts);
1185 }
1186}
1187
1188static void get_time_diff(void)
1189{
1190#ifndef CONFIG_APM_RTC_IS_GMT
1191 /*
1192 * Estimate time zone so that set_time can update the clock
1193 */
1194 clock_cmos_diff = -get_cmos_time();
1195 clock_cmos_diff += get_seconds();
1196 got_clock_diff = 1;
1197#endif
1198}
1199
1200static void reinit_timer(void) 1178static void reinit_timer(void)
1201{ 1179{
1202#ifdef INIT_TIMER_AFTER_SUSPEND 1180#ifdef INIT_TIMER_AFTER_SUSPEND
@@ -1236,19 +1214,6 @@ static int suspend(int vetoable)
1236 local_irq_disable(); 1214 local_irq_disable();
1237 device_power_down(PMSG_SUSPEND); 1215 device_power_down(PMSG_SUSPEND);
1238 1216
1239 /* serialize with the timer interrupt */
1240 write_seqlock(&xtime_lock);
1241
1242 /* protect against access to timer chip registers */
1243 spin_lock(&i8253_lock);
1244
1245 get_time_diff();
1246 /*
1247 * Irq spinlock must be dropped around set_system_power_state.
1248 * We'll undo any timer changes due to interrupts below.
1249 */
1250 spin_unlock(&i8253_lock);
1251 write_sequnlock(&xtime_lock);
1252 local_irq_enable(); 1217 local_irq_enable();
1253 1218
1254 save_processor_state(); 1219 save_processor_state();
@@ -1257,7 +1222,6 @@ static int suspend(int vetoable)
1257 restore_processor_state(); 1222 restore_processor_state();
1258 1223
1259 local_irq_disable(); 1224 local_irq_disable();
1260 set_time();
1261 reinit_timer(); 1225 reinit_timer();
1262 1226
1263 if (err == APM_NO_ERROR) 1227 if (err == APM_NO_ERROR)
@@ -1287,11 +1251,6 @@ static void standby(void)
1287 1251
1288 local_irq_disable(); 1252 local_irq_disable();
1289 device_power_down(PMSG_SUSPEND); 1253 device_power_down(PMSG_SUSPEND);
1290 /* serialize with the timer interrupt */
1291 write_seqlock(&xtime_lock);
1292 /* If needed, notify drivers here */
1293 get_time_diff();
1294 write_sequnlock(&xtime_lock);
1295 local_irq_enable(); 1254 local_irq_enable();
1296 1255
1297 err = set_system_power_state(APM_STATE_STANDBY); 1256 err = set_system_power_state(APM_STATE_STANDBY);
@@ -1385,7 +1344,6 @@ static void check_events(void)
1385 ignore_bounce = 1; 1344 ignore_bounce = 1;
1386 if ((event != APM_NORMAL_RESUME) 1345 if ((event != APM_NORMAL_RESUME)
1387 || (ignore_normal_resume == 0)) { 1346 || (ignore_normal_resume == 0)) {
1388 set_time();
1389 device_resume(); 1347 device_resume();
1390 pm_send_all(PM_RESUME, (void *)0); 1348 pm_send_all(PM_RESUME, (void *)0);
1391 queue_event(event, NULL); 1349 queue_event(event, NULL);
@@ -1401,7 +1359,6 @@ static void check_events(void)
1401 break; 1359 break;
1402 1360
1403 case APM_UPDATE_TIME: 1361 case APM_UPDATE_TIME:
1404 set_time();
1405 break; 1362 break;
1406 1363
1407 case APM_CRITICAL_SUSPEND: 1364 case APM_CRITICAL_SUSPEND:
@@ -1636,9 +1593,8 @@ static int do_open(struct inode * inode, struct file * filp)
1636 return 0; 1593 return 0;
1637} 1594}
1638 1595
1639static int apm_get_info(char *buf, char **start, off_t fpos, int length) 1596static int proc_apm_show(struct seq_file *m, void *v)
1640{ 1597{
1641 char * p;
1642 unsigned short bx; 1598 unsigned short bx;
1643 unsigned short cx; 1599 unsigned short cx;
1644 unsigned short dx; 1600 unsigned short dx;
@@ -1650,8 +1606,6 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length)
1650 int time_units = -1; 1606 int time_units = -1;
1651 char *units = "?"; 1607 char *units = "?";
1652 1608
1653 p = buf;
1654
1655 if ((num_online_cpus() == 1) && 1609 if ((num_online_cpus() == 1) &&
1656 !(error = apm_get_power_status(&bx, &cx, &dx))) { 1610 !(error = apm_get_power_status(&bx, &cx, &dx))) {
1657 ac_line_status = (bx >> 8) & 0xff; 1611 ac_line_status = (bx >> 8) & 0xff;
@@ -1705,7 +1659,7 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length)
1705 -1: Unknown 1659 -1: Unknown
1706 8) min = minutes; sec = seconds */ 1660 8) min = minutes; sec = seconds */
1707 1661
1708 p += sprintf(p, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n", 1662 seq_printf(m, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n",
1709 driver_version, 1663 driver_version,
1710 (apm_info.bios.version >> 8) & 0xff, 1664 (apm_info.bios.version >> 8) & 0xff,
1711 apm_info.bios.version & 0xff, 1665 apm_info.bios.version & 0xff,
@@ -1716,10 +1670,22 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length)
1716 percentage, 1670 percentage,
1717 time_units, 1671 time_units,
1718 units); 1672 units);
1673 return 0;
1674}
1719 1675
1720 return p - buf; 1676static int proc_apm_open(struct inode *inode, struct file *file)
1677{
1678 return single_open(file, proc_apm_show, NULL);
1721} 1679}
1722 1680
1681static const struct file_operations apm_file_ops = {
1682 .owner = THIS_MODULE,
1683 .open = proc_apm_open,
1684 .read = seq_read,
1685 .llseek = seq_lseek,
1686 .release = single_release,
1687};
1688
1723static int apm(void *unused) 1689static int apm(void *unused)
1724{ 1690{
1725 unsigned short bx; 1691 unsigned short bx;
@@ -1894,7 +1860,7 @@ static int __init apm_setup(char *str)
1894__setup("apm=", apm_setup); 1860__setup("apm=", apm_setup);
1895#endif 1861#endif
1896 1862
1897static struct file_operations apm_bios_fops = { 1863static const struct file_operations apm_bios_fops = {
1898 .owner = THIS_MODULE, 1864 .owner = THIS_MODULE,
1899 .read = do_read, 1865 .read = do_read,
1900 .poll = do_poll, 1866 .poll = do_poll,
@@ -2341,9 +2307,9 @@ static int __init apm_init(void)
2341 set_base(gdt[APM_DS >> 3], 2307 set_base(gdt[APM_DS >> 3],
2342 __va((unsigned long)apm_info.bios.dseg << 4)); 2308 __va((unsigned long)apm_info.bios.dseg << 4));
2343 2309
2344 apm_proc = create_proc_info_entry("apm", 0, NULL, apm_get_info); 2310 apm_proc = create_proc_entry("apm", 0, NULL);
2345 if (apm_proc) 2311 if (apm_proc)
2346 apm_proc->owner = THIS_MODULE; 2312 apm_proc->proc_fops = &apm_file_ops;
2347 2313
2348 kapmd_task = kthread_create(apm, NULL, "kapmd"); 2314 kapmd_task = kthread_create(apm, NULL, "kapmd");
2349 if (IS_ERR(kapmd_task)) { 2315 if (IS_ERR(kapmd_task)) {
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
index 1b2f3cd33270..c37535163bfc 100644
--- a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -72,7 +72,7 @@ void foo(void)
72 OFFSET(PT_EAX, pt_regs, eax); 72 OFFSET(PT_EAX, pt_regs, eax);
73 OFFSET(PT_DS, pt_regs, xds); 73 OFFSET(PT_DS, pt_regs, xds);
74 OFFSET(PT_ES, pt_regs, xes); 74 OFFSET(PT_ES, pt_regs, xes);
75 OFFSET(PT_GS, pt_regs, xgs); 75 OFFSET(PT_FS, pt_regs, xfs);
76 OFFSET(PT_ORIG_EAX, pt_regs, orig_eax); 76 OFFSET(PT_ORIG_EAX, pt_regs, orig_eax);
77 OFFSET(PT_EIP, pt_regs, eip); 77 OFFSET(PT_EIP, pt_regs, eip);
78 OFFSET(PT_CS, pt_regs, xcs); 78 OFFSET(PT_CS, pt_regs, xcs);
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 8a8bbdaaf38a..dcbbd0a8bfc2 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -605,7 +605,7 @@ void __init early_cpu_init(void)
605struct pt_regs * __devinit idle_regs(struct pt_regs *regs) 605struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
606{ 606{
607 memset(regs, 0, sizeof(struct pt_regs)); 607 memset(regs, 0, sizeof(struct pt_regs));
608 regs->xgs = __KERNEL_PDA; 608 regs->xfs = __KERNEL_PDA;
609 return regs; 609 return regs;
610} 610}
611 611
@@ -662,12 +662,12 @@ struct i386_pda boot_pda = {
662 .pcurrent = &init_task, 662 .pcurrent = &init_task,
663}; 663};
664 664
665static inline void set_kernel_gs(void) 665static inline void set_kernel_fs(void)
666{ 666{
667 /* Set %gs for this CPU's PDA. Memory clobber is to create a 667 /* Set %fs for this CPU's PDA. Memory clobber is to create a
668 barrier with respect to any PDA operations, so the compiler 668 barrier with respect to any PDA operations, so the compiler
669 doesn't move any before here. */ 669 doesn't move any before here. */
670 asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory"); 670 asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory");
671} 671}
672 672
673/* Initialize the CPU's GDT and PDA. The boot CPU does this for 673/* Initialize the CPU's GDT and PDA. The boot CPU does this for
@@ -718,7 +718,7 @@ void __cpuinit cpu_set_gdt(int cpu)
718 the boot CPU, this will transition from the boot gdt+pda to 718 the boot CPU, this will transition from the boot gdt+pda to
719 the real ones). */ 719 the real ones). */
720 load_gdt(cpu_gdt_descr); 720 load_gdt(cpu_gdt_descr);
721 set_kernel_gs(); 721 set_kernel_fs();
722} 722}
723 723
724/* Common CPU init for both boot and secondary CPUs */ 724/* Common CPU init for both boot and secondary CPUs */
@@ -764,8 +764,8 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
764 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); 764 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
765#endif 765#endif
766 766
767 /* Clear %fs. */ 767 /* Clear %gs. */
768 asm volatile ("mov %0, %%fs" : : "r" (0)); 768 asm volatile ("mov %0, %%gs" : : "r" (0));
769 769
770 /* Clear all 6 debug registers: */ 770 /* Clear all 6 debug registers: */
771 set_debugreg(0, 0); 771 set_debugreg(0, 0);
diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig
index 5299c5bf4454..6c52182ca323 100644
--- a/arch/i386/kernel/cpu/cpufreq/Kconfig
+++ b/arch/i386/kernel/cpu/cpufreq/Kconfig
@@ -217,6 +217,15 @@ config X86_LONGHAUL
217 217
218 If in doubt, say N. 218 If in doubt, say N.
219 219
220config X86_E_POWERSAVER
221 tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)"
222 select CPU_FREQ_TABLE
223 depends on EXPERIMENTAL
224 help
225 This adds the CPUFreq driver for VIA C7 processors.
226
227 If in doubt, say N.
228
220comment "shared options" 229comment "shared options"
221 230
222config X86_ACPI_CPUFREQ_PROC_INTF 231config X86_ACPI_CPUFREQ_PROC_INTF
diff --git a/arch/i386/kernel/cpu/cpufreq/Makefile b/arch/i386/kernel/cpu/cpufreq/Makefile
index 8de3abe322a9..560f7760dae5 100644
--- a/arch/i386/kernel/cpu/cpufreq/Makefile
+++ b/arch/i386/kernel/cpu/cpufreq/Makefile
@@ -2,6 +2,7 @@ obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
2obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o 2obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o
3obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o 3obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
4obj-$(CONFIG_X86_LONGHAUL) += longhaul.o 4obj-$(CONFIG_X86_LONGHAUL) += longhaul.o
5obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o
5obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o 6obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o
6obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o 7obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o
7obj-$(CONFIG_X86_LONGRUN) += longrun.o 8obj-$(CONFIG_X86_LONGRUN) += longrun.o
diff --git a/arch/i386/kernel/cpu/cpufreq/e_powersaver.c b/arch/i386/kernel/cpu/cpufreq/e_powersaver.c
new file mode 100644
index 000000000000..f43d98e11cc7
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/e_powersaver.c
@@ -0,0 +1,334 @@
1/*
2 * Based on documentation provided by Dave Jones. Thanks!
3 *
4 * Licensed under the terms of the GNU GPL License version 2.
5 *
6 * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
7 */
8
9#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/cpufreq.h>
13#include <linux/ioport.h>
14#include <linux/slab.h>
15
16#include <asm/msr.h>
17#include <asm/tsc.h>
18#include <asm/timex.h>
19#include <asm/io.h>
20#include <asm/delay.h>
21
22#define EPS_BRAND_C7M 0
23#define EPS_BRAND_C7 1
24#define EPS_BRAND_EDEN 2
25#define EPS_BRAND_C3 3
26
27struct eps_cpu_data {
28 u32 fsb;
29 struct cpufreq_frequency_table freq_table[];
30};
31
32static struct eps_cpu_data *eps_cpu[NR_CPUS];
33
34
35static unsigned int eps_get(unsigned int cpu)
36{
37 struct eps_cpu_data *centaur;
38 u32 lo, hi;
39
40 if (cpu)
41 return 0;
42 centaur = eps_cpu[cpu];
43 if (centaur == NULL)
44 return 0;
45
46 /* Return current frequency */
47 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
48 return centaur->fsb * ((lo >> 8) & 0xff);
49}
50
51static int eps_set_state(struct eps_cpu_data *centaur,
52 unsigned int cpu,
53 u32 dest_state)
54{
55 struct cpufreq_freqs freqs;
56 u32 lo, hi;
57 int err = 0;
58 int i;
59
60 freqs.old = eps_get(cpu);
61 freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff);
62 freqs.cpu = cpu;
63 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
64
65 /* Wait while CPU is busy */
66 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
67 i = 0;
68 while (lo & ((1 << 16) | (1 << 17))) {
69 udelay(16);
70 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
71 i++;
72 if (unlikely(i > 64)) {
73 err = -ENODEV;
74 goto postchange;
75 }
76 }
77 /* Set new multiplier and voltage */
78 wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0);
79 /* Wait until transition end */
80 i = 0;
81 do {
82 udelay(16);
83 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
84 i++;
85 if (unlikely(i > 64)) {
86 err = -ENODEV;
87 goto postchange;
88 }
89 } while (lo & ((1 << 16) | (1 << 17)));
90
91 /* Return current frequency */
92postchange:
93 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
94 freqs.new = centaur->fsb * ((lo >> 8) & 0xff);
95
96 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
97 return err;
98}
99
100static int eps_target(struct cpufreq_policy *policy,
101 unsigned int target_freq,
102 unsigned int relation)
103{
104 struct eps_cpu_data *centaur;
105 unsigned int newstate = 0;
106 unsigned int cpu = policy->cpu;
107 unsigned int dest_state;
108 int ret;
109
110 if (unlikely(eps_cpu[cpu] == NULL))
111 return -ENODEV;
112 centaur = eps_cpu[cpu];
113
114 if (unlikely(cpufreq_frequency_table_target(policy,
115 &eps_cpu[cpu]->freq_table[0],
116 target_freq,
117 relation,
118 &newstate))) {
119 return -EINVAL;
120 }
121
122 /* Make frequency transition */
123 dest_state = centaur->freq_table[newstate].index & 0xffff;
124 ret = eps_set_state(centaur, cpu, dest_state);
125 if (ret)
126 printk(KERN_ERR "eps: Timeout!\n");
127 return ret;
128}
129
130static int eps_verify(struct cpufreq_policy *policy)
131{
132 return cpufreq_frequency_table_verify(policy,
133 &eps_cpu[policy->cpu]->freq_table[0]);
134}
135
136static int eps_cpu_init(struct cpufreq_policy *policy)
137{
138 unsigned int i;
139 u32 lo, hi;
140 u64 val;
141 u8 current_multiplier, current_voltage;
142 u8 max_multiplier, max_voltage;
143 u8 min_multiplier, min_voltage;
144 u8 brand;
145 u32 fsb;
146 struct eps_cpu_data *centaur;
147 struct cpufreq_frequency_table *f_table;
148 int k, step, voltage;
149 int ret;
150 int states;
151
152 if (policy->cpu != 0)
153 return -ENODEV;
154
155 /* Check brand */
156 printk("eps: Detected VIA ");
157 rdmsr(0x1153, lo, hi);
158 brand = (((lo >> 2) ^ lo) >> 18) & 3;
159 switch(brand) {
160 case EPS_BRAND_C7M:
161 printk("C7-M\n");
162 break;
163 case EPS_BRAND_C7:
164 printk("C7\n");
165 break;
166 case EPS_BRAND_EDEN:
167 printk("Eden\n");
168 break;
169 case EPS_BRAND_C3:
170 printk("C3\n");
171 return -ENODEV;
172 break;
173 }
174 /* Enable Enhanced PowerSaver */
175 rdmsrl(MSR_IA32_MISC_ENABLE, val);
176 if (!(val & 1 << 16)) {
177 val |= 1 << 16;
178 wrmsrl(MSR_IA32_MISC_ENABLE, val);
179 /* Can be locked at 0 */
180 rdmsrl(MSR_IA32_MISC_ENABLE, val);
181 if (!(val & 1 << 16)) {
182 printk("eps: Can't enable Enhanced PowerSaver\n");
183 return -ENODEV;
184 }
185 }
186
187 /* Print voltage and multiplier */
188 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
189 current_voltage = lo & 0xff;
190 printk("eps: Current voltage = %dmV\n", current_voltage * 16 + 700);
191 current_multiplier = (lo >> 8) & 0xff;
192 printk("eps: Current multiplier = %d\n", current_multiplier);
193
194 /* Print limits */
195 max_voltage = hi & 0xff;
196 printk("eps: Highest voltage = %dmV\n", max_voltage * 16 + 700);
197 max_multiplier = (hi >> 8) & 0xff;
198 printk("eps: Highest multiplier = %d\n", max_multiplier);
199 min_voltage = (hi >> 16) & 0xff;
200 printk("eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700);
201 min_multiplier = (hi >> 24) & 0xff;
202 printk("eps: Lowest multiplier = %d\n", min_multiplier);
203
204 /* Sanity checks */
205 if (current_multiplier == 0 || max_multiplier == 0
206 || min_multiplier == 0)
207 return -EINVAL;
208 if (current_multiplier > max_multiplier
209 || max_multiplier <= min_multiplier)
210 return -EINVAL;
211 if (current_voltage > 0x1c || max_voltage > 0x1c)
212 return -EINVAL;
213 if (max_voltage < min_voltage)
214 return -EINVAL;
215
216 /* Calc FSB speed */
217 fsb = cpu_khz / current_multiplier;
218 /* Calc number of p-states supported */
219 if (brand == EPS_BRAND_C7M)
220 states = max_multiplier - min_multiplier + 1;
221 else
222 states = 2;
223
224 /* Allocate private data and frequency table for current cpu */
225 centaur = kzalloc(sizeof(struct eps_cpu_data)
226 + (states + 1) * sizeof(struct cpufreq_frequency_table),
227 GFP_KERNEL);
228 if (!centaur)
229 return -ENOMEM;
230 eps_cpu[0] = centaur;
231
232 /* Copy basic values */
233 centaur->fsb = fsb;
234
235 /* Fill frequency and MSR value table */
236 f_table = &centaur->freq_table[0];
237 if (brand != EPS_BRAND_C7M) {
238 f_table[0].frequency = fsb * min_multiplier;
239 f_table[0].index = (min_multiplier << 8) | min_voltage;
240 f_table[1].frequency = fsb * max_multiplier;
241 f_table[1].index = (max_multiplier << 8) | max_voltage;
242 f_table[2].frequency = CPUFREQ_TABLE_END;
243 } else {
244 k = 0;
245 step = ((max_voltage - min_voltage) * 256)
246 / (max_multiplier - min_multiplier);
247 for (i = min_multiplier; i <= max_multiplier; i++) {
248 voltage = (k * step) / 256 + min_voltage;
249 f_table[k].frequency = fsb * i;
250 f_table[k].index = (i << 8) | voltage;
251 k++;
252 }
253 f_table[k].frequency = CPUFREQ_TABLE_END;
254 }
255
256 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
257 policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */
258 policy->cur = fsb * current_multiplier;
259
260 ret = cpufreq_frequency_table_cpuinfo(policy, &centaur->freq_table[0]);
261 if (ret) {
262 kfree(centaur);
263 return ret;
264 }
265
266 cpufreq_frequency_table_get_attr(&centaur->freq_table[0], policy->cpu);
267 return 0;
268}
269
270static int eps_cpu_exit(struct cpufreq_policy *policy)
271{
272 unsigned int cpu = policy->cpu;
273 struct eps_cpu_data *centaur;
274 u32 lo, hi;
275
276 if (eps_cpu[cpu] == NULL)
277 return -ENODEV;
278 centaur = eps_cpu[cpu];
279
280 /* Get max frequency */
281 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
282 /* Set max frequency */
283 eps_set_state(centaur, cpu, hi & 0xffff);
284 /* Bye */
285 cpufreq_frequency_table_put_attr(policy->cpu);
286 kfree(eps_cpu[cpu]);
287 eps_cpu[cpu] = NULL;
288 return 0;
289}
290
291static struct freq_attr* eps_attr[] = {
292 &cpufreq_freq_attr_scaling_available_freqs,
293 NULL,
294};
295
296static struct cpufreq_driver eps_driver = {
297 .verify = eps_verify,
298 .target = eps_target,
299 .init = eps_cpu_init,
300 .exit = eps_cpu_exit,
301 .get = eps_get,
302 .name = "e_powersaver",
303 .owner = THIS_MODULE,
304 .attr = eps_attr,
305};
306
307static int __init eps_init(void)
308{
309 struct cpuinfo_x86 *c = cpu_data;
310
311 /* This driver will work only on Centaur C7 processors with
312 * Enhanced SpeedStep/PowerSaver registers */
313 if (c->x86_vendor != X86_VENDOR_CENTAUR
314 || c->x86 != 6 || c->x86_model != 10)
315 return -ENODEV;
316 if (!cpu_has(c, X86_FEATURE_EST))
317 return -ENODEV;
318
319 if (cpufreq_register_driver(&eps_driver))
320 return -EINVAL;
321 return 0;
322}
323
324static void __exit eps_exit(void)
325{
326 cpufreq_unregister_driver(&eps_driver);
327}
328
329MODULE_AUTHOR("Rafa³ Bilski <rafalbilski@interia.pl>");
330MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's.");
331MODULE_LICENSE("GPL");
332
333module_init(eps_init);
334module_exit(eps_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
index e940e00b96c9..b59878a0d9b3 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -8,12 +8,11 @@
8 * VIA have currently 3 different versions of Longhaul. 8 * VIA have currently 3 different versions of Longhaul.
9 * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147. 9 * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147.
10 * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0. 10 * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0.
11 * Version 2 of longhaul is the same as v1, but adds voltage scaling. 11 * Version 2 of longhaul is backward compatible with v1, but adds
12 * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C) 12 * LONGHAUL MSR for purpose of both frequency and voltage scaling.
13 * voltage scaling support has currently been disabled in this driver 13 * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C).
14 * until we have code that gets it right.
15 * Version 3 of longhaul got renamed to Powersaver and redesigned 14 * Version 3 of longhaul got renamed to Powersaver and redesigned
16 * to use the POWERSAVER MSR at 0x110a. 15 * to use only the POWERSAVER MSR at 0x110a.
17 * It is present in Ezra-T (C5M), Nehemiah (C5X) and above. 16 * It is present in Ezra-T (C5M), Nehemiah (C5X) and above.
18 * It's pretty much the same feature wise to longhaul v2, though 17 * It's pretty much the same feature wise to longhaul v2, though
19 * there is provision for scaling FSB too, but this doesn't work 18 * there is provision for scaling FSB too, but this doesn't work
@@ -51,10 +50,12 @@
51#define CPU_EZRA 3 50#define CPU_EZRA 3
52#define CPU_EZRA_T 4 51#define CPU_EZRA_T 4
53#define CPU_NEHEMIAH 5 52#define CPU_NEHEMIAH 5
53#define CPU_NEHEMIAH_C 6
54 54
55/* Flags */ 55/* Flags */
56#define USE_ACPI_C3 (1 << 1) 56#define USE_ACPI_C3 (1 << 1)
57#define USE_NORTHBRIDGE (1 << 2) 57#define USE_NORTHBRIDGE (1 << 2)
58#define USE_VT8235 (1 << 3)
58 59
59static int cpu_model; 60static int cpu_model;
60static unsigned int numscales=16; 61static unsigned int numscales=16;
@@ -63,7 +64,8 @@ static unsigned int fsb;
63static struct mV_pos *vrm_mV_table; 64static struct mV_pos *vrm_mV_table;
64static unsigned char *mV_vrm_table; 65static unsigned char *mV_vrm_table;
65struct f_msr { 66struct f_msr {
66 unsigned char vrm; 67 u8 vrm;
68 u8 pos;
67}; 69};
68static struct f_msr f_msr_table[32]; 70static struct f_msr f_msr_table[32];
69 71
@@ -73,10 +75,10 @@ static int can_scale_voltage;
73static struct acpi_processor *pr = NULL; 75static struct acpi_processor *pr = NULL;
74static struct acpi_processor_cx *cx = NULL; 76static struct acpi_processor_cx *cx = NULL;
75static u8 longhaul_flags; 77static u8 longhaul_flags;
78static u8 longhaul_pos;
76 79
77/* Module parameters */ 80/* Module parameters */
78static int scale_voltage; 81static int scale_voltage;
79static int ignore_latency;
80 82
81#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) 83#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
82 84
@@ -164,41 +166,79 @@ static void do_longhaul1(unsigned int clock_ratio_index)
164static void do_powersaver(int cx_address, unsigned int clock_ratio_index) 166static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
165{ 167{
166 union msr_longhaul longhaul; 168 union msr_longhaul longhaul;
169 u8 dest_pos;
167 u32 t; 170 u32 t;
168 171
172 dest_pos = f_msr_table[clock_ratio_index].pos;
173
169 rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); 174 rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
175 /* Setup new frequency */
170 longhaul.bits.RevisionKey = longhaul.bits.RevisionID; 176 longhaul.bits.RevisionKey = longhaul.bits.RevisionID;
171 longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; 177 longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf;
172 longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; 178 longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4;
173 longhaul.bits.EnableSoftBusRatio = 1; 179 /* Setup new voltage */
174 180 if (can_scale_voltage)
175 if (can_scale_voltage) {
176 longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm; 181 longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm;
182 /* Sync to timer tick */
183 safe_halt();
184 /* Raise voltage if necessary */
185 if (can_scale_voltage && longhaul_pos < dest_pos) {
177 longhaul.bits.EnableSoftVID = 1; 186 longhaul.bits.EnableSoftVID = 1;
187 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
188 /* Change voltage */
189 if (!cx_address) {
190 ACPI_FLUSH_CPU_CACHE();
191 halt();
192 } else {
193 ACPI_FLUSH_CPU_CACHE();
194 /* Invoke C3 */
195 inb(cx_address);
196 /* Dummy op - must do something useless after P_LVL3
197 * read */
198 t = inl(acpi_gbl_FADT.xpm_timer_block.address);
199 }
200 longhaul.bits.EnableSoftVID = 0;
201 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
202 longhaul_pos = dest_pos;
178 } 203 }
179 204
180 /* Sync to timer tick */
181 safe_halt();
182 /* Change frequency on next halt or sleep */ 205 /* Change frequency on next halt or sleep */
206 longhaul.bits.EnableSoftBusRatio = 1;
183 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); 207 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
184 if (!cx_address) { 208 if (!cx_address) {
185 ACPI_FLUSH_CPU_CACHE(); 209 ACPI_FLUSH_CPU_CACHE();
186 /* Invoke C1 */
187 halt(); 210 halt();
188 } else { 211 } else {
189 ACPI_FLUSH_CPU_CACHE(); 212 ACPI_FLUSH_CPU_CACHE();
190 /* Invoke C3 */ 213 /* Invoke C3 */
191 inb(cx_address); 214 inb(cx_address);
192 /* Dummy op - must do something useless after P_LVL3 read */ 215 /* Dummy op - must do something useless after P_LVL3 read */
193 t = inl(acpi_fadt.xpm_tmr_blk.address); 216 t = inl(acpi_gbl_FADT.xpm_timer_block.address);
194 } 217 }
195 /* Disable bus ratio bit */ 218 /* Disable bus ratio bit */
196 local_irq_disable();
197 longhaul.bits.RevisionKey = longhaul.bits.RevisionID;
198 longhaul.bits.EnableSoftBusRatio = 0; 219 longhaul.bits.EnableSoftBusRatio = 0;
199 longhaul.bits.EnableSoftBSEL = 0;
200 longhaul.bits.EnableSoftVID = 0;
201 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); 220 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
221
222 /* Reduce voltage if necessary */
223 if (can_scale_voltage && longhaul_pos > dest_pos) {
224 longhaul.bits.EnableSoftVID = 1;
225 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
226 /* Change voltage */
227 if (!cx_address) {
228 ACPI_FLUSH_CPU_CACHE();
229 halt();
230 } else {
231 ACPI_FLUSH_CPU_CACHE();
232 /* Invoke C3 */
233 inb(cx_address);
234 /* Dummy op - must do something useless after P_LVL3
235 * read */
236 t = inl(acpi_gbl_FADT.xpm_timer_block.address);
237 }
238 longhaul.bits.EnableSoftVID = 0;
239 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
240 longhaul_pos = dest_pos;
241 }
202} 242}
203 243
204/** 244/**
@@ -250,39 +290,30 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
250 outb(3, 0x22); 290 outb(3, 0x22);
251 } else if ((pr != NULL) && pr->flags.bm_control) { 291 } else if ((pr != NULL) && pr->flags.bm_control) {
252 /* Disable bus master arbitration */ 292 /* Disable bus master arbitration */
253 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1, 293 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
254 ACPI_MTX_DO_NOT_LOCK);
255 } 294 }
256 switch (longhaul_version) { 295 switch (longhaul_version) {
257 296
258 /* 297 /*
259 * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B]) 298 * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B])
260 * Software controlled multipliers only. 299 * Software controlled multipliers only.
261 *
262 * *NB* Until we get voltage scaling working v1 & v2 are the same code.
263 * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5b] and Ezra [C5C]
264 */ 300 */
265 case TYPE_LONGHAUL_V1: 301 case TYPE_LONGHAUL_V1:
266 case TYPE_LONGHAUL_V2:
267 do_longhaul1(clock_ratio_index); 302 do_longhaul1(clock_ratio_index);
268 break; 303 break;
269 304
270 /* 305 /*
306 * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C]
307 *
271 * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N]) 308 * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N])
272 * We can scale voltage with this too, but that's currently
273 * disabled until we come up with a decent 'match freq to voltage'
274 * algorithm.
275 * When we add voltage scaling, we will also need to do the
276 * voltage/freq setting in order depending on the direction
277 * of scaling (like we do in powernow-k7.c)
278 * Nehemiah can do FSB scaling too, but this has never been proven 309 * Nehemiah can do FSB scaling too, but this has never been proven
279 * to work in practice. 310 * to work in practice.
280 */ 311 */
312 case TYPE_LONGHAUL_V2:
281 case TYPE_POWERSAVER: 313 case TYPE_POWERSAVER:
282 if (longhaul_flags & USE_ACPI_C3) { 314 if (longhaul_flags & USE_ACPI_C3) {
283 /* Don't allow wakeup */ 315 /* Don't allow wakeup */
284 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0, 316 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
285 ACPI_MTX_DO_NOT_LOCK);
286 do_powersaver(cx->address, clock_ratio_index); 317 do_powersaver(cx->address, clock_ratio_index);
287 } else { 318 } else {
288 do_powersaver(0, clock_ratio_index); 319 do_powersaver(0, clock_ratio_index);
@@ -295,8 +326,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
295 outb(0, 0x22); 326 outb(0, 0x22);
296 } else if ((pr != NULL) && pr->flags.bm_control) { 327 } else if ((pr != NULL) && pr->flags.bm_control) {
297 /* Enable bus master arbitration */ 328 /* Enable bus master arbitration */
298 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0, 329 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
299 ACPI_MTX_DO_NOT_LOCK);
300 } 330 }
301 outb(pic2_mask,0xA1); /* restore mask */ 331 outb(pic2_mask,0xA1); /* restore mask */
302 outb(pic1_mask,0x21); 332 outb(pic1_mask,0x21);
@@ -304,6 +334,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
304 local_irq_restore(flags); 334 local_irq_restore(flags);
305 preempt_enable(); 335 preempt_enable();
306 336
337 freqs.new = calc_speed(longhaul_get_cpu_mult());
307 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 338 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
308} 339}
309 340
@@ -318,31 +349,19 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
318 349
319#define ROUNDING 0xf 350#define ROUNDING 0xf
320 351
321static int _guess(int guess, int mult)
322{
323 int target;
324
325 target = ((mult/10)*guess);
326 if (mult%10 != 0)
327 target += (guess/2);
328 target += ROUNDING/2;
329 target &= ~ROUNDING;
330 return target;
331}
332
333
334static int guess_fsb(int mult) 352static int guess_fsb(int mult)
335{ 353{
336 int speed = (cpu_khz/1000); 354 int speed = cpu_khz / 1000;
337 int i; 355 int i;
338 int speeds[] = { 66, 100, 133, 200 }; 356 int speeds[] = { 666, 1000, 1333, 2000 };
339 357 int f_max, f_min;
340 speed += ROUNDING/2; 358
341 speed &= ~ROUNDING; 359 for (i = 0; i < 4; i++) {
342 360 f_max = ((speeds[i] * mult) + 50) / 100;
343 for (i=0; i<4; i++) { 361 f_max += (ROUNDING / 2);
344 if (_guess(speeds[i], mult) == speed) 362 f_min = f_max - ROUNDING;
345 return speeds[i]; 363 if ((speed <= f_max) && (speed >= f_min))
364 return speeds[i] / 10;
346 } 365 }
347 return 0; 366 return 0;
348} 367}
@@ -350,71 +369,44 @@ static int guess_fsb(int mult)
350 369
351static int __init longhaul_get_ranges(void) 370static int __init longhaul_get_ranges(void)
352{ 371{
353 unsigned long invalue;
354 unsigned int ezra_t_multipliers[32]= {
355 90, 30, 40, 100, 55, 35, 45, 95,
356 50, 70, 80, 60, 120, 75, 85, 65,
357 -1, 110, 120, -1, 135, 115, 125, 105,
358 130, 150, 160, 140, -1, 155, -1, 145 };
359 unsigned int j, k = 0; 372 unsigned int j, k = 0;
360 union msr_longhaul longhaul; 373 int mult;
361 int mult = 0;
362 374
363 switch (longhaul_version) { 375 /* Get current frequency */
364 case TYPE_LONGHAUL_V1: 376 mult = longhaul_get_cpu_mult();
365 case TYPE_LONGHAUL_V2: 377 if (mult == -1) {
366 /* Ugh, Longhaul v1 didn't have the min/max MSRs. 378 printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n");
367 Assume min=3.0x & max = whatever we booted at. */ 379 return -EINVAL;
380 }
381 fsb = guess_fsb(mult);
382 if (fsb == 0) {
383 printk(KERN_INFO PFX "Invalid (reserved) FSB!\n");
384 return -EINVAL;
385 }
386 /* Get max multiplier - as we always did.
387 * Longhaul MSR is usefull only when voltage scaling is enabled.
388 * C3 is booting at max anyway. */
389 maxmult = mult;
390 /* Get min multiplier */
391 switch (cpu_model) {
392 case CPU_NEHEMIAH:
393 minmult = 50;
394 break;
395 case CPU_NEHEMIAH_C:
396 minmult = 40;
397 break;
398 default:
368 minmult = 30; 399 minmult = 30;
369 maxmult = mult = longhaul_get_cpu_mult();
370 break; 400 break;
371
372 case TYPE_POWERSAVER:
373 /* Ezra-T */
374 if (cpu_model==CPU_EZRA_T) {
375 minmult = 30;
376 rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
377 invalue = longhaul.bits.MaxMHzBR;
378 if (longhaul.bits.MaxMHzBR4)
379 invalue += 16;
380 maxmult = mult = ezra_t_multipliers[invalue];
381 break;
382 }
383
384 /* Nehemiah */
385 if (cpu_model==CPU_NEHEMIAH) {
386 rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
387
388 /*
389 * TODO: This code works, but raises a lot of questions.
390 * - Some Nehemiah's seem to have broken Min/MaxMHzBR's.
391 * We get around this by using a hardcoded multiplier of 4.0x
392 * for the minimimum speed, and the speed we booted up at for the max.
393 * This is done in longhaul_get_cpu_mult() by reading the EBLCR register.
394 * - According to some VIA documentation EBLCR is only
395 * in pre-Nehemiah C3s. How this still works is a mystery.
396 * We're possibly using something undocumented and unsupported,
397 * But it works, so we don't grumble.
398 */
399 minmult=40;
400 maxmult = mult = longhaul_get_cpu_mult();
401 break;
402 }
403 } 401 }
404 fsb = guess_fsb(mult);
405 402
406 dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", 403 dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n",
407 minmult/10, minmult%10, maxmult/10, maxmult%10); 404 minmult/10, minmult%10, maxmult/10, maxmult%10);
408 405
409 if (fsb == 0) {
410 printk (KERN_INFO PFX "Invalid (reserved) FSB!\n");
411 return -EINVAL;
412 }
413
414 highest_speed = calc_speed(maxmult); 406 highest_speed = calc_speed(maxmult);
415 lowest_speed = calc_speed(minmult); 407 lowest_speed = calc_speed(minmult);
416 dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, 408 dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb,
417 print_speed(lowest_speed/1000), 409 print_speed(lowest_speed/1000),
418 print_speed(highest_speed/1000)); 410 print_speed(highest_speed/1000));
419 411
420 if (lowest_speed == highest_speed) { 412 if (lowest_speed == highest_speed) {
@@ -458,6 +450,7 @@ static void __init longhaul_setup_voltagescaling(void)
458 union msr_longhaul longhaul; 450 union msr_longhaul longhaul;
459 struct mV_pos minvid, maxvid; 451 struct mV_pos minvid, maxvid;
460 unsigned int j, speed, pos, kHz_step, numvscales; 452 unsigned int j, speed, pos, kHz_step, numvscales;
453 int min_vid_speed;
461 454
462 rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); 455 rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
463 if (!(longhaul.bits.RevisionID & 1)) { 456 if (!(longhaul.bits.RevisionID & 1)) {
@@ -471,14 +464,14 @@ static void __init longhaul_setup_voltagescaling(void)
471 mV_vrm_table = &mV_vrm85[0]; 464 mV_vrm_table = &mV_vrm85[0];
472 } else { 465 } else {
473 printk (KERN_INFO PFX "Mobile VRM\n"); 466 printk (KERN_INFO PFX "Mobile VRM\n");
467 if (cpu_model < CPU_NEHEMIAH)
468 return;
474 vrm_mV_table = &mobilevrm_mV[0]; 469 vrm_mV_table = &mobilevrm_mV[0];
475 mV_vrm_table = &mV_mobilevrm[0]; 470 mV_vrm_table = &mV_mobilevrm[0];
476 } 471 }
477 472
478 minvid = vrm_mV_table[longhaul.bits.MinimumVID]; 473 minvid = vrm_mV_table[longhaul.bits.MinimumVID];
479 maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; 474 maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
480 numvscales = maxvid.pos - minvid.pos + 1;
481 kHz_step = (highest_speed - lowest_speed) / numvscales;
482 475
483 if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { 476 if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
484 printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " 477 printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
@@ -494,20 +487,59 @@ static void __init longhaul_setup_voltagescaling(void)
494 return; 487 return;
495 } 488 }
496 489
497 printk(KERN_INFO PFX "Max VID=%d.%03d Min VID=%d.%03d, %d possible voltage scales\n", 490 /* How many voltage steps */
491 numvscales = maxvid.pos - minvid.pos + 1;
492 printk(KERN_INFO PFX
493 "Max VID=%d.%03d "
494 "Min VID=%d.%03d, "
495 "%d possible voltage scales\n",
498 maxvid.mV/1000, maxvid.mV%1000, 496 maxvid.mV/1000, maxvid.mV%1000,
499 minvid.mV/1000, minvid.mV%1000, 497 minvid.mV/1000, minvid.mV%1000,
500 numvscales); 498 numvscales);
501 499
500 /* Calculate max frequency at min voltage */
501 j = longhaul.bits.MinMHzBR;
502 if (longhaul.bits.MinMHzBR4)
503 j += 16;
504 min_vid_speed = eblcr_table[j];
505 if (min_vid_speed == -1)
506 return;
507 switch (longhaul.bits.MinMHzFSB) {
508 case 0:
509 min_vid_speed *= 13333;
510 break;
511 case 1:
512 min_vid_speed *= 10000;
513 break;
514 case 3:
515 min_vid_speed *= 6666;
516 break;
517 default:
518 return;
519 break;
520 }
521 if (min_vid_speed >= highest_speed)
522 return;
523 /* Calculate kHz for one voltage step */
524 kHz_step = (highest_speed - min_vid_speed) / numvscales;
525
526
502 j = 0; 527 j = 0;
503 while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { 528 while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) {
504 speed = longhaul_table[j].frequency; 529 speed = longhaul_table[j].frequency;
505 pos = (speed - lowest_speed) / kHz_step + minvid.pos; 530 if (speed > min_vid_speed)
531 pos = (speed - min_vid_speed) / kHz_step + minvid.pos;
532 else
533 pos = minvid.pos;
506 f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos]; 534 f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos];
535 f_msr_table[longhaul_table[j].index].pos = pos;
507 j++; 536 j++;
508 } 537 }
509 538
539 longhaul_pos = maxvid.pos;
510 can_scale_voltage = 1; 540 can_scale_voltage = 1;
541 printk(KERN_INFO PFX "Voltage scaling enabled. "
542 "Use of \"conservative\" governor is highly recommended.\n");
511} 543}
512 544
513 545
@@ -576,20 +608,51 @@ static int enable_arbiter_disable(void)
576 if (dev != NULL) { 608 if (dev != NULL) {
577 /* Enable access to port 0x22 */ 609 /* Enable access to port 0x22 */
578 pci_read_config_byte(dev, reg, &pci_cmd); 610 pci_read_config_byte(dev, reg, &pci_cmd);
579 if ( !(pci_cmd & 1<<7) ) { 611 if (!(pci_cmd & 1<<7)) {
580 pci_cmd |= 1<<7; 612 pci_cmd |= 1<<7;
581 pci_write_config_byte(dev, reg, pci_cmd); 613 pci_write_config_byte(dev, reg, pci_cmd);
614 pci_read_config_byte(dev, reg, &pci_cmd);
615 if (!(pci_cmd & 1<<7)) {
616 printk(KERN_ERR PFX
617 "Can't enable access to port 0x22.\n");
618 return 0;
619 }
582 } 620 }
583 return 1; 621 return 1;
584 } 622 }
585 return 0; 623 return 0;
586} 624}
587 625
626static int longhaul_setup_vt8235(void)
627{
628 struct pci_dev *dev;
629 u8 pci_cmd;
630
631 /* Find VT8235 southbridge */
632 dev = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL);
633 if (dev != NULL) {
634 /* Set transition time to max */
635 pci_read_config_byte(dev, 0xec, &pci_cmd);
636 pci_cmd &= ~(1 << 2);
637 pci_write_config_byte(dev, 0xec, pci_cmd);
638 pci_read_config_byte(dev, 0xe4, &pci_cmd);
639 pci_cmd &= ~(1 << 7);
640 pci_write_config_byte(dev, 0xe4, pci_cmd);
641 pci_read_config_byte(dev, 0xe5, &pci_cmd);
642 pci_cmd |= 1 << 7;
643 pci_write_config_byte(dev, 0xe5, pci_cmd);
644 return 1;
645 }
646 return 0;
647}
648
588static int __init longhaul_cpu_init(struct cpufreq_policy *policy) 649static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
589{ 650{
590 struct cpuinfo_x86 *c = cpu_data; 651 struct cpuinfo_x86 *c = cpu_data;
591 char *cpuname=NULL; 652 char *cpuname=NULL;
592 int ret; 653 int ret;
654 u32 lo, hi;
655 int vt8235_present;
593 656
594 /* Check what we have on this motherboard */ 657 /* Check what we have on this motherboard */
595 switch (c->x86_model) { 658 switch (c->x86_model) {
@@ -602,16 +665,20 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
602 break; 665 break;
603 666
604 case 7: 667 case 7:
605 longhaul_version = TYPE_LONGHAUL_V1;
606 switch (c->x86_mask) { 668 switch (c->x86_mask) {
607 case 0: 669 case 0:
670 longhaul_version = TYPE_LONGHAUL_V1;
608 cpu_model = CPU_SAMUEL2; 671 cpu_model = CPU_SAMUEL2;
609 cpuname = "C3 'Samuel 2' [C5B]"; 672 cpuname = "C3 'Samuel 2' [C5B]";
610 /* Note, this is not a typo, early Samuel2's had Samuel1 ratios. */ 673 /* Note, this is not a typo, early Samuel2's had
611 memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio)); 674 * Samuel1 ratios. */
612 memcpy (eblcr_table, samuel2_eblcr, sizeof(samuel2_eblcr)); 675 memcpy(clock_ratio, samuel1_clock_ratio,
676 sizeof(samuel1_clock_ratio));
677 memcpy(eblcr_table, samuel2_eblcr,
678 sizeof(samuel2_eblcr));
613 break; 679 break;
614 case 1 ... 15: 680 case 1 ... 15:
681 longhaul_version = TYPE_LONGHAUL_V2;
615 if (c->x86_mask < 8) { 682 if (c->x86_mask < 8) {
616 cpu_model = CPU_SAMUEL2; 683 cpu_model = CPU_SAMUEL2;
617 cpuname = "C3 'Samuel 2' [C5B]"; 684 cpuname = "C3 'Samuel 2' [C5B]";
@@ -619,8 +686,10 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
619 cpu_model = CPU_EZRA; 686 cpu_model = CPU_EZRA;
620 cpuname = "C3 'Ezra' [C5C]"; 687 cpuname = "C3 'Ezra' [C5C]";
621 } 688 }
622 memcpy (clock_ratio, ezra_clock_ratio, sizeof(ezra_clock_ratio)); 689 memcpy(clock_ratio, ezra_clock_ratio,
623 memcpy (eblcr_table, ezra_eblcr, sizeof(ezra_eblcr)); 690 sizeof(ezra_clock_ratio));
691 memcpy(eblcr_table, ezra_eblcr,
692 sizeof(ezra_eblcr));
624 break; 693 break;
625 } 694 }
626 break; 695 break;
@@ -635,24 +704,24 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
635 break; 704 break;
636 705
637 case 9: 706 case 9:
638 cpu_model = CPU_NEHEMIAH;
639 longhaul_version = TYPE_POWERSAVER; 707 longhaul_version = TYPE_POWERSAVER;
640 numscales=32; 708 numscales = 32;
709 memcpy(clock_ratio,
710 nehemiah_clock_ratio,
711 sizeof(nehemiah_clock_ratio));
712 memcpy(eblcr_table, nehemiah_eblcr, sizeof(nehemiah_eblcr));
641 switch (c->x86_mask) { 713 switch (c->x86_mask) {
642 case 0 ... 1: 714 case 0 ... 1:
643 cpuname = "C3 'Nehemiah A' [C5N]"; 715 cpu_model = CPU_NEHEMIAH;
644 memcpy (clock_ratio, nehemiah_a_clock_ratio, sizeof(nehemiah_a_clock_ratio)); 716 cpuname = "C3 'Nehemiah A' [C5XLOE]";
645 memcpy (eblcr_table, nehemiah_a_eblcr, sizeof(nehemiah_a_eblcr));
646 break; 717 break;
647 case 2 ... 4: 718 case 2 ... 4:
648 cpuname = "C3 'Nehemiah B' [C5N]"; 719 cpu_model = CPU_NEHEMIAH;
649 memcpy (clock_ratio, nehemiah_b_clock_ratio, sizeof(nehemiah_b_clock_ratio)); 720 cpuname = "C3 'Nehemiah B' [C5XLOH]";
650 memcpy (eblcr_table, nehemiah_b_eblcr, sizeof(nehemiah_b_eblcr));
651 break; 721 break;
652 case 5 ... 15: 722 case 5 ... 15:
653 cpuname = "C3 'Nehemiah C' [C5N]"; 723 cpu_model = CPU_NEHEMIAH_C;
654 memcpy (clock_ratio, nehemiah_c_clock_ratio, sizeof(nehemiah_c_clock_ratio)); 724 cpuname = "C3 'Nehemiah C' [C5P]";
655 memcpy (eblcr_table, nehemiah_c_eblcr, sizeof(nehemiah_c_eblcr));
656 break; 725 break;
657 } 726 }
658 break; 727 break;
@@ -661,6 +730,13 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
661 cpuname = "Unknown"; 730 cpuname = "Unknown";
662 break; 731 break;
663 } 732 }
733 /* Check Longhaul ver. 2 */
734 if (longhaul_version == TYPE_LONGHAUL_V2) {
735 rdmsr(MSR_VIA_LONGHAUL, lo, hi);
736 if (lo == 0 && hi == 0)
737 /* Looks like MSR isn't present */
738 longhaul_version = TYPE_LONGHAUL_V1;
739 }
664 740
665 printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname); 741 printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname);
666 switch (longhaul_version) { 742 switch (longhaul_version) {
@@ -673,15 +749,18 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
673 break; 749 break;
674 }; 750 };
675 751
752 /* Doesn't hurt */
753 vt8235_present = longhaul_setup_vt8235();
754
676 /* Find ACPI data for processor */ 755 /* Find ACPI data for processor */
677 acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, 756 acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
678 &longhaul_walk_callback, NULL, (void *)&pr); 757 ACPI_UINT32_MAX, &longhaul_walk_callback,
758 NULL, (void *)&pr);
679 759
680 /* Check ACPI support for C3 state */ 760 /* Check ACPI support for C3 state */
681 if ((pr != NULL) && (longhaul_version == TYPE_POWERSAVER)) { 761 if (pr != NULL && longhaul_version != TYPE_LONGHAUL_V1) {
682 cx = &pr->power.states[ACPI_STATE_C3]; 762 cx = &pr->power.states[ACPI_STATE_C3];
683 if (cx->address > 0 && 763 if (cx->address > 0 && cx->latency <= 1000) {
684 (cx->latency <= 1000 || ignore_latency != 0) ) {
685 longhaul_flags |= USE_ACPI_C3; 764 longhaul_flags |= USE_ACPI_C3;
686 goto print_support_type; 765 goto print_support_type;
687 } 766 }
@@ -691,8 +770,11 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
691 longhaul_flags |= USE_NORTHBRIDGE; 770 longhaul_flags |= USE_NORTHBRIDGE;
692 goto print_support_type; 771 goto print_support_type;
693 } 772 }
694 773 /* Use VT8235 southbridge if present */
695 /* No ACPI C3 or we can't use it */ 774 if (longhaul_version == TYPE_POWERSAVER && vt8235_present) {
775 longhaul_flags |= USE_VT8235;
776 goto print_support_type;
777 }
696 /* Check ACPI support for bus master arbiter disable */ 778 /* Check ACPI support for bus master arbiter disable */
697 if ((pr == NULL) || !(pr->flags.bm_control)) { 779 if ((pr == NULL) || !(pr->flags.bm_control)) {
698 printk(KERN_ERR PFX 780 printk(KERN_ERR PFX
@@ -701,18 +783,18 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
701 } 783 }
702 784
703print_support_type: 785print_support_type:
704 if (!(longhaul_flags & USE_NORTHBRIDGE)) { 786 if (longhaul_flags & USE_NORTHBRIDGE)
705 printk (KERN_INFO PFX "Using ACPI support.\n");
706 } else {
707 printk (KERN_INFO PFX "Using northbridge support.\n"); 787 printk (KERN_INFO PFX "Using northbridge support.\n");
708 } 788 else if (longhaul_flags & USE_VT8235)
789 printk (KERN_INFO PFX "Using VT8235 support.\n");
790 else
791 printk (KERN_INFO PFX "Using ACPI support.\n");
709 792
710 ret = longhaul_get_ranges(); 793 ret = longhaul_get_ranges();
711 if (ret != 0) 794 if (ret != 0)
712 return ret; 795 return ret;
713 796
714 if ((longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) && 797 if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0))
715 (scale_voltage != 0))
716 longhaul_setup_voltagescaling(); 798 longhaul_setup_voltagescaling();
717 799
718 policy->governor = CPUFREQ_DEFAULT_GOVERNOR; 800 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
@@ -800,8 +882,6 @@ static void __exit longhaul_exit(void)
800 882
801module_param (scale_voltage, int, 0644); 883module_param (scale_voltage, int, 0644);
802MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); 884MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
803module_param(ignore_latency, int, 0644);
804MODULE_PARM_DESC(ignore_latency, "Skip ACPI C3 latency test");
805 885
806MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); 886MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
807MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); 887MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h
index bc4682aad69b..bb0a04b1d1ab 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.h
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h
@@ -235,84 +235,14 @@ static int __initdata ezrat_eblcr[32] = {
235/* 235/*
236 * VIA C3 Nehemiah */ 236 * VIA C3 Nehemiah */
237 237
238static int __initdata nehemiah_a_clock_ratio[32] = { 238static int __initdata nehemiah_clock_ratio[32] = {
239 100, /* 0000 -> 10.0x */ 239 100, /* 0000 -> 10.0x */
240 160, /* 0001 -> 16.0x */ 240 160, /* 0001 -> 16.0x */
241 -1, /* 0010 -> RESERVED */ 241 40, /* 0010 -> 4.0x */
242 90, /* 0011 -> 9.0x */
243 95, /* 0100 -> 9.5x */
244 -1, /* 0101 -> RESERVED */
245 -1, /* 0110 -> RESERVED */
246 55, /* 0111 -> 5.5x */
247 60, /* 1000 -> 6.0x */
248 70, /* 1001 -> 7.0x */
249 80, /* 1010 -> 8.0x */
250 50, /* 1011 -> 5.0x */
251 65, /* 1100 -> 6.5x */
252 75, /* 1101 -> 7.5x */
253 85, /* 1110 -> 8.5x */
254 120, /* 1111 -> 12.0x */
255 100, /* 0000 -> 10.0x */
256 -1, /* 0001 -> RESERVED */
257 120, /* 0010 -> 12.0x */
258 90, /* 0011 -> 9.0x */
259 105, /* 0100 -> 10.5x */
260 115, /* 0101 -> 11.5x */
261 125, /* 0110 -> 12.5x */
262 135, /* 0111 -> 13.5x */
263 140, /* 1000 -> 14.0x */
264 150, /* 1001 -> 15.0x */
265 160, /* 1010 -> 16.0x */
266 130, /* 1011 -> 13.0x */
267 145, /* 1100 -> 14.5x */
268 155, /* 1101 -> 15.5x */
269 -1, /* 1110 -> RESERVED (13.0x) */
270 120, /* 1111 -> 12.0x */
271};
272
273static int __initdata nehemiah_b_clock_ratio[32] = {
274 100, /* 0000 -> 10.0x */
275 160, /* 0001 -> 16.0x */
276 -1, /* 0010 -> RESERVED */
277 90, /* 0011 -> 9.0x */
278 95, /* 0100 -> 9.5x */
279 -1, /* 0101 -> RESERVED */
280 -1, /* 0110 -> RESERVED */
281 55, /* 0111 -> 5.5x */
282 60, /* 1000 -> 6.0x */
283 70, /* 1001 -> 7.0x */
284 80, /* 1010 -> 8.0x */
285 50, /* 1011 -> 5.0x */
286 65, /* 1100 -> 6.5x */
287 75, /* 1101 -> 7.5x */
288 85, /* 1110 -> 8.5x */
289 120, /* 1111 -> 12.0x */
290 100, /* 0000 -> 10.0x */
291 110, /* 0001 -> 11.0x */
292 120, /* 0010 -> 12.0x */
293 90, /* 0011 -> 9.0x */
294 105, /* 0100 -> 10.5x */
295 115, /* 0101 -> 11.5x */
296 125, /* 0110 -> 12.5x */
297 135, /* 0111 -> 13.5x */
298 140, /* 1000 -> 14.0x */
299 150, /* 1001 -> 15.0x */
300 160, /* 1010 -> 16.0x */
301 130, /* 1011 -> 13.0x */
302 145, /* 1100 -> 14.5x */
303 155, /* 1101 -> 15.5x */
304 -1, /* 1110 -> RESERVED (13.0x) */
305 120, /* 1111 -> 12.0x */
306};
307
308static int __initdata nehemiah_c_clock_ratio[32] = {
309 100, /* 0000 -> 10.0x */
310 160, /* 0001 -> 16.0x */
311 40, /* 0010 -> RESERVED */
312 90, /* 0011 -> 9.0x */ 242 90, /* 0011 -> 9.0x */
313 95, /* 0100 -> 9.5x */ 243 95, /* 0100 -> 9.5x */
314 -1, /* 0101 -> RESERVED */ 244 -1, /* 0101 -> RESERVED */
315 45, /* 0110 -> RESERVED */ 245 45, /* 0110 -> 4.5x */
316 55, /* 0111 -> 5.5x */ 246 55, /* 0111 -> 5.5x */
317 60, /* 1000 -> 6.0x */ 247 60, /* 1000 -> 6.0x */
318 70, /* 1001 -> 7.0x */ 248 70, /* 1001 -> 7.0x */
@@ -340,84 +270,14 @@ static int __initdata nehemiah_c_clock_ratio[32] = {
340 120, /* 1111 -> 12.0x */ 270 120, /* 1111 -> 12.0x */
341}; 271};
342 272
343static int __initdata nehemiah_a_eblcr[32] = { 273static int __initdata nehemiah_eblcr[32] = {
344 50, /* 0000 -> 5.0x */
345 160, /* 0001 -> 16.0x */
346 -1, /* 0010 -> RESERVED */
347 100, /* 0011 -> 10.0x */
348 55, /* 0100 -> 5.5x */
349 -1, /* 0101 -> RESERVED */
350 -1, /* 0110 -> RESERVED */
351 95, /* 0111 -> 9.5x */
352 90, /* 1000 -> 9.0x */
353 70, /* 1001 -> 7.0x */
354 80, /* 1010 -> 8.0x */
355 60, /* 1011 -> 6.0x */
356 120, /* 1100 -> 12.0x */
357 75, /* 1101 -> 7.5x */
358 85, /* 1110 -> 8.5x */
359 65, /* 1111 -> 6.5x */
360 90, /* 0000 -> 9.0x */
361 -1, /* 0001 -> RESERVED */
362 120, /* 0010 -> 12.0x */
363 100, /* 0011 -> 10.0x */
364 135, /* 0100 -> 13.5x */
365 115, /* 0101 -> 11.5x */
366 125, /* 0110 -> 12.5x */
367 105, /* 0111 -> 10.5x */
368 130, /* 1000 -> 13.0x */
369 150, /* 1001 -> 15.0x */
370 160, /* 1010 -> 16.0x */
371 140, /* 1011 -> 14.0x */
372 120, /* 1100 -> 12.0x */
373 155, /* 1101 -> 15.5x */
374 -1, /* 1110 -> RESERVED (13.0x) */
375 145 /* 1111 -> 14.5x */
376 /* end of table */
377};
378static int __initdata nehemiah_b_eblcr[32] = {
379 50, /* 0000 -> 5.0x */
380 160, /* 0001 -> 16.0x */
381 -1, /* 0010 -> RESERVED */
382 100, /* 0011 -> 10.0x */
383 55, /* 0100 -> 5.5x */
384 -1, /* 0101 -> RESERVED */
385 -1, /* 0110 -> RESERVED */
386 95, /* 0111 -> 9.5x */
387 90, /* 1000 -> 9.0x */
388 70, /* 1001 -> 7.0x */
389 80, /* 1010 -> 8.0x */
390 60, /* 1011 -> 6.0x */
391 120, /* 1100 -> 12.0x */
392 75, /* 1101 -> 7.5x */
393 85, /* 1110 -> 8.5x */
394 65, /* 1111 -> 6.5x */
395 90, /* 0000 -> 9.0x */
396 110, /* 0001 -> 11.0x */
397 120, /* 0010 -> 12.0x */
398 100, /* 0011 -> 10.0x */
399 135, /* 0100 -> 13.5x */
400 115, /* 0101 -> 11.5x */
401 125, /* 0110 -> 12.5x */
402 105, /* 0111 -> 10.5x */
403 130, /* 1000 -> 13.0x */
404 150, /* 1001 -> 15.0x */
405 160, /* 1010 -> 16.0x */
406 140, /* 1011 -> 14.0x */
407 120, /* 1100 -> 12.0x */
408 155, /* 1101 -> 15.5x */
409 -1, /* 1110 -> RESERVED (13.0x) */
410 145 /* 1111 -> 14.5x */
411 /* end of table */
412};
413static int __initdata nehemiah_c_eblcr[32] = {
414 50, /* 0000 -> 5.0x */ 274 50, /* 0000 -> 5.0x */
415 160, /* 0001 -> 16.0x */ 275 160, /* 0001 -> 16.0x */
416 40, /* 0010 -> RESERVED */ 276 40, /* 0010 -> 4.0x */
417 100, /* 0011 -> 10.0x */ 277 100, /* 0011 -> 10.0x */
418 55, /* 0100 -> 5.5x */ 278 55, /* 0100 -> 5.5x */
419 -1, /* 0101 -> RESERVED */ 279 -1, /* 0101 -> RESERVED */
420 45, /* 0110 -> RESERVED */ 280 45, /* 0110 -> 4.5x */
421 95, /* 0111 -> 9.5x */ 281 95, /* 0111 -> 9.5x */
422 90, /* 1000 -> 9.0x */ 282 90, /* 1000 -> 9.0x */
423 70, /* 1001 -> 7.0x */ 283 70, /* 1001 -> 7.0x */
@@ -443,7 +303,6 @@ static int __initdata nehemiah_c_eblcr[32] = {
443 155, /* 1101 -> 15.5x */ 303 155, /* 1101 -> 15.5x */
444 -1, /* 1110 -> RESERVED (13.0x) */ 304 -1, /* 1110 -> RESERVED (13.0x) */
445 145 /* 1111 -> 14.5x */ 305 145 /* 1111 -> 14.5x */
446 /* end of table */
447}; 306};
448 307
449/* 308/*
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
index 2d6491672559..fe3b67005ebb 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
@@ -1289,7 +1289,11 @@ static unsigned int powernowk8_get (unsigned int cpu)
1289 if (query_current_values_with_pending_wait(data)) 1289 if (query_current_values_with_pending_wait(data))
1290 goto out; 1290 goto out;
1291 1291
1292 khz = find_khz_freq_from_fid(data->currfid); 1292 if (cpu_family == CPU_HW_PSTATE)
1293 khz = find_khz_freq_from_fiddid(data->currfid, data->currdid);
1294 else
1295 khz = find_khz_freq_from_fid(data->currfid);
1296
1293 1297
1294out: 1298out:
1295 set_cpus_allowed(current, oldmask); 1299 set_cpus_allowed(current, oldmask);
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index c0c3b59de32c..de27bd07bc9c 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -6,6 +6,7 @@
6#include <asm/io.h> 6#include <asm/io.h>
7#include <asm/processor.h> 7#include <asm/processor.h>
8#include <asm/timer.h> 8#include <asm/timer.h>
9#include <asm/pci-direct.h>
9 10
10#include "cpu.h" 11#include "cpu.h"
11 12
@@ -161,19 +162,19 @@ static void __cpuinit set_cx86_inc(void)
161static void __cpuinit geode_configure(void) 162static void __cpuinit geode_configure(void)
162{ 163{
163 unsigned long flags; 164 unsigned long flags;
164 u8 ccr3, ccr4; 165 u8 ccr3;
165 local_irq_save(flags); 166 local_irq_save(flags);
166 167
167 /* Suspend on halt power saving and enable #SUSP pin */ 168 /* Suspend on halt power saving and enable #SUSP pin */
168 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); 169 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88);
169 170
170 ccr3 = getCx86(CX86_CCR3); 171 ccr3 = getCx86(CX86_CCR3);
171 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* Enable */ 172 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
172
173 ccr4 = getCx86(CX86_CCR4);
174 ccr4 |= 0x38; /* FPU fast, DTE cache, Mem bypass */
175 173
176 setCx86(CX86_CCR3, ccr3); 174
175 /* FPU fast, DTE cache, Mem bypass */
176 setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38);
177 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
177 178
178 set_cx86_memwb(); 179 set_cx86_memwb();
179 set_cx86_reorder(); 180 set_cx86_reorder();
@@ -183,14 +184,6 @@ static void __cpuinit geode_configure(void)
183} 184}
184 185
185 186
186#ifdef CONFIG_PCI
187static struct pci_device_id __cpuinitdata cyrix_55x0[] = {
188 { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) },
189 { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) },
190 { },
191};
192#endif
193
194static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) 187static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
195{ 188{
196 unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; 189 unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
@@ -258,6 +251,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
258 251
259 case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */ 252 case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
260#ifdef CONFIG_PCI 253#ifdef CONFIG_PCI
254 {
255 u32 vendor, device;
261 /* It isn't really a PCI quirk directly, but the cure is the 256 /* It isn't really a PCI quirk directly, but the cure is the
262 same. The MediaGX has deep magic SMM stuff that handles the 257 same. The MediaGX has deep magic SMM stuff that handles the
263 SB emulation. It thows away the fifo on disable_dma() which 258 SB emulation. It thows away the fifo on disable_dma() which
@@ -273,22 +268,34 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
273 printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); 268 printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
274 isa_dma_bridge_buggy = 2; 269 isa_dma_bridge_buggy = 2;
275 270
271 /* We do this before the PCI layer is running. However we
272 are safe here as we know the bridge must be a Cyrix
273 companion and must be present */
274 vendor = read_pci_config_16(0, 0, 0x12, PCI_VENDOR_ID);
275 device = read_pci_config_16(0, 0, 0x12, PCI_DEVICE_ID);
276 276
277 /* 277 /*
278 * The 5510/5520 companion chips have a funky PIT. 278 * The 5510/5520 companion chips have a funky PIT.
279 */ 279 */
280 if (pci_dev_present(cyrix_55x0)) 280 if (vendor == PCI_VENDOR_ID_CYRIX &&
281 (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520))
281 pit_latch_buggy = 1; 282 pit_latch_buggy = 1;
283 }
282#endif 284#endif
283 c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ 285 c->x86_cache_size=16; /* Yep 16K integrated cache thats it */
284 286
285 /* GXm supports extended cpuid levels 'ala' AMD */ 287 /* GXm supports extended cpuid levels 'ala' AMD */
286 if (c->cpuid_level == 2) { 288 if (c->cpuid_level == 2) {
287 /* Enable cxMMX extensions (GX1 Datasheet 54) */ 289 /* Enable cxMMX extensions (GX1 Datasheet 54) */
288 setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); 290 setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1);
289 291
290 /* GXlv/GXm/GX1 */ 292 /*
291 if((dir1 >= 0x50 && dir1 <= 0x54) || dir1 >= 0x63) 293 * GXm : 0x30 ... 0x5f GXm datasheet 51
294 * GXlv: 0x6x GXlv datasheet 54
295 * ? : 0x7x
296 * GX1 : 0x8x GX1 datasheet 56
297 */
298 if((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <=dir1 && dir1 <= 0x8f))
292 geode_configure(); 299 geode_configure();
293 get_model_name(c); /* get CPU marketing name */ 300 get_model_name(c); /* get CPU marketing name */
294 return; 301 return;
@@ -415,15 +422,14 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c)
415 422
416 if (dir0 == 5 || dir0 == 3) 423 if (dir0 == 5 || dir0 == 3)
417 { 424 {
418 unsigned char ccr3, ccr4; 425 unsigned char ccr3;
419 unsigned long flags; 426 unsigned long flags;
420 printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); 427 printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
421 local_irq_save(flags); 428 local_irq_save(flags);
422 ccr3 = getCx86(CX86_CCR3); 429 ccr3 = getCx86(CX86_CCR3);
423 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ 430 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
424 ccr4 = getCx86(CX86_CCR4); 431 setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */
425 setCx86(CX86_CCR4, ccr4 | 0x80); /* enable cpuid */ 432 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
426 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
427 local_irq_restore(flags); 433 local_irq_restore(flags);
428 } 434 }
429 } 435 }
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c
index d555bec0db99..4f10c62d180c 100644
--- a/arch/i386/kernel/cpu/mcheck/mce.c
+++ b/arch/i386/kernel/cpu/mcheck/mce.c
@@ -12,6 +12,7 @@
12 12
13#include <asm/processor.h> 13#include <asm/processor.h>
14#include <asm/system.h> 14#include <asm/system.h>
15#include <asm/mce.h>
15 16
16#include "mce.h" 17#include "mce.h"
17 18
diff --git a/arch/i386/kernel/cpu/mcheck/mce.h b/arch/i386/kernel/cpu/mcheck/mce.h
index 84fd4cf7d0fb..81fb6e2d35f3 100644
--- a/arch/i386/kernel/cpu/mcheck/mce.h
+++ b/arch/i386/kernel/cpu/mcheck/mce.h
@@ -1,4 +1,5 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <asm/mce.h>
2 3
3void amd_mcheck_init(struct cpuinfo_x86 *c); 4void amd_mcheck_init(struct cpuinfo_x86 *c);
4void intel_p4_mcheck_init(struct cpuinfo_x86 *c); 5void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
@@ -9,6 +10,5 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c);
9/* Call the installed machine check handler for this CPU setup. */ 10/* Call the installed machine check handler for this CPU setup. */
10extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); 11extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code);
11 12
12extern int mce_disabled;
13extern int nr_mce_banks; 13extern int nr_mce_banks;
14 14
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
index 504434a46011..8359c19d3a23 100644
--- a/arch/i386/kernel/cpu/mcheck/p4.c
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -12,6 +12,7 @@
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/msr.h> 13#include <asm/msr.h>
14#include <asm/apic.h> 14#include <asm/apic.h>
15#include <asm/idle.h>
15 16
16#include <asm/therm_throt.h> 17#include <asm/therm_throt.h>
17 18
@@ -59,6 +60,7 @@ static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_therm
59 60
60fastcall void smp_thermal_interrupt(struct pt_regs *regs) 61fastcall void smp_thermal_interrupt(struct pt_regs *regs)
61{ 62{
63 exit_idle();
62 irq_enter(); 64 irq_enter();
63 vendor_thermal_interrupt(regs); 65 vendor_thermal_interrupt(regs);
64 irq_exit(); 66 irq_exit();
diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
index 5ae1705eafa6..c7d8f1756745 100644
--- a/arch/i386/kernel/cpu/mtrr/if.c
+++ b/arch/i386/kernel/cpu/mtrr/if.c
@@ -211,6 +211,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
211 default: 211 default:
212 return -ENOTTY; 212 return -ENOTTY;
213 case MTRRIOC_ADD_ENTRY: 213 case MTRRIOC_ADD_ENTRY:
214#ifdef CONFIG_COMPAT
215 case MTRRIOC32_ADD_ENTRY:
216#endif
214 if (!capable(CAP_SYS_ADMIN)) 217 if (!capable(CAP_SYS_ADMIN))
215 return -EPERM; 218 return -EPERM;
216 err = 219 err =
@@ -218,21 +221,33 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
218 file, 0); 221 file, 0);
219 break; 222 break;
220 case MTRRIOC_SET_ENTRY: 223 case MTRRIOC_SET_ENTRY:
224#ifdef CONFIG_COMPAT
225 case MTRRIOC32_SET_ENTRY:
226#endif
221 if (!capable(CAP_SYS_ADMIN)) 227 if (!capable(CAP_SYS_ADMIN))
222 return -EPERM; 228 return -EPERM;
223 err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); 229 err = mtrr_add(sentry.base, sentry.size, sentry.type, 0);
224 break; 230 break;
225 case MTRRIOC_DEL_ENTRY: 231 case MTRRIOC_DEL_ENTRY:
232#ifdef CONFIG_COMPAT
233 case MTRRIOC32_DEL_ENTRY:
234#endif
226 if (!capable(CAP_SYS_ADMIN)) 235 if (!capable(CAP_SYS_ADMIN))
227 return -EPERM; 236 return -EPERM;
228 err = mtrr_file_del(sentry.base, sentry.size, file, 0); 237 err = mtrr_file_del(sentry.base, sentry.size, file, 0);
229 break; 238 break;
230 case MTRRIOC_KILL_ENTRY: 239 case MTRRIOC_KILL_ENTRY:
240#ifdef CONFIG_COMPAT
241 case MTRRIOC32_KILL_ENTRY:
242#endif
231 if (!capable(CAP_SYS_ADMIN)) 243 if (!capable(CAP_SYS_ADMIN))
232 return -EPERM; 244 return -EPERM;
233 err = mtrr_del(-1, sentry.base, sentry.size); 245 err = mtrr_del(-1, sentry.base, sentry.size);
234 break; 246 break;
235 case MTRRIOC_GET_ENTRY: 247 case MTRRIOC_GET_ENTRY:
248#ifdef CONFIG_COMPAT
249 case MTRRIOC32_GET_ENTRY:
250#endif
236 if (gentry.regnum >= num_var_ranges) 251 if (gentry.regnum >= num_var_ranges)
237 return -EINVAL; 252 return -EINVAL;
238 mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); 253 mtrr_if->get(gentry.regnum, &gentry.base, &size, &type);
@@ -249,6 +264,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
249 264
250 break; 265 break;
251 case MTRRIOC_ADD_PAGE_ENTRY: 266 case MTRRIOC_ADD_PAGE_ENTRY:
267#ifdef CONFIG_COMPAT
268 case MTRRIOC32_ADD_PAGE_ENTRY:
269#endif
252 if (!capable(CAP_SYS_ADMIN)) 270 if (!capable(CAP_SYS_ADMIN))
253 return -EPERM; 271 return -EPERM;
254 err = 272 err =
@@ -256,21 +274,33 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
256 file, 1); 274 file, 1);
257 break; 275 break;
258 case MTRRIOC_SET_PAGE_ENTRY: 276 case MTRRIOC_SET_PAGE_ENTRY:
277#ifdef CONFIG_COMPAT
278 case MTRRIOC32_SET_PAGE_ENTRY:
279#endif
259 if (!capable(CAP_SYS_ADMIN)) 280 if (!capable(CAP_SYS_ADMIN))
260 return -EPERM; 281 return -EPERM;
261 err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); 282 err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0);
262 break; 283 break;
263 case MTRRIOC_DEL_PAGE_ENTRY: 284 case MTRRIOC_DEL_PAGE_ENTRY:
285#ifdef CONFIG_COMPAT
286 case MTRRIOC32_DEL_PAGE_ENTRY:
287#endif
264 if (!capable(CAP_SYS_ADMIN)) 288 if (!capable(CAP_SYS_ADMIN))
265 return -EPERM; 289 return -EPERM;
266 err = mtrr_file_del(sentry.base, sentry.size, file, 1); 290 err = mtrr_file_del(sentry.base, sentry.size, file, 1);
267 break; 291 break;
268 case MTRRIOC_KILL_PAGE_ENTRY: 292 case MTRRIOC_KILL_PAGE_ENTRY:
293#ifdef CONFIG_COMPAT
294 case MTRRIOC32_KILL_PAGE_ENTRY:
295#endif
269 if (!capable(CAP_SYS_ADMIN)) 296 if (!capable(CAP_SYS_ADMIN))
270 return -EPERM; 297 return -EPERM;
271 err = mtrr_del_page(-1, sentry.base, sentry.size); 298 err = mtrr_del_page(-1, sentry.base, sentry.size);
272 break; 299 break;
273 case MTRRIOC_GET_PAGE_ENTRY: 300 case MTRRIOC_GET_PAGE_ENTRY:
301#ifdef CONFIG_COMPAT
302 case MTRRIOC32_GET_PAGE_ENTRY:
303#endif
274 if (gentry.regnum >= num_var_ranges) 304 if (gentry.regnum >= num_var_ranges)
275 return -EINVAL; 305 return -EINVAL;
276 mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); 306 mtrr_if->get(gentry.regnum, &gentry.base, &size, &type);
@@ -339,7 +369,7 @@ static int mtrr_open(struct inode *inode, struct file *file)
339 return single_open(file, mtrr_seq_show, NULL); 369 return single_open(file, mtrr_seq_show, NULL);
340} 370}
341 371
342static struct file_operations mtrr_fops = { 372static const struct file_operations mtrr_fops = {
343 .owner = THIS_MODULE, 373 .owner = THIS_MODULE,
344 .open = mtrr_open, 374 .open = mtrr_open,
345 .read = seq_read, 375 .read = seq_read,
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 16bb7ea87145..0acfb6a5a220 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -50,7 +50,7 @@ u32 num_var_ranges = 0;
50unsigned int *usage_table; 50unsigned int *usage_table;
51static DEFINE_MUTEX(mtrr_mutex); 51static DEFINE_MUTEX(mtrr_mutex);
52 52
53u32 size_or_mask, size_and_mask; 53u64 size_or_mask, size_and_mask;
54 54
55static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; 55static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
56 56
@@ -662,8 +662,8 @@ void __init mtrr_bp_init(void)
662 boot_cpu_data.x86_mask == 0x4)) 662 boot_cpu_data.x86_mask == 0x4))
663 phys_addr = 36; 663 phys_addr = 36;
664 664
665 size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1); 665 size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1);
666 size_and_mask = ~size_or_mask & 0xfff00000; 666 size_and_mask = ~size_or_mask & 0xfffff00000ULL;
667 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && 667 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
668 boot_cpu_data.x86 == 6) { 668 boot_cpu_data.x86 == 6) {
669 /* VIA C* family have Intel style MTRRs, but 669 /* VIA C* family have Intel style MTRRs, but
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
index d61ea9db6cfe..289dfe6030e3 100644
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -84,7 +84,7 @@ void get_mtrr_state(void);
84 84
85extern void set_mtrr_ops(struct mtrr_ops * ops); 85extern void set_mtrr_ops(struct mtrr_ops * ops);
86 86
87extern u32 size_or_mask, size_and_mask; 87extern u64 size_or_mask, size_and_mask;
88extern struct mtrr_ops * mtrr_if; 88extern struct mtrr_ops * mtrr_if;
89 89
90#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) 90#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index 6624d8583c42..47e3ebbfb28d 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -29,7 +29,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
29 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 29 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
30 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, 30 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
31 NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, 31 NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
32 NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow", 32 NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", "3dnowext", "3dnow",
33 33
34 /* Transmeta-defined */ 34 /* Transmeta-defined */
35 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, 35 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
@@ -47,7 +47,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
47 /* Intel-defined (#2) */ 47 /* Intel-defined (#2) */
48 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", 48 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
49 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, 49 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
50 NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL, 50 NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt",
51 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 51 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
52 52
53 /* VIA/Cyrix/Centaur-defined */ 53 /* VIA/Cyrix/Centaur-defined */
@@ -57,8 +57,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
57 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 57 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
58 58
59 /* AMD-defined (#2) */ 59 /* AMD-defined (#2) */
60 "lahf_lm", "cmp_legacy", "svm", NULL, "cr8legacy", NULL, NULL, NULL, 60 "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8legacy", "abm",
61 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 61 "sse4a", "misalignsse",
62 "3dnowprefetch", "osvw", "ibs", NULL, NULL, NULL, NULL, NULL,
62 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 63 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
63 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 64 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
64 }; 65 };
@@ -69,8 +70,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
69 "ttp", /* thermal trip */ 70 "ttp", /* thermal trip */
70 "tm", 71 "tm",
71 "stc", 72 "stc",
73 "100mhzsteps",
74 "hwpstate",
72 NULL, 75 NULL,
73 /* nothing */ /* constant_tsc - moved to flags */ 76 NULL, /* constant_tsc - moved to flags */
77 /* nothing */
74 }; 78 };
75 struct cpuinfo_x86 *c = v; 79 struct cpuinfo_x86 *c = v;
76 int i, n = c - cpu_data; 80 int i, n = c - cpu_data;
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c
index 4056fb7d2cdf..5678d46863c6 100644
--- a/arch/i386/kernel/cpu/transmeta.c
+++ b/arch/i386/kernel/cpu/transmeta.c
@@ -9,7 +9,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
9{ 9{
10 unsigned int cap_mask, uk, max, dummy; 10 unsigned int cap_mask, uk, max, dummy;
11 unsigned int cms_rev1, cms_rev2; 11 unsigned int cms_rev1, cms_rev2;
12 unsigned int cpu_rev, cpu_freq, cpu_flags, new_cpu_rev; 12 unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev;
13 char cpu_info[65]; 13 char cpu_info[65];
14 14
15 get_model_name(c); /* Same as AMD/Cyrix */ 15 get_model_name(c); /* Same as AMD/Cyrix */
@@ -72,6 +72,9 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
72 wrmsr(0x80860004, ~0, uk); 72 wrmsr(0x80860004, ~0, uk);
73 c->x86_capability[0] = cpuid_edx(0x00000001); 73 c->x86_capability[0] = cpuid_edx(0x00000001);
74 wrmsr(0x80860004, cap_mask, uk); 74 wrmsr(0x80860004, cap_mask, uk);
75
76 /* All Transmeta CPUs have a constant TSC */
77 set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
75 78
76 /* If we can run i686 user-space code, call us an i686 */ 79 /* If we can run i686 user-space code, call us an i686 */
77#define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV) 80#define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV)
diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c
index 51130b39cd2e..eeae0d992337 100644
--- a/arch/i386/kernel/cpuid.c
+++ b/arch/i386/kernel/cpuid.c
@@ -48,7 +48,6 @@ static struct class *cpuid_class;
48#ifdef CONFIG_SMP 48#ifdef CONFIG_SMP
49 49
50struct cpuid_command { 50struct cpuid_command {
51 int cpu;
52 u32 reg; 51 u32 reg;
53 u32 *data; 52 u32 *data;
54}; 53};
@@ -57,8 +56,7 @@ static void cpuid_smp_cpuid(void *cmd_block)
57{ 56{
58 struct cpuid_command *cmd = (struct cpuid_command *)cmd_block; 57 struct cpuid_command *cmd = (struct cpuid_command *)cmd_block;
59 58
60 if (cmd->cpu == smp_processor_id()) 59 cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2],
61 cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2],
62 &cmd->data[3]); 60 &cmd->data[3]);
63} 61}
64 62
@@ -70,11 +68,10 @@ static inline void do_cpuid(int cpu, u32 reg, u32 * data)
70 if (cpu == smp_processor_id()) { 68 if (cpu == smp_processor_id()) {
71 cpuid(reg, &data[0], &data[1], &data[2], &data[3]); 69 cpuid(reg, &data[0], &data[1], &data[2], &data[3]);
72 } else { 70 } else {
73 cmd.cpu = cpu;
74 cmd.reg = reg; 71 cmd.reg = reg;
75 cmd.data = data; 72 cmd.data = data;
76 73
77 smp_call_function(cpuid_smp_cpuid, &cmd, 1, 1); 74 smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
78 } 75 }
79 preempt_enable(); 76 preempt_enable();
80} 77}
@@ -148,7 +145,7 @@ static int cpuid_open(struct inode *inode, struct file *file)
148/* 145/*
149 * File operations we support 146 * File operations we support
150 */ 147 */
151static struct file_operations cpuid_fops = { 148static const struct file_operations cpuid_fops = {
152 .owner = THIS_MODULE, 149 .owner = THIS_MODULE,
153 .llseek = cpuid_seek, 150 .llseek = cpuid_seek,
154 .read = cpuid_read, 151 .read = cpuid_read,
diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c
index f391abcf7da9..70f39560846a 100644
--- a/arch/i386/kernel/e820.c
+++ b/arch/i386/kernel/e820.c
@@ -14,6 +14,7 @@
14#include <asm/pgtable.h> 14#include <asm/pgtable.h>
15#include <asm/page.h> 15#include <asm/page.h>
16#include <asm/e820.h> 16#include <asm/e820.h>
17#include <asm/setup.h>
17 18
18#ifdef CONFIG_EFI 19#ifdef CONFIG_EFI
19int efi_enabled = 0; 20int efi_enabled = 0;
@@ -156,21 +157,22 @@ static struct resource standard_io_resources[] = { {
156 .flags = IORESOURCE_BUSY | IORESOURCE_IO 157 .flags = IORESOURCE_BUSY | IORESOURCE_IO
157} }; 158} };
158 159
159static int romsignature(const unsigned char *x) 160#define ROMSIGNATURE 0xaa55
161
162static int __init romsignature(const unsigned char *rom)
160{ 163{
161 unsigned short sig; 164 unsigned short sig;
162 int ret = 0; 165
163 if (probe_kernel_address((const unsigned short *)x, sig) == 0) 166 return probe_kernel_address((const unsigned short *)rom, sig) == 0 &&
164 ret = (sig == 0xaa55); 167 sig == ROMSIGNATURE;
165 return ret;
166} 168}
167 169
168static int __init romchecksum(unsigned char *rom, unsigned long length) 170static int __init romchecksum(unsigned char *rom, unsigned long length)
169{ 171{
170 unsigned char *p, sum = 0; 172 unsigned char sum;
171 173
172 for (p = rom; p < rom + length; p++) 174 for (sum = 0; length; length--)
173 sum += *p; 175 sum += *rom++;
174 return sum == 0; 176 return sum == 0;
175} 177}
176 178
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 5e47683fc63a..18bddcb8e9e8 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -30,7 +30,7 @@
30 * 18(%esp) - %eax 30 * 18(%esp) - %eax
31 * 1C(%esp) - %ds 31 * 1C(%esp) - %ds
32 * 20(%esp) - %es 32 * 20(%esp) - %es
33 * 24(%esp) - %gs 33 * 24(%esp) - %fs
34 * 28(%esp) - orig_eax 34 * 28(%esp) - orig_eax
35 * 2C(%esp) - %eip 35 * 2C(%esp) - %eip
36 * 30(%esp) - %cs 36 * 30(%esp) - %cs
@@ -99,9 +99,9 @@ VM_MASK = 0x00020000
99 99
100#define SAVE_ALL \ 100#define SAVE_ALL \
101 cld; \ 101 cld; \
102 pushl %gs; \ 102 pushl %fs; \
103 CFI_ADJUST_CFA_OFFSET 4;\ 103 CFI_ADJUST_CFA_OFFSET 4;\
104 /*CFI_REL_OFFSET gs, 0;*/\ 104 /*CFI_REL_OFFSET fs, 0;*/\
105 pushl %es; \ 105 pushl %es; \
106 CFI_ADJUST_CFA_OFFSET 4;\ 106 CFI_ADJUST_CFA_OFFSET 4;\
107 /*CFI_REL_OFFSET es, 0;*/\ 107 /*CFI_REL_OFFSET es, 0;*/\
@@ -133,7 +133,7 @@ VM_MASK = 0x00020000
133 movl %edx, %ds; \ 133 movl %edx, %ds; \
134 movl %edx, %es; \ 134 movl %edx, %es; \
135 movl $(__KERNEL_PDA), %edx; \ 135 movl $(__KERNEL_PDA), %edx; \
136 movl %edx, %gs 136 movl %edx, %fs
137 137
138#define RESTORE_INT_REGS \ 138#define RESTORE_INT_REGS \
139 popl %ebx; \ 139 popl %ebx; \
@@ -166,9 +166,9 @@ VM_MASK = 0x00020000
1662: popl %es; \ 1662: popl %es; \
167 CFI_ADJUST_CFA_OFFSET -4;\ 167 CFI_ADJUST_CFA_OFFSET -4;\
168 /*CFI_RESTORE es;*/\ 168 /*CFI_RESTORE es;*/\
1693: popl %gs; \ 1693: popl %fs; \
170 CFI_ADJUST_CFA_OFFSET -4;\ 170 CFI_ADJUST_CFA_OFFSET -4;\
171 /*CFI_RESTORE gs;*/\ 171 /*CFI_RESTORE fs;*/\
172.pushsection .fixup,"ax"; \ 172.pushsection .fixup,"ax"; \
1734: movl $0,(%esp); \ 1734: movl $0,(%esp); \
174 jmp 1b; \ 174 jmp 1b; \
@@ -227,6 +227,7 @@ ENTRY(ret_from_fork)
227 CFI_ADJUST_CFA_OFFSET -4 227 CFI_ADJUST_CFA_OFFSET -4
228 jmp syscall_exit 228 jmp syscall_exit
229 CFI_ENDPROC 229 CFI_ENDPROC
230END(ret_from_fork)
230 231
231/* 232/*
232 * Return to user mode is not as complex as all this looks, 233 * Return to user mode is not as complex as all this looks,
@@ -258,6 +259,7 @@ ENTRY(resume_userspace)
258 # int/exception return? 259 # int/exception return?
259 jne work_pending 260 jne work_pending
260 jmp restore_all 261 jmp restore_all
262END(ret_from_exception)
261 263
262#ifdef CONFIG_PREEMPT 264#ifdef CONFIG_PREEMPT
263ENTRY(resume_kernel) 265ENTRY(resume_kernel)
@@ -272,6 +274,7 @@ need_resched:
272 jz restore_all 274 jz restore_all
273 call preempt_schedule_irq 275 call preempt_schedule_irq
274 jmp need_resched 276 jmp need_resched
277END(resume_kernel)
275#endif 278#endif
276 CFI_ENDPROC 279 CFI_ENDPROC
277 280
@@ -349,16 +352,17 @@ sysenter_past_esp:
349 movl PT_OLDESP(%esp), %ecx 352 movl PT_OLDESP(%esp), %ecx
350 xorl %ebp,%ebp 353 xorl %ebp,%ebp
351 TRACE_IRQS_ON 354 TRACE_IRQS_ON
3521: mov PT_GS(%esp), %gs 3551: mov PT_FS(%esp), %fs
353 ENABLE_INTERRUPTS_SYSEXIT 356 ENABLE_INTERRUPTS_SYSEXIT
354 CFI_ENDPROC 357 CFI_ENDPROC
355.pushsection .fixup,"ax" 358.pushsection .fixup,"ax"
3562: movl $0,PT_GS(%esp) 3592: movl $0,PT_FS(%esp)
357 jmp 1b 360 jmp 1b
358.section __ex_table,"a" 361.section __ex_table,"a"
359 .align 4 362 .align 4
360 .long 1b,2b 363 .long 1b,2b
361.popsection 364.popsection
365ENDPROC(sysenter_entry)
362 366
363 # system call handler stub 367 # system call handler stub
364ENTRY(system_call) 368ENTRY(system_call)
@@ -459,6 +463,7 @@ ldt_ss:
459 CFI_ADJUST_CFA_OFFSET -8 463 CFI_ADJUST_CFA_OFFSET -8
460 jmp restore_nocheck 464 jmp restore_nocheck
461 CFI_ENDPROC 465 CFI_ENDPROC
466ENDPROC(system_call)
462 467
463 # perform work that needs to be done immediately before resumption 468 # perform work that needs to be done immediately before resumption
464 ALIGN 469 ALIGN
@@ -504,6 +509,7 @@ work_notifysig_v86:
504 xorl %edx, %edx 509 xorl %edx, %edx
505 call do_notify_resume 510 call do_notify_resume
506 jmp resume_userspace_sig 511 jmp resume_userspace_sig
512END(work_pending)
507 513
508 # perform syscall exit tracing 514 # perform syscall exit tracing
509 ALIGN 515 ALIGN
@@ -519,6 +525,7 @@ syscall_trace_entry:
519 cmpl $(nr_syscalls), %eax 525 cmpl $(nr_syscalls), %eax
520 jnae syscall_call 526 jnae syscall_call
521 jmp syscall_exit 527 jmp syscall_exit
528END(syscall_trace_entry)
522 529
523 # perform syscall exit tracing 530 # perform syscall exit tracing
524 ALIGN 531 ALIGN
@@ -532,6 +539,7 @@ syscall_exit_work:
532 movl $1, %edx 539 movl $1, %edx
533 call do_syscall_trace 540 call do_syscall_trace
534 jmp resume_userspace 541 jmp resume_userspace
542END(syscall_exit_work)
535 CFI_ENDPROC 543 CFI_ENDPROC
536 544
537 RING0_INT_FRAME # can't unwind into user space anyway 545 RING0_INT_FRAME # can't unwind into user space anyway
@@ -542,15 +550,17 @@ syscall_fault:
542 GET_THREAD_INFO(%ebp) 550 GET_THREAD_INFO(%ebp)
543 movl $-EFAULT,PT_EAX(%esp) 551 movl $-EFAULT,PT_EAX(%esp)
544 jmp resume_userspace 552 jmp resume_userspace
553END(syscall_fault)
545 554
546syscall_badsys: 555syscall_badsys:
547 movl $-ENOSYS,PT_EAX(%esp) 556 movl $-ENOSYS,PT_EAX(%esp)
548 jmp resume_userspace 557 jmp resume_userspace
558END(syscall_badsys)
549 CFI_ENDPROC 559 CFI_ENDPROC
550 560
551#define FIXUP_ESPFIX_STACK \ 561#define FIXUP_ESPFIX_STACK \
552 /* since we are on a wrong stack, we cant make it a C code :( */ \ 562 /* since we are on a wrong stack, we cant make it a C code :( */ \
553 movl %gs:PDA_cpu, %ebx; \ 563 movl %fs:PDA_cpu, %ebx; \
554 PER_CPU(cpu_gdt_descr, %ebx); \ 564 PER_CPU(cpu_gdt_descr, %ebx); \
555 movl GDS_address(%ebx), %ebx; \ 565 movl GDS_address(%ebx), %ebx; \
556 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ 566 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
@@ -581,9 +591,9 @@ syscall_badsys:
581ENTRY(interrupt) 591ENTRY(interrupt)
582.text 592.text
583 593
584vector=0
585ENTRY(irq_entries_start) 594ENTRY(irq_entries_start)
586 RING0_INT_FRAME 595 RING0_INT_FRAME
596vector=0
587.rept NR_IRQS 597.rept NR_IRQS
588 ALIGN 598 ALIGN
589 .if vector 599 .if vector
@@ -592,11 +602,16 @@ ENTRY(irq_entries_start)
5921: pushl $~(vector) 6021: pushl $~(vector)
593 CFI_ADJUST_CFA_OFFSET 4 603 CFI_ADJUST_CFA_OFFSET 4
594 jmp common_interrupt 604 jmp common_interrupt
595.data 605 .previous
596 .long 1b 606 .long 1b
597.text 607 .text
598vector=vector+1 608vector=vector+1
599.endr 609.endr
610END(irq_entries_start)
611
612.previous
613END(interrupt)
614.previous
600 615
601/* 616/*
602 * the CPU automatically disables interrupts when executing an IRQ vector, 617 * the CPU automatically disables interrupts when executing an IRQ vector,
@@ -609,6 +624,7 @@ common_interrupt:
609 movl %esp,%eax 624 movl %esp,%eax
610 call do_IRQ 625 call do_IRQ
611 jmp ret_from_intr 626 jmp ret_from_intr
627ENDPROC(common_interrupt)
612 CFI_ENDPROC 628 CFI_ENDPROC
613 629
614#define BUILD_INTERRUPT(name, nr) \ 630#define BUILD_INTERRUPT(name, nr) \
@@ -621,18 +637,24 @@ ENTRY(name) \
621 movl %esp,%eax; \ 637 movl %esp,%eax; \
622 call smp_/**/name; \ 638 call smp_/**/name; \
623 jmp ret_from_intr; \ 639 jmp ret_from_intr; \
624 CFI_ENDPROC 640 CFI_ENDPROC; \
641ENDPROC(name)
625 642
626/* The include is where all of the SMP etc. interrupts come from */ 643/* The include is where all of the SMP etc. interrupts come from */
627#include "entry_arch.h" 644#include "entry_arch.h"
628 645
646/* This alternate entry is needed because we hijack the apic LVTT */
647#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC)
648BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR)
649#endif
650
629KPROBE_ENTRY(page_fault) 651KPROBE_ENTRY(page_fault)
630 RING0_EC_FRAME 652 RING0_EC_FRAME
631 pushl $do_page_fault 653 pushl $do_page_fault
632 CFI_ADJUST_CFA_OFFSET 4 654 CFI_ADJUST_CFA_OFFSET 4
633 ALIGN 655 ALIGN
634error_code: 656error_code:
635 /* the function address is in %gs's slot on the stack */ 657 /* the function address is in %fs's slot on the stack */
636 pushl %es 658 pushl %es
637 CFI_ADJUST_CFA_OFFSET 4 659 CFI_ADJUST_CFA_OFFSET 4
638 /*CFI_REL_OFFSET es, 0*/ 660 /*CFI_REL_OFFSET es, 0*/
@@ -661,20 +683,20 @@ error_code:
661 CFI_ADJUST_CFA_OFFSET 4 683 CFI_ADJUST_CFA_OFFSET 4
662 CFI_REL_OFFSET ebx, 0 684 CFI_REL_OFFSET ebx, 0
663 cld 685 cld
664 pushl %gs 686 pushl %fs
665 CFI_ADJUST_CFA_OFFSET 4 687 CFI_ADJUST_CFA_OFFSET 4
666 /*CFI_REL_OFFSET gs, 0*/ 688 /*CFI_REL_OFFSET fs, 0*/
667 movl $(__KERNEL_PDA), %ecx 689 movl $(__KERNEL_PDA), %ecx
668 movl %ecx, %gs 690 movl %ecx, %fs
669 UNWIND_ESPFIX_STACK 691 UNWIND_ESPFIX_STACK
670 popl %ecx 692 popl %ecx
671 CFI_ADJUST_CFA_OFFSET -4 693 CFI_ADJUST_CFA_OFFSET -4
672 /*CFI_REGISTER es, ecx*/ 694 /*CFI_REGISTER es, ecx*/
673 movl PT_GS(%esp), %edi # get the function address 695 movl PT_FS(%esp), %edi # get the function address
674 movl PT_ORIG_EAX(%esp), %edx # get the error code 696 movl PT_ORIG_EAX(%esp), %edx # get the error code
675 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart 697 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
676 mov %ecx, PT_GS(%esp) 698 mov %ecx, PT_FS(%esp)
677 /*CFI_REL_OFFSET gs, ES*/ 699 /*CFI_REL_OFFSET fs, ES*/
678 movl $(__USER_DS), %ecx 700 movl $(__USER_DS), %ecx
679 movl %ecx, %ds 701 movl %ecx, %ds
680 movl %ecx, %es 702 movl %ecx, %es
@@ -692,6 +714,7 @@ ENTRY(coprocessor_error)
692 CFI_ADJUST_CFA_OFFSET 4 714 CFI_ADJUST_CFA_OFFSET 4
693 jmp error_code 715 jmp error_code
694 CFI_ENDPROC 716 CFI_ENDPROC
717END(coprocessor_error)
695 718
696ENTRY(simd_coprocessor_error) 719ENTRY(simd_coprocessor_error)
697 RING0_INT_FRAME 720 RING0_INT_FRAME
@@ -701,6 +724,7 @@ ENTRY(simd_coprocessor_error)
701 CFI_ADJUST_CFA_OFFSET 4 724 CFI_ADJUST_CFA_OFFSET 4
702 jmp error_code 725 jmp error_code
703 CFI_ENDPROC 726 CFI_ENDPROC
727END(simd_coprocessor_error)
704 728
705ENTRY(device_not_available) 729ENTRY(device_not_available)
706 RING0_INT_FRAME 730 RING0_INT_FRAME
@@ -721,6 +745,7 @@ device_not_available_emulate:
721 CFI_ADJUST_CFA_OFFSET -4 745 CFI_ADJUST_CFA_OFFSET -4
722 jmp ret_from_exception 746 jmp ret_from_exception
723 CFI_ENDPROC 747 CFI_ENDPROC
748END(device_not_available)
724 749
725/* 750/*
726 * Debug traps and NMI can happen at the one SYSENTER instruction 751 * Debug traps and NMI can happen at the one SYSENTER instruction
@@ -864,10 +889,12 @@ ENTRY(native_iret)
864 .align 4 889 .align 4
865 .long 1b,iret_exc 890 .long 1b,iret_exc
866.previous 891.previous
892END(native_iret)
867 893
868ENTRY(native_irq_enable_sysexit) 894ENTRY(native_irq_enable_sysexit)
869 sti 895 sti
870 sysexit 896 sysexit
897END(native_irq_enable_sysexit)
871#endif 898#endif
872 899
873KPROBE_ENTRY(int3) 900KPROBE_ENTRY(int3)
@@ -890,6 +917,7 @@ ENTRY(overflow)
890 CFI_ADJUST_CFA_OFFSET 4 917 CFI_ADJUST_CFA_OFFSET 4
891 jmp error_code 918 jmp error_code
892 CFI_ENDPROC 919 CFI_ENDPROC
920END(overflow)
893 921
894ENTRY(bounds) 922ENTRY(bounds)
895 RING0_INT_FRAME 923 RING0_INT_FRAME
@@ -899,6 +927,7 @@ ENTRY(bounds)
899 CFI_ADJUST_CFA_OFFSET 4 927 CFI_ADJUST_CFA_OFFSET 4
900 jmp error_code 928 jmp error_code
901 CFI_ENDPROC 929 CFI_ENDPROC
930END(bounds)
902 931
903ENTRY(invalid_op) 932ENTRY(invalid_op)
904 RING0_INT_FRAME 933 RING0_INT_FRAME
@@ -908,6 +937,7 @@ ENTRY(invalid_op)
908 CFI_ADJUST_CFA_OFFSET 4 937 CFI_ADJUST_CFA_OFFSET 4
909 jmp error_code 938 jmp error_code
910 CFI_ENDPROC 939 CFI_ENDPROC
940END(invalid_op)
911 941
912ENTRY(coprocessor_segment_overrun) 942ENTRY(coprocessor_segment_overrun)
913 RING0_INT_FRAME 943 RING0_INT_FRAME
@@ -917,6 +947,7 @@ ENTRY(coprocessor_segment_overrun)
917 CFI_ADJUST_CFA_OFFSET 4 947 CFI_ADJUST_CFA_OFFSET 4
918 jmp error_code 948 jmp error_code
919 CFI_ENDPROC 949 CFI_ENDPROC
950END(coprocessor_segment_overrun)
920 951
921ENTRY(invalid_TSS) 952ENTRY(invalid_TSS)
922 RING0_EC_FRAME 953 RING0_EC_FRAME
@@ -924,6 +955,7 @@ ENTRY(invalid_TSS)
924 CFI_ADJUST_CFA_OFFSET 4 955 CFI_ADJUST_CFA_OFFSET 4
925 jmp error_code 956 jmp error_code
926 CFI_ENDPROC 957 CFI_ENDPROC
958END(invalid_TSS)
927 959
928ENTRY(segment_not_present) 960ENTRY(segment_not_present)
929 RING0_EC_FRAME 961 RING0_EC_FRAME
@@ -931,6 +963,7 @@ ENTRY(segment_not_present)
931 CFI_ADJUST_CFA_OFFSET 4 963 CFI_ADJUST_CFA_OFFSET 4
932 jmp error_code 964 jmp error_code
933 CFI_ENDPROC 965 CFI_ENDPROC
966END(segment_not_present)
934 967
935ENTRY(stack_segment) 968ENTRY(stack_segment)
936 RING0_EC_FRAME 969 RING0_EC_FRAME
@@ -938,6 +971,7 @@ ENTRY(stack_segment)
938 CFI_ADJUST_CFA_OFFSET 4 971 CFI_ADJUST_CFA_OFFSET 4
939 jmp error_code 972 jmp error_code
940 CFI_ENDPROC 973 CFI_ENDPROC
974END(stack_segment)
941 975
942KPROBE_ENTRY(general_protection) 976KPROBE_ENTRY(general_protection)
943 RING0_EC_FRAME 977 RING0_EC_FRAME
@@ -953,6 +987,7 @@ ENTRY(alignment_check)
953 CFI_ADJUST_CFA_OFFSET 4 987 CFI_ADJUST_CFA_OFFSET 4
954 jmp error_code 988 jmp error_code
955 CFI_ENDPROC 989 CFI_ENDPROC
990END(alignment_check)
956 991
957ENTRY(divide_error) 992ENTRY(divide_error)
958 RING0_INT_FRAME 993 RING0_INT_FRAME
@@ -962,6 +997,7 @@ ENTRY(divide_error)
962 CFI_ADJUST_CFA_OFFSET 4 997 CFI_ADJUST_CFA_OFFSET 4
963 jmp error_code 998 jmp error_code
964 CFI_ENDPROC 999 CFI_ENDPROC
1000END(divide_error)
965 1001
966#ifdef CONFIG_X86_MCE 1002#ifdef CONFIG_X86_MCE
967ENTRY(machine_check) 1003ENTRY(machine_check)
@@ -972,6 +1008,7 @@ ENTRY(machine_check)
972 CFI_ADJUST_CFA_OFFSET 4 1008 CFI_ADJUST_CFA_OFFSET 4
973 jmp error_code 1009 jmp error_code
974 CFI_ENDPROC 1010 CFI_ENDPROC
1011END(machine_check)
975#endif 1012#endif
976 1013
977ENTRY(spurious_interrupt_bug) 1014ENTRY(spurious_interrupt_bug)
@@ -982,6 +1019,7 @@ ENTRY(spurious_interrupt_bug)
982 CFI_ADJUST_CFA_OFFSET 4 1019 CFI_ADJUST_CFA_OFFSET 4
983 jmp error_code 1020 jmp error_code
984 CFI_ENDPROC 1021 CFI_ENDPROC
1022END(spurious_interrupt_bug)
985 1023
986ENTRY(kernel_thread_helper) 1024ENTRY(kernel_thread_helper)
987 pushl $0 # fake return address for unwinder 1025 pushl $0 # fake return address for unwinder
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index edef5084ce17..3fa7f9389afe 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -53,6 +53,7 @@
53 * any particular GDT layout, because we load our own as soon as we 53 * any particular GDT layout, because we load our own as soon as we
54 * can. 54 * can.
55 */ 55 */
56.section .text.head,"ax",@progbits
56ENTRY(startup_32) 57ENTRY(startup_32)
57 58
58#ifdef CONFIG_PARAVIRT 59#ifdef CONFIG_PARAVIRT
@@ -103,7 +104,7 @@ ENTRY(startup_32)
103 movzwl OLD_CL_OFFSET,%esi 104 movzwl OLD_CL_OFFSET,%esi
104 addl $(OLD_CL_BASE_ADDR),%esi 105 addl $(OLD_CL_BASE_ADDR),%esi
1052: 1062:
106 movl $(saved_command_line - __PAGE_OFFSET),%edi 107 movl $(boot_command_line - __PAGE_OFFSET),%edi
107 movl $(COMMAND_LINE_SIZE/4),%ecx 108 movl $(COMMAND_LINE_SIZE/4),%ecx
108 rep 109 rep
109 movsl 110 movsl
@@ -141,16 +142,25 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
141 jb 10b 142 jb 10b
142 movl %edi,(init_pg_tables_end - __PAGE_OFFSET) 143 movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
143 144
144#ifdef CONFIG_SMP
145 xorl %ebx,%ebx /* This is the boot CPU (BSP) */ 145 xorl %ebx,%ebx /* This is the boot CPU (BSP) */
146 jmp 3f 146 jmp 3f
147
148/* 147/*
149 * Non-boot CPU entry point; entered from trampoline.S 148 * Non-boot CPU entry point; entered from trampoline.S
150 * We can't lgdt here, because lgdt itself uses a data segment, but 149 * We can't lgdt here, because lgdt itself uses a data segment, but
151 * we know the trampoline has already loaded the boot_gdt_table GDT 150 * we know the trampoline has already loaded the boot_gdt_table GDT
152 * for us. 151 * for us.
152 *
153 * If cpu hotplug is not supported then this code can go in init section
154 * which will be freed later
153 */ 155 */
156
157#ifdef CONFIG_HOTPLUG_CPU
158.section .text,"ax",@progbits
159#else
160.section .init.text,"ax",@progbits
161#endif
162
163#ifdef CONFIG_SMP
154ENTRY(startup_32_smp) 164ENTRY(startup_32_smp)
155 cld 165 cld
156 movl $(__BOOT_DS),%eax 166 movl $(__BOOT_DS),%eax
@@ -208,8 +218,8 @@ ENTRY(startup_32_smp)
208 xorl %ebx,%ebx 218 xorl %ebx,%ebx
209 incl %ebx 219 incl %ebx
210 220
2113:
212#endif /* CONFIG_SMP */ 221#endif /* CONFIG_SMP */
2223:
213 223
214/* 224/*
215 * Enable paging 225 * Enable paging
@@ -309,7 +319,7 @@ is386: movl $2,%ecx # set MP
309 319
310 call check_x87 320 call check_x87
311 call setup_pda 321 call setup_pda
312 lgdt cpu_gdt_descr 322 lgdt early_gdt_descr
313 lidt idt_descr 323 lidt idt_descr
314 ljmp $(__KERNEL_CS),$1f 324 ljmp $(__KERNEL_CS),$1f
3151: movl $(__KERNEL_DS),%eax # reload all the segment registers 3251: movl $(__KERNEL_DS),%eax # reload all the segment registers
@@ -319,12 +329,12 @@ is386: movl $2,%ecx # set MP
319 movl %eax,%ds 329 movl %eax,%ds
320 movl %eax,%es 330 movl %eax,%es
321 331
322 xorl %eax,%eax # Clear FS and LDT 332 xorl %eax,%eax # Clear GS and LDT
323 movl %eax,%fs 333 movl %eax,%gs
324 lldt %ax 334 lldt %ax
325 335
326 movl $(__KERNEL_PDA),%eax 336 movl $(__KERNEL_PDA),%eax
327 mov %eax,%gs 337 mov %eax,%fs
328 338
329 cld # gcc2 wants the direction flag cleared at all times 339 cld # gcc2 wants the direction flag cleared at all times
330 pushl $0 # fake return address for unwinder 340 pushl $0 # fake return address for unwinder
@@ -360,12 +370,12 @@ check_x87:
360 * cpu_gdt_table and boot_pda; for secondary CPUs, these will be 370 * cpu_gdt_table and boot_pda; for secondary CPUs, these will be
361 * that CPU's GDT and PDA. 371 * that CPU's GDT and PDA.
362 */ 372 */
363setup_pda: 373ENTRY(setup_pda)
364 /* get the PDA pointer */ 374 /* get the PDA pointer */
365 movl start_pda, %eax 375 movl start_pda, %eax
366 376
367 /* slot the PDA address into the GDT */ 377 /* slot the PDA address into the GDT */
368 mov cpu_gdt_descr+2, %ecx 378 mov early_gdt_descr+2, %ecx
369 mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ 379 mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */
370 shr $16, %eax 380 shr $16, %eax
371 mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ 381 mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */
@@ -492,6 +502,7 @@ ignore_int:
492#endif 502#endif
493 iret 503 iret
494 504
505.section .text
495#ifdef CONFIG_PARAVIRT 506#ifdef CONFIG_PARAVIRT
496startup_paravirt: 507startup_paravirt:
497 cld 508 cld
@@ -502,10 +513,11 @@ startup_paravirt:
502 pushl %ecx 513 pushl %ecx
503 pushl %eax 514 pushl %eax
504 515
505 /* paravirt.o is last in link, and that probe fn never returns */
506 pushl $__start_paravirtprobe 516 pushl $__start_paravirtprobe
5071: 5171:
508 movl 0(%esp), %eax 518 movl 0(%esp), %eax
519 cmpl $__stop_paravirtprobe, %eax
520 je unhandled_paravirt
509 pushl (%eax) 521 pushl (%eax)
510 movl 8(%esp), %eax 522 movl 8(%esp), %eax
511 call *(%esp) 523 call *(%esp)
@@ -517,6 +529,10 @@ startup_paravirt:
517 529
518 addl $4, (%esp) 530 addl $4, (%esp)
519 jmp 1b 531 jmp 1b
532
533unhandled_paravirt:
534 /* Nothing wanted us: we're screwed. */
535 ud2
520#endif 536#endif
521 537
522/* 538/*
@@ -581,7 +597,7 @@ idt_descr:
581 597
582# boot GDT descriptor (later on used by CPU#0): 598# boot GDT descriptor (later on used by CPU#0):
583 .word 0 # 32 bit align gdt_desc.address 599 .word 0 # 32 bit align gdt_desc.address
584ENTRY(cpu_gdt_descr) 600ENTRY(early_gdt_descr)
585 .word GDT_ENTRIES*8-1 601 .word GDT_ENTRIES*8-1
586 .long cpu_gdt_table 602 .long cpu_gdt_table
587 603
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c
index 45a8685bb60b..e1006b7acc9e 100644
--- a/arch/i386/kernel/hpet.c
+++ b/arch/i386/kernel/hpet.c
@@ -1,4 +1,5 @@
1#include <linux/clocksource.h> 1#include <linux/clocksource.h>
2#include <linux/clockchips.h>
2#include <linux/errno.h> 3#include <linux/errno.h>
3#include <linux/hpet.h> 4#include <linux/hpet.h>
4#include <linux/init.h> 5#include <linux/init.h>
@@ -6,17 +7,278 @@
6#include <asm/hpet.h> 7#include <asm/hpet.h>
7#include <asm/io.h> 8#include <asm/io.h>
8 9
10extern struct clock_event_device *global_clock_event;
11
9#define HPET_MASK CLOCKSOURCE_MASK(32) 12#define HPET_MASK CLOCKSOURCE_MASK(32)
10#define HPET_SHIFT 22 13#define HPET_SHIFT 22
11 14
12/* FSEC = 10^-15 NSEC = 10^-9 */ 15/* FSEC = 10^-15 NSEC = 10^-9 */
13#define FSEC_PER_NSEC 1000000 16#define FSEC_PER_NSEC 1000000
14 17
15static void *hpet_ptr; 18/*
19 * HPET address is set in acpi/boot.c, when an ACPI entry exists
20 */
21unsigned long hpet_address;
22static void __iomem * hpet_virt_address;
23
24static inline unsigned long hpet_readl(unsigned long a)
25{
26 return readl(hpet_virt_address + a);
27}
28
29static inline void hpet_writel(unsigned long d, unsigned long a)
30{
31 writel(d, hpet_virt_address + a);
32}
33
34/*
35 * HPET command line enable / disable
36 */
37static int boot_hpet_disable;
38
39static int __init hpet_setup(char* str)
40{
41 if (str) {
42 if (!strncmp("disable", str, 7))
43 boot_hpet_disable = 1;
44 }
45 return 1;
46}
47__setup("hpet=", hpet_setup);
48
49static inline int is_hpet_capable(void)
50{
51 return (!boot_hpet_disable && hpet_address);
52}
53
54/*
55 * HPET timer interrupt enable / disable
56 */
57static int hpet_legacy_int_enabled;
58
59/**
60 * is_hpet_enabled - check whether the hpet timer interrupt is enabled
61 */
62int is_hpet_enabled(void)
63{
64 return is_hpet_capable() && hpet_legacy_int_enabled;
65}
66
67/*
68 * When the hpet driver (/dev/hpet) is enabled, we need to reserve
69 * timer 0 and timer 1 in case of RTC emulation.
70 */
71#ifdef CONFIG_HPET
72static void hpet_reserve_platform_timers(unsigned long id)
73{
74 struct hpet __iomem *hpet = hpet_virt_address;
75 struct hpet_timer __iomem *timer = &hpet->hpet_timers[2];
76 unsigned int nrtimers, i;
77 struct hpet_data hd;
78
79 nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
80
81 memset(&hd, 0, sizeof (hd));
82 hd.hd_phys_address = hpet_address;
83 hd.hd_address = hpet_virt_address;
84 hd.hd_nirqs = nrtimers;
85 hd.hd_flags = HPET_DATA_PLATFORM;
86 hpet_reserve_timer(&hd, 0);
87
88#ifdef CONFIG_HPET_EMULATE_RTC
89 hpet_reserve_timer(&hd, 1);
90#endif
91
92 hd.hd_irq[0] = HPET_LEGACY_8254;
93 hd.hd_irq[1] = HPET_LEGACY_RTC;
94
95 for (i = 2; i < nrtimers; timer++, i++)
96 hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >>
97 Tn_INT_ROUTE_CNF_SHIFT;
98
99 hpet_alloc(&hd);
100
101}
102#else
103static void hpet_reserve_platform_timers(unsigned long id) { }
104#endif
105
106/*
107 * Common hpet info
108 */
109static unsigned long hpet_period;
110
111static void hpet_set_mode(enum clock_event_mode mode,
112 struct clock_event_device *evt);
113static int hpet_next_event(unsigned long delta,
114 struct clock_event_device *evt);
115
116/*
117 * The hpet clock event device
118 */
119static struct clock_event_device hpet_clockevent = {
120 .name = "hpet",
121 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
122 .set_mode = hpet_set_mode,
123 .set_next_event = hpet_next_event,
124 .shift = 32,
125 .irq = 0,
126};
127
128static void hpet_start_counter(void)
129{
130 unsigned long cfg = hpet_readl(HPET_CFG);
131
132 cfg &= ~HPET_CFG_ENABLE;
133 hpet_writel(cfg, HPET_CFG);
134 hpet_writel(0, HPET_COUNTER);
135 hpet_writel(0, HPET_COUNTER + 4);
136 cfg |= HPET_CFG_ENABLE;
137 hpet_writel(cfg, HPET_CFG);
138}
139
140static void hpet_enable_int(void)
141{
142 unsigned long cfg = hpet_readl(HPET_CFG);
143
144 cfg |= HPET_CFG_LEGACY;
145 hpet_writel(cfg, HPET_CFG);
146 hpet_legacy_int_enabled = 1;
147}
148
149static void hpet_set_mode(enum clock_event_mode mode,
150 struct clock_event_device *evt)
151{
152 unsigned long cfg, cmp, now;
153 uint64_t delta;
154
155 switch(mode) {
156 case CLOCK_EVT_MODE_PERIODIC:
157 delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult;
158 delta >>= hpet_clockevent.shift;
159 now = hpet_readl(HPET_COUNTER);
160 cmp = now + (unsigned long) delta;
161 cfg = hpet_readl(HPET_T0_CFG);
162 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
163 HPET_TN_SETVAL | HPET_TN_32BIT;
164 hpet_writel(cfg, HPET_T0_CFG);
165 /*
166 * The first write after writing TN_SETVAL to the
167 * config register sets the counter value, the second
168 * write sets the period.
169 */
170 hpet_writel(cmp, HPET_T0_CMP);
171 udelay(1);
172 hpet_writel((unsigned long) delta, HPET_T0_CMP);
173 break;
174
175 case CLOCK_EVT_MODE_ONESHOT:
176 cfg = hpet_readl(HPET_T0_CFG);
177 cfg &= ~HPET_TN_PERIODIC;
178 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
179 hpet_writel(cfg, HPET_T0_CFG);
180 break;
181
182 case CLOCK_EVT_MODE_UNUSED:
183 case CLOCK_EVT_MODE_SHUTDOWN:
184 cfg = hpet_readl(HPET_T0_CFG);
185 cfg &= ~HPET_TN_ENABLE;
186 hpet_writel(cfg, HPET_T0_CFG);
187 break;
188 }
189}
190
191static int hpet_next_event(unsigned long delta,
192 struct clock_event_device *evt)
193{
194 unsigned long cnt;
195
196 cnt = hpet_readl(HPET_COUNTER);
197 cnt += delta;
198 hpet_writel(cnt, HPET_T0_CMP);
199
200 return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0);
201}
202
203/*
204 * Try to setup the HPET timer
205 */
206int __init hpet_enable(void)
207{
208 unsigned long id;
209 uint64_t hpet_freq;
210
211 if (!is_hpet_capable())
212 return 0;
213
214 hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
215
216 /*
217 * Read the period and check for a sane value:
218 */
219 hpet_period = hpet_readl(HPET_PERIOD);
220 if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD)
221 goto out_nohpet;
222
223 /*
224 * The period is a femto seconds value. We need to calculate the
225 * scaled math multiplication factor for nanosecond to hpet tick
226 * conversion.
227 */
228 hpet_freq = 1000000000000000ULL;
229 do_div(hpet_freq, hpet_period);
230 hpet_clockevent.mult = div_sc((unsigned long) hpet_freq,
231 NSEC_PER_SEC, 32);
232 /* Calculate the min / max delta */
233 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
234 &hpet_clockevent);
235 hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30,
236 &hpet_clockevent);
237
238 /*
239 * Read the HPET ID register to retrieve the IRQ routing
240 * information and the number of channels
241 */
242 id = hpet_readl(HPET_ID);
243
244#ifdef CONFIG_HPET_EMULATE_RTC
245 /*
246 * The legacy routing mode needs at least two channels, tick timer
247 * and the rtc emulation channel.
248 */
249 if (!(id & HPET_ID_NUMBER))
250 goto out_nohpet;
251#endif
252
253 /* Start the counter */
254 hpet_start_counter();
255
256 if (id & HPET_ID_LEGSUP) {
257 hpet_enable_int();
258 hpet_reserve_platform_timers(id);
259 /*
260 * Start hpet with the boot cpu mask and make it
261 * global after the IO_APIC has been initialized.
262 */
263 hpet_clockevent.cpumask =cpumask_of_cpu(0);
264 clockevents_register_device(&hpet_clockevent);
265 global_clock_event = &hpet_clockevent;
266 return 1;
267 }
268 return 0;
16 269
270out_nohpet:
271 iounmap(hpet_virt_address);
272 hpet_virt_address = NULL;
273 return 0;
274}
275
276/*
277 * Clock source related code
278 */
17static cycle_t read_hpet(void) 279static cycle_t read_hpet(void)
18{ 280{
19 return (cycle_t)readl(hpet_ptr); 281 return (cycle_t)hpet_readl(HPET_COUNTER);
20} 282}
21 283
22static struct clocksource clocksource_hpet = { 284static struct clocksource clocksource_hpet = {
@@ -24,29 +286,17 @@ static struct clocksource clocksource_hpet = {
24 .rating = 250, 286 .rating = 250,
25 .read = read_hpet, 287 .read = read_hpet,
26 .mask = HPET_MASK, 288 .mask = HPET_MASK,
27 .mult = 0, /* set below */
28 .shift = HPET_SHIFT, 289 .shift = HPET_SHIFT,
29 .is_continuous = 1, 290 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
30}; 291};
31 292
32static int __init init_hpet_clocksource(void) 293static int __init init_hpet_clocksource(void)
33{ 294{
34 unsigned long hpet_period;
35 void __iomem* hpet_base;
36 u64 tmp; 295 u64 tmp;
37 int err;
38 296
39 if (!is_hpet_enabled()) 297 if (!hpet_virt_address)
40 return -ENODEV; 298 return -ENODEV;
41 299
42 /* calculate the hpet address: */
43 hpet_base =
44 (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
45 hpet_ptr = hpet_base + HPET_COUNTER;
46
47 /* calculate the frequency: */
48 hpet_period = readl(hpet_base + HPET_PERIOD);
49
50 /* 300 /*
51 * hpet period is in femto seconds per cycle 301 * hpet period is in femto seconds per cycle
52 * so we need to convert this to ns/cyc units 302 * so we need to convert this to ns/cyc units
@@ -62,11 +312,218 @@ static int __init init_hpet_clocksource(void)
62 do_div(tmp, FSEC_PER_NSEC); 312 do_div(tmp, FSEC_PER_NSEC);
63 clocksource_hpet.mult = (u32)tmp; 313 clocksource_hpet.mult = (u32)tmp;
64 314
65 err = clocksource_register(&clocksource_hpet); 315 return clocksource_register(&clocksource_hpet);
66 if (err)
67 iounmap(hpet_base);
68
69 return err;
70} 316}
71 317
72module_init(init_hpet_clocksource); 318module_init(init_hpet_clocksource);
319
320#ifdef CONFIG_HPET_EMULATE_RTC
321
322/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
323 * is enabled, we support RTC interrupt functionality in software.
324 * RTC has 3 kinds of interrupts:
325 * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
326 * is updated
327 * 2) Alarm Interrupt - generate an interrupt at a specific time of day
328 * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
329 * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
330 * (1) and (2) above are implemented using polling at a frequency of
331 * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
332 * overhead. (DEFAULT_RTC_INT_FREQ)
333 * For (3), we use interrupts at 64Hz or user specified periodic
334 * frequency, whichever is higher.
335 */
336#include <linux/mc146818rtc.h>
337#include <linux/rtc.h>
338
339#define DEFAULT_RTC_INT_FREQ 64
340#define DEFAULT_RTC_SHIFT 6
341#define RTC_NUM_INTS 1
342
343static unsigned long hpet_rtc_flags;
344static unsigned long hpet_prev_update_sec;
345static struct rtc_time hpet_alarm_time;
346static unsigned long hpet_pie_count;
347static unsigned long hpet_t1_cmp;
348static unsigned long hpet_default_delta;
349static unsigned long hpet_pie_delta;
350static unsigned long hpet_pie_limit;
351
352/*
353 * Timer 1 for RTC emulation. We use one shot mode, as periodic mode
354 * is not supported by all HPET implementations for timer 1.
355 *
356 * hpet_rtc_timer_init() is called when the rtc is initialized.
357 */
358int hpet_rtc_timer_init(void)
359{
360 unsigned long cfg, cnt, delta, flags;
361
362 if (!is_hpet_enabled())
363 return 0;
364
365 if (!hpet_default_delta) {
366 uint64_t clc;
367
368 clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC;
369 clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT;
370 hpet_default_delta = (unsigned long) clc;
371 }
372
373 if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit)
374 delta = hpet_default_delta;
375 else
376 delta = hpet_pie_delta;
377
378 local_irq_save(flags);
379
380 cnt = delta + hpet_readl(HPET_COUNTER);
381 hpet_writel(cnt, HPET_T1_CMP);
382 hpet_t1_cmp = cnt;
383
384 cfg = hpet_readl(HPET_T1_CFG);
385 cfg &= ~HPET_TN_PERIODIC;
386 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
387 hpet_writel(cfg, HPET_T1_CFG);
388
389 local_irq_restore(flags);
390
391 return 1;
392}
393
394/*
395 * The functions below are called from rtc driver.
396 * Return 0 if HPET is not being used.
397 * Otherwise do the necessary changes and return 1.
398 */
399int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
400{
401 if (!is_hpet_enabled())
402 return 0;
403
404 hpet_rtc_flags &= ~bit_mask;
405 return 1;
406}
407
408int hpet_set_rtc_irq_bit(unsigned long bit_mask)
409{
410 unsigned long oldbits = hpet_rtc_flags;
411
412 if (!is_hpet_enabled())
413 return 0;
414
415 hpet_rtc_flags |= bit_mask;
416
417 if (!oldbits)
418 hpet_rtc_timer_init();
419
420 return 1;
421}
422
423int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
424 unsigned char sec)
425{
426 if (!is_hpet_enabled())
427 return 0;
428
429 hpet_alarm_time.tm_hour = hrs;
430 hpet_alarm_time.tm_min = min;
431 hpet_alarm_time.tm_sec = sec;
432
433 return 1;
434}
435
436int hpet_set_periodic_freq(unsigned long freq)
437{
438 uint64_t clc;
439
440 if (!is_hpet_enabled())
441 return 0;
442
443 if (freq <= DEFAULT_RTC_INT_FREQ)
444 hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq;
445 else {
446 clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC;
447 do_div(clc, freq);
448 clc >>= hpet_clockevent.shift;
449 hpet_pie_delta = (unsigned long) clc;
450 }
451 return 1;
452}
453
454int hpet_rtc_dropped_irq(void)
455{
456 return is_hpet_enabled();
457}
458
459static void hpet_rtc_timer_reinit(void)
460{
461 unsigned long cfg, delta;
462 int lost_ints = -1;
463
464 if (unlikely(!hpet_rtc_flags)) {
465 cfg = hpet_readl(HPET_T1_CFG);
466 cfg &= ~HPET_TN_ENABLE;
467 hpet_writel(cfg, HPET_T1_CFG);
468 return;
469 }
470
471 if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit)
472 delta = hpet_default_delta;
473 else
474 delta = hpet_pie_delta;
475
476 /*
477 * Increment the comparator value until we are ahead of the
478 * current count.
479 */
480 do {
481 hpet_t1_cmp += delta;
482 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
483 lost_ints++;
484 } while ((long)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0);
485
486 if (lost_ints) {
487 if (hpet_rtc_flags & RTC_PIE)
488 hpet_pie_count += lost_ints;
489 if (printk_ratelimit())
490 printk(KERN_WARNING "rtc: lost %d interrupts\n",
491 lost_ints);
492 }
493}
494
495irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
496{
497 struct rtc_time curr_time;
498 unsigned long rtc_int_flag = 0;
499
500 hpet_rtc_timer_reinit();
501
502 if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
503 rtc_get_rtc_time(&curr_time);
504
505 if (hpet_rtc_flags & RTC_UIE &&
506 curr_time.tm_sec != hpet_prev_update_sec) {
507 rtc_int_flag = RTC_UF;
508 hpet_prev_update_sec = curr_time.tm_sec;
509 }
510
511 if (hpet_rtc_flags & RTC_PIE &&
512 ++hpet_pie_count >= hpet_pie_limit) {
513 rtc_int_flag |= RTC_PF;
514 hpet_pie_count = 0;
515 }
516
517 if (hpet_rtc_flags & RTC_PIE &&
518 (curr_time.tm_sec == hpet_alarm_time.tm_sec) &&
519 (curr_time.tm_min == hpet_alarm_time.tm_min) &&
520 (curr_time.tm_hour == hpet_alarm_time.tm_hour))
521 rtc_int_flag |= RTC_AF;
522
523 if (rtc_int_flag) {
524 rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
525 rtc_interrupt(rtc_int_flag, dev_id);
526 }
527 return IRQ_HANDLED;
528}
529#endif
diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c
index 9a0060b92e32..a6bc7bb38834 100644
--- a/arch/i386/kernel/i8253.c
+++ b/arch/i386/kernel/i8253.c
@@ -2,7 +2,7 @@
2 * i8253.c 8253/PIT functions 2 * i8253.c 8253/PIT functions
3 * 3 *
4 */ 4 */
5#include <linux/clocksource.h> 5#include <linux/clockchips.h>
6#include <linux/spinlock.h> 6#include <linux/spinlock.h>
7#include <linux/jiffies.h> 7#include <linux/jiffies.h>
8#include <linux/sysdev.h> 8#include <linux/sysdev.h>
@@ -19,17 +19,97 @@
19DEFINE_SPINLOCK(i8253_lock); 19DEFINE_SPINLOCK(i8253_lock);
20EXPORT_SYMBOL(i8253_lock); 20EXPORT_SYMBOL(i8253_lock);
21 21
22void setup_pit_timer(void) 22/*
23 * HPET replaces the PIT, when enabled. So we need to know, which of
24 * the two timers is used
25 */
26struct clock_event_device *global_clock_event;
27
28/*
29 * Initialize the PIT timer.
30 *
31 * This is also called after resume to bring the PIT into operation again.
32 */
33static void init_pit_timer(enum clock_event_mode mode,
34 struct clock_event_device *evt)
35{
36 unsigned long flags;
37
38 spin_lock_irqsave(&i8253_lock, flags);
39
40 switch(mode) {
41 case CLOCK_EVT_MODE_PERIODIC:
42 /* binary, mode 2, LSB/MSB, ch 0 */
43 outb_p(0x34, PIT_MODE);
44 udelay(10);
45 outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
46 udelay(10);
47 outb(LATCH >> 8 , PIT_CH0); /* MSB */
48 break;
49
50 case CLOCK_EVT_MODE_ONESHOT:
51 case CLOCK_EVT_MODE_SHUTDOWN:
52 case CLOCK_EVT_MODE_UNUSED:
53 /* One shot setup */
54 outb_p(0x38, PIT_MODE);
55 udelay(10);
56 break;
57 }
58 spin_unlock_irqrestore(&i8253_lock, flags);
59}
60
61/*
62 * Program the next event in oneshot mode
63 *
64 * Delta is given in PIT ticks
65 */
66static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
23{ 67{
24 unsigned long flags; 68 unsigned long flags;
25 69
26 spin_lock_irqsave(&i8253_lock, flags); 70 spin_lock_irqsave(&i8253_lock, flags);
27 outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ 71 outb_p(delta & 0xff , PIT_CH0); /* LSB */
28 udelay(10); 72 outb(delta >> 8 , PIT_CH0); /* MSB */
29 outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
30 udelay(10);
31 outb(LATCH >> 8 , PIT_CH0); /* MSB */
32 spin_unlock_irqrestore(&i8253_lock, flags); 73 spin_unlock_irqrestore(&i8253_lock, flags);
74
75 return 0;
76}
77
78/*
79 * On UP the PIT can serve all of the possible timer functions. On SMP systems
80 * it can be solely used for the global tick.
81 *
82 * The profiling and update capabilites are switched off once the local apic is
83 * registered. This mechanism replaces the previous #ifdef LOCAL_APIC -
84 * !using_apic_timer decisions in do_timer_interrupt_hook()
85 */
86struct clock_event_device pit_clockevent = {
87 .name = "pit",
88 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
89 .set_mode = init_pit_timer,
90 .set_next_event = pit_next_event,
91 .shift = 32,
92 .irq = 0,
93};
94
95/*
96 * Initialize the conversion factor and the min/max deltas of the clock event
97 * structure and register the clock event source with the framework.
98 */
99void __init setup_pit_timer(void)
100{
101 /*
102 * Start pit with the boot cpu mask and make it global after the
103 * IO_APIC has been initialized.
104 */
105 pit_clockevent.cpumask = cpumask_of_cpu(0);
106 pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32);
107 pit_clockevent.max_delta_ns =
108 clockevent_delta2ns(0x7FFF, &pit_clockevent);
109 pit_clockevent.min_delta_ns =
110 clockevent_delta2ns(0xF, &pit_clockevent);
111 clockevents_register_device(&pit_clockevent);
112 global_clock_event = &pit_clockevent;
33} 113}
34 114
35/* 115/*
@@ -46,7 +126,7 @@ static cycle_t pit_read(void)
46 static u32 old_jifs; 126 static u32 old_jifs;
47 127
48 spin_lock_irqsave(&i8253_lock, flags); 128 spin_lock_irqsave(&i8253_lock, flags);
49 /* 129 /*
50 * Although our caller may have the read side of xtime_lock, 130 * Although our caller may have the read side of xtime_lock,
51 * this is now a seqlock, and we are cheating in this routine 131 * this is now a seqlock, and we are cheating in this routine
52 * by having side effects on state that we cannot undo if 132 * by having side effects on state that we cannot undo if
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
index c8d45821c788..03abfdb1a6e4 100644
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -41,6 +41,7 @@ static void mask_and_ack_8259A(unsigned int);
41static struct irq_chip i8259A_chip = { 41static struct irq_chip i8259A_chip = {
42 .name = "XT-PIC", 42 .name = "XT-PIC",
43 .mask = disable_8259A_irq, 43 .mask = disable_8259A_irq,
44 .disable = disable_8259A_irq,
44 .unmask = enable_8259A_irq, 45 .unmask = enable_8259A_irq,
45 .mask_ack = mask_and_ack_8259A, 46 .mask_ack = mask_and_ack_8259A,
46}; 47};
@@ -410,12 +411,6 @@ void __init native_init_IRQ(void)
410 intr_init_hook(); 411 intr_init_hook();
411 412
412 /* 413 /*
413 * Set the clock to HZ Hz, we already have a valid
414 * vector now:
415 */
416 setup_pit_timer();
417
418 /*
419 * External FPU? Set up irq13 if so, for 414 * External FPU? Set up irq13 if so, for
420 * original braindamaged IBM FERR coupling. 415 * original braindamaged IBM FERR coupling.
421 */ 416 */
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 6a3875f81a0a..4ccebd454e25 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -126,7 +126,7 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
126 */ 126 */
127static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) 127static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
128{ 128{
129 volatile struct io_apic *io_apic = io_apic_base(apic); 129 volatile struct io_apic __iomem *io_apic = io_apic_base(apic);
130 if (sis_apic_bug) 130 if (sis_apic_bug)
131 writel(reg, &io_apic->index); 131 writel(reg, &io_apic->index);
132 writel(value, &io_apic->data); 132 writel(value, &io_apic->data);
@@ -482,8 +482,8 @@ static void do_irq_balance(void)
482 package_index = CPU_TO_PACKAGEINDEX(i); 482 package_index = CPU_TO_PACKAGEINDEX(i);
483 for (j = 0; j < NR_IRQS; j++) { 483 for (j = 0; j < NR_IRQS; j++) {
484 unsigned long value_now, delta; 484 unsigned long value_now, delta;
485 /* Is this an active IRQ? */ 485 /* Is this an active IRQ or balancing disabled ? */
486 if (!irq_desc[j].action) 486 if (!irq_desc[j].action || irq_balancing_disabled(j))
487 continue; 487 continue;
488 if ( package_index == i ) 488 if ( package_index == i )
489 IRQ_DELTA(package_index,j) = 0; 489 IRQ_DELTA(package_index,j) = 0;
@@ -1281,11 +1281,9 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
1281 trigger == IOAPIC_LEVEL) 1281 trigger == IOAPIC_LEVEL)
1282 set_irq_chip_and_handler_name(irq, &ioapic_chip, 1282 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1283 handle_fasteoi_irq, "fasteoi"); 1283 handle_fasteoi_irq, "fasteoi");
1284 else { 1284 else
1285 irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
1286 set_irq_chip_and_handler_name(irq, &ioapic_chip, 1285 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1287 handle_edge_irq, "edge"); 1286 handle_edge_irq, "edge");
1288 }
1289 set_intr_gate(vector, interrupt[irq]); 1287 set_intr_gate(vector, interrupt[irq]);
1290} 1288}
1291 1289
@@ -1588,7 +1586,7 @@ void /*__init*/ print_local_APIC(void * dummy)
1588 v = apic_read(APIC_LVR); 1586 v = apic_read(APIC_LVR);
1589 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1587 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1590 ver = GET_APIC_VERSION(v); 1588 ver = GET_APIC_VERSION(v);
1591 maxlvt = get_maxlvt(); 1589 maxlvt = lapic_get_maxlvt();
1592 1590
1593 v = apic_read(APIC_TASKPRI); 1591 v = apic_read(APIC_TASKPRI);
1594 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); 1592 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
@@ -1920,7 +1918,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
1920static void __init setup_ioapic_ids_from_mpc(void) { } 1918static void __init setup_ioapic_ids_from_mpc(void) { }
1921#endif 1919#endif
1922 1920
1923static int no_timer_check __initdata; 1921int no_timer_check __initdata;
1924 1922
1925static int __init notimercheck(char *s) 1923static int __init notimercheck(char *s)
1926{ 1924{
@@ -2310,7 +2308,7 @@ static inline void __init check_timer(void)
2310 2308
2311 disable_8259A_irq(0); 2309 disable_8259A_irq(0);
2312 set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, 2310 set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
2313 "fasteio"); 2311 "fasteoi");
2314 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ 2312 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
2315 enable_8259A_irq(0); 2313 enable_8259A_irq(0);
2316 2314
@@ -2606,25 +2604,32 @@ static struct irq_chip msi_chip = {
2606 .retrigger = ioapic_retrigger_irq, 2604 .retrigger = ioapic_retrigger_irq,
2607}; 2605};
2608 2606
2609int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev) 2607int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2610{ 2608{
2611 struct msi_msg msg; 2609 struct msi_msg msg;
2612 int ret; 2610 int irq, ret;
2611 irq = create_irq();
2612 if (irq < 0)
2613 return irq;
2614
2615 set_irq_msi(irq, desc);
2613 ret = msi_compose_msg(dev, irq, &msg); 2616 ret = msi_compose_msg(dev, irq, &msg);
2614 if (ret < 0) 2617 if (ret < 0) {
2618 destroy_irq(irq);
2615 return ret; 2619 return ret;
2620 }
2616 2621
2617 write_msi_msg(irq, &msg); 2622 write_msi_msg(irq, &msg);
2618 2623
2619 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, 2624 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
2620 "edge"); 2625 "edge");
2621 2626
2622 return 0; 2627 return irq;
2623} 2628}
2624 2629
2625void arch_teardown_msi_irq(unsigned int irq) 2630void arch_teardown_msi_irq(unsigned int irq)
2626{ 2631{
2627 return; 2632 destroy_irq(irq);
2628} 2633}
2629 2634
2630#endif /* CONFIG_PCI_MSI */ 2635#endif /* CONFIG_PCI_MSI */
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 3201d421090a..0f2ca590bf23 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -10,7 +10,6 @@
10 * io_apic.c.) 10 * io_apic.c.)
11 */ 11 */
12 12
13#include <asm/uaccess.h>
14#include <linux/module.h> 13#include <linux/module.h>
15#include <linux/seq_file.h> 14#include <linux/seq_file.h>
16#include <linux/interrupt.h> 15#include <linux/interrupt.h>
@@ -19,19 +18,36 @@
19#include <linux/cpu.h> 18#include <linux/cpu.h>
20#include <linux/delay.h> 19#include <linux/delay.h>
21 20
21#include <asm/idle.h>
22
23#include <asm/apic.h>
24#include <asm/uaccess.h>
25
22DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; 26DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
23EXPORT_PER_CPU_SYMBOL(irq_stat); 27EXPORT_PER_CPU_SYMBOL(irq_stat);
24 28
25#ifndef CONFIG_X86_LOCAL_APIC
26/* 29/*
27 * 'what should we do if we get a hw irq event on an illegal vector'. 30 * 'what should we do if we get a hw irq event on an illegal vector'.
28 * each architecture has to answer this themselves. 31 * each architecture has to answer this themselves.
29 */ 32 */
30void ack_bad_irq(unsigned int irq) 33void ack_bad_irq(unsigned int irq)
31{ 34{
32 printk("unexpected IRQ trap at vector %02x\n", irq); 35 printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
33} 36
37#ifdef CONFIG_X86_LOCAL_APIC
38 /*
39 * Currently unexpected vectors happen only on SMP and APIC.
40 * We _must_ ack these because every local APIC has only N
41 * irq slots per priority level, and a 'hanging, unacked' IRQ
42 * holds up an irq slot - in excessive cases (when multiple
43 * unexpected vectors occur) that might lock up the APIC
44 * completely.
45 * But only ack when the APIC is enabled -AK
46 */
47 if (cpu_has_apic)
48 ack_APIC_irq();
34#endif 49#endif
50}
35 51
36#ifdef CONFIG_4KSTACKS 52#ifdef CONFIG_4KSTACKS
37/* 53/*
@@ -61,6 +77,7 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs)
61 union irq_ctx *curctx, *irqctx; 77 union irq_ctx *curctx, *irqctx;
62 u32 *isp; 78 u32 *isp;
63#endif 79#endif
80 exit_idle();
64 81
65 if (unlikely((unsigned)irq >= NR_IRQS)) { 82 if (unlikely((unsigned)irq >= NR_IRQS)) {
66 printk(KERN_EMERG "%s: cannot handle IRQ %d\n", 83 printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index af1d53344993..b545bc746fce 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -363,7 +363,7 @@ no_kprobe:
363 " pushf\n" 363 " pushf\n"
364 /* skip cs, eip, orig_eax */ 364 /* skip cs, eip, orig_eax */
365 " subl $12, %esp\n" 365 " subl $12, %esp\n"
366 " pushl %gs\n" 366 " pushl %fs\n"
367 " pushl %ds\n" 367 " pushl %ds\n"
368 " pushl %es\n" 368 " pushl %es\n"
369 " pushl %eax\n" 369 " pushl %eax\n"
@@ -387,7 +387,7 @@ no_kprobe:
387 " popl %edi\n" 387 " popl %edi\n"
388 " popl %ebp\n" 388 " popl %ebp\n"
389 " popl %eax\n" 389 " popl %eax\n"
390 /* skip eip, orig_eax, es, ds, gs */ 390 /* skip eip, orig_eax, es, ds, fs */
391 " addl $20, %esp\n" 391 " addl $20, %esp\n"
392 " popf\n" 392 " popf\n"
393 " ret\n"); 393 " ret\n");
@@ -408,7 +408,7 @@ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
408 spin_lock_irqsave(&kretprobe_lock, flags); 408 spin_lock_irqsave(&kretprobe_lock, flags);
409 head = kretprobe_inst_table_head(current); 409 head = kretprobe_inst_table_head(current);
410 /* fixup registers */ 410 /* fixup registers */
411 regs->xcs = __KERNEL_CS; 411 regs->xcs = __KERNEL_CS | get_kernel_rpl();
412 regs->eip = trampoline_address; 412 regs->eip = trampoline_address;
413 regs->orig_eax = 0xffffffff; 413 regs->orig_eax = 0xffffffff;
414 414
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
index c8fa13721bcb..b8f16633a6ec 100644
--- a/arch/i386/kernel/microcode.c
+++ b/arch/i386/kernel/microcode.c
@@ -384,7 +384,7 @@ static int do_microcode_update (void)
384{ 384{
385 long cursor = 0; 385 long cursor = 0;
386 int error = 0; 386 int error = 0;
387 void *new_mc; 387 void *new_mc = NULL;
388 int cpu; 388 int cpu;
389 cpumask_t old; 389 cpumask_t old;
390 390
@@ -451,7 +451,7 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_
451 return ret; 451 return ret;
452} 452}
453 453
454static struct file_operations microcode_fops = { 454static const struct file_operations microcode_fops = {
455 .owner = THIS_MODULE, 455 .owner = THIS_MODULE,
456 .write = microcode_write, 456 .write = microcode_write,
457 .open = microcode_open, 457 .open = microcode_open,
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
index 49bff3596bff..4f5983c98669 100644
--- a/arch/i386/kernel/mpparse.c
+++ b/arch/i386/kernel/mpparse.c
@@ -1057,7 +1057,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
1057 static int gsi_to_irq[MAX_GSI_NUM]; 1057 static int gsi_to_irq[MAX_GSI_NUM];
1058 1058
1059 /* Don't set up the ACPI SCI because it's already set up */ 1059 /* Don't set up the ACPI SCI because it's already set up */
1060 if (acpi_fadt.sci_int == gsi) 1060 if (acpi_gbl_FADT.sci_interrupt == gsi)
1061 return gsi; 1061 return gsi;
1062 1062
1063 ioapic = mp_find_ioapic(gsi); 1063 ioapic = mp_find_ioapic(gsi);
@@ -1114,7 +1114,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
1114 /* 1114 /*
1115 * Don't assign IRQ used by ACPI SCI 1115 * Don't assign IRQ used by ACPI SCI
1116 */ 1116 */
1117 if (gsi == acpi_fadt.sci_int) 1117 if (gsi == acpi_gbl_FADT.sci_interrupt)
1118 gsi = pci_irq++; 1118 gsi = pci_irq++;
1119 gsi_to_irq[irq] = gsi; 1119 gsi_to_irq[irq] = gsi;
1120 } else { 1120 } else {
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c
index 4a472a17d1c6..bcaa6e9b6197 100644
--- a/arch/i386/kernel/msr.c
+++ b/arch/i386/kernel/msr.c
@@ -68,7 +68,6 @@ static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx)
68#ifdef CONFIG_SMP 68#ifdef CONFIG_SMP
69 69
70struct msr_command { 70struct msr_command {
71 int cpu;
72 int err; 71 int err;
73 u32 reg; 72 u32 reg;
74 u32 data[2]; 73 u32 data[2];
@@ -78,16 +77,14 @@ static void msr_smp_wrmsr(void *cmd_block)
78{ 77{
79 struct msr_command *cmd = (struct msr_command *)cmd_block; 78 struct msr_command *cmd = (struct msr_command *)cmd_block;
80 79
81 if (cmd->cpu == smp_processor_id()) 80 cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]);
82 cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]);
83} 81}
84 82
85static void msr_smp_rdmsr(void *cmd_block) 83static void msr_smp_rdmsr(void *cmd_block)
86{ 84{
87 struct msr_command *cmd = (struct msr_command *)cmd_block; 85 struct msr_command *cmd = (struct msr_command *)cmd_block;
88 86
89 if (cmd->cpu == smp_processor_id()) 87 cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]);
90 cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]);
91} 88}
92 89
93static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) 90static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
@@ -99,12 +96,11 @@ static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
99 if (cpu == smp_processor_id()) { 96 if (cpu == smp_processor_id()) {
100 ret = wrmsr_eio(reg, eax, edx); 97 ret = wrmsr_eio(reg, eax, edx);
101 } else { 98 } else {
102 cmd.cpu = cpu;
103 cmd.reg = reg; 99 cmd.reg = reg;
104 cmd.data[0] = eax; 100 cmd.data[0] = eax;
105 cmd.data[1] = edx; 101 cmd.data[1] = edx;
106 102
107 smp_call_function(msr_smp_wrmsr, &cmd, 1, 1); 103 smp_call_function_single(cpu, msr_smp_wrmsr, &cmd, 1, 1);
108 ret = cmd.err; 104 ret = cmd.err;
109 } 105 }
110 preempt_enable(); 106 preempt_enable();
@@ -120,10 +116,9 @@ static inline int do_rdmsr(int cpu, u32 reg, u32 * eax, u32 * edx)
120 if (cpu == smp_processor_id()) { 116 if (cpu == smp_processor_id()) {
121 ret = rdmsr_eio(reg, eax, edx); 117 ret = rdmsr_eio(reg, eax, edx);
122 } else { 118 } else {
123 cmd.cpu = cpu;
124 cmd.reg = reg; 119 cmd.reg = reg;
125 120
126 smp_call_function(msr_smp_rdmsr, &cmd, 1, 1); 121 smp_call_function_single(cpu, msr_smp_rdmsr, &cmd, 1, 1);
127 122
128 *eax = cmd.data[0]; 123 *eax = cmd.data[0];
129 *edx = cmd.data[1]; 124 *edx = cmd.data[1];
@@ -230,7 +225,7 @@ static int msr_open(struct inode *inode, struct file *file)
230/* 225/*
231 * File operations we support 226 * File operations we support
232 */ 227 */
233static struct file_operations msr_fops = { 228static const struct file_operations msr_fops = {
234 .owner = THIS_MODULE, 229 .owner = THIS_MODULE,
235 .llseek = msr_seek, 230 .llseek = msr_seek,
236 .read = msr_read, 231 .read = msr_read,
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 1a6f8bb8881c..821df34d2b3a 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -23,6 +23,7 @@
23#include <linux/dmi.h> 23#include <linux/dmi.h>
24#include <linux/kprobes.h> 24#include <linux/kprobes.h>
25#include <linux/cpumask.h> 25#include <linux/cpumask.h>
26#include <linux/kernel_stat.h>
26 27
27#include <asm/smp.h> 28#include <asm/smp.h>
28#include <asm/nmi.h> 29#include <asm/nmi.h>
@@ -185,7 +186,8 @@ static __cpuinit inline int nmi_known_cpu(void)
185{ 186{
186 switch (boot_cpu_data.x86_vendor) { 187 switch (boot_cpu_data.x86_vendor) {
187 case X86_VENDOR_AMD: 188 case X86_VENDOR_AMD:
188 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); 189 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)
190 || (boot_cpu_data.x86 == 16));
189 case X86_VENDOR_INTEL: 191 case X86_VENDOR_INTEL:
190 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 192 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
191 return 1; 193 return 1;
@@ -216,6 +218,28 @@ static __init void nmi_cpu_busy(void *data)
216} 218}
217#endif 219#endif
218 220
221static unsigned int adjust_for_32bit_ctr(unsigned int hz)
222{
223 u64 counter_val;
224 unsigned int retval = hz;
225
226 /*
227 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
228 * are writable, with higher bits sign extending from bit 31.
229 * So, we can only program the counter with 31 bit values and
230 * 32nd bit should be 1, for 33.. to be 1.
231 * Find the appropriate nmi_hz
232 */
233 counter_val = (u64)cpu_khz * 1000;
234 do_div(counter_val, retval);
235 if (counter_val > 0x7fffffffULL) {
236 u64 count = (u64)cpu_khz * 1000;
237 do_div(count, 0x7fffffffUL);
238 retval = count + 1;
239 }
240 return retval;
241}
242
219static int __init check_nmi_watchdog(void) 243static int __init check_nmi_watchdog(void)
220{ 244{
221 unsigned int *prev_nmi_count; 245 unsigned int *prev_nmi_count;
@@ -281,18 +305,10 @@ static int __init check_nmi_watchdog(void)
281 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 305 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
282 306
283 nmi_hz = 1; 307 nmi_hz = 1;
284 /* 308
285 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter 309 if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
286 * are writable, with higher bits sign extending from bit 31. 310 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
287 * So, we can only program the counter with 31 bit values and 311 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
288 * 32nd bit should be 1, for 33.. to be 1.
289 * Find the appropriate nmi_hz
290 */
291 if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
292 ((u64)cpu_khz * 1000) > 0x7fffffffULL) {
293 u64 count = (u64)cpu_khz * 1000;
294 do_div(count, 0x7fffffffUL);
295 nmi_hz = count + 1;
296 } 312 }
297 } 313 }
298 314
@@ -369,6 +385,34 @@ void enable_timer_nmi_watchdog(void)
369 } 385 }
370} 386}
371 387
388static void __acpi_nmi_disable(void *__unused)
389{
390 apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
391}
392
393/*
394 * Disable timer based NMIs on all CPUs:
395 */
396void acpi_nmi_disable(void)
397{
398 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
399 on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
400}
401
402static void __acpi_nmi_enable(void *__unused)
403{
404 apic_write_around(APIC_LVT0, APIC_DM_NMI);
405}
406
407/*
408 * Enable timer based NMIs on all CPUs:
409 */
410void acpi_nmi_enable(void)
411{
412 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
413 on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
414}
415
372#ifdef CONFIG_PM 416#ifdef CONFIG_PM
373 417
374static int nmi_pm_active; /* nmi_active before suspend */ 418static int nmi_pm_active; /* nmi_active before suspend */
@@ -442,6 +486,17 @@ static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
442 wrmsrl(perfctr_msr, 0 - count); 486 wrmsrl(perfctr_msr, 0 - count);
443} 487}
444 488
489static void write_watchdog_counter32(unsigned int perfctr_msr,
490 const char *descr)
491{
492 u64 count = (u64)cpu_khz * 1000;
493
494 do_div(count, nmi_hz);
495 if(descr)
496 Dprintk("setting %s to -0x%08Lx\n", descr, count);
497 wrmsr(perfctr_msr, (u32)(-count), 0);
498}
499
445/* Note that these events don't tick when the CPU idles. This means 500/* Note that these events don't tick when the CPU idles. This means
446 the frequency varies with CPU load. */ 501 the frequency varies with CPU load. */
447 502
@@ -531,7 +586,8 @@ static int setup_p6_watchdog(void)
531 586
532 /* setup the timer */ 587 /* setup the timer */
533 wrmsr(evntsel_msr, evntsel, 0); 588 wrmsr(evntsel_msr, evntsel, 0);
534 write_watchdog_counter(perfctr_msr, "P6_PERFCTR0"); 589 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
590 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0");
535 apic_write(APIC_LVTPC, APIC_DM_NMI); 591 apic_write(APIC_LVTPC, APIC_DM_NMI);
536 evntsel |= P6_EVNTSEL0_ENABLE; 592 evntsel |= P6_EVNTSEL0_ENABLE;
537 wrmsr(evntsel_msr, evntsel, 0); 593 wrmsr(evntsel_msr, evntsel, 0);
@@ -704,7 +760,8 @@ static int setup_intel_arch_watchdog(void)
704 760
705 /* setup the timer */ 761 /* setup the timer */
706 wrmsr(evntsel_msr, evntsel, 0); 762 wrmsr(evntsel_msr, evntsel, 0);
707 write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0"); 763 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
764 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0");
708 apic_write(APIC_LVTPC, APIC_DM_NMI); 765 apic_write(APIC_LVTPC, APIC_DM_NMI);
709 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; 766 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
710 wrmsr(evntsel_msr, evntsel, 0); 767 wrmsr(evntsel_msr, evntsel, 0);
@@ -762,7 +819,8 @@ void setup_apic_nmi_watchdog (void *unused)
762 if (nmi_watchdog == NMI_LOCAL_APIC) { 819 if (nmi_watchdog == NMI_LOCAL_APIC) {
763 switch (boot_cpu_data.x86_vendor) { 820 switch (boot_cpu_data.x86_vendor) {
764 case X86_VENDOR_AMD: 821 case X86_VENDOR_AMD:
765 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) 822 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
823 boot_cpu_data.x86 != 16)
766 return; 824 return;
767 if (!setup_k7_watchdog()) 825 if (!setup_k7_watchdog())
768 return; 826 return;
@@ -916,9 +974,13 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
916 cpu_clear(cpu, backtrace_mask); 974 cpu_clear(cpu, backtrace_mask);
917 } 975 }
918 976
919 sum = per_cpu(irq_stat, cpu).apic_timer_irqs; 977 /*
978 * Take the local apic timer and PIT/HPET into account. We don't
979 * know which one is active, when we have highres/dyntick on
980 */
981 sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0);
920 982
921 /* if the apic timer isn't firing, this cpu isn't doing much */ 983 /* if the none of the timers isn't firing, this cpu isn't doing much */
922 if (!touched && last_irq_sums[cpu] == sum) { 984 if (!touched && last_irq_sums[cpu] == sum) {
923 /* 985 /*
924 * Ayiee, looks like this CPU is stuck ... 986 * Ayiee, looks like this CPU is stuck ...
@@ -956,6 +1018,8 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
956 dummy &= ~P4_CCCR_OVF; 1018 dummy &= ~P4_CCCR_OVF;
957 wrmsrl(wd->cccr_msr, dummy); 1019 wrmsrl(wd->cccr_msr, dummy);
958 apic_write(APIC_LVTPC, APIC_DM_NMI); 1020 apic_write(APIC_LVTPC, APIC_DM_NMI);
1021 /* start the cycle over again */
1022 write_watchdog_counter(wd->perfctr_msr, NULL);
959 } 1023 }
960 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 || 1024 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
961 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { 1025 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
@@ -964,9 +1028,12 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
964 * other P6 variant. 1028 * other P6 variant.
965 * ArchPerfom/Core Duo also needs this */ 1029 * ArchPerfom/Core Duo also needs this */
966 apic_write(APIC_LVTPC, APIC_DM_NMI); 1030 apic_write(APIC_LVTPC, APIC_DM_NMI);
1031 /* P6/ARCH_PERFMON has 32 bit counter write */
1032 write_watchdog_counter32(wd->perfctr_msr, NULL);
1033 } else {
1034 /* start the cycle over again */
1035 write_watchdog_counter(wd->perfctr_msr, NULL);
967 } 1036 }
968 /* start the cycle over again */
969 write_watchdog_counter(wd->perfctr_msr, NULL);
970 rc = 1; 1037 rc = 1;
971 } else if (nmi_watchdog == NMI_IO_APIC) { 1038 } else if (nmi_watchdog == NMI_IO_APIC) {
972 /* don't know how to accurately check for this. 1039 /* don't know how to accurately check for this.
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index e55fd05da0f5..c156ecfa3872 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -92,7 +92,7 @@ static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
92 return insn_len; 92 return insn_len;
93} 93}
94 94
95static fastcall unsigned long native_get_debugreg(int regno) 95static unsigned long native_get_debugreg(int regno)
96{ 96{
97 unsigned long val = 0; /* Damn you, gcc! */ 97 unsigned long val = 0; /* Damn you, gcc! */
98 98
@@ -115,7 +115,7 @@ static fastcall unsigned long native_get_debugreg(int regno)
115 return val; 115 return val;
116} 116}
117 117
118static fastcall void native_set_debugreg(int regno, unsigned long value) 118static void native_set_debugreg(int regno, unsigned long value)
119{ 119{
120 switch (regno) { 120 switch (regno) {
121 case 0: 121 case 0:
@@ -146,55 +146,55 @@ void init_IRQ(void)
146 paravirt_ops.init_IRQ(); 146 paravirt_ops.init_IRQ();
147} 147}
148 148
149static fastcall void native_clts(void) 149static void native_clts(void)
150{ 150{
151 asm volatile ("clts"); 151 asm volatile ("clts");
152} 152}
153 153
154static fastcall unsigned long native_read_cr0(void) 154static unsigned long native_read_cr0(void)
155{ 155{
156 unsigned long val; 156 unsigned long val;
157 asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); 157 asm volatile("movl %%cr0,%0\n\t" :"=r" (val));
158 return val; 158 return val;
159} 159}
160 160
161static fastcall void native_write_cr0(unsigned long val) 161static void native_write_cr0(unsigned long val)
162{ 162{
163 asm volatile("movl %0,%%cr0": :"r" (val)); 163 asm volatile("movl %0,%%cr0": :"r" (val));
164} 164}
165 165
166static fastcall unsigned long native_read_cr2(void) 166static unsigned long native_read_cr2(void)
167{ 167{
168 unsigned long val; 168 unsigned long val;
169 asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); 169 asm volatile("movl %%cr2,%0\n\t" :"=r" (val));
170 return val; 170 return val;
171} 171}
172 172
173static fastcall void native_write_cr2(unsigned long val) 173static void native_write_cr2(unsigned long val)
174{ 174{
175 asm volatile("movl %0,%%cr2": :"r" (val)); 175 asm volatile("movl %0,%%cr2": :"r" (val));
176} 176}
177 177
178static fastcall unsigned long native_read_cr3(void) 178static unsigned long native_read_cr3(void)
179{ 179{
180 unsigned long val; 180 unsigned long val;
181 asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); 181 asm volatile("movl %%cr3,%0\n\t" :"=r" (val));
182 return val; 182 return val;
183} 183}
184 184
185static fastcall void native_write_cr3(unsigned long val) 185static void native_write_cr3(unsigned long val)
186{ 186{
187 asm volatile("movl %0,%%cr3": :"r" (val)); 187 asm volatile("movl %0,%%cr3": :"r" (val));
188} 188}
189 189
190static fastcall unsigned long native_read_cr4(void) 190static unsigned long native_read_cr4(void)
191{ 191{
192 unsigned long val; 192 unsigned long val;
193 asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); 193 asm volatile("movl %%cr4,%0\n\t" :"=r" (val));
194 return val; 194 return val;
195} 195}
196 196
197static fastcall unsigned long native_read_cr4_safe(void) 197static unsigned long native_read_cr4_safe(void)
198{ 198{
199 unsigned long val; 199 unsigned long val;
200 /* This could fault if %cr4 does not exist */ 200 /* This could fault if %cr4 does not exist */
@@ -207,51 +207,51 @@ static fastcall unsigned long native_read_cr4_safe(void)
207 return val; 207 return val;
208} 208}
209 209
210static fastcall void native_write_cr4(unsigned long val) 210static void native_write_cr4(unsigned long val)
211{ 211{
212 asm volatile("movl %0,%%cr4": :"r" (val)); 212 asm volatile("movl %0,%%cr4": :"r" (val));
213} 213}
214 214
215static fastcall unsigned long native_save_fl(void) 215static unsigned long native_save_fl(void)
216{ 216{
217 unsigned long f; 217 unsigned long f;
218 asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); 218 asm volatile("pushfl ; popl %0":"=g" (f): /* no input */);
219 return f; 219 return f;
220} 220}
221 221
222static fastcall void native_restore_fl(unsigned long f) 222static void native_restore_fl(unsigned long f)
223{ 223{
224 asm volatile("pushl %0 ; popfl": /* no output */ 224 asm volatile("pushl %0 ; popfl": /* no output */
225 :"g" (f) 225 :"g" (f)
226 :"memory", "cc"); 226 :"memory", "cc");
227} 227}
228 228
229static fastcall void native_irq_disable(void) 229static void native_irq_disable(void)
230{ 230{
231 asm volatile("cli": : :"memory"); 231 asm volatile("cli": : :"memory");
232} 232}
233 233
234static fastcall void native_irq_enable(void) 234static void native_irq_enable(void)
235{ 235{
236 asm volatile("sti": : :"memory"); 236 asm volatile("sti": : :"memory");
237} 237}
238 238
239static fastcall void native_safe_halt(void) 239static void native_safe_halt(void)
240{ 240{
241 asm volatile("sti; hlt": : :"memory"); 241 asm volatile("sti; hlt": : :"memory");
242} 242}
243 243
244static fastcall void native_halt(void) 244static void native_halt(void)
245{ 245{
246 asm volatile("hlt": : :"memory"); 246 asm volatile("hlt": : :"memory");
247} 247}
248 248
249static fastcall void native_wbinvd(void) 249static void native_wbinvd(void)
250{ 250{
251 asm volatile("wbinvd": : :"memory"); 251 asm volatile("wbinvd": : :"memory");
252} 252}
253 253
254static fastcall unsigned long long native_read_msr(unsigned int msr, int *err) 254static unsigned long long native_read_msr(unsigned int msr, int *err)
255{ 255{
256 unsigned long long val; 256 unsigned long long val;
257 257
@@ -270,7 +270,7 @@ static fastcall unsigned long long native_read_msr(unsigned int msr, int *err)
270 return val; 270 return val;
271} 271}
272 272
273static fastcall int native_write_msr(unsigned int msr, unsigned long long val) 273static int native_write_msr(unsigned int msr, unsigned long long val)
274{ 274{
275 int err; 275 int err;
276 asm volatile("2: wrmsr ; xorl %0,%0\n" 276 asm volatile("2: wrmsr ; xorl %0,%0\n"
@@ -288,53 +288,53 @@ static fastcall int native_write_msr(unsigned int msr, unsigned long long val)
288 return err; 288 return err;
289} 289}
290 290
291static fastcall unsigned long long native_read_tsc(void) 291static unsigned long long native_read_tsc(void)
292{ 292{
293 unsigned long long val; 293 unsigned long long val;
294 asm volatile("rdtsc" : "=A" (val)); 294 asm volatile("rdtsc" : "=A" (val));
295 return val; 295 return val;
296} 296}
297 297
298static fastcall unsigned long long native_read_pmc(void) 298static unsigned long long native_read_pmc(void)
299{ 299{
300 unsigned long long val; 300 unsigned long long val;
301 asm volatile("rdpmc" : "=A" (val)); 301 asm volatile("rdpmc" : "=A" (val));
302 return val; 302 return val;
303} 303}
304 304
305static fastcall void native_load_tr_desc(void) 305static void native_load_tr_desc(void)
306{ 306{
307 asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); 307 asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
308} 308}
309 309
310static fastcall void native_load_gdt(const struct Xgt_desc_struct *dtr) 310static void native_load_gdt(const struct Xgt_desc_struct *dtr)
311{ 311{
312 asm volatile("lgdt %0"::"m" (*dtr)); 312 asm volatile("lgdt %0"::"m" (*dtr));
313} 313}
314 314
315static fastcall void native_load_idt(const struct Xgt_desc_struct *dtr) 315static void native_load_idt(const struct Xgt_desc_struct *dtr)
316{ 316{
317 asm volatile("lidt %0"::"m" (*dtr)); 317 asm volatile("lidt %0"::"m" (*dtr));
318} 318}
319 319
320static fastcall void native_store_gdt(struct Xgt_desc_struct *dtr) 320static void native_store_gdt(struct Xgt_desc_struct *dtr)
321{ 321{
322 asm ("sgdt %0":"=m" (*dtr)); 322 asm ("sgdt %0":"=m" (*dtr));
323} 323}
324 324
325static fastcall void native_store_idt(struct Xgt_desc_struct *dtr) 325static void native_store_idt(struct Xgt_desc_struct *dtr)
326{ 326{
327 asm ("sidt %0":"=m" (*dtr)); 327 asm ("sidt %0":"=m" (*dtr));
328} 328}
329 329
330static fastcall unsigned long native_store_tr(void) 330static unsigned long native_store_tr(void)
331{ 331{
332 unsigned long tr; 332 unsigned long tr;
333 asm ("str %0":"=r" (tr)); 333 asm ("str %0":"=r" (tr));
334 return tr; 334 return tr;
335} 335}
336 336
337static fastcall void native_load_tls(struct thread_struct *t, unsigned int cpu) 337static void native_load_tls(struct thread_struct *t, unsigned int cpu)
338{ 338{
339#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] 339#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
340 C(0); C(1); C(2); 340 C(0); C(1); C(2);
@@ -348,22 +348,22 @@ static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32
348 lp[1] = entry_high; 348 lp[1] = entry_high;
349} 349}
350 350
351static fastcall void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high) 351static void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high)
352{ 352{
353 native_write_dt_entry(dt, entrynum, low, high); 353 native_write_dt_entry(dt, entrynum, low, high);
354} 354}
355 355
356static fastcall void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high) 356static void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high)
357{ 357{
358 native_write_dt_entry(dt, entrynum, low, high); 358 native_write_dt_entry(dt, entrynum, low, high);
359} 359}
360 360
361static fastcall void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high) 361static void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high)
362{ 362{
363 native_write_dt_entry(dt, entrynum, low, high); 363 native_write_dt_entry(dt, entrynum, low, high);
364} 364}
365 365
366static fastcall void native_load_esp0(struct tss_struct *tss, 366static void native_load_esp0(struct tss_struct *tss,
367 struct thread_struct *thread) 367 struct thread_struct *thread)
368{ 368{
369 tss->esp0 = thread->esp0; 369 tss->esp0 = thread->esp0;
@@ -375,12 +375,12 @@ static fastcall void native_load_esp0(struct tss_struct *tss,
375 } 375 }
376} 376}
377 377
378static fastcall void native_io_delay(void) 378static void native_io_delay(void)
379{ 379{
380 asm volatile("outb %al,$0x80"); 380 asm volatile("outb %al,$0x80");
381} 381}
382 382
383static fastcall void native_flush_tlb(void) 383static void native_flush_tlb(void)
384{ 384{
385 __native_flush_tlb(); 385 __native_flush_tlb();
386} 386}
@@ -389,49 +389,49 @@ static fastcall void native_flush_tlb(void)
389 * Global pages have to be flushed a bit differently. Not a real 389 * Global pages have to be flushed a bit differently. Not a real
390 * performance problem because this does not happen often. 390 * performance problem because this does not happen often.
391 */ 391 */
392static fastcall void native_flush_tlb_global(void) 392static void native_flush_tlb_global(void)
393{ 393{
394 __native_flush_tlb_global(); 394 __native_flush_tlb_global();
395} 395}
396 396
397static fastcall void native_flush_tlb_single(u32 addr) 397static void native_flush_tlb_single(u32 addr)
398{ 398{
399 __native_flush_tlb_single(addr); 399 __native_flush_tlb_single(addr);
400} 400}
401 401
402#ifndef CONFIG_X86_PAE 402#ifndef CONFIG_X86_PAE
403static fastcall void native_set_pte(pte_t *ptep, pte_t pteval) 403static void native_set_pte(pte_t *ptep, pte_t pteval)
404{ 404{
405 *ptep = pteval; 405 *ptep = pteval;
406} 406}
407 407
408static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) 408static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval)
409{ 409{
410 *ptep = pteval; 410 *ptep = pteval;
411} 411}
412 412
413static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) 413static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
414{ 414{
415 *pmdp = pmdval; 415 *pmdp = pmdval;
416} 416}
417 417
418#else /* CONFIG_X86_PAE */ 418#else /* CONFIG_X86_PAE */
419 419
420static fastcall void native_set_pte(pte_t *ptep, pte_t pte) 420static void native_set_pte(pte_t *ptep, pte_t pte)
421{ 421{
422 ptep->pte_high = pte.pte_high; 422 ptep->pte_high = pte.pte_high;
423 smp_wmb(); 423 smp_wmb();
424 ptep->pte_low = pte.pte_low; 424 ptep->pte_low = pte.pte_low;
425} 425}
426 426
427static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) 427static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte)
428{ 428{
429 ptep->pte_high = pte.pte_high; 429 ptep->pte_high = pte.pte_high;
430 smp_wmb(); 430 smp_wmb();
431 ptep->pte_low = pte.pte_low; 431 ptep->pte_low = pte.pte_low;
432} 432}
433 433
434static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) 434static void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
435{ 435{
436 ptep->pte_low = 0; 436 ptep->pte_low = 0;
437 smp_wmb(); 437 smp_wmb();
@@ -440,29 +440,29 @@ static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long
440 ptep->pte_low = pte.pte_low; 440 ptep->pte_low = pte.pte_low;
441} 441}
442 442
443static fastcall void native_set_pte_atomic(pte_t *ptep, pte_t pteval) 443static void native_set_pte_atomic(pte_t *ptep, pte_t pteval)
444{ 444{
445 set_64bit((unsigned long long *)ptep,pte_val(pteval)); 445 set_64bit((unsigned long long *)ptep,pte_val(pteval));
446} 446}
447 447
448static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) 448static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
449{ 449{
450 set_64bit((unsigned long long *)pmdp,pmd_val(pmdval)); 450 set_64bit((unsigned long long *)pmdp,pmd_val(pmdval));
451} 451}
452 452
453static fastcall void native_set_pud(pud_t *pudp, pud_t pudval) 453static void native_set_pud(pud_t *pudp, pud_t pudval)
454{ 454{
455 *pudp = pudval; 455 *pudp = pudval;
456} 456}
457 457
458static fastcall void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 458static void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
459{ 459{
460 ptep->pte_low = 0; 460 ptep->pte_low = 0;
461 smp_wmb(); 461 smp_wmb();
462 ptep->pte_high = 0; 462 ptep->pte_high = 0;
463} 463}
464 464
465static fastcall void native_pmd_clear(pmd_t *pmd) 465static void native_pmd_clear(pmd_t *pmd)
466{ 466{
467 u32 *tmp = (u32 *)pmd; 467 u32 *tmp = (u32 *)pmd;
468 *tmp = 0; 468 *tmp = 0;
@@ -472,8 +472,8 @@ static fastcall void native_pmd_clear(pmd_t *pmd)
472#endif /* CONFIG_X86_PAE */ 472#endif /* CONFIG_X86_PAE */
473 473
474/* These are in entry.S */ 474/* These are in entry.S */
475extern fastcall void native_iret(void); 475extern void native_iret(void);
476extern fastcall void native_irq_enable_sysexit(void); 476extern void native_irq_enable_sysexit(void);
477 477
478static int __init print_banner(void) 478static int __init print_banner(void)
479{ 479{
@@ -482,9 +482,6 @@ static int __init print_banner(void)
482} 482}
483core_initcall(print_banner); 483core_initcall(print_banner);
484 484
485/* We simply declare start_kernel to be the paravirt probe of last resort. */
486paravirt_probe(start_kernel);
487
488struct paravirt_ops paravirt_ops = { 485struct paravirt_ops paravirt_ops = {
489 .name = "bare hardware", 486 .name = "bare hardware",
490 .paravirt_enabled = 0, 487 .paravirt_enabled = 0,
@@ -544,12 +541,21 @@ struct paravirt_ops paravirt_ops = {
544 .apic_write = native_apic_write, 541 .apic_write = native_apic_write,
545 .apic_write_atomic = native_apic_write_atomic, 542 .apic_write_atomic = native_apic_write_atomic,
546 .apic_read = native_apic_read, 543 .apic_read = native_apic_read,
544 .setup_boot_clock = setup_boot_APIC_clock,
545 .setup_secondary_clock = setup_secondary_APIC_clock,
547#endif 546#endif
547 .set_lazy_mode = (void *)native_nop,
548 548
549 .flush_tlb_user = native_flush_tlb, 549 .flush_tlb_user = native_flush_tlb,
550 .flush_tlb_kernel = native_flush_tlb_global, 550 .flush_tlb_kernel = native_flush_tlb_global,
551 .flush_tlb_single = native_flush_tlb_single, 551 .flush_tlb_single = native_flush_tlb_single,
552 552
553 .alloc_pt = (void *)native_nop,
554 .alloc_pd = (void *)native_nop,
555 .alloc_pd_clone = (void *)native_nop,
556 .release_pt = (void *)native_nop,
557 .release_pd = (void *)native_nop,
558
553 .set_pte = native_set_pte, 559 .set_pte = native_set_pte,
554 .set_pte_at = native_set_pte_at, 560 .set_pte_at = native_set_pte_at,
555 .set_pmd = native_set_pmd, 561 .set_pmd = native_set_pmd,
@@ -565,6 +571,8 @@ struct paravirt_ops paravirt_ops = {
565 571
566 .irq_enable_sysexit = native_irq_enable_sysexit, 572 .irq_enable_sysexit = native_irq_enable_sysexit,
567 .iret = native_iret, 573 .iret = native_iret,
574
575 .startup_ipi_hook = (void *)native_nop,
568}; 576};
569 577
570/* 578/*
diff --git a/arch/i386/kernel/pcspeaker.c b/arch/i386/kernel/pcspeaker.c
new file mode 100644
index 000000000000..bc1f2d3ea277
--- /dev/null
+++ b/arch/i386/kernel/pcspeaker.c
@@ -0,0 +1,20 @@
1#include <linux/platform_device.h>
2#include <linux/errno.h>
3#include <linux/init.h>
4
5static __init int add_pcspkr(void)
6{
7 struct platform_device *pd;
8 int ret;
9
10 pd = platform_device_alloc("pcspkr", -1);
11 if (!pd)
12 return -ENOMEM;
13
14 ret = platform_device_add(pd);
15 if (ret)
16 platform_device_put(pd);
17
18 return ret;
19}
20device_initcall(add_pcspkr);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index c641056233a6..bea304d48cdb 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -38,6 +38,7 @@
38#include <linux/ptrace.h> 38#include <linux/ptrace.h>
39#include <linux/random.h> 39#include <linux/random.h>
40#include <linux/personality.h> 40#include <linux/personality.h>
41#include <linux/tick.h>
41 42
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
43#include <asm/pgtable.h> 44#include <asm/pgtable.h>
@@ -48,6 +49,7 @@
48#include <asm/i387.h> 49#include <asm/i387.h>
49#include <asm/desc.h> 50#include <asm/desc.h>
50#include <asm/vm86.h> 51#include <asm/vm86.h>
52#include <asm/idle.h>
51#ifdef CONFIG_MATH_EMULATION 53#ifdef CONFIG_MATH_EMULATION
52#include <asm/math_emu.h> 54#include <asm/math_emu.h>
53#endif 55#endif
@@ -80,6 +82,42 @@ void (*pm_idle)(void);
80EXPORT_SYMBOL(pm_idle); 82EXPORT_SYMBOL(pm_idle);
81static DEFINE_PER_CPU(unsigned int, cpu_idle_state); 83static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
82 84
85static ATOMIC_NOTIFIER_HEAD(idle_notifier);
86
87void idle_notifier_register(struct notifier_block *n)
88{
89 atomic_notifier_chain_register(&idle_notifier, n);
90}
91
92void idle_notifier_unregister(struct notifier_block *n)
93{
94 atomic_notifier_chain_unregister(&idle_notifier, n);
95}
96
97static DEFINE_PER_CPU(volatile unsigned long, idle_state);
98
99void enter_idle(void)
100{
101 /* needs to be atomic w.r.t. interrupts, not against other CPUs */
102 __set_bit(0, &__get_cpu_var(idle_state));
103 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
104}
105
106static void __exit_idle(void)
107{
108 /* needs to be atomic w.r.t. interrupts, not against other CPUs */
109 if (__test_and_clear_bit(0, &__get_cpu_var(idle_state)) == 0)
110 return;
111 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
112}
113
114void exit_idle(void)
115{
116 if (current->pid)
117 return;
118 __exit_idle();
119}
120
83void disable_hlt(void) 121void disable_hlt(void)
84{ 122{
85 hlt_counter++; 123 hlt_counter++;
@@ -130,6 +168,7 @@ EXPORT_SYMBOL(default_idle);
130 */ 168 */
131static void poll_idle (void) 169static void poll_idle (void)
132{ 170{
171 local_irq_enable();
133 cpu_relax(); 172 cpu_relax();
134} 173}
135 174
@@ -173,6 +212,7 @@ void cpu_idle(void)
173 212
174 /* endless idle loop with no priority at all */ 213 /* endless idle loop with no priority at all */
175 while (1) { 214 while (1) {
215 tick_nohz_stop_sched_tick();
176 while (!need_resched()) { 216 while (!need_resched()) {
177 void (*idle)(void); 217 void (*idle)(void);
178 218
@@ -189,8 +229,18 @@ void cpu_idle(void)
189 play_dead(); 229 play_dead();
190 230
191 __get_cpu_var(irq_stat).idle_timestamp = jiffies; 231 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
232
233 /*
234 * Idle routines should keep interrupts disabled
235 * from here on, until they go to idle.
236 * Otherwise, idle callbacks can misfire.
237 */
238 local_irq_disable();
239 enter_idle();
192 idle(); 240 idle();
241 __exit_idle();
193 } 242 }
243 tick_nohz_restart_sched_tick();
194 preempt_enable_no_resched(); 244 preempt_enable_no_resched();
195 schedule(); 245 schedule();
196 preempt_disable(); 246 preempt_disable();
@@ -243,7 +293,11 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
243 __monitor((void *)&current_thread_info()->flags, 0, 0); 293 __monitor((void *)&current_thread_info()->flags, 0, 0);
244 smp_mb(); 294 smp_mb();
245 if (!need_resched()) 295 if (!need_resched())
246 __mwait(eax, ecx); 296 __sti_mwait(eax, ecx);
297 else
298 local_irq_enable();
299 } else {
300 local_irq_enable();
247 } 301 }
248} 302}
249 303
@@ -308,8 +362,8 @@ void show_regs(struct pt_regs * regs)
308 regs->eax,regs->ebx,regs->ecx,regs->edx); 362 regs->eax,regs->ebx,regs->ecx,regs->edx);
309 printk("ESI: %08lx EDI: %08lx EBP: %08lx", 363 printk("ESI: %08lx EDI: %08lx EBP: %08lx",
310 regs->esi, regs->edi, regs->ebp); 364 regs->esi, regs->edi, regs->ebp);
311 printk(" DS: %04x ES: %04x GS: %04x\n", 365 printk(" DS: %04x ES: %04x FS: %04x\n",
312 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); 366 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs);
313 367
314 cr0 = read_cr0(); 368 cr0 = read_cr0();
315 cr2 = read_cr2(); 369 cr2 = read_cr2();
@@ -340,7 +394,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
340 394
341 regs.xds = __USER_DS; 395 regs.xds = __USER_DS;
342 regs.xes = __USER_DS; 396 regs.xes = __USER_DS;
343 regs.xgs = __KERNEL_PDA; 397 regs.xfs = __KERNEL_PDA;
344 regs.orig_eax = -1; 398 regs.orig_eax = -1;
345 regs.eip = (unsigned long) kernel_thread_helper; 399 regs.eip = (unsigned long) kernel_thread_helper;
346 regs.xcs = __KERNEL_CS | get_kernel_rpl(); 400 regs.xcs = __KERNEL_CS | get_kernel_rpl();
@@ -425,7 +479,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
425 479
426 p->thread.eip = (unsigned long) ret_from_fork; 480 p->thread.eip = (unsigned long) ret_from_fork;
427 481
428 savesegment(fs,p->thread.fs); 482 savesegment(gs,p->thread.gs);
429 483
430 tsk = current; 484 tsk = current;
431 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { 485 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
@@ -501,8 +555,8 @@ void dump_thread(struct pt_regs * regs, struct user * dump)
501 dump->regs.eax = regs->eax; 555 dump->regs.eax = regs->eax;
502 dump->regs.ds = regs->xds; 556 dump->regs.ds = regs->xds;
503 dump->regs.es = regs->xes; 557 dump->regs.es = regs->xes;
504 savesegment(fs,dump->regs.fs); 558 dump->regs.fs = regs->xfs;
505 dump->regs.gs = regs->xgs; 559 savesegment(gs,dump->regs.gs);
506 dump->regs.orig_eax = regs->orig_eax; 560 dump->regs.orig_eax = regs->orig_eax;
507 dump->regs.eip = regs->eip; 561 dump->regs.eip = regs->eip;
508 dump->regs.cs = regs->xcs; 562 dump->regs.cs = regs->xcs;
@@ -653,7 +707,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
653 load_esp0(tss, next); 707 load_esp0(tss, next);
654 708
655 /* 709 /*
656 * Save away %fs. No need to save %gs, as it was saved on the 710 * Save away %gs. No need to save %fs, as it was saved on the
657 * stack on entry. No need to save %es and %ds, as those are 711 * stack on entry. No need to save %es and %ds, as those are
658 * always kernel segments while inside the kernel. Doing this 712 * always kernel segments while inside the kernel. Doing this
659 * before setting the new TLS descriptors avoids the situation 713 * before setting the new TLS descriptors avoids the situation
@@ -662,7 +716,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
662 * used %fs or %gs (it does not today), or if the kernel is 716 * used %fs or %gs (it does not today), or if the kernel is
663 * running inside of a hypervisor layer. 717 * running inside of a hypervisor layer.
664 */ 718 */
665 savesegment(fs, prev->fs); 719 savesegment(gs, prev->gs);
666 720
667 /* 721 /*
668 * Load the per-thread Thread-Local Storage descriptor. 722 * Load the per-thread Thread-Local Storage descriptor.
@@ -670,14 +724,13 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
670 load_TLS(next, cpu); 724 load_TLS(next, cpu);
671 725
672 /* 726 /*
673 * Restore %fs if needed. 727 * Restore IOPL if needed. In normal use, the flags restore
674 * 728 * in the switch assembly will handle this. But if the kernel
675 * Glibc normally makes %fs be zero. 729 * is running virtualized at a non-zero CPL, the popf will
730 * not restore flags, so it must be done in a separate step.
676 */ 731 */
677 if (unlikely(prev->fs | next->fs)) 732 if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
678 loadsegment(fs, next->fs); 733 set_iopl_mask(next->iopl);
679
680 write_pda(pcurrent, next_p);
681 734
682 /* 735 /*
683 * Now maybe handle debug registers and/or IO bitmaps 736 * Now maybe handle debug registers and/or IO bitmaps
@@ -688,6 +741,15 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
688 741
689 disable_tsc(prev_p, next_p); 742 disable_tsc(prev_p, next_p);
690 743
744 /*
745 * Leave lazy mode, flushing any hypercalls made here.
746 * This must be done before restoring TLS segments so
747 * the GDT and LDT are properly updated, and must be
748 * done before math_state_restore, so the TS bit is up
749 * to date.
750 */
751 arch_leave_lazy_cpu_mode();
752
691 /* If the task has used fpu the last 5 timeslices, just do a full 753 /* If the task has used fpu the last 5 timeslices, just do a full
692 * restore of the math state immediately to avoid the trap; the 754 * restore of the math state immediately to avoid the trap; the
693 * chances of needing FPU soon are obviously high now 755 * chances of needing FPU soon are obviously high now
@@ -695,6 +757,14 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
695 if (next_p->fpu_counter > 5) 757 if (next_p->fpu_counter > 5)
696 math_state_restore(); 758 math_state_restore();
697 759
760 /*
761 * Restore %gs if needed (which is common)
762 */
763 if (prev->gs | next->gs)
764 loadsegment(gs, next->gs);
765
766 write_pda(pcurrent, next_p);
767
698 return prev_p; 768 return prev_p;
699} 769}
700 770
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index af8aabe85800..4a8f8a259723 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -89,14 +89,14 @@ static int putreg(struct task_struct *child,
89 unsigned long regno, unsigned long value) 89 unsigned long regno, unsigned long value)
90{ 90{
91 switch (regno >> 2) { 91 switch (regno >> 2) {
92 case FS: 92 case GS:
93 if (value && (value & 3) != 3) 93 if (value && (value & 3) != 3)
94 return -EIO; 94 return -EIO;
95 child->thread.fs = value; 95 child->thread.gs = value;
96 return 0; 96 return 0;
97 case DS: 97 case DS:
98 case ES: 98 case ES:
99 case GS: 99 case FS:
100 if (value && (value & 3) != 3) 100 if (value && (value & 3) != 3)
101 return -EIO; 101 return -EIO;
102 value &= 0xffff; 102 value &= 0xffff;
@@ -112,7 +112,7 @@ static int putreg(struct task_struct *child,
112 value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; 112 value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK;
113 break; 113 break;
114 } 114 }
115 if (regno > ES*4) 115 if (regno > FS*4)
116 regno -= 1*4; 116 regno -= 1*4;
117 put_stack_long(child, regno, value); 117 put_stack_long(child, regno, value);
118 return 0; 118 return 0;
@@ -124,18 +124,18 @@ static unsigned long getreg(struct task_struct *child,
124 unsigned long retval = ~0UL; 124 unsigned long retval = ~0UL;
125 125
126 switch (regno >> 2) { 126 switch (regno >> 2) {
127 case FS: 127 case GS:
128 retval = child->thread.fs; 128 retval = child->thread.gs;
129 break; 129 break;
130 case DS: 130 case DS:
131 case ES: 131 case ES:
132 case GS: 132 case FS:
133 case SS: 133 case SS:
134 case CS: 134 case CS:
135 retval = 0xffff; 135 retval = 0xffff;
136 /* fall through */ 136 /* fall through */
137 default: 137 default:
138 if (regno > ES*4) 138 if (regno > FS*4)
139 regno -= 1*4; 139 regno -= 1*4;
140 retval &= get_stack_long(child, regno); 140 retval &= get_stack_long(child, regno);
141 } 141 }
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 4b31ad70c1ac..122623dcc6e1 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -33,7 +33,6 @@
33#include <linux/initrd.h> 33#include <linux/initrd.h>
34#include <linux/bootmem.h> 34#include <linux/bootmem.h>
35#include <linux/seq_file.h> 35#include <linux/seq_file.h>
36#include <linux/platform_device.h>
37#include <linux/console.h> 36#include <linux/console.h>
38#include <linux/mca.h> 37#include <linux/mca.h>
39#include <linux/root_dev.h> 38#include <linux/root_dev.h>
@@ -60,6 +59,7 @@
60#include <asm/io_apic.h> 59#include <asm/io_apic.h>
61#include <asm/ist.h> 60#include <asm/ist.h>
62#include <asm/io.h> 61#include <asm/io.h>
62#include <asm/vmi.h>
63#include <setup_arch.h> 63#include <setup_arch.h>
64#include <bios_ebda.h> 64#include <bios_ebda.h>
65 65
@@ -132,7 +132,7 @@ unsigned long saved_videomode;
132#define RAMDISK_PROMPT_FLAG 0x8000 132#define RAMDISK_PROMPT_FLAG 0x8000
133#define RAMDISK_LOAD_FLAG 0x4000 133#define RAMDISK_LOAD_FLAG 0x4000
134 134
135static char command_line[COMMAND_LINE_SIZE]; 135static char __initdata command_line[COMMAND_LINE_SIZE];
136 136
137unsigned char __initdata boot_params[PARAM_SIZE]; 137unsigned char __initdata boot_params[PARAM_SIZE];
138 138
@@ -576,11 +576,19 @@ void __init setup_arch(char **cmdline_p)
576 print_memory_map("user"); 576 print_memory_map("user");
577 } 577 }
578 578
579 strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); 579 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
580 *cmdline_p = command_line; 580 *cmdline_p = command_line;
581 581
582 max_low_pfn = setup_memory(); 582 max_low_pfn = setup_memory();
583 583
584#ifdef CONFIG_VMI
585 /*
586 * Must be after max_low_pfn is determined, and before kernel
587 * pagetables are setup.
588 */
589 vmi_init();
590#endif
591
584 /* 592 /*
585 * NOTE: before this point _nobody_ is allowed to allocate 593 * NOTE: before this point _nobody_ is allowed to allocate
586 * any memory using the bootmem allocator. Although the 594 * any memory using the bootmem allocator. Although the
@@ -651,28 +659,3 @@ void __init setup_arch(char **cmdline_p)
651#endif 659#endif
652 tsc_init(); 660 tsc_init();
653} 661}
654
655static __init int add_pcspkr(void)
656{
657 struct platform_device *pd;
658 int ret;
659
660 pd = platform_device_alloc("pcspkr", -1);
661 if (!pd)
662 return -ENOMEM;
663
664 ret = platform_device_add(pd);
665 if (ret)
666 platform_device_put(pd);
667
668 return ret;
669}
670device_initcall(add_pcspkr);
671
672/*
673 * Local Variables:
674 * mode:c
675 * c-file-style:"k&r"
676 * c-basic-offset:8
677 * End:
678 */
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index 65d7620eaa09..4f99e870c986 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -21,6 +21,7 @@
21#include <linux/suspend.h> 21#include <linux/suspend.h>
22#include <linux/ptrace.h> 22#include <linux/ptrace.h>
23#include <linux/elf.h> 23#include <linux/elf.h>
24#include <linux/binfmts.h>
24#include <asm/processor.h> 25#include <asm/processor.h>
25#include <asm/ucontext.h> 26#include <asm/ucontext.h>
26#include <asm/uaccess.h> 27#include <asm/uaccess.h>
@@ -128,8 +129,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax
128 X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ 129 X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \
129 X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) 130 X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF)
130 131
131 COPY_SEG(gs); 132 GET_SEG(gs);
132 GET_SEG(fs); 133 COPY_SEG(fs);
133 COPY_SEG(es); 134 COPY_SEG(es);
134 COPY_SEG(ds); 135 COPY_SEG(ds);
135 COPY(edi); 136 COPY(edi);
@@ -244,9 +245,9 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
244{ 245{
245 int tmp, err = 0; 246 int tmp, err = 0;
246 247
247 err |= __put_user(regs->xgs, (unsigned int __user *)&sc->gs); 248 err |= __put_user(regs->xfs, (unsigned int __user *)&sc->fs);
248 savesegment(fs, tmp); 249 savesegment(gs, tmp);
249 err |= __put_user(tmp, (unsigned int __user *)&sc->fs); 250 err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
250 251
251 err |= __put_user(regs->xes, (unsigned int __user *)&sc->es); 252 err |= __put_user(regs->xes, (unsigned int __user *)&sc->es);
252 err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds); 253 err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds);
@@ -349,7 +350,10 @@ static int setup_frame(int sig, struct k_sigaction *ka,
349 goto give_sigsegv; 350 goto give_sigsegv;
350 } 351 }
351 352
352 restorer = (void *)VDSO_SYM(&__kernel_sigreturn); 353 if (current->binfmt->hasvdso)
354 restorer = (void *)VDSO_SYM(&__kernel_sigreturn);
355 else
356 restorer = (void *)&frame->retcode;
353 if (ka->sa.sa_flags & SA_RESTORER) 357 if (ka->sa.sa_flags & SA_RESTORER)
354 restorer = ka->sa.sa_restorer; 358 restorer = ka->sa.sa_restorer;
355 359
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 5285aff8367f..9bd9637ae692 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -23,6 +23,7 @@
23 23
24#include <asm/mtrr.h> 24#include <asm/mtrr.h>
25#include <asm/tlbflush.h> 25#include <asm/tlbflush.h>
26#include <asm/idle.h>
26#include <mach_apic.h> 27#include <mach_apic.h>
27 28
28/* 29/*
@@ -374,8 +375,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
374 /* 375 /*
375 * i'm not happy about this global shared spinlock in the 376 * i'm not happy about this global shared spinlock in the
376 * MM hot path, but we'll see how contended it is. 377 * MM hot path, but we'll see how contended it is.
377 * Temporarily this turns IRQs off, so that lockups are 378 * AK: x86-64 has a faster method that could be ported.
378 * detected by the NMI watchdog.
379 */ 379 */
380 spin_lock(&tlbstate_lock); 380 spin_lock(&tlbstate_lock);
381 381
@@ -400,7 +400,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
400 400
401 while (!cpus_empty(flush_cpumask)) 401 while (!cpus_empty(flush_cpumask))
402 /* nothing. lockup detection does not belong here */ 402 /* nothing. lockup detection does not belong here */
403 mb(); 403 cpu_relax();
404 404
405 flush_mm = NULL; 405 flush_mm = NULL;
406 flush_va = 0; 406 flush_va = 0;
@@ -624,6 +624,7 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs)
624 /* 624 /*
625 * At this point the info structure may be out of scope unless wait==1 625 * At this point the info structure may be out of scope unless wait==1
626 */ 626 */
627 exit_idle();
627 irq_enter(); 628 irq_enter();
628 (*func)(info); 629 (*func)(info);
629 irq_exit(); 630 irq_exit();
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 8c6c8c52b95c..48bfcaa13ecc 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -63,6 +63,7 @@
63#include <mach_apic.h> 63#include <mach_apic.h>
64#include <mach_wakecpu.h> 64#include <mach_wakecpu.h>
65#include <smpboot_hooks.h> 65#include <smpboot_hooks.h>
66#include <asm/vmi.h>
66 67
67/* Set if we find a B stepping CPU */ 68/* Set if we find a B stepping CPU */
68static int __devinitdata smp_b_stepping; 69static int __devinitdata smp_b_stepping;
@@ -93,12 +94,6 @@ cpumask_t cpu_possible_map;
93EXPORT_SYMBOL(cpu_possible_map); 94EXPORT_SYMBOL(cpu_possible_map);
94static cpumask_t smp_commenced_mask; 95static cpumask_t smp_commenced_mask;
95 96
96/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
97 * is no way to resync one AP against BP. TBD: for prescott and above, we
98 * should use IA64's algorithm
99 */
100static int __devinitdata tsc_sync_disabled;
101
102/* Per CPU bogomips and other parameters */ 97/* Per CPU bogomips and other parameters */
103struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; 98struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
104EXPORT_SYMBOL(cpu_data); 99EXPORT_SYMBOL(cpu_data);
@@ -215,151 +210,6 @@ valid_k7:
215 ; 210 ;
216} 211}
217 212
218/*
219 * TSC synchronization.
220 *
221 * We first check whether all CPUs have their TSC's synchronized,
222 * then we print a warning if not, and always resync.
223 */
224
225static struct {
226 atomic_t start_flag;
227 atomic_t count_start;
228 atomic_t count_stop;
229 unsigned long long values[NR_CPUS];
230} tsc __cpuinitdata = {
231 .start_flag = ATOMIC_INIT(0),
232 .count_start = ATOMIC_INIT(0),
233 .count_stop = ATOMIC_INIT(0),
234};
235
236#define NR_LOOPS 5
237
238static void __init synchronize_tsc_bp(void)
239{
240 int i;
241 unsigned long long t0;
242 unsigned long long sum, avg;
243 long long delta;
244 unsigned int one_usec;
245 int buggy = 0;
246
247 printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
248
249 /* convert from kcyc/sec to cyc/usec */
250 one_usec = cpu_khz / 1000;
251
252 atomic_set(&tsc.start_flag, 1);
253 wmb();
254
255 /*
256 * We loop a few times to get a primed instruction cache,
257 * then the last pass is more or less synchronized and
258 * the BP and APs set their cycle counters to zero all at
259 * once. This reduces the chance of having random offsets
260 * between the processors, and guarantees that the maximum
261 * delay between the cycle counters is never bigger than
262 * the latency of information-passing (cachelines) between
263 * two CPUs.
264 */
265 for (i = 0; i < NR_LOOPS; i++) {
266 /*
267 * all APs synchronize but they loop on '== num_cpus'
268 */
269 while (atomic_read(&tsc.count_start) != num_booting_cpus()-1)
270 cpu_relax();
271 atomic_set(&tsc.count_stop, 0);
272 wmb();
273 /*
274 * this lets the APs save their current TSC:
275 */
276 atomic_inc(&tsc.count_start);
277
278 rdtscll(tsc.values[smp_processor_id()]);
279 /*
280 * We clear the TSC in the last loop:
281 */
282 if (i == NR_LOOPS-1)
283 write_tsc(0, 0);
284
285 /*
286 * Wait for all APs to leave the synchronization point:
287 */
288 while (atomic_read(&tsc.count_stop) != num_booting_cpus()-1)
289 cpu_relax();
290 atomic_set(&tsc.count_start, 0);
291 wmb();
292 atomic_inc(&tsc.count_stop);
293 }
294
295 sum = 0;
296 for (i = 0; i < NR_CPUS; i++) {
297 if (cpu_isset(i, cpu_callout_map)) {
298 t0 = tsc.values[i];
299 sum += t0;
300 }
301 }
302 avg = sum;
303 do_div(avg, num_booting_cpus());
304
305 for (i = 0; i < NR_CPUS; i++) {
306 if (!cpu_isset(i, cpu_callout_map))
307 continue;
308 delta = tsc.values[i] - avg;
309 if (delta < 0)
310 delta = -delta;
311 /*
312 * We report bigger than 2 microseconds clock differences.
313 */
314 if (delta > 2*one_usec) {
315 long long realdelta;
316
317 if (!buggy) {
318 buggy = 1;
319 printk("\n");
320 }
321 realdelta = delta;
322 do_div(realdelta, one_usec);
323 if (tsc.values[i] < avg)
324 realdelta = -realdelta;
325
326 if (realdelta)
327 printk(KERN_INFO "CPU#%d had %Ld usecs TSC "
328 "skew, fixed it up.\n", i, realdelta);
329 }
330 }
331 if (!buggy)
332 printk("passed.\n");
333}
334
335static void __cpuinit synchronize_tsc_ap(void)
336{
337 int i;
338
339 /*
340 * Not every cpu is online at the time
341 * this gets called, so we first wait for the BP to
342 * finish SMP initialization:
343 */
344 while (!atomic_read(&tsc.start_flag))
345 cpu_relax();
346
347 for (i = 0; i < NR_LOOPS; i++) {
348 atomic_inc(&tsc.count_start);
349 while (atomic_read(&tsc.count_start) != num_booting_cpus())
350 cpu_relax();
351
352 rdtscll(tsc.values[smp_processor_id()]);
353 if (i == NR_LOOPS-1)
354 write_tsc(0, 0);
355
356 atomic_inc(&tsc.count_stop);
357 while (atomic_read(&tsc.count_stop) != num_booting_cpus())
358 cpu_relax();
359 }
360}
361#undef NR_LOOPS
362
363extern void calibrate_delay(void); 213extern void calibrate_delay(void);
364 214
365static atomic_t init_deasserted; 215static atomic_t init_deasserted;
@@ -437,20 +287,12 @@ static void __cpuinit smp_callin(void)
437 /* 287 /*
438 * Save our processor parameters 288 * Save our processor parameters
439 */ 289 */
440 smp_store_cpu_info(cpuid); 290 smp_store_cpu_info(cpuid);
441
442 disable_APIC_timer();
443 291
444 /* 292 /*
445 * Allow the master to continue. 293 * Allow the master to continue.
446 */ 294 */
447 cpu_set(cpuid, cpu_callin_map); 295 cpu_set(cpuid, cpu_callin_map);
448
449 /*
450 * Synchronize the TSC with the BP
451 */
452 if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
453 synchronize_tsc_ap();
454} 296}
455 297
456static int cpucount; 298static int cpucount;
@@ -545,18 +387,25 @@ static void __cpuinit start_secondary(void *unused)
545 * booting is too fragile that we want to limit the 387 * booting is too fragile that we want to limit the
546 * things done here to the most necessary things. 388 * things done here to the most necessary things.
547 */ 389 */
390#ifdef CONFIG_VMI
391 vmi_bringup();
392#endif
548 secondary_cpu_init(); 393 secondary_cpu_init();
549 preempt_disable(); 394 preempt_disable();
550 smp_callin(); 395 smp_callin();
551 while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) 396 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
552 rep_nop(); 397 rep_nop();
553 setup_secondary_APIC_clock(); 398 /*
399 * Check TSC synchronization with the BP:
400 */
401 check_tsc_sync_target();
402
403 setup_secondary_clock();
554 if (nmi_watchdog == NMI_IO_APIC) { 404 if (nmi_watchdog == NMI_IO_APIC) {
555 disable_8259A_irq(0); 405 disable_8259A_irq(0);
556 enable_NMI_through_LVT0(NULL); 406 enable_NMI_through_LVT0(NULL);
557 enable_8259A_irq(0); 407 enable_8259A_irq(0);
558 } 408 }
559 enable_APIC_timer();
560 /* 409 /*
561 * low-memory mappings have been cleared, flush them from 410 * low-memory mappings have been cleared, flush them from
562 * the local TLBs too. 411 * the local TLBs too.
@@ -619,7 +468,6 @@ extern struct {
619 unsigned short ss; 468 unsigned short ss;
620} stack_start; 469} stack_start;
621extern struct i386_pda *start_pda; 470extern struct i386_pda *start_pda;
622extern struct Xgt_desc_struct cpu_gdt_descr;
623 471
624#ifdef CONFIG_NUMA 472#ifdef CONFIG_NUMA
625 473
@@ -749,7 +597,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
749 /* 597 /*
750 * Due to the Pentium erratum 3AP. 598 * Due to the Pentium erratum 3AP.
751 */ 599 */
752 maxlvt = get_maxlvt(); 600 maxlvt = lapic_get_maxlvt();
753 if (maxlvt > 3) { 601 if (maxlvt > 3) {
754 apic_read_around(APIC_SPIV); 602 apic_read_around(APIC_SPIV);
755 apic_write(APIC_ESR, 0); 603 apic_write(APIC_ESR, 0);
@@ -835,11 +683,18 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
835 num_starts = 0; 683 num_starts = 0;
836 684
837 /* 685 /*
686 * Paravirt / VMI wants a startup IPI hook here to set up the
687 * target processor state.
688 */
689 startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
690 (unsigned long) stack_start.esp);
691
692 /*
838 * Run STARTUP IPI loop. 693 * Run STARTUP IPI loop.
839 */ 694 */
840 Dprintk("#startup loops: %d.\n", num_starts); 695 Dprintk("#startup loops: %d.\n", num_starts);
841 696
842 maxlvt = get_maxlvt(); 697 maxlvt = lapic_get_maxlvt();
843 698
844 for (j = 1; j <= num_starts; j++) { 699 for (j = 1; j <= num_starts; j++) {
845 Dprintk("Sending STARTUP #%d.\n",j); 700 Dprintk("Sending STARTUP #%d.\n",j);
@@ -1115,8 +970,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
1115 info.cpu = cpu; 970 info.cpu = cpu;
1116 INIT_WORK(&info.task, do_warm_boot_cpu); 971 INIT_WORK(&info.task, do_warm_boot_cpu);
1117 972
1118 tsc_sync_disabled = 1;
1119
1120 /* init low mem mapping */ 973 /* init low mem mapping */
1121 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, 974 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
1122 min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); 975 min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
@@ -1124,7 +977,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
1124 schedule_work(&info.task); 977 schedule_work(&info.task);
1125 wait_for_completion(&done); 978 wait_for_completion(&done);
1126 979
1127 tsc_sync_disabled = 0;
1128 zap_low_mappings(); 980 zap_low_mappings();
1129 ret = 0; 981 ret = 0;
1130exit: 982exit:
@@ -1320,13 +1172,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
1320 1172
1321 smpboot_setup_io_apic(); 1173 smpboot_setup_io_apic();
1322 1174
1323 setup_boot_APIC_clock(); 1175 setup_boot_clock();
1324
1325 /*
1326 * Synchronize the TSC with the AP
1327 */
1328 if (cpu_has_tsc && cpucount && cpu_khz)
1329 synchronize_tsc_bp();
1330} 1176}
1331 1177
1332/* These are wrappers to interface to the new boot process. Someone 1178/* These are wrappers to interface to the new boot process. Someone
@@ -1461,9 +1307,16 @@ int __cpuinit __cpu_up(unsigned int cpu)
1461 } 1307 }
1462 1308
1463 local_irq_enable(); 1309 local_irq_enable();
1310
1464 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 1311 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
1465 /* Unleash the CPU! */ 1312 /* Unleash the CPU! */
1466 cpu_set(cpu, smp_commenced_mask); 1313 cpu_set(cpu, smp_commenced_mask);
1314
1315 /*
1316 * Check TSC synchronization with the AP:
1317 */
1318 check_tsc_sync_source(cpu);
1319
1467 while (!cpu_isset(cpu, cpu_online_map)) 1320 while (!cpu_isset(cpu, cpu_online_map))
1468 cpu_relax(); 1321 cpu_relax();
1469 1322
diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c
index f7e735c077c3..2a8713ec0f9a 100644
--- a/arch/i386/kernel/srat.c
+++ b/arch/i386/kernel/srat.c
@@ -62,19 +62,19 @@ extern void * boot_ioremap(unsigned long, unsigned long);
62/* Identify CPU proximity domains */ 62/* Identify CPU proximity domains */
63static void __init parse_cpu_affinity_structure(char *p) 63static void __init parse_cpu_affinity_structure(char *p)
64{ 64{
65 struct acpi_table_processor_affinity *cpu_affinity = 65 struct acpi_srat_cpu_affinity *cpu_affinity =
66 (struct acpi_table_processor_affinity *) p; 66 (struct acpi_srat_cpu_affinity *) p;
67 67
68 if (!cpu_affinity->flags.enabled) 68 if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0)
69 return; /* empty entry */ 69 return; /* empty entry */
70 70
71 /* mark this node as "seen" in node bitmap */ 71 /* mark this node as "seen" in node bitmap */
72 BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain); 72 BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo);
73 73
74 apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain; 74 apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo;
75 75
76 printk("CPU 0x%02X in proximity domain 0x%02X\n", 76 printk("CPU 0x%02X in proximity domain 0x%02X\n",
77 cpu_affinity->apic_id, cpu_affinity->proximity_domain); 77 cpu_affinity->apic_id, cpu_affinity->proximity_domain_lo);
78} 78}
79 79
80/* 80/*
@@ -84,28 +84,27 @@ static void __init parse_cpu_affinity_structure(char *p)
84static void __init parse_memory_affinity_structure (char *sratp) 84static void __init parse_memory_affinity_structure (char *sratp)
85{ 85{
86 unsigned long long paddr, size; 86 unsigned long long paddr, size;
87 unsigned long start_pfn, end_pfn; 87 unsigned long start_pfn, end_pfn;
88 u8 pxm; 88 u8 pxm;
89 struct node_memory_chunk_s *p, *q, *pend; 89 struct node_memory_chunk_s *p, *q, *pend;
90 struct acpi_table_memory_affinity *memory_affinity = 90 struct acpi_srat_mem_affinity *memory_affinity =
91 (struct acpi_table_memory_affinity *) sratp; 91 (struct acpi_srat_mem_affinity *) sratp;
92 92
93 if (!memory_affinity->flags.enabled) 93 if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0)
94 return; /* empty entry */ 94 return; /* empty entry */
95 95
96 pxm = memory_affinity->proximity_domain & 0xff;
97
96 /* mark this node as "seen" in node bitmap */ 98 /* mark this node as "seen" in node bitmap */
97 BMAP_SET(pxm_bitmap, memory_affinity->proximity_domain); 99 BMAP_SET(pxm_bitmap, pxm);
98 100
99 /* calculate info for memory chunk structure */ 101 /* calculate info for memory chunk structure */
100 paddr = memory_affinity->base_addr_hi; 102 paddr = memory_affinity->base_address;
101 paddr = (paddr << 32) | memory_affinity->base_addr_lo; 103 size = memory_affinity->length;
102 size = memory_affinity->length_hi; 104
103 size = (size << 32) | memory_affinity->length_lo;
104
105 start_pfn = paddr >> PAGE_SHIFT; 105 start_pfn = paddr >> PAGE_SHIFT;
106 end_pfn = (paddr + size) >> PAGE_SHIFT; 106 end_pfn = (paddr + size) >> PAGE_SHIFT;
107 107
108 pxm = memory_affinity->proximity_domain;
109 108
110 if (num_memory_chunks >= MAXCHUNKS) { 109 if (num_memory_chunks >= MAXCHUNKS) {
111 printk("Too many mem chunks in SRAT. Ignoring %lld MBytes at %llx\n", 110 printk("Too many mem chunks in SRAT. Ignoring %lld MBytes at %llx\n",
@@ -132,8 +131,8 @@ static void __init parse_memory_affinity_structure (char *sratp)
132 printk("Memory range 0x%lX to 0x%lX (type 0x%X) in proximity domain 0x%02X %s\n", 131 printk("Memory range 0x%lX to 0x%lX (type 0x%X) in proximity domain 0x%02X %s\n",
133 start_pfn, end_pfn, 132 start_pfn, end_pfn,
134 memory_affinity->memory_type, 133 memory_affinity->memory_type,
135 memory_affinity->proximity_domain, 134 pxm,
136 (memory_affinity->flags.hot_pluggable ? 135 ((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
137 "enabled and removable" : "enabled" ) ); 136 "enabled and removable" : "enabled" ) );
138} 137}
139 138
@@ -185,10 +184,10 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
185 num_memory_chunks = 0; 184 num_memory_chunks = 0;
186 while (p < end) { 185 while (p < end) {
187 switch (*p) { 186 switch (*p) {
188 case ACPI_SRAT_PROCESSOR_AFFINITY: 187 case ACPI_SRAT_TYPE_CPU_AFFINITY:
189 parse_cpu_affinity_structure(p); 188 parse_cpu_affinity_structure(p);
190 break; 189 break;
191 case ACPI_SRAT_MEMORY_AFFINITY: 190 case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
192 parse_memory_affinity_structure(p); 191 parse_memory_affinity_structure(p);
193 break; 192 break;
194 default: 193 default:
@@ -262,31 +261,30 @@ out_fail:
262 return 0; 261 return 0;
263} 262}
264 263
264struct acpi_static_rsdt {
265 struct acpi_table_rsdt table;
266 u32 padding[7]; /* Allow for 7 more table entries */
267};
268
265int __init get_memcfg_from_srat(void) 269int __init get_memcfg_from_srat(void)
266{ 270{
267 struct acpi_table_header *header = NULL; 271 struct acpi_table_header *header = NULL;
268 struct acpi_table_rsdp *rsdp = NULL; 272 struct acpi_table_rsdp *rsdp = NULL;
269 struct acpi_table_rsdt *rsdt = NULL; 273 struct acpi_table_rsdt *rsdt = NULL;
270 struct acpi_pointer *rsdp_address = NULL; 274 acpi_native_uint rsdp_address = 0;
271 struct acpi_table_rsdt saved_rsdt; 275 struct acpi_static_rsdt saved_rsdt;
272 int tables = 0; 276 int tables = 0;
273 int i = 0; 277 int i = 0;
274 278
275 if (ACPI_FAILURE(acpi_find_root_pointer(ACPI_PHYSICAL_ADDRESSING, 279 rsdp_address = acpi_find_rsdp();
276 rsdp_address))) { 280 if (!rsdp_address) {
277 printk("%s: System description tables not found\n", 281 printk("%s: System description tables not found\n",
278 __FUNCTION__); 282 __FUNCTION__);
279 goto out_err; 283 goto out_err;
280 } 284 }
281 285
282 if (rsdp_address->pointer_type == ACPI_PHYSICAL_POINTER) { 286 printk("%s: assigning address to rsdp\n", __FUNCTION__);
283 printk("%s: assigning address to rsdp\n", __FUNCTION__); 287 rsdp = (struct acpi_table_rsdp *)(u32)rsdp_address;
284 rsdp = (struct acpi_table_rsdp *)
285 (u32)rsdp_address->pointer.physical;
286 } else {
287 printk("%s: rsdp_address is not a physical pointer\n", __FUNCTION__);
288 goto out_err;
289 }
290 if (!rsdp) { 288 if (!rsdp) {
291 printk("%s: Didn't find ACPI root!\n", __FUNCTION__); 289 printk("%s: Didn't find ACPI root!\n", __FUNCTION__);
292 goto out_err; 290 goto out_err;
@@ -295,13 +293,13 @@ int __init get_memcfg_from_srat(void)
295 printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision, 293 printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision,
296 rsdp->oem_id); 294 rsdp->oem_id);
297 295
298 if (strncmp(rsdp->signature, RSDP_SIG,strlen(RSDP_SIG))) { 296 if (strncmp(rsdp->signature, ACPI_SIG_RSDP,strlen(ACPI_SIG_RSDP))) {
299 printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __FUNCTION__); 297 printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __FUNCTION__);
300 goto out_err; 298 goto out_err;
301 } 299 }
302 300
303 rsdt = (struct acpi_table_rsdt *) 301 rsdt = (struct acpi_table_rsdt *)
304 boot_ioremap(rsdp->rsdt_address, sizeof(struct acpi_table_rsdt)); 302 boot_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt));
305 303
306 if (!rsdt) { 304 if (!rsdt) {
307 printk(KERN_WARNING 305 printk(KERN_WARNING
@@ -310,9 +308,9 @@ int __init get_memcfg_from_srat(void)
310 goto out_err; 308 goto out_err;
311 } 309 }
312 310
313 header = & rsdt->header; 311 header = &rsdt->header;
314 312
315 if (strncmp(header->signature, RSDT_SIG, strlen(RSDT_SIG))) { 313 if (strncmp(header->signature, ACPI_SIG_RSDT, strlen(ACPI_SIG_RSDT))) {
316 printk(KERN_WARNING "ACPI: RSDT signature incorrect\n"); 314 printk(KERN_WARNING "ACPI: RSDT signature incorrect\n");
317 goto out_err; 315 goto out_err;
318 } 316 }
@@ -330,9 +328,9 @@ int __init get_memcfg_from_srat(void)
330 328
331 memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt)); 329 memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt));
332 330
333 if (saved_rsdt.header.length > sizeof(saved_rsdt)) { 331 if (saved_rsdt.table.header.length > sizeof(saved_rsdt)) {
334 printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n", 332 printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n",
335 saved_rsdt.header.length); 333 saved_rsdt.table.header.length);
336 goto out_err; 334 goto out_err;
337 } 335 }
338 336
@@ -341,15 +339,15 @@ int __init get_memcfg_from_srat(void)
341 for (i = 0; i < tables; i++) { 339 for (i = 0; i < tables; i++) {
342 /* Map in header, then map in full table length. */ 340 /* Map in header, then map in full table length. */
343 header = (struct acpi_table_header *) 341 header = (struct acpi_table_header *)
344 boot_ioremap(saved_rsdt.entry[i], sizeof(struct acpi_table_header)); 342 boot_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header));
345 if (!header) 343 if (!header)
346 break; 344 break;
347 header = (struct acpi_table_header *) 345 header = (struct acpi_table_header *)
348 boot_ioremap(saved_rsdt.entry[i], header->length); 346 boot_ioremap(saved_rsdt.table.table_offset_entry[i], header->length);
349 if (!header) 347 if (!header)
350 break; 348 break;
351 349
352 if (strncmp((char *) &header->signature, "SRAT", 4)) 350 if (strncmp((char *) &header->signature, ACPI_SIG_SRAT, 4))
353 continue; 351 continue;
354 352
355 /* we've found the srat table. don't need to look at any more tables */ 353 /* we've found the srat table. don't need to look at any more tables */
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 5da744204d10..13ca54a85a1c 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -70,14 +70,15 @@ void enable_sep_cpu(void)
70 */ 70 */
71extern const char vsyscall_int80_start, vsyscall_int80_end; 71extern const char vsyscall_int80_start, vsyscall_int80_end;
72extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; 72extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
73static void *syscall_page; 73static struct page *syscall_pages[1];
74 74
75int __init sysenter_setup(void) 75int __init sysenter_setup(void)
76{ 76{
77 syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); 77 void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
78 syscall_pages[0] = virt_to_page(syscall_page);
78 79
79#ifdef CONFIG_COMPAT_VDSO 80#ifdef CONFIG_COMPAT_VDSO
80 __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY); 81 __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC);
81 printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); 82 printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
82#endif 83#endif
83 84
@@ -96,31 +97,12 @@ int __init sysenter_setup(void)
96} 97}
97 98
98#ifndef CONFIG_COMPAT_VDSO 99#ifndef CONFIG_COMPAT_VDSO
99static struct page *syscall_nopage(struct vm_area_struct *vma,
100 unsigned long adr, int *type)
101{
102 struct page *p = virt_to_page(adr - vma->vm_start + syscall_page);
103 get_page(p);
104 return p;
105}
106
107/* Prevent VMA merging */
108static void syscall_vma_close(struct vm_area_struct *vma)
109{
110}
111
112static struct vm_operations_struct syscall_vm_ops = {
113 .close = syscall_vma_close,
114 .nopage = syscall_nopage,
115};
116
117/* Defined in vsyscall-sysenter.S */ 100/* Defined in vsyscall-sysenter.S */
118extern void SYSENTER_RETURN; 101extern void SYSENTER_RETURN;
119 102
120/* Setup a VMA at program startup for the vsyscall page */ 103/* Setup a VMA at program startup for the vsyscall page */
121int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) 104int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
122{ 105{
123 struct vm_area_struct *vma;
124 struct mm_struct *mm = current->mm; 106 struct mm_struct *mm = current->mm;
125 unsigned long addr; 107 unsigned long addr;
126 int ret; 108 int ret;
@@ -132,38 +114,25 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
132 goto up_fail; 114 goto up_fail;
133 } 115 }
134 116
135 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
136 if (!vma) {
137 ret = -ENOMEM;
138 goto up_fail;
139 }
140
141 vma->vm_start = addr;
142 vma->vm_end = addr + PAGE_SIZE;
143 /* MAYWRITE to allow gdb to COW and set breakpoints */
144 vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
145 /* 117 /*
118 * MAYWRITE to allow gdb to COW and set breakpoints
119 *
146 * Make sure the vDSO gets into every core dump. 120 * Make sure the vDSO gets into every core dump.
147 * Dumping its contents makes post-mortem fully interpretable later 121 * Dumping its contents makes post-mortem fully interpretable later
148 * without matching up the same kernel and hardware config to see 122 * without matching up the same kernel and hardware config to see
149 * what PC values meant. 123 * what PC values meant.
150 */ 124 */
151 vma->vm_flags |= VM_ALWAYSDUMP; 125 ret = install_special_mapping(mm, addr, PAGE_SIZE,
152 vma->vm_flags |= mm->def_flags; 126 VM_READ|VM_EXEC|
153 vma->vm_page_prot = protection_map[vma->vm_flags & 7]; 127 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
154 vma->vm_ops = &syscall_vm_ops; 128 VM_ALWAYSDUMP,
155 vma->vm_mm = mm; 129 syscall_pages);
156 130 if (ret)
157 ret = insert_vm_struct(mm, vma);
158 if (unlikely(ret)) {
159 kmem_cache_free(vm_area_cachep, vma);
160 goto up_fail; 131 goto up_fail;
161 }
162 132
163 current->mm->context.vdso = (void *)addr; 133 current->mm->context.vdso = (void *)addr;
164 current_thread_info()->sysenter_return = 134 current_thread_info()->sysenter_return =
165 (void *)VDSO_SYM(&SYSENTER_RETURN); 135 (void *)VDSO_SYM(&SYSENTER_RETURN);
166 mm->total_vm++;
167up_fail: 136up_fail:
168 up_write(&mm->mmap_sem); 137 up_write(&mm->mmap_sem);
169 return ret; 138 return ret;
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index c505b16c0990..a5350059557a 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -131,15 +131,13 @@ unsigned long profile_pc(struct pt_regs *regs)
131 unsigned long pc = instruction_pointer(regs); 131 unsigned long pc = instruction_pointer(regs);
132 132
133#ifdef CONFIG_SMP 133#ifdef CONFIG_SMP
134 if (!user_mode_vm(regs) && in_lock_functions(pc)) { 134 if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs) &&
135 in_lock_functions(pc)) {
135#ifdef CONFIG_FRAME_POINTER 136#ifdef CONFIG_FRAME_POINTER
136 return *(unsigned long *)(regs->ebp + 4); 137 return *(unsigned long *)(regs->ebp + 4);
137#else 138#else
138 unsigned long *sp; 139 unsigned long *sp = (unsigned long *)&regs->esp;
139 if ((regs->xcs & 3) == 0) 140
140 sp = (unsigned long *)&regs->esp;
141 else
142 sp = (unsigned long *)regs->esp;
143 /* Return address is either directly at stack pointer 141 /* Return address is either directly at stack pointer
144 or above a saved eflags. Eflags has bits 22-31 zero, 142 or above a saved eflags. Eflags has bits 22-31 zero,
145 kernel addresses don't. */ 143 kernel addresses don't. */
@@ -161,15 +159,6 @@ EXPORT_SYMBOL(profile_pc);
161 */ 159 */
162irqreturn_t timer_interrupt(int irq, void *dev_id) 160irqreturn_t timer_interrupt(int irq, void *dev_id)
163{ 161{
164 /*
165 * Here we are in the timer irq handler. We just have irqs locally
166 * disabled but we don't know if the timer_bh is running on the other
167 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
168 * the irq version of write_lock because as just said we have irq
169 * locally disabled. -arca
170 */
171 write_seqlock(&xtime_lock);
172
173#ifdef CONFIG_X86_IO_APIC 162#ifdef CONFIG_X86_IO_APIC
174 if (timer_ack) { 163 if (timer_ack) {
175 /* 164 /*
@@ -188,7 +177,6 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
188 177
189 do_timer_interrupt_hook(); 178 do_timer_interrupt_hook();
190 179
191
192 if (MCA_bus) { 180 if (MCA_bus) {
193 /* The PS/2 uses level-triggered interrupts. You can't 181 /* The PS/2 uses level-triggered interrupts. You can't
194 turn them off, nor would you want to (any attempt to 182 turn them off, nor would you want to (any attempt to
@@ -203,18 +191,11 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
203 outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */ 191 outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */
204 } 192 }
205 193
206 write_sequnlock(&xtime_lock);
207
208#ifdef CONFIG_X86_LOCAL_APIC
209 if (using_apic_timer)
210 smp_send_timer_broadcast_ipi();
211#endif
212
213 return IRQ_HANDLED; 194 return IRQ_HANDLED;
214} 195}
215 196
216/* not static: needed by APM */ 197/* not static: needed by APM */
217unsigned long get_cmos_time(void) 198unsigned long read_persistent_clock(void)
218{ 199{
219 unsigned long retval; 200 unsigned long retval;
220 unsigned long flags; 201 unsigned long flags;
@@ -227,11 +208,11 @@ unsigned long get_cmos_time(void)
227 208
228 return retval; 209 return retval;
229} 210}
230EXPORT_SYMBOL(get_cmos_time);
231 211
232static void sync_cmos_clock(unsigned long dummy); 212static void sync_cmos_clock(unsigned long dummy);
233 213
234static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); 214static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
215int no_sync_cmos_clock;
235 216
236static void sync_cmos_clock(unsigned long dummy) 217static void sync_cmos_clock(unsigned long dummy)
237{ 218{
@@ -275,117 +256,20 @@ static void sync_cmos_clock(unsigned long dummy)
275 256
276void notify_arch_cmos_timer(void) 257void notify_arch_cmos_timer(void)
277{ 258{
278 mod_timer(&sync_cmos_timer, jiffies + 1); 259 if (!no_sync_cmos_clock)
279} 260 mod_timer(&sync_cmos_timer, jiffies + 1);
280
281static long clock_cmos_diff;
282static unsigned long sleep_start;
283
284static int timer_suspend(struct sys_device *dev, pm_message_t state)
285{
286 /*
287 * Estimate time zone so that set_time can update the clock
288 */
289 unsigned long ctime = get_cmos_time();
290
291 clock_cmos_diff = -ctime;
292 clock_cmos_diff += get_seconds();
293 sleep_start = ctime;
294 return 0;
295}
296
297static int timer_resume(struct sys_device *dev)
298{
299 unsigned long flags;
300 unsigned long sec;
301 unsigned long ctime = get_cmos_time();
302 long sleep_length = (ctime - sleep_start) * HZ;
303 struct timespec ts;
304
305 if (sleep_length < 0) {
306 printk(KERN_WARNING "CMOS clock skew detected in timer resume!\n");
307 /* The time after the resume must not be earlier than the time
308 * before the suspend or some nasty things will happen
309 */
310 sleep_length = 0;
311 ctime = sleep_start;
312 }
313#ifdef CONFIG_HPET_TIMER
314 if (is_hpet_enabled())
315 hpet_reenable();
316#endif
317 setup_pit_timer();
318
319 sec = ctime + clock_cmos_diff;
320 ts.tv_sec = sec;
321 ts.tv_nsec = 0;
322 do_settimeofday(&ts);
323 write_seqlock_irqsave(&xtime_lock, flags);
324 jiffies_64 += sleep_length;
325 write_sequnlock_irqrestore(&xtime_lock, flags);
326 touch_softlockup_watchdog();
327 return 0;
328}
329
330static struct sysdev_class timer_sysclass = {
331 .resume = timer_resume,
332 .suspend = timer_suspend,
333 set_kset_name("timer"),
334};
335
336
337/* XXX this driverfs stuff should probably go elsewhere later -john */
338static struct sys_device device_timer = {
339 .id = 0,
340 .cls = &timer_sysclass,
341};
342
343static int time_init_device(void)
344{
345 int error = sysdev_class_register(&timer_sysclass);
346 if (!error)
347 error = sysdev_register(&device_timer);
348 return error;
349} 261}
350 262
351device_initcall(time_init_device);
352
353#ifdef CONFIG_HPET_TIMER
354extern void (*late_time_init)(void); 263extern void (*late_time_init)(void);
355/* Duplicate of time_init() below, with hpet_enable part added */ 264/* Duplicate of time_init() below, with hpet_enable part added */
356static void __init hpet_time_init(void) 265static void __init hpet_time_init(void)
357{ 266{
358 struct timespec ts; 267 if (!hpet_enable())
359 ts.tv_sec = get_cmos_time(); 268 setup_pit_timer();
360 ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
361
362 do_settimeofday(&ts);
363
364 if ((hpet_enable() >= 0) && hpet_use_timer) {
365 printk("Using HPET for base-timer\n");
366 }
367
368 do_time_init(); 269 do_time_init();
369} 270}
370#endif
371 271
372void __init time_init(void) 272void __init time_init(void)
373{ 273{
374 struct timespec ts; 274 late_time_init = hpet_time_init;
375#ifdef CONFIG_HPET_TIMER
376 if (is_hpet_capable()) {
377 /*
378 * HPET initialization needs to do memory-mapped io. So, let
379 * us do a late initialization after mem_init().
380 */
381 late_time_init = hpet_time_init;
382 return;
383 }
384#endif
385 ts.tv_sec = get_cmos_time();
386 ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
387
388 do_settimeofday(&ts);
389
390 do_time_init();
391} 275}
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c
deleted file mode 100644
index 1e4702dfcd01..000000000000
--- a/arch/i386/kernel/time_hpet.c
+++ /dev/null
@@ -1,497 +0,0 @@
1/*
2 * linux/arch/i386/kernel/time_hpet.c
3 * This code largely copied from arch/x86_64/kernel/time.c
4 * See that file for credits.
5 *
6 * 2003-06-30 Venkatesh Pallipadi - Additional changes for HPET support
7 */
8
9#include <linux/errno.h>
10#include <linux/kernel.h>
11#include <linux/param.h>
12#include <linux/string.h>
13#include <linux/init.h>
14#include <linux/smp.h>
15
16#include <asm/timer.h>
17#include <asm/fixmap.h>
18#include <asm/apic.h>
19
20#include <linux/timex.h>
21
22#include <asm/hpet.h>
23#include <linux/hpet.h>
24
25static unsigned long hpet_period; /* fsecs / HPET clock */
26unsigned long hpet_tick; /* hpet clks count per tick */
27unsigned long hpet_address; /* hpet memory map physical address */
28int hpet_use_timer;
29
30static int use_hpet; /* can be used for runtime check of hpet */
31static int boot_hpet_disable; /* boottime override for HPET timer */
32static void __iomem * hpet_virt_address; /* hpet kernel virtual address */
33
34#define FSEC_TO_USEC (1000000000UL)
35
36int hpet_readl(unsigned long a)
37{
38 return readl(hpet_virt_address + a);
39}
40
41static void hpet_writel(unsigned long d, unsigned long a)
42{
43 writel(d, hpet_virt_address + a);
44}
45
46#ifdef CONFIG_X86_LOCAL_APIC
47/*
48 * HPET counters dont wrap around on every tick. They just change the
49 * comparator value and continue. Next tick can be caught by checking
50 * for a change in the comparator value. Used in apic.c.
51 */
52static void __devinit wait_hpet_tick(void)
53{
54 unsigned int start_cmp_val, end_cmp_val;
55
56 start_cmp_val = hpet_readl(HPET_T0_CMP);
57 do {
58 end_cmp_val = hpet_readl(HPET_T0_CMP);
59 } while (start_cmp_val == end_cmp_val);
60}
61#endif
62
63static int hpet_timer_stop_set_go(unsigned long tick)
64{
65 unsigned int cfg;
66
67 /*
68 * Stop the timers and reset the main counter.
69 */
70 cfg = hpet_readl(HPET_CFG);
71 cfg &= ~HPET_CFG_ENABLE;
72 hpet_writel(cfg, HPET_CFG);
73 hpet_writel(0, HPET_COUNTER);
74 hpet_writel(0, HPET_COUNTER + 4);
75
76 if (hpet_use_timer) {
77 /*
78 * Set up timer 0, as periodic with first interrupt to happen at
79 * hpet_tick, and period also hpet_tick.
80 */
81 cfg = hpet_readl(HPET_T0_CFG);
82 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
83 HPET_TN_SETVAL | HPET_TN_32BIT;
84 hpet_writel(cfg, HPET_T0_CFG);
85
86 /*
87 * The first write after writing TN_SETVAL to the config register sets
88 * the counter value, the second write sets the threshold.
89 */
90 hpet_writel(tick, HPET_T0_CMP);
91 hpet_writel(tick, HPET_T0_CMP);
92 }
93 /*
94 * Go!
95 */
96 cfg = hpet_readl(HPET_CFG);
97 if (hpet_use_timer)
98 cfg |= HPET_CFG_LEGACY;
99 cfg |= HPET_CFG_ENABLE;
100 hpet_writel(cfg, HPET_CFG);
101
102 return 0;
103}
104
105/*
106 * Check whether HPET was found by ACPI boot parse. If yes setup HPET
107 * counter 0 for kernel base timer.
108 */
109int __init hpet_enable(void)
110{
111 unsigned int id;
112 unsigned long tick_fsec_low, tick_fsec_high; /* tick in femto sec */
113 unsigned long hpet_tick_rem;
114
115 if (boot_hpet_disable)
116 return -1;
117
118 if (!hpet_address) {
119 return -1;
120 }
121 hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
122 /*
123 * Read the period, compute tick and quotient.
124 */
125 id = hpet_readl(HPET_ID);
126
127 /*
128 * We are checking for value '1' or more in number field if
129 * CONFIG_HPET_EMULATE_RTC is set because we will need an
130 * additional timer for RTC emulation.
131 * However, we can do with one timer otherwise using the
132 * the single HPET timer for system time.
133 */
134#ifdef CONFIG_HPET_EMULATE_RTC
135 if (!(id & HPET_ID_NUMBER)) {
136 iounmap(hpet_virt_address);
137 hpet_virt_address = NULL;
138 return -1;
139 }
140#endif
141
142
143 hpet_period = hpet_readl(HPET_PERIOD);
144 if ((hpet_period < HPET_MIN_PERIOD) || (hpet_period > HPET_MAX_PERIOD)) {
145 iounmap(hpet_virt_address);
146 hpet_virt_address = NULL;
147 return -1;
148 }
149
150 /*
151 * 64 bit math
152 * First changing tick into fsec
153 * Then 64 bit div to find number of hpet clk per tick
154 */
155 ASM_MUL64_REG(tick_fsec_low, tick_fsec_high,
156 KERNEL_TICK_USEC, FSEC_TO_USEC);
157 ASM_DIV64_REG(hpet_tick, hpet_tick_rem,
158 hpet_period, tick_fsec_low, tick_fsec_high);
159
160 if (hpet_tick_rem > (hpet_period >> 1))
161 hpet_tick++; /* rounding the result */
162
163 hpet_use_timer = id & HPET_ID_LEGSUP;
164
165 if (hpet_timer_stop_set_go(hpet_tick)) {
166 iounmap(hpet_virt_address);
167 hpet_virt_address = NULL;
168 return -1;
169 }
170
171 use_hpet = 1;
172
173#ifdef CONFIG_HPET
174 {
175 struct hpet_data hd;
176 unsigned int ntimer;
177
178 memset(&hd, 0, sizeof (hd));
179
180 ntimer = hpet_readl(HPET_ID);
181 ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
182 ntimer++;
183
184 /*
185 * Register with driver.
186 * Timer0 and Timer1 is used by platform.
187 */
188 hd.hd_phys_address = hpet_address;
189 hd.hd_address = hpet_virt_address;
190 hd.hd_nirqs = ntimer;
191 hd.hd_flags = HPET_DATA_PLATFORM;
192 hpet_reserve_timer(&hd, 0);
193#ifdef CONFIG_HPET_EMULATE_RTC
194 hpet_reserve_timer(&hd, 1);
195#endif
196 hd.hd_irq[0] = HPET_LEGACY_8254;
197 hd.hd_irq[1] = HPET_LEGACY_RTC;
198 if (ntimer > 2) {
199 struct hpet __iomem *hpet;
200 struct hpet_timer __iomem *timer;
201 int i;
202
203 hpet = hpet_virt_address;
204
205 for (i = 2, timer = &hpet->hpet_timers[2]; i < ntimer;
206 timer++, i++)
207 hd.hd_irq[i] = (timer->hpet_config &
208 Tn_INT_ROUTE_CNF_MASK) >>
209 Tn_INT_ROUTE_CNF_SHIFT;
210
211 }
212
213 hpet_alloc(&hd);
214 }
215#endif
216
217#ifdef CONFIG_X86_LOCAL_APIC
218 if (hpet_use_timer)
219 wait_timer_tick = wait_hpet_tick;
220#endif
221 return 0;
222}
223
224int hpet_reenable(void)
225{
226 return hpet_timer_stop_set_go(hpet_tick);
227}
228
229int is_hpet_enabled(void)
230{
231 return use_hpet;
232}
233
234int is_hpet_capable(void)
235{
236 if (!boot_hpet_disable && hpet_address)
237 return 1;
238 return 0;
239}
240
241static int __init hpet_setup(char* str)
242{
243 if (str) {
244 if (!strncmp("disable", str, 7))
245 boot_hpet_disable = 1;
246 }
247 return 1;
248}
249
250__setup("hpet=", hpet_setup);
251
252#ifdef CONFIG_HPET_EMULATE_RTC
253/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
254 * is enabled, we support RTC interrupt functionality in software.
255 * RTC has 3 kinds of interrupts:
256 * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
257 * is updated
258 * 2) Alarm Interrupt - generate an interrupt at a specific time of day
259 * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
260 * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
261 * (1) and (2) above are implemented using polling at a frequency of
262 * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
263 * overhead. (DEFAULT_RTC_INT_FREQ)
264 * For (3), we use interrupts at 64Hz or user specified periodic
265 * frequency, whichever is higher.
266 */
267#include <linux/mc146818rtc.h>
268#include <linux/rtc.h>
269
270#define DEFAULT_RTC_INT_FREQ 64
271#define RTC_NUM_INTS 1
272
273static unsigned long UIE_on;
274static unsigned long prev_update_sec;
275
276static unsigned long AIE_on;
277static struct rtc_time alarm_time;
278
279static unsigned long PIE_on;
280static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ;
281static unsigned long PIE_count;
282
283static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */
284static unsigned int hpet_t1_cmp; /* cached comparator register */
285
286/*
287 * Timer 1 for RTC, we do not use periodic interrupt feature,
288 * even if HPET supports periodic interrupts on Timer 1.
289 * The reason being, to set up a periodic interrupt in HPET, we need to
290 * stop the main counter. And if we do that everytime someone diables/enables
291 * RTC, we will have adverse effect on main kernel timer running on Timer 0.
292 * So, for the time being, simulate the periodic interrupt in software.
293 *
294 * hpet_rtc_timer_init() is called for the first time and during subsequent
295 * interuppts reinit happens through hpet_rtc_timer_reinit().
296 */
297int hpet_rtc_timer_init(void)
298{
299 unsigned int cfg, cnt;
300 unsigned long flags;
301
302 if (!is_hpet_enabled())
303 return 0;
304 /*
305 * Set the counter 1 and enable the interrupts.
306 */
307 if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
308 hpet_rtc_int_freq = PIE_freq;
309 else
310 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
311
312 local_irq_save(flags);
313
314 cnt = hpet_readl(HPET_COUNTER);
315 cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
316 hpet_writel(cnt, HPET_T1_CMP);
317 hpet_t1_cmp = cnt;
318
319 cfg = hpet_readl(HPET_T1_CFG);
320 cfg &= ~HPET_TN_PERIODIC;
321 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
322 hpet_writel(cfg, HPET_T1_CFG);
323
324 local_irq_restore(flags);
325
326 return 1;
327}
328
329static void hpet_rtc_timer_reinit(void)
330{
331 unsigned int cfg, cnt, ticks_per_int, lost_ints;
332
333 if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
334 cfg = hpet_readl(HPET_T1_CFG);
335 cfg &= ~HPET_TN_ENABLE;
336 hpet_writel(cfg, HPET_T1_CFG);
337 return;
338 }
339
340 if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
341 hpet_rtc_int_freq = PIE_freq;
342 else
343 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
344
345 /* It is more accurate to use the comparator value than current count.*/
346 ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq;
347 hpet_t1_cmp += ticks_per_int;
348 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
349
350 /*
351 * If the interrupt handler was delayed too long, the write above tries
352 * to schedule the next interrupt in the past and the hardware would
353 * not interrupt until the counter had wrapped around.
354 * So we have to check that the comparator wasn't set to a past time.
355 */
356 cnt = hpet_readl(HPET_COUNTER);
357 if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) {
358 lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1;
359 /* Make sure that, even with the time needed to execute
360 * this code, the next scheduled interrupt has been moved
361 * back to the future: */
362 lost_ints++;
363
364 hpet_t1_cmp += lost_ints * ticks_per_int;
365 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
366
367 if (PIE_on)
368 PIE_count += lost_ints;
369
370 printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
371 hpet_rtc_int_freq);
372 }
373}
374
375/*
376 * The functions below are called from rtc driver.
377 * Return 0 if HPET is not being used.
378 * Otherwise do the necessary changes and return 1.
379 */
380int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
381{
382 if (!is_hpet_enabled())
383 return 0;
384
385 if (bit_mask & RTC_UIE)
386 UIE_on = 0;
387 if (bit_mask & RTC_PIE)
388 PIE_on = 0;
389 if (bit_mask & RTC_AIE)
390 AIE_on = 0;
391
392 return 1;
393}
394
395int hpet_set_rtc_irq_bit(unsigned long bit_mask)
396{
397 int timer_init_reqd = 0;
398
399 if (!is_hpet_enabled())
400 return 0;
401
402 if (!(PIE_on | AIE_on | UIE_on))
403 timer_init_reqd = 1;
404
405 if (bit_mask & RTC_UIE) {
406 UIE_on = 1;
407 }
408 if (bit_mask & RTC_PIE) {
409 PIE_on = 1;
410 PIE_count = 0;
411 }
412 if (bit_mask & RTC_AIE) {
413 AIE_on = 1;
414 }
415
416 if (timer_init_reqd)
417 hpet_rtc_timer_init();
418
419 return 1;
420}
421
422int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
423{
424 if (!is_hpet_enabled())
425 return 0;
426
427 alarm_time.tm_hour = hrs;
428 alarm_time.tm_min = min;
429 alarm_time.tm_sec = sec;
430
431 return 1;
432}
433
434int hpet_set_periodic_freq(unsigned long freq)
435{
436 if (!is_hpet_enabled())
437 return 0;
438
439 PIE_freq = freq;
440 PIE_count = 0;
441
442 return 1;
443}
444
445int hpet_rtc_dropped_irq(void)
446{
447 if (!is_hpet_enabled())
448 return 0;
449
450 return 1;
451}
452
453irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
454{
455 struct rtc_time curr_time;
456 unsigned long rtc_int_flag = 0;
457 int call_rtc_interrupt = 0;
458
459 hpet_rtc_timer_reinit();
460
461 if (UIE_on | AIE_on) {
462 rtc_get_rtc_time(&curr_time);
463 }
464 if (UIE_on) {
465 if (curr_time.tm_sec != prev_update_sec) {
466 /* Set update int info, call real rtc int routine */
467 call_rtc_interrupt = 1;
468 rtc_int_flag = RTC_UF;
469 prev_update_sec = curr_time.tm_sec;
470 }
471 }
472 if (PIE_on) {
473 PIE_count++;
474 if (PIE_count >= hpet_rtc_int_freq/PIE_freq) {
475 /* Set periodic int info, call real rtc int routine */
476 call_rtc_interrupt = 1;
477 rtc_int_flag |= RTC_PF;
478 PIE_count = 0;
479 }
480 }
481 if (AIE_on) {
482 if ((curr_time.tm_sec == alarm_time.tm_sec) &&
483 (curr_time.tm_min == alarm_time.tm_min) &&
484 (curr_time.tm_hour == alarm_time.tm_hour)) {
485 /* Set alarm int info, call real rtc int routine */
486 call_rtc_interrupt = 1;
487 rtc_int_flag |= RTC_AF;
488 }
489 }
490 if (call_rtc_interrupt) {
491 rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
492 rtc_interrupt(rtc_int_flag, dev_id);
493 }
494 return IRQ_HANDLED;
495}
496#endif
497
diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c
index 79cf608e14ca..45782356a618 100644
--- a/arch/i386/kernel/topology.c
+++ b/arch/i386/kernel/topology.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * arch/i386/kernel/topology.c - Populate driverfs with topology information 2 * arch/i386/kernel/topology.c - Populate sysfs with topology information
3 * 3 *
4 * Written by: Matthew Dobson, IBM Corporation 4 * Written by: Matthew Dobson, IBM Corporation
5 * Original Code: Paul Dorwin, IBM Corporation, Patrick Mochel, OSDL 5 * Original Code: Paul Dorwin, IBM Corporation, Patrick Mochel, OSDL
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 0efad8aeb41a..af0d3f70a817 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -94,6 +94,7 @@ asmlinkage void spurious_interrupt_bug(void);
94asmlinkage void machine_check(void); 94asmlinkage void machine_check(void);
95 95
96int kstack_depth_to_print = 24; 96int kstack_depth_to_print = 24;
97static unsigned int code_bytes = 64;
97ATOMIC_NOTIFIER_HEAD(i386die_chain); 98ATOMIC_NOTIFIER_HEAD(i386die_chain);
98 99
99int register_die_notifier(struct notifier_block *nb) 100int register_die_notifier(struct notifier_block *nb)
@@ -291,10 +292,11 @@ void show_registers(struct pt_regs *regs)
291 int i; 292 int i;
292 int in_kernel = 1; 293 int in_kernel = 1;
293 unsigned long esp; 294 unsigned long esp;
294 unsigned short ss; 295 unsigned short ss, gs;
295 296
296 esp = (unsigned long) (&regs->esp); 297 esp = (unsigned long) (&regs->esp);
297 savesegment(ss, ss); 298 savesegment(ss, ss);
299 savesegment(gs, gs);
298 if (user_mode_vm(regs)) { 300 if (user_mode_vm(regs)) {
299 in_kernel = 0; 301 in_kernel = 0;
300 esp = regs->esp; 302 esp = regs->esp;
@@ -313,8 +315,8 @@ void show_registers(struct pt_regs *regs)
313 regs->eax, regs->ebx, regs->ecx, regs->edx); 315 regs->eax, regs->ebx, regs->ecx, regs->edx);
314 printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", 316 printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
315 regs->esi, regs->edi, regs->ebp, esp); 317 regs->esi, regs->edi, regs->ebp, esp);
316 printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", 318 printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
317 regs->xds & 0xffff, regs->xes & 0xffff, ss); 319 regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
318 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", 320 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
319 TASK_COMM_LEN, current->comm, current->pid, 321 TASK_COMM_LEN, current->comm, current->pid,
320 current_thread_info(), current, current->thread_info); 322 current_thread_info(), current, current->thread_info);
@@ -324,7 +326,8 @@ void show_registers(struct pt_regs *regs)
324 */ 326 */
325 if (in_kernel) { 327 if (in_kernel) {
326 u8 *eip; 328 u8 *eip;
327 int code_bytes = 64; 329 unsigned int code_prologue = code_bytes * 43 / 64;
330 unsigned int code_len = code_bytes;
328 unsigned char c; 331 unsigned char c;
329 332
330 printk("\n" KERN_EMERG "Stack: "); 333 printk("\n" KERN_EMERG "Stack: ");
@@ -332,14 +335,14 @@ void show_registers(struct pt_regs *regs)
332 335
333 printk(KERN_EMERG "Code: "); 336 printk(KERN_EMERG "Code: ");
334 337
335 eip = (u8 *)regs->eip - 43; 338 eip = (u8 *)regs->eip - code_prologue;
336 if (eip < (u8 *)PAGE_OFFSET || 339 if (eip < (u8 *)PAGE_OFFSET ||
337 probe_kernel_address(eip, c)) { 340 probe_kernel_address(eip, c)) {
338 /* try starting at EIP */ 341 /* try starting at EIP */
339 eip = (u8 *)regs->eip; 342 eip = (u8 *)regs->eip;
340 code_bytes = 32; 343 code_len = code_len - code_prologue + 1;
341 } 344 }
342 for (i = 0; i < code_bytes; i++, eip++) { 345 for (i = 0; i < code_len; i++, eip++) {
343 if (eip < (u8 *)PAGE_OFFSET || 346 if (eip < (u8 *)PAGE_OFFSET ||
344 probe_kernel_address(eip, c)) { 347 probe_kernel_address(eip, c)) {
345 printk(" Bad EIP value."); 348 printk(" Bad EIP value.");
@@ -1191,3 +1194,13 @@ static int __init kstack_setup(char *s)
1191 return 1; 1194 return 1;
1192} 1195}
1193__setup("kstack=", kstack_setup); 1196__setup("kstack=", kstack_setup);
1197
1198static int __init code_bytes_setup(char *s)
1199{
1200 code_bytes = simple_strtoul(s, NULL, 0);
1201 if (code_bytes > 8192)
1202 code_bytes = 8192;
1203
1204 return 1;
1205}
1206__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 2cfc7b09b925..3082a418635c 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -23,6 +23,7 @@
23 * an extra value to store the TSC freq 23 * an extra value to store the TSC freq
24 */ 24 */
25unsigned int tsc_khz; 25unsigned int tsc_khz;
26unsigned long long (*custom_sched_clock)(void);
26 27
27int tsc_disable; 28int tsc_disable;
28 29
@@ -59,12 +60,6 @@ static inline int check_tsc_unstable(void)
59 return tsc_unstable; 60 return tsc_unstable;
60} 61}
61 62
62void mark_tsc_unstable(void)
63{
64 tsc_unstable = 1;
65}
66EXPORT_SYMBOL_GPL(mark_tsc_unstable);
67
68/* Accellerators for sched_clock() 63/* Accellerators for sched_clock()
69 * convert from cycles(64bits) => nanoseconds (64bits) 64 * convert from cycles(64bits) => nanoseconds (64bits)
70 * basic equation: 65 * basic equation:
@@ -107,14 +102,14 @@ unsigned long long sched_clock(void)
107{ 102{
108 unsigned long long this_offset; 103 unsigned long long this_offset;
109 104
105 if (unlikely(custom_sched_clock))
106 return (*custom_sched_clock)();
107
110 /* 108 /*
111 * in the NUMA case we dont use the TSC as they are not 109 * Fall back to jiffies if there's no TSC available:
112 * synchronized across all CPUs.
113 */ 110 */
114#ifndef CONFIG_NUMA 111 if (unlikely(tsc_disable))
115 if (!cpu_khz || check_tsc_unstable()) 112 /* No locking but a rare wrong value is not a big deal: */
116#endif
117 /* no locking but a rare wrong value is not a big deal */
118 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); 113 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
119 114
120 /* read the Time Stamp Counter: */ 115 /* read the Time Stamp Counter: */
@@ -194,13 +189,13 @@ EXPORT_SYMBOL(recalibrate_cpu_khz);
194void __init tsc_init(void) 189void __init tsc_init(void)
195{ 190{
196 if (!cpu_has_tsc || tsc_disable) 191 if (!cpu_has_tsc || tsc_disable)
197 return; 192 goto out_no_tsc;
198 193
199 cpu_khz = calculate_cpu_khz(); 194 cpu_khz = calculate_cpu_khz();
200 tsc_khz = cpu_khz; 195 tsc_khz = cpu_khz;
201 196
202 if (!cpu_khz) 197 if (!cpu_khz)
203 return; 198 goto out_no_tsc;
204 199
205 printk("Detected %lu.%03lu MHz processor.\n", 200 printk("Detected %lu.%03lu MHz processor.\n",
206 (unsigned long)cpu_khz / 1000, 201 (unsigned long)cpu_khz / 1000,
@@ -208,37 +203,18 @@ void __init tsc_init(void)
208 203
209 set_cyc2ns_scale(cpu_khz); 204 set_cyc2ns_scale(cpu_khz);
210 use_tsc_delay(); 205 use_tsc_delay();
211} 206 return;
212 207
213#ifdef CONFIG_CPU_FREQ 208out_no_tsc:
214 209 /*
215static unsigned int cpufreq_delayed_issched = 0; 210 * Set the tsc_disable flag if there's no TSC support, this
216static unsigned int cpufreq_init = 0; 211 * makes it a fast flag for the kernel to see whether it
217static struct work_struct cpufreq_delayed_get_work; 212 * should be using the TSC.
218 213 */
219static void handle_cpufreq_delayed_get(struct work_struct *work) 214 tsc_disable = 1;
220{
221 unsigned int cpu;
222
223 for_each_online_cpu(cpu)
224 cpufreq_get(cpu);
225
226 cpufreq_delayed_issched = 0;
227} 215}
228 216
229/* 217#ifdef CONFIG_CPU_FREQ
230 * if we notice cpufreq oddness, schedule a call to cpufreq_get() as it tries
231 * to verify the CPU frequency the timing core thinks the CPU is running
232 * at is still correct.
233 */
234static inline void cpufreq_delayed_get(void)
235{
236 if (cpufreq_init && !cpufreq_delayed_issched) {
237 cpufreq_delayed_issched = 1;
238 printk(KERN_DEBUG "Checking if CPU frequency changed.\n");
239 schedule_work(&cpufreq_delayed_get_work);
240 }
241}
242 218
243/* 219/*
244 * if the CPU frequency is scaled, TSC-based delays will need a different 220 * if the CPU frequency is scaled, TSC-based delays will need a different
@@ -303,17 +279,9 @@ static struct notifier_block time_cpufreq_notifier_block = {
303 279
304static int __init cpufreq_tsc(void) 280static int __init cpufreq_tsc(void)
305{ 281{
306 int ret; 282 return cpufreq_register_notifier(&time_cpufreq_notifier_block,
307 283 CPUFREQ_TRANSITION_NOTIFIER);
308 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
309 ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
310 CPUFREQ_TRANSITION_NOTIFIER);
311 if (!ret)
312 cpufreq_init = 1;
313
314 return ret;
315} 284}
316
317core_initcall(cpufreq_tsc); 285core_initcall(cpufreq_tsc);
318 286
319#endif 287#endif
@@ -321,7 +289,6 @@ core_initcall(cpufreq_tsc);
321/* clock source code */ 289/* clock source code */
322 290
323static unsigned long current_tsc_khz = 0; 291static unsigned long current_tsc_khz = 0;
324static int tsc_update_callback(void);
325 292
326static cycle_t read_tsc(void) 293static cycle_t read_tsc(void)
327{ 294{
@@ -339,37 +306,28 @@ static struct clocksource clocksource_tsc = {
339 .mask = CLOCKSOURCE_MASK(64), 306 .mask = CLOCKSOURCE_MASK(64),
340 .mult = 0, /* to be set */ 307 .mult = 0, /* to be set */
341 .shift = 22, 308 .shift = 22,
342 .update_callback = tsc_update_callback, 309 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
343 .is_continuous = 1, 310 CLOCK_SOURCE_MUST_VERIFY,
344}; 311};
345 312
346static int tsc_update_callback(void) 313void mark_tsc_unstable(void)
347{ 314{
348 int change = 0; 315 if (!tsc_unstable) {
349 316 tsc_unstable = 1;
350 /* check to see if we should switch to the safe clocksource: */ 317 /* Can be called before registration */
351 if (clocksource_tsc.rating != 0 && check_tsc_unstable()) { 318 if (clocksource_tsc.mult)
352 clocksource_tsc.rating = 0; 319 clocksource_change_rating(&clocksource_tsc, 0);
353 clocksource_reselect(); 320 else
354 change = 1; 321 clocksource_tsc.rating = 0;
355 }
356
357 /* only update if tsc_khz has changed: */
358 if (current_tsc_khz != tsc_khz) {
359 current_tsc_khz = tsc_khz;
360 clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
361 clocksource_tsc.shift);
362 change = 1;
363 } 322 }
364
365 return change;
366} 323}
324EXPORT_SYMBOL_GPL(mark_tsc_unstable);
367 325
368static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d) 326static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d)
369{ 327{
370 printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", 328 printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
371 d->ident); 329 d->ident);
372 mark_tsc_unstable(); 330 tsc_unstable = 1;
373 return 0; 331 return 0;
374} 332}
375 333
@@ -386,65 +344,44 @@ static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
386 {} 344 {}
387}; 345};
388 346
389#define TSC_FREQ_CHECK_INTERVAL (10*MSEC_PER_SEC) /* 10sec in MS */
390static struct timer_list verify_tsc_freq_timer;
391
392/* XXX - Probably should add locking */
393static void verify_tsc_freq(unsigned long unused)
394{
395 static u64 last_tsc;
396 static unsigned long last_jiffies;
397
398 u64 now_tsc, interval_tsc;
399 unsigned long now_jiffies, interval_jiffies;
400
401
402 if (check_tsc_unstable())
403 return;
404
405 rdtscll(now_tsc);
406 now_jiffies = jiffies;
407
408 if (!last_jiffies) {
409 goto out;
410 }
411
412 interval_jiffies = now_jiffies - last_jiffies;
413 interval_tsc = now_tsc - last_tsc;
414 interval_tsc *= HZ;
415 do_div(interval_tsc, cpu_khz*1000);
416
417 if (interval_tsc < (interval_jiffies * 3 / 4)) {
418 printk("TSC appears to be running slowly. "
419 "Marking it as unstable\n");
420 mark_tsc_unstable();
421 return;
422 }
423
424out:
425 last_tsc = now_tsc;
426 last_jiffies = now_jiffies;
427 /* set us up to go off on the next interval: */
428 mod_timer(&verify_tsc_freq_timer,
429 jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL));
430}
431
432/* 347/*
433 * Make an educated guess if the TSC is trustworthy and synchronized 348 * Make an educated guess if the TSC is trustworthy and synchronized
434 * over all CPUs. 349 * over all CPUs.
435 */ 350 */
436static __init int unsynchronized_tsc(void) 351__cpuinit int unsynchronized_tsc(void)
437{ 352{
353 if (!cpu_has_tsc || tsc_unstable)
354 return 1;
438 /* 355 /*
439 * Intel systems are normally all synchronized. 356 * Intel systems are normally all synchronized.
440 * Exceptions must mark TSC as unstable: 357 * Exceptions must mark TSC as unstable:
441 */ 358 */
442 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 359 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
443 return 0; 360 /* assume multi socket systems are not synchronized: */
361 if (num_possible_cpus() > 1)
362 tsc_unstable = 1;
363 }
364 return tsc_unstable;
365}
366
367/*
368 * Geode_LX - the OLPC CPU has a possibly a very reliable TSC
369 */
370#ifdef CONFIG_MGEODE_LX
371/* RTSC counts during suspend */
372#define RTSC_SUSP 0x100
373
374static void __init check_geode_tsc_reliable(void)
375{
376 unsigned long val;
444 377
445 /* assume multi socket systems are not synchronized: */ 378 rdmsrl(MSR_GEODE_BUSCONT_CONF0, val);
446 return num_possible_cpus() > 1; 379 if ((val & RTSC_SUSP))
380 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
447} 381}
382#else
383static inline void check_geode_tsc_reliable(void) { }
384#endif
448 385
449static int __init init_tsc_clocksource(void) 386static int __init init_tsc_clocksource(void)
450{ 387{
@@ -453,20 +390,16 @@ static int __init init_tsc_clocksource(void)
453 /* check blacklist */ 390 /* check blacklist */
454 dmi_check_system(bad_tsc_dmi_table); 391 dmi_check_system(bad_tsc_dmi_table);
455 392
456 if (unsynchronized_tsc()) /* mark unstable if unsynced */ 393 unsynchronized_tsc();
457 mark_tsc_unstable(); 394 check_geode_tsc_reliable();
458 current_tsc_khz = tsc_khz; 395 current_tsc_khz = tsc_khz;
459 clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, 396 clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
460 clocksource_tsc.shift); 397 clocksource_tsc.shift);
461 /* lower the rating if we already know its unstable: */ 398 /* lower the rating if we already know its unstable: */
462 if (check_tsc_unstable()) 399 if (check_tsc_unstable()) {
463 clocksource_tsc.rating = 0; 400 clocksource_tsc.rating = 0;
464 401 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
465 init_timer(&verify_tsc_freq_timer); 402 }
466 verify_tsc_freq_timer.function = verify_tsc_freq;
467 verify_tsc_freq_timer.expires =
468 jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL);
469 add_timer(&verify_tsc_freq_timer);
470 403
471 return clocksource_register(&clocksource_tsc); 404 return clocksource_register(&clocksource_tsc);
472 } 405 }
diff --git a/arch/i386/kernel/tsc_sync.c b/arch/i386/kernel/tsc_sync.c
new file mode 100644
index 000000000000..12424629af87
--- /dev/null
+++ b/arch/i386/kernel/tsc_sync.c
@@ -0,0 +1 @@
#include "../../x86_64/kernel/tsc_sync.c"
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
index be2f96e67f78..d1b8f2b7aea6 100644
--- a/arch/i386/kernel/vm86.c
+++ b/arch/i386/kernel/vm86.c
@@ -96,12 +96,12 @@ static int copy_vm86_regs_to_user(struct vm86_regs __user *user,
96{ 96{
97 int ret = 0; 97 int ret = 0;
98 98
99 /* kernel_vm86_regs is missing xfs, so copy everything up to 99 /* kernel_vm86_regs is missing xgs, so copy everything up to
100 (but not including) xgs, and then rest after xgs. */ 100 (but not including) orig_eax, and then rest including orig_eax. */
101 ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.xgs)); 101 ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_eax));
102 ret += copy_to_user(&user->__null_gs, &regs->pt.xgs, 102 ret += copy_to_user(&user->orig_eax, &regs->pt.orig_eax,
103 sizeof(struct kernel_vm86_regs) - 103 sizeof(struct kernel_vm86_regs) -
104 offsetof(struct kernel_vm86_regs, pt.xgs)); 104 offsetof(struct kernel_vm86_regs, pt.orig_eax));
105 105
106 return ret; 106 return ret;
107} 107}
@@ -113,12 +113,13 @@ static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs,
113{ 113{
114 int ret = 0; 114 int ret = 0;
115 115
116 ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.xgs)); 116 /* copy eax-xfs inclusive */
117 ret += copy_from_user(&regs->pt.xgs, &user->__null_gs, 117 ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_eax));
118 /* copy orig_eax-__gsh+extra */
119 ret += copy_from_user(&regs->pt.orig_eax, &user->orig_eax,
118 sizeof(struct kernel_vm86_regs) - 120 sizeof(struct kernel_vm86_regs) -
119 offsetof(struct kernel_vm86_regs, pt.xgs) + 121 offsetof(struct kernel_vm86_regs, pt.orig_eax) +
120 extra); 122 extra);
121
122 return ret; 123 return ret;
123} 124}
124 125
@@ -157,8 +158,8 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
157 158
158 ret = KVM86->regs32; 159 ret = KVM86->regs32;
159 160
160 loadsegment(fs, current->thread.saved_fs); 161 ret->xfs = current->thread.saved_fs;
161 ret->xgs = current->thread.saved_gs; 162 loadsegment(gs, current->thread.saved_gs);
162 163
163 return ret; 164 return ret;
164} 165}
@@ -285,9 +286,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
285 */ 286 */
286 info->regs.pt.xds = 0; 287 info->regs.pt.xds = 0;
287 info->regs.pt.xes = 0; 288 info->regs.pt.xes = 0;
288 info->regs.pt.xgs = 0; 289 info->regs.pt.xfs = 0;
289 290
290/* we are clearing fs later just before "jmp resume_userspace", 291/* we are clearing gs later just before "jmp resume_userspace",
291 * because it is not saved/restored. 292 * because it is not saved/restored.
292 */ 293 */
293 294
@@ -321,8 +322,8 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
321 */ 322 */
322 info->regs32->eax = 0; 323 info->regs32->eax = 0;
323 tsk->thread.saved_esp0 = tsk->thread.esp0; 324 tsk->thread.saved_esp0 = tsk->thread.esp0;
324 savesegment(fs, tsk->thread.saved_fs); 325 tsk->thread.saved_fs = info->regs32->xfs;
325 tsk->thread.saved_gs = info->regs32->xgs; 326 savesegment(gs, tsk->thread.saved_gs);
326 327
327 tss = &per_cpu(init_tss, get_cpu()); 328 tss = &per_cpu(init_tss, get_cpu());
328 tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; 329 tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
@@ -342,7 +343,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
342 __asm__ __volatile__( 343 __asm__ __volatile__(
343 "movl %0,%%esp\n\t" 344 "movl %0,%%esp\n\t"
344 "movl %1,%%ebp\n\t" 345 "movl %1,%%ebp\n\t"
345 "mov %2, %%fs\n\t" 346 "mov %2, %%gs\n\t"
346 "jmp resume_userspace" 347 "jmp resume_userspace"
347 : /* no outputs */ 348 : /* no outputs */
348 :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); 349 :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0));
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c
new file mode 100644
index 000000000000..bb5a7abf949c
--- /dev/null
+++ b/arch/i386/kernel/vmi.c
@@ -0,0 +1,949 @@
1/*
2 * VMI specific paravirt-ops implementation
3 *
4 * Copyright (C) 2005, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Send feedback to zach@vmware.com
22 *
23 */
24
25#include <linux/module.h>
26#include <linux/license.h>
27#include <linux/cpu.h>
28#include <linux/bootmem.h>
29#include <linux/mm.h>
30#include <asm/vmi.h>
31#include <asm/io.h>
32#include <asm/fixmap.h>
33#include <asm/apicdef.h>
34#include <asm/apic.h>
35#include <asm/processor.h>
36#include <asm/timer.h>
37#include <asm/vmi_time.h>
38
39/* Convenient for calling VMI functions indirectly in the ROM */
40typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void);
41typedef u64 __attribute__((regparm(2))) (VROMLONGFUNC)(int);
42
43#define call_vrom_func(rom,func) \
44 (((VROMFUNC *)(rom->func))())
45
46#define call_vrom_long_func(rom,func,arg) \
47 (((VROMLONGFUNC *)(rom->func)) (arg))
48
49static struct vrom_header *vmi_rom;
50static int license_gplok;
51static int disable_nodelay;
52static int disable_pge;
53static int disable_pse;
54static int disable_sep;
55static int disable_tsc;
56static int disable_mtrr;
57
58/* Cached VMI operations */
59struct {
60 void (*cpuid)(void /* non-c */);
61 void (*_set_ldt)(u32 selector);
62 void (*set_tr)(u32 selector);
63 void (*set_kernel_stack)(u32 selector, u32 esp0);
64 void (*allocate_page)(u32, u32, u32, u32, u32);
65 void (*release_page)(u32, u32);
66 void (*set_pte)(pte_t, pte_t *, unsigned);
67 void (*update_pte)(pte_t *, unsigned);
68 void (*set_linear_mapping)(int, u32, u32, u32);
69 void (*flush_tlb)(int);
70 void (*set_initial_ap_state)(int, int);
71 void (*halt)(void);
72} vmi_ops;
73
74/* XXX move this to alternative.h */
75extern struct paravirt_patch __start_parainstructions[],
76 __stop_parainstructions[];
77
78/*
79 * VMI patching routines.
80 */
81#define MNEM_CALL 0xe8
82#define MNEM_JMP 0xe9
83#define MNEM_RET 0xc3
84
85static char irq_save_disable_callout[] = {
86 MNEM_CALL, 0, 0, 0, 0,
87 MNEM_CALL, 0, 0, 0, 0,
88 MNEM_RET
89};
90#define IRQ_PATCH_INT_MASK 0
91#define IRQ_PATCH_DISABLE 5
92
93static inline void patch_offset(unsigned char *eip, unsigned char *dest)
94{
95 *(unsigned long *)(eip+1) = dest-eip-5;
96}
97
98static unsigned patch_internal(int call, unsigned len, void *insns)
99{
100 u64 reloc;
101 struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc;
102 reloc = call_vrom_long_func(vmi_rom, get_reloc, call);
103 switch(rel->type) {
104 case VMI_RELOCATION_CALL_REL:
105 BUG_ON(len < 5);
106 *(char *)insns = MNEM_CALL;
107 patch_offset(insns, rel->eip);
108 return 5;
109
110 case VMI_RELOCATION_JUMP_REL:
111 BUG_ON(len < 5);
112 *(char *)insns = MNEM_JMP;
113 patch_offset(insns, rel->eip);
114 return 5;
115
116 case VMI_RELOCATION_NOP:
117 /* obliterate the whole thing */
118 return 0;
119
120 case VMI_RELOCATION_NONE:
121 /* leave native code in place */
122 break;
123
124 default:
125 BUG();
126 }
127 return len;
128}
129
130/*
131 * Apply patch if appropriate, return length of new instruction
132 * sequence. The callee does nop padding for us.
133 */
134static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len)
135{
136 switch (type) {
137 case PARAVIRT_IRQ_DISABLE:
138 return patch_internal(VMI_CALL_DisableInterrupts, len, insns);
139 case PARAVIRT_IRQ_ENABLE:
140 return patch_internal(VMI_CALL_EnableInterrupts, len, insns);
141 case PARAVIRT_RESTORE_FLAGS:
142 return patch_internal(VMI_CALL_SetInterruptMask, len, insns);
143 case PARAVIRT_SAVE_FLAGS:
144 return patch_internal(VMI_CALL_GetInterruptMask, len, insns);
145 case PARAVIRT_SAVE_FLAGS_IRQ_DISABLE:
146 if (len >= 10) {
147 patch_internal(VMI_CALL_GetInterruptMask, len, insns);
148 patch_internal(VMI_CALL_DisableInterrupts, len-5, insns+5);
149 return 10;
150 } else {
151 /*
152 * You bastards didn't leave enough room to
153 * patch save_flags_irq_disable inline. Patch
154 * to a helper
155 */
156 BUG_ON(len < 5);
157 *(char *)insns = MNEM_CALL;
158 patch_offset(insns, irq_save_disable_callout);
159 return 5;
160 }
161 case PARAVIRT_INTERRUPT_RETURN:
162 return patch_internal(VMI_CALL_IRET, len, insns);
163 case PARAVIRT_STI_SYSEXIT:
164 return patch_internal(VMI_CALL_SYSEXIT, len, insns);
165 default:
166 break;
167 }
168 return len;
169}
170
171/* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */
172static void vmi_cpuid(unsigned int *eax, unsigned int *ebx,
173 unsigned int *ecx, unsigned int *edx)
174{
175 int override = 0;
176 if (*eax == 1)
177 override = 1;
178 asm volatile ("call *%6"
179 : "=a" (*eax),
180 "=b" (*ebx),
181 "=c" (*ecx),
182 "=d" (*edx)
183 : "0" (*eax), "2" (*ecx), "r" (vmi_ops.cpuid));
184 if (override) {
185 if (disable_pse)
186 *edx &= ~X86_FEATURE_PSE;
187 if (disable_pge)
188 *edx &= ~X86_FEATURE_PGE;
189 if (disable_sep)
190 *edx &= ~X86_FEATURE_SEP;
191 if (disable_tsc)
192 *edx &= ~X86_FEATURE_TSC;
193 if (disable_mtrr)
194 *edx &= ~X86_FEATURE_MTRR;
195 }
196}
197
198static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new)
199{
200 if (gdt[nr].a != new->a || gdt[nr].b != new->b)
201 write_gdt_entry(gdt, nr, new->a, new->b);
202}
203
204static void vmi_load_tls(struct thread_struct *t, unsigned int cpu)
205{
206 struct desc_struct *gdt = get_cpu_gdt_table(cpu);
207 vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 0, &t->tls_array[0]);
208 vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 1, &t->tls_array[1]);
209 vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 2, &t->tls_array[2]);
210}
211
212static void vmi_set_ldt(const void *addr, unsigned entries)
213{
214 unsigned cpu = smp_processor_id();
215 u32 low, high;
216
217 pack_descriptor(&low, &high, (unsigned long)addr,
218 entries * sizeof(struct desc_struct) - 1,
219 DESCTYPE_LDT, 0);
220 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, low, high);
221 vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0);
222}
223
224static void vmi_set_tr(void)
225{
226 vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct));
227}
228
229static void vmi_load_esp0(struct tss_struct *tss,
230 struct thread_struct *thread)
231{
232 tss->esp0 = thread->esp0;
233
234 /* This can only happen when SEP is enabled, no need to test "SEP"arately */
235 if (unlikely(tss->ss1 != thread->sysenter_cs)) {
236 tss->ss1 = thread->sysenter_cs;
237 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
238 }
239 vmi_ops.set_kernel_stack(__KERNEL_DS, tss->esp0);
240}
241
242static void vmi_flush_tlb_user(void)
243{
244 vmi_ops.flush_tlb(VMI_FLUSH_TLB);
245}
246
247static void vmi_flush_tlb_kernel(void)
248{
249 vmi_ops.flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL);
250}
251
252/* Stub to do nothing at all; used for delays and unimplemented calls */
253static void vmi_nop(void)
254{
255}
256
257/* For NO_IDLE_HZ, we stop the clock when halting the kernel */
258#ifdef CONFIG_NO_IDLE_HZ
259static fastcall void vmi_safe_halt(void)
260{
261 int idle = vmi_stop_hz_timer();
262 vmi_ops.halt();
263 if (idle) {
264 local_irq_disable();
265 vmi_account_time_restart_hz_timer();
266 local_irq_enable();
267 }
268}
269#endif
270
271#ifdef CONFIG_DEBUG_PAGE_TYPE
272
273#ifdef CONFIG_X86_PAE
274#define MAX_BOOT_PTS (2048+4+1)
275#else
276#define MAX_BOOT_PTS (1024+1)
277#endif
278
279/*
280 * During boot, mem_map is not yet available in paging_init, so stash
281 * all the boot page allocations here.
282 */
283static struct {
284 u32 pfn;
285 int type;
286} boot_page_allocations[MAX_BOOT_PTS];
287static int num_boot_page_allocations;
288static int boot_allocations_applied;
289
290void vmi_apply_boot_page_allocations(void)
291{
292 int i;
293 BUG_ON(!mem_map);
294 for (i = 0; i < num_boot_page_allocations; i++) {
295 struct page *page = pfn_to_page(boot_page_allocations[i].pfn);
296 page->type = boot_page_allocations[i].type;
297 page->type = boot_page_allocations[i].type &
298 ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
299 }
300 boot_allocations_applied = 1;
301}
302
303static void record_page_type(u32 pfn, int type)
304{
305 BUG_ON(num_boot_page_allocations >= MAX_BOOT_PTS);
306 boot_page_allocations[num_boot_page_allocations].pfn = pfn;
307 boot_page_allocations[num_boot_page_allocations].type = type;
308 num_boot_page_allocations++;
309}
310
311static void check_zeroed_page(u32 pfn, int type, struct page *page)
312{
313 u32 *ptr;
314 int i;
315 int limit = PAGE_SIZE / sizeof(int);
316
317 if (page_address(page))
318 ptr = (u32 *)page_address(page);
319 else
320 ptr = (u32 *)__va(pfn << PAGE_SHIFT);
321 /*
322 * When cloning the root in non-PAE mode, only the userspace
323 * pdes need to be zeroed.
324 */
325 if (type & VMI_PAGE_CLONE)
326 limit = USER_PTRS_PER_PGD;
327 for (i = 0; i < limit; i++)
328 BUG_ON(ptr[i]);
329}
330
331/*
332 * We stash the page type into struct page so we can verify the page
333 * types are used properly.
334 */
335static void vmi_set_page_type(u32 pfn, int type)
336{
337 /* PAE can have multiple roots per page - don't track */
338 if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP))
339 return;
340
341 if (boot_allocations_applied) {
342 struct page *page = pfn_to_page(pfn);
343 if (type != VMI_PAGE_NORMAL)
344 BUG_ON(page->type);
345 else
346 BUG_ON(page->type == VMI_PAGE_NORMAL);
347 page->type = type & ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
348 if (type & VMI_PAGE_ZEROED)
349 check_zeroed_page(pfn, type, page);
350 } else {
351 record_page_type(pfn, type);
352 }
353}
354
355static void vmi_check_page_type(u32 pfn, int type)
356{
357 /* PAE can have multiple roots per page - skip checks */
358 if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP))
359 return;
360
361 type &= ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
362 if (boot_allocations_applied) {
363 struct page *page = pfn_to_page(pfn);
364 BUG_ON((page->type ^ type) & VMI_PAGE_PAE);
365 BUG_ON(type == VMI_PAGE_NORMAL && page->type);
366 BUG_ON((type & page->type) == 0);
367 }
368}
369#else
370#define vmi_set_page_type(p,t) do { } while (0)
371#define vmi_check_page_type(p,t) do { } while (0)
372#endif
373
374static void vmi_allocate_pt(u32 pfn)
375{
376 vmi_set_page_type(pfn, VMI_PAGE_L1);
377 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
378}
379
380static void vmi_allocate_pd(u32 pfn)
381{
382 /*
383 * This call comes in very early, before mem_map is setup.
384 * It is called only for swapper_pg_dir, which already has
385 * data on it.
386 */
387 vmi_set_page_type(pfn, VMI_PAGE_L2);
388 vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
389}
390
391static void vmi_allocate_pd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
392{
393 vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
394 vmi_check_page_type(clonepfn, VMI_PAGE_L2);
395 vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
396}
397
398static void vmi_release_pt(u32 pfn)
399{
400 vmi_ops.release_page(pfn, VMI_PAGE_L1);
401 vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
402}
403
404static void vmi_release_pd(u32 pfn)
405{
406 vmi_ops.release_page(pfn, VMI_PAGE_L2);
407 vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
408}
409
410/*
411 * Helper macros for MMU update flags. We can defer updates until a flush
412 * or page invalidation only if the update is to the current address space
413 * (otherwise, there is no flush). We must check against init_mm, since
414 * this could be a kernel update, which usually passes init_mm, although
415 * sometimes this check can be skipped if we know the particular function
416 * is only called on user mode PTEs. We could change the kernel to pass
417 * current->active_mm here, but in particular, I was unsure if changing
418 * mm/highmem.c to do this would still be correct on other architectures.
419 */
420#define is_current_as(mm, mustbeuser) ((mm) == current->active_mm || \
421 (!mustbeuser && (mm) == &init_mm))
422#define vmi_flags_addr(mm, addr, level, user) \
423 ((level) | (is_current_as(mm, user) ? \
424 (VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0))
425#define vmi_flags_addr_defer(mm, addr, level, user) \
426 ((level) | (is_current_as(mm, user) ? \
427 (VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0))
428
429static void vmi_update_pte(struct mm_struct *mm, u32 addr, pte_t *ptep)
430{
431 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
432 vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
433}
434
435static void vmi_update_pte_defer(struct mm_struct *mm, u32 addr, pte_t *ptep)
436{
437 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
438 vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0));
439}
440
441static void vmi_set_pte(pte_t *ptep, pte_t pte)
442{
443 /* XXX because of set_pmd_pte, this can be called on PT or PD layers */
444 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE | VMI_PAGE_PD);
445 vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT);
446}
447
448static void vmi_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte)
449{
450 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
451 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
452}
453
454static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval)
455{
456#ifdef CONFIG_X86_PAE
457 const pte_t pte = { pmdval.pmd, pmdval.pmd >> 32 };
458 vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD);
459#else
460 const pte_t pte = { pmdval.pud.pgd.pgd };
461 vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PGD);
462#endif
463 vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD);
464}
465
466#ifdef CONFIG_X86_PAE
467
468static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval)
469{
470 /*
471 * XXX This is called from set_pmd_pte, but at both PT
472 * and PD layers so the VMI_PAGE_PT flag is wrong. But
473 * it is only called for large page mapping changes,
474 * the Xen backend, doesn't support large pages, and the
475 * ESX backend doesn't depend on the flag.
476 */
477 set_64bit((unsigned long long *)ptep,pte_val(pteval));
478 vmi_ops.update_pte(ptep, VMI_PAGE_PT);
479}
480
481static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
482{
483 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
484 vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1));
485}
486
487static void vmi_set_pud(pud_t *pudp, pud_t pudval)
488{
489 /* Um, eww */
490 const pte_t pte = { pudval.pgd.pgd, pudval.pgd.pgd >> 32 };
491 vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD);
492 vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP);
493}
494
495static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
496{
497 const pte_t pte = { 0 };
498 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
499 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
500}
501
502void vmi_pmd_clear(pmd_t *pmd)
503{
504 const pte_t pte = { 0 };
505 vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD);
506 vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD);
507}
508#endif
509
510#ifdef CONFIG_SMP
511struct vmi_ap_state ap;
512extern void setup_pda(void);
513
514static void __init /* XXX cpu hotplug */
515vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
516 unsigned long start_esp)
517{
518 /* Default everything to zero. This is fine for most GPRs. */
519 memset(&ap, 0, sizeof(struct vmi_ap_state));
520
521 ap.gdtr_limit = GDT_SIZE - 1;
522 ap.gdtr_base = (unsigned long) get_cpu_gdt_table(phys_apicid);
523
524 ap.idtr_limit = IDT_ENTRIES * 8 - 1;
525 ap.idtr_base = (unsigned long) idt_table;
526
527 ap.ldtr = 0;
528
529 ap.cs = __KERNEL_CS;
530 ap.eip = (unsigned long) start_eip;
531 ap.ss = __KERNEL_DS;
532 ap.esp = (unsigned long) start_esp;
533
534 ap.ds = __USER_DS;
535 ap.es = __USER_DS;
536 ap.fs = __KERNEL_PDA;
537 ap.gs = 0;
538
539 ap.eflags = 0;
540
541 setup_pda();
542
543#ifdef CONFIG_X86_PAE
544 /* efer should match BSP efer. */
545 if (cpu_has_nx) {
546 unsigned l, h;
547 rdmsr(MSR_EFER, l, h);
548 ap.efer = (unsigned long long) h << 32 | l;
549 }
550#endif
551
552 ap.cr3 = __pa(swapper_pg_dir);
553 /* Protected mode, paging, AM, WP, NE, MP. */
554 ap.cr0 = 0x80050023;
555 ap.cr4 = mmu_cr4_features;
556 vmi_ops.set_initial_ap_state(__pa(&ap), phys_apicid);
557}
558#endif
559
560static inline int __init check_vmi_rom(struct vrom_header *rom)
561{
562 struct pci_header *pci;
563 struct pnp_header *pnp;
564 const char *manufacturer = "UNKNOWN";
565 const char *product = "UNKNOWN";
566 const char *license = "unspecified";
567
568 if (rom->rom_signature != 0xaa55)
569 return 0;
570 if (rom->vrom_signature != VMI_SIGNATURE)
571 return 0;
572 if (rom->api_version_maj != VMI_API_REV_MAJOR ||
573 rom->api_version_min+1 < VMI_API_REV_MINOR+1) {
574 printk(KERN_WARNING "VMI: Found mismatched rom version %d.%d\n",
575 rom->api_version_maj,
576 rom->api_version_min);
577 return 0;
578 }
579
580 /*
581 * Relying on the VMI_SIGNATURE field is not 100% safe, so check
582 * the PCI header and device type to make sure this is really a
583 * VMI device.
584 */
585 if (!rom->pci_header_offs) {
586 printk(KERN_WARNING "VMI: ROM does not contain PCI header.\n");
587 return 0;
588 }
589
590 pci = (struct pci_header *)((char *)rom+rom->pci_header_offs);
591 if (pci->vendorID != PCI_VENDOR_ID_VMWARE ||
592 pci->deviceID != PCI_DEVICE_ID_VMWARE_VMI) {
593 /* Allow it to run... anyways, but warn */
594 printk(KERN_WARNING "VMI: ROM from unknown manufacturer\n");
595 }
596
597 if (rom->pnp_header_offs) {
598 pnp = (struct pnp_header *)((char *)rom+rom->pnp_header_offs);
599 if (pnp->manufacturer_offset)
600 manufacturer = (const char *)rom+pnp->manufacturer_offset;
601 if (pnp->product_offset)
602 product = (const char *)rom+pnp->product_offset;
603 }
604
605 if (rom->license_offs)
606 license = (char *)rom+rom->license_offs;
607
608 printk(KERN_INFO "VMI: Found %s %s, API version %d.%d, ROM version %d.%d\n",
609 manufacturer, product,
610 rom->api_version_maj, rom->api_version_min,
611 pci->rom_version_maj, pci->rom_version_min);
612
613 license_gplok = license_is_gpl_compatible(license);
614 if (!license_gplok) {
615 printk(KERN_WARNING "VMI: ROM license '%s' taints kernel... "
616 "inlining disabled\n",
617 license);
618 add_taint(TAINT_PROPRIETARY_MODULE);
619 }
620 return 1;
621}
622
623/*
624 * Probe for the VMI option ROM
625 */
626static inline int __init probe_vmi_rom(void)
627{
628 unsigned long base;
629
630 /* VMI ROM is in option ROM area, check signature */
631 for (base = 0xC0000; base < 0xE0000; base += 2048) {
632 struct vrom_header *romstart;
633 romstart = (struct vrom_header *)isa_bus_to_virt(base);
634 if (check_vmi_rom(romstart)) {
635 vmi_rom = romstart;
636 return 1;
637 }
638 }
639 return 0;
640}
641
642/*
643 * VMI setup common to all processors
644 */
645void vmi_bringup(void)
646{
647 /* We must establish the lowmem mapping for MMU ops to work */
648 if (vmi_rom)
649 vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0);
650}
651
652/*
653 * Return a pointer to the VMI function or a NOP stub
654 */
655static void *vmi_get_function(int vmicall)
656{
657 u64 reloc;
658 const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc;
659 reloc = call_vrom_long_func(vmi_rom, get_reloc, vmicall);
660 BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL);
661 if (rel->type == VMI_RELOCATION_CALL_REL)
662 return (void *)rel->eip;
663 else
664 return (void *)vmi_nop;
665}
666
667/*
668 * Helper macro for making the VMI paravirt-ops fill code readable.
669 * For unimplemented operations, fall back to default.
670 */
671#define para_fill(opname, vmicall) \
672do { \
673 reloc = call_vrom_long_func(vmi_rom, get_reloc, \
674 VMI_CALL_##vmicall); \
675 if (rel->type != VMI_RELOCATION_NONE) { \
676 BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); \
677 paravirt_ops.opname = (void *)rel->eip; \
678 } \
679} while (0)
680
681/*
682 * Activate the VMI interface and switch into paravirtualized mode
683 */
684static inline int __init activate_vmi(void)
685{
686 short kernel_cs;
687 u64 reloc;
688 const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc;
689
690 if (call_vrom_func(vmi_rom, vmi_init) != 0) {
691 printk(KERN_ERR "VMI ROM failed to initialize!");
692 return 0;
693 }
694 savesegment(cs, kernel_cs);
695
696 paravirt_ops.paravirt_enabled = 1;
697 paravirt_ops.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
698
699 paravirt_ops.patch = vmi_patch;
700 paravirt_ops.name = "vmi";
701
702 /*
703 * Many of these operations are ABI compatible with VMI.
704 * This means we can fill in the paravirt-ops with direct
705 * pointers into the VMI ROM. If the calling convention for
706 * these operations changes, this code needs to be updated.
707 *
708 * Exceptions
709 * CPUID paravirt-op uses pointers, not the native ISA
710 * halt has no VMI equivalent; all VMI halts are "safe"
711 * no MSR support yet - just trap and emulate. VMI uses the
712 * same ABI as the native ISA, but Linux wants exceptions
713 * from bogus MSR read / write handled
714 * rdpmc is not yet used in Linux
715 */
716
717 /* CPUID is special, so very special */
718 reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_CPUID);
719 if (rel->type != VMI_RELOCATION_NONE) {
720 BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
721 vmi_ops.cpuid = (void *)rel->eip;
722 paravirt_ops.cpuid = vmi_cpuid;
723 }
724
725 para_fill(clts, CLTS);
726 para_fill(get_debugreg, GetDR);
727 para_fill(set_debugreg, SetDR);
728 para_fill(read_cr0, GetCR0);
729 para_fill(read_cr2, GetCR2);
730 para_fill(read_cr3, GetCR3);
731 para_fill(read_cr4, GetCR4);
732 para_fill(write_cr0, SetCR0);
733 para_fill(write_cr2, SetCR2);
734 para_fill(write_cr3, SetCR3);
735 para_fill(write_cr4, SetCR4);
736 para_fill(save_fl, GetInterruptMask);
737 para_fill(restore_fl, SetInterruptMask);
738 para_fill(irq_disable, DisableInterrupts);
739 para_fill(irq_enable, EnableInterrupts);
740 /* irq_save_disable !!! sheer pain */
741 patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK],
742 (char *)paravirt_ops.save_fl);
743 patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE],
744 (char *)paravirt_ops.irq_disable);
745#ifndef CONFIG_NO_IDLE_HZ
746 para_fill(safe_halt, Halt);
747#else
748 vmi_ops.halt = vmi_get_function(VMI_CALL_Halt);
749 paravirt_ops.safe_halt = vmi_safe_halt;
750#endif
751 para_fill(wbinvd, WBINVD);
752 /* paravirt_ops.read_msr = vmi_rdmsr */
753 /* paravirt_ops.write_msr = vmi_wrmsr */
754 para_fill(read_tsc, RDTSC);
755 /* paravirt_ops.rdpmc = vmi_rdpmc */
756
757 /* TR interface doesn't pass TR value */
758 reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetTR);
759 if (rel->type != VMI_RELOCATION_NONE) {
760 BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
761 vmi_ops.set_tr = (void *)rel->eip;
762 paravirt_ops.load_tr_desc = vmi_set_tr;
763 }
764
765 /* LDT is special, too */
766 reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetLDT);
767 if (rel->type != VMI_RELOCATION_NONE) {
768 BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
769 vmi_ops._set_ldt = (void *)rel->eip;
770 paravirt_ops.set_ldt = vmi_set_ldt;
771 }
772
773 para_fill(load_gdt, SetGDT);
774 para_fill(load_idt, SetIDT);
775 para_fill(store_gdt, GetGDT);
776 para_fill(store_idt, GetIDT);
777 para_fill(store_tr, GetTR);
778 paravirt_ops.load_tls = vmi_load_tls;
779 para_fill(write_ldt_entry, WriteLDTEntry);
780 para_fill(write_gdt_entry, WriteGDTEntry);
781 para_fill(write_idt_entry, WriteIDTEntry);
782 reloc = call_vrom_long_func(vmi_rom, get_reloc,
783 VMI_CALL_UpdateKernelStack);
784 if (rel->type != VMI_RELOCATION_NONE) {
785 BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
786 vmi_ops.set_kernel_stack = (void *)rel->eip;
787 paravirt_ops.load_esp0 = vmi_load_esp0;
788 }
789
790 para_fill(set_iopl_mask, SetIOPLMask);
791 paravirt_ops.io_delay = (void *)vmi_nop;
792 if (!disable_nodelay) {
793 paravirt_ops.const_udelay = (void *)vmi_nop;
794 }
795
796 para_fill(set_lazy_mode, SetLazyMode);
797
798 reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_FlushTLB);
799 if (rel->type != VMI_RELOCATION_NONE) {
800 vmi_ops.flush_tlb = (void *)rel->eip;
801 paravirt_ops.flush_tlb_user = vmi_flush_tlb_user;
802 paravirt_ops.flush_tlb_kernel = vmi_flush_tlb_kernel;
803 }
804 para_fill(flush_tlb_single, InvalPage);
805
806 /*
807 * Until a standard flag format can be agreed on, we need to
808 * implement these as wrappers in Linux. Get the VMI ROM
809 * function pointers for the two backend calls.
810 */
811#ifdef CONFIG_X86_PAE
812 vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxELong);
813 vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxELong);
814#else
815 vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE);
816 vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE);
817#endif
818 vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
819 vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
820 vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
821
822 paravirt_ops.alloc_pt = vmi_allocate_pt;
823 paravirt_ops.alloc_pd = vmi_allocate_pd;
824 paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone;
825 paravirt_ops.release_pt = vmi_release_pt;
826 paravirt_ops.release_pd = vmi_release_pd;
827 paravirt_ops.set_pte = vmi_set_pte;
828 paravirt_ops.set_pte_at = vmi_set_pte_at;
829 paravirt_ops.set_pmd = vmi_set_pmd;
830 paravirt_ops.pte_update = vmi_update_pte;
831 paravirt_ops.pte_update_defer = vmi_update_pte_defer;
832#ifdef CONFIG_X86_PAE
833 paravirt_ops.set_pte_atomic = vmi_set_pte_atomic;
834 paravirt_ops.set_pte_present = vmi_set_pte_present;
835 paravirt_ops.set_pud = vmi_set_pud;
836 paravirt_ops.pte_clear = vmi_pte_clear;
837 paravirt_ops.pmd_clear = vmi_pmd_clear;
838#endif
839 /*
840 * These MUST always be patched. Don't support indirect jumps
841 * through these operations, as the VMI interface may use either
842 * a jump or a call to get to these operations, depending on
843 * the backend. They are performance critical anyway, so requiring
844 * a patch is not a big problem.
845 */
846 paravirt_ops.irq_enable_sysexit = (void *)0xfeedbab0;
847 paravirt_ops.iret = (void *)0xbadbab0;
848
849#ifdef CONFIG_SMP
850 paravirt_ops.startup_ipi_hook = vmi_startup_ipi_hook;
851 vmi_ops.set_initial_ap_state = vmi_get_function(VMI_CALL_SetInitialAPState);
852#endif
853
854#ifdef CONFIG_X86_LOCAL_APIC
855 paravirt_ops.apic_read = vmi_get_function(VMI_CALL_APICRead);
856 paravirt_ops.apic_write = vmi_get_function(VMI_CALL_APICWrite);
857 paravirt_ops.apic_write_atomic = vmi_get_function(VMI_CALL_APICWrite);
858#endif
859
860 /*
861 * Check for VMI timer functionality by probing for a cycle frequency method
862 */
863 reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency);
864 if (rel->type != VMI_RELOCATION_NONE) {
865 vmi_timer_ops.get_cycle_frequency = (void *)rel->eip;
866 vmi_timer_ops.get_cycle_counter =
867 vmi_get_function(VMI_CALL_GetCycleCounter);
868 vmi_timer_ops.get_wallclock =
869 vmi_get_function(VMI_CALL_GetWallclockTime);
870 vmi_timer_ops.wallclock_updated =
871 vmi_get_function(VMI_CALL_WallclockUpdated);
872 vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm);
873 vmi_timer_ops.cancel_alarm =
874 vmi_get_function(VMI_CALL_CancelAlarm);
875 paravirt_ops.time_init = vmi_time_init;
876 paravirt_ops.get_wallclock = vmi_get_wallclock;
877 paravirt_ops.set_wallclock = vmi_set_wallclock;
878#ifdef CONFIG_X86_LOCAL_APIC
879 paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm;
880 paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm;
881#endif
882 custom_sched_clock = vmi_sched_clock;
883 }
884
885 /*
886 * Alternative instruction rewriting doesn't happen soon enough
887 * to convert VMI_IRET to a call instead of a jump; so we have
888 * to do this before IRQs get reenabled. Fortunately, it is
889 * idempotent.
890 */
891 apply_paravirt(__start_parainstructions, __stop_parainstructions);
892
893 vmi_bringup();
894
895 return 1;
896}
897
898#undef para_fill
899
900void __init vmi_init(void)
901{
902 unsigned long flags;
903
904 if (!vmi_rom)
905 probe_vmi_rom();
906 else
907 check_vmi_rom(vmi_rom);
908
909 /* In case probing for or validating the ROM failed, basil */
910 if (!vmi_rom)
911 return;
912
913 reserve_top_address(-vmi_rom->virtual_top);
914
915 local_irq_save(flags);
916 activate_vmi();
917#ifdef CONFIG_SMP
918 no_timer_check = 1;
919#endif
920 local_irq_restore(flags & X86_EFLAGS_IF);
921}
922
923static int __init parse_vmi(char *arg)
924{
925 if (!arg)
926 return -EINVAL;
927
928 if (!strcmp(arg, "disable_nodelay"))
929 disable_nodelay = 1;
930 else if (!strcmp(arg, "disable_pge")) {
931 clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
932 disable_pge = 1;
933 } else if (!strcmp(arg, "disable_pse")) {
934 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
935 disable_pse = 1;
936 } else if (!strcmp(arg, "disable_sep")) {
937 clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability);
938 disable_sep = 1;
939 } else if (!strcmp(arg, "disable_tsc")) {
940 clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
941 disable_tsc = 1;
942 } else if (!strcmp(arg, "disable_mtrr")) {
943 clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability);
944 disable_mtrr = 1;
945 }
946 return 0;
947}
948
949early_param("vmi", parse_vmi);
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c
new file mode 100644
index 000000000000..76d2adcae5a3
--- /dev/null
+++ b/arch/i386/kernel/vmitime.c
@@ -0,0 +1,499 @@
1/*
2 * VMI paravirtual timer support routines.
3 *
4 * Copyright (C) 2005, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Send feedback to dhecht@vmware.com
22 *
23 */
24
25/*
26 * Portions of this code from arch/i386/kernel/timers/timer_tsc.c.
27 * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c.
28 * See comments there for proper credits.
29 */
30
31#include <linux/spinlock.h>
32#include <linux/init.h>
33#include <linux/errno.h>
34#include <linux/jiffies.h>
35#include <linux/interrupt.h>
36#include <linux/kernel_stat.h>
37#include <linux/rcupdate.h>
38#include <linux/clocksource.h>
39
40#include <asm/timer.h>
41#include <asm/io.h>
42#include <asm/apic.h>
43#include <asm/div64.h>
44#include <asm/timer.h>
45#include <asm/desc.h>
46
47#include <asm/vmi.h>
48#include <asm/vmi_time.h>
49
50#include <mach_timer.h>
51#include <io_ports.h>
52
53#ifdef CONFIG_X86_LOCAL_APIC
54#define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT
55#else
56#define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0
57#endif
58
59/* Cached VMI operations */
60struct vmi_timer_ops vmi_timer_ops;
61
62#ifdef CONFIG_NO_IDLE_HZ
63
64/* /proc/sys/kernel/hz_timer state. */
65int sysctl_hz_timer;
66
67/* Some stats */
68static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs);
69static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies);
70static DEFINE_PER_CPU(unsigned long, idle_start_jiffies);
71
72#endif /* CONFIG_NO_IDLE_HZ */
73
74/* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */
75static int alarm_hz = CONFIG_VMI_ALARM_HZ;
76
77/* Cache of the value get_cycle_frequency / HZ. */
78static signed long long cycles_per_jiffy;
79
80/* Cache of the value get_cycle_frequency / alarm_hz. */
81static signed long long cycles_per_alarm;
82
83/* The number of cycles accounted for by the 'jiffies'/'xtime' count.
84 * Protected by xtime_lock. */
85static unsigned long long real_cycles_accounted_system;
86
87/* The number of cycles accounted for by update_process_times(), per cpu. */
88static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu);
89
90/* The number of stolen cycles accounted, per cpu. */
91static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu);
92
93/* Clock source. */
94static cycle_t read_real_cycles(void)
95{
96 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
97}
98
99static cycle_t read_available_cycles(void)
100{
101 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
102}
103
104#if 0
105static cycle_t read_stolen_cycles(void)
106{
107 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN);
108}
109#endif /* 0 */
110
111static struct clocksource clocksource_vmi = {
112 .name = "vmi-timer",
113 .rating = 450,
114 .read = read_real_cycles,
115 .mask = CLOCKSOURCE_MASK(64),
116 .mult = 0, /* to be set */
117 .shift = 22,
118 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
119};
120
121
122/* Timer interrupt handler. */
123static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id);
124
125static struct irqaction vmi_timer_irq = {
126 vmi_timer_interrupt,
127 SA_INTERRUPT,
128 CPU_MASK_NONE,
129 "VMI-alarm",
130 NULL,
131 NULL
132};
133
134/* Alarm rate */
135static int __init vmi_timer_alarm_rate_setup(char* str)
136{
137 int alarm_rate;
138 if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) {
139 alarm_hz = alarm_rate;
140 printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz);
141 }
142 return 1;
143}
144__setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup);
145
146
147/* Initialization */
148static void vmi_get_wallclock_ts(struct timespec *ts)
149{
150 unsigned long long wallclock;
151 wallclock = vmi_timer_ops.get_wallclock(); // nsec units
152 ts->tv_nsec = do_div(wallclock, 1000000000);
153 ts->tv_sec = wallclock;
154}
155
156static void update_xtime_from_wallclock(void)
157{
158 struct timespec ts;
159 vmi_get_wallclock_ts(&ts);
160 do_settimeofday(&ts);
161}
162
163unsigned long vmi_get_wallclock(void)
164{
165 struct timespec ts;
166 vmi_get_wallclock_ts(&ts);
167 return ts.tv_sec;
168}
169
170int vmi_set_wallclock(unsigned long now)
171{
172 return -1;
173}
174
175unsigned long long vmi_sched_clock(void)
176{
177 return read_available_cycles();
178}
179
180void __init vmi_time_init(void)
181{
182 unsigned long long cycles_per_sec, cycles_per_msec;
183 unsigned long flags;
184
185 local_irq_save(flags);
186 setup_irq(0, &vmi_timer_irq);
187#ifdef CONFIG_X86_LOCAL_APIC
188 set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt);
189#endif
190
191 no_sync_cmos_clock = 1;
192
193 vmi_get_wallclock_ts(&xtime);
194 set_normalized_timespec(&wall_to_monotonic,
195 -xtime.tv_sec, -xtime.tv_nsec);
196
197 real_cycles_accounted_system = read_real_cycles();
198 update_xtime_from_wallclock();
199 per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles();
200
201 cycles_per_sec = vmi_timer_ops.get_cycle_frequency();
202
203 cycles_per_jiffy = cycles_per_sec;
204 (void)do_div(cycles_per_jiffy, HZ);
205 cycles_per_alarm = cycles_per_sec;
206 (void)do_div(cycles_per_alarm, alarm_hz);
207 cycles_per_msec = cycles_per_sec;
208 (void)do_div(cycles_per_msec, 1000);
209 cpu_khz = cycles_per_msec;
210
211 printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;"
212 "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy,
213 cycles_per_alarm);
214
215 clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
216 clocksource_vmi.shift);
217 if (clocksource_register(&clocksource_vmi))
218 printk(KERN_WARNING "Error registering VMITIME clocksource.");
219
220 /* Disable PIT. */
221 outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
222
223 /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu
224 * reduce the latency calling update_process_times. */
225 vmi_timer_ops.set_alarm(
226 VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
227 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
228 cycles_per_alarm);
229
230 local_irq_restore(flags);
231}
232
233#ifdef CONFIG_X86_LOCAL_APIC
234
235void __init vmi_timer_setup_boot_alarm(void)
236{
237 local_irq_disable();
238
239 /* Route the interrupt to the correct vector. */
240 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
241
242 /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */
243 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
244 vmi_timer_ops.set_alarm(
245 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
246 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
247 cycles_per_alarm);
248 local_irq_enable();
249}
250
251/* Initialize the time accounting variables for an AP on an SMP system.
252 * Also, set the local alarm for the AP. */
253void __init vmi_timer_setup_secondary_alarm(void)
254{
255 int cpu = smp_processor_id();
256
257 /* Route the interrupt to the correct vector. */
258 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
259
260 per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles();
261
262 vmi_timer_ops.set_alarm(
263 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
264 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
265 cycles_per_alarm);
266}
267
268#endif
269
270/* Update system wide (real) time accounting (e.g. jiffies, xtime). */
271static void vmi_account_real_cycles(unsigned long long cur_real_cycles)
272{
273 long long cycles_not_accounted;
274
275 write_seqlock(&xtime_lock);
276
277 cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system;
278 while (cycles_not_accounted >= cycles_per_jiffy) {
279 /* systems wide jiffies and wallclock. */
280 do_timer(1);
281
282 cycles_not_accounted -= cycles_per_jiffy;
283 real_cycles_accounted_system += cycles_per_jiffy;
284 }
285
286 if (vmi_timer_ops.wallclock_updated())
287 update_xtime_from_wallclock();
288
289 write_sequnlock(&xtime_lock);
290}
291
292/* Update per-cpu process times. */
293static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu,
294 unsigned long long cur_process_times_cycles)
295{
296 long long cycles_not_accounted;
297 cycles_not_accounted = cur_process_times_cycles -
298 per_cpu(process_times_cycles_accounted_cpu, cpu);
299
300 while (cycles_not_accounted >= cycles_per_jiffy) {
301 /* Account time to the current process. This includes
302 * calling into the scheduler to decrement the timeslice
303 * and possibly reschedule.*/
304 update_process_times(user_mode(regs));
305 /* XXX handle /proc/profile multiplier. */
306 profile_tick(CPU_PROFILING);
307
308 cycles_not_accounted -= cycles_per_jiffy;
309 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
310 }
311}
312
313#ifdef CONFIG_NO_IDLE_HZ
314/* Update per-cpu idle times. Used when a no-hz halt is ended. */
315static void vmi_account_no_hz_idle_cycles(int cpu,
316 unsigned long long cur_process_times_cycles)
317{
318 long long cycles_not_accounted;
319 unsigned long no_idle_hz_jiffies = 0;
320
321 cycles_not_accounted = cur_process_times_cycles -
322 per_cpu(process_times_cycles_accounted_cpu, cpu);
323
324 while (cycles_not_accounted >= cycles_per_jiffy) {
325 no_idle_hz_jiffies++;
326 cycles_not_accounted -= cycles_per_jiffy;
327 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
328 }
329 /* Account time to the idle process. */
330 account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies));
331}
332#endif
333
334/* Update per-cpu stolen time. */
335static void vmi_account_stolen_cycles(int cpu,
336 unsigned long long cur_real_cycles,
337 unsigned long long cur_avail_cycles)
338{
339 long long stolen_cycles_not_accounted;
340 unsigned long stolen_jiffies = 0;
341
342 if (cur_real_cycles < cur_avail_cycles)
343 return;
344
345 stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles -
346 per_cpu(stolen_cycles_accounted_cpu, cpu);
347
348 while (stolen_cycles_not_accounted >= cycles_per_jiffy) {
349 stolen_jiffies++;
350 stolen_cycles_not_accounted -= cycles_per_jiffy;
351 per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
352 }
353 /* HACK: pass NULL to force time onto cpustat->steal. */
354 account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies));
355}
356
357/* Body of either IRQ0 interrupt handler (UP no local-APIC) or
358 * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */
359static void vmi_local_timer_interrupt(int cpu)
360{
361 unsigned long long cur_real_cycles, cur_process_times_cycles;
362
363 cur_real_cycles = read_real_cycles();
364 cur_process_times_cycles = read_available_cycles();
365 /* Update system wide (real) time state (xtime, jiffies). */
366 vmi_account_real_cycles(cur_real_cycles);
367 /* Update per-cpu process times. */
368 vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles);
369 /* Update time stolen from this cpu by the hypervisor. */
370 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
371}
372
373#ifdef CONFIG_NO_IDLE_HZ
374
375/* Must be called only from idle loop, with interrupts disabled. */
376int vmi_stop_hz_timer(void)
377{
378 /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */
379
380 unsigned long seq, next;
381 unsigned long long real_cycles_expiry;
382 int cpu = smp_processor_id();
383 int idle;
384
385 BUG_ON(!irqs_disabled());
386 if (sysctl_hz_timer != 0)
387 return 0;
388
389 cpu_set(cpu, nohz_cpu_mask);
390 smp_mb();
391 if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
392 (next = next_timer_interrupt(), time_before_eq(next, jiffies))) {
393 cpu_clear(cpu, nohz_cpu_mask);
394 next = jiffies;
395 idle = 0;
396 } else
397 idle = 1;
398
399 /* Convert jiffies to the real cycle counter. */
400 do {
401 seq = read_seqbegin(&xtime_lock);
402 real_cycles_expiry = real_cycles_accounted_system +
403 (long)(next - jiffies) * cycles_per_jiffy;
404 } while (read_seqretry(&xtime_lock, seq));
405
406 /* This cpu is going idle. Disable the periodic alarm. */
407 if (idle) {
408 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
409 per_cpu(idle_start_jiffies, cpu) = jiffies;
410 }
411
412 /* Set the real time alarm to expire at the next event. */
413 vmi_timer_ops.set_alarm(
414 VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
415 real_cycles_expiry, 0);
416
417 return idle;
418}
419
420static void vmi_reenable_hz_timer(int cpu)
421{
422 /* For /proc/vmi/info idle_hz stat. */
423 per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu);
424 per_cpu(vmi_idle_no_hz_irqs, cpu)++;
425
426 /* Don't bother explicitly cancelling the one-shot alarm -- at
427 * worse we will receive a spurious timer interrupt. */
428 vmi_timer_ops.set_alarm(
429 VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
430 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
431 cycles_per_alarm);
432 /* Indicate this cpu is no longer nohz idle. */
433 cpu_clear(cpu, nohz_cpu_mask);
434}
435
436/* Called from interrupt handlers when (local) HZ timer is disabled. */
437void vmi_account_time_restart_hz_timer(void)
438{
439 unsigned long long cur_real_cycles, cur_process_times_cycles;
440 int cpu = smp_processor_id();
441
442 BUG_ON(!irqs_disabled());
443 /* Account the time during which the HZ timer was disabled. */
444 cur_real_cycles = read_real_cycles();
445 cur_process_times_cycles = read_available_cycles();
446 /* Update system wide (real) time state (xtime, jiffies). */
447 vmi_account_real_cycles(cur_real_cycles);
448 /* Update per-cpu idle times. */
449 vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles);
450 /* Update time stolen from this cpu by the hypervisor. */
451 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
452 /* Reenable the hz timer. */
453 vmi_reenable_hz_timer(cpu);
454}
455
456#endif /* CONFIG_NO_IDLE_HZ */
457
458/* UP (and no local-APIC) VMI-timer alarm interrupt handler.
459 * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after
460 * APIC setup and setup_boot_vmi_alarm() is called. */
461static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
462{
463 vmi_local_timer_interrupt(smp_processor_id());
464 return IRQ_HANDLED;
465}
466
467#ifdef CONFIG_X86_LOCAL_APIC
468
469/* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector.
470 * Also used in UP when CONFIG_X86_LOCAL_APIC.
471 * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */
472void smp_apic_vmi_timer_interrupt(struct pt_regs *regs)
473{
474 struct pt_regs *old_regs = set_irq_regs(regs);
475 int cpu = smp_processor_id();
476
477 /*
478 * the NMI deadlock-detector uses this.
479 */
480 per_cpu(irq_stat,cpu).apic_timer_irqs++;
481
482 /*
483 * NOTE! We'd better ACK the irq immediately,
484 * because timer handling can be slow.
485 */
486 ack_APIC_irq();
487
488 /*
489 * update_process_times() expects us to have done irq_enter().
490 * Besides, if we don't timer interrupts ignore the global
491 * interrupt lock, which is the WrongThing (tm) to do.
492 */
493 irq_enter();
494 vmi_local_timer_interrupt(cpu);
495 irq_exit();
496 set_irq_regs(old_regs);
497}
498
499#endif /* CONFIG_X86_LOCAL_APIC */
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index a53c8b1854b5..ca51610955df 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -37,9 +37,14 @@ SECTIONS
37{ 37{
38 . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; 38 . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
39 phys_startup_32 = startup_32 - LOAD_OFFSET; 39 phys_startup_32 = startup_32 - LOAD_OFFSET;
40
41 .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
42 _text = .; /* Text and read-only data */
43 *(.text.head)
44 } :text = 0x9090
45
40 /* read-only */ 46 /* read-only */
41 .text : AT(ADDR(.text) - LOAD_OFFSET) { 47 .text : AT(ADDR(.text) - LOAD_OFFSET) {
42 _text = .; /* Text and read-only data */
43 *(.text) 48 *(.text)
44 SCHED_TEXT 49 SCHED_TEXT
45 LOCK_TEXT 50 LOCK_TEXT
@@ -181,12 +186,14 @@ SECTIONS
181 from .altinstructions and .eh_frame */ 186 from .altinstructions and .eh_frame */
182 .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } 187 .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
183 .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } 188 .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
189#if defined(CONFIG_BLK_DEV_INITRD)
184 . = ALIGN(4096); 190 . = ALIGN(4096);
185 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { 191 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
186 __initramfs_start = .; 192 __initramfs_start = .;
187 *(.init.ramfs) 193 *(.init.ramfs)
188 __initramfs_end = .; 194 __initramfs_end = .;
189 } 195 }
196#endif
190 . = ALIGN(L1_CACHE_BYTES); 197 . = ALIGN(L1_CACHE_BYTES);
191 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { 198 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
192 __per_cpu_start = .; 199 __per_cpu_start = .;
diff --git a/arch/i386/mach-default/setup.c b/arch/i386/mach-default/setup.c
index cc2f519b2f7f..c78816210706 100644
--- a/arch/i386/mach-default/setup.c
+++ b/arch/i386/mach-default/setup.c
@@ -79,7 +79,12 @@ void __init trap_init_hook(void)
79{ 79{
80} 80}
81 81
82static struct irqaction irq0 = { timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL}; 82static struct irqaction irq0 = {
83 .handler = timer_interrupt,
84 .flags = IRQF_DISABLED | IRQF_NOBALANCING,
85 .mask = CPU_MASK_NONE,
86 .name = "timer"
87};
83 88
84/** 89/**
85 * time_init_hook - do any specific initialisations for the system timer. 90 * time_init_hook - do any specific initialisations for the system timer.
@@ -90,6 +95,7 @@ static struct irqaction irq0 = { timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE,
90 **/ 95 **/
91void __init time_init_hook(void) 96void __init time_init_hook(void)
92{ 97{
98 irq0.mask = cpumask_of_cpu(0);
93 setup_irq(0, &irq0); 99 setup_irq(0, &irq0);
94} 100}
95 101
diff --git a/arch/i386/mach-es7000/es7000.h b/arch/i386/mach-es7000/es7000.h
index 80566ca4a80a..c8d5aa132fa0 100644
--- a/arch/i386/mach-es7000/es7000.h
+++ b/arch/i386/mach-es7000/es7000.h
@@ -84,15 +84,6 @@ struct es7000_oem_table {
84}; 84};
85 85
86#ifdef CONFIG_ACPI 86#ifdef CONFIG_ACPI
87struct acpi_table_sdt {
88 unsigned long pa;
89 unsigned long count;
90 struct {
91 unsigned long pa;
92 enum acpi_table_id id;
93 unsigned long size;
94 } entry[50];
95};
96 87
97struct oem_table { 88struct oem_table {
98 struct acpi_table_header Header; 89 struct acpi_table_header Header;
diff --git a/arch/i386/mach-es7000/es7000plat.c b/arch/i386/mach-es7000/es7000plat.c
index 3d0fc853516d..9be6ceabf042 100644
--- a/arch/i386/mach-es7000/es7000plat.c
+++ b/arch/i386/mach-es7000/es7000plat.c
@@ -160,51 +160,14 @@ parse_unisys_oem (char *oemptr)
160int __init 160int __init
161find_unisys_acpi_oem_table(unsigned long *oem_addr) 161find_unisys_acpi_oem_table(unsigned long *oem_addr)
162{ 162{
163 struct acpi_table_rsdp *rsdp = NULL; 163 struct acpi_table_header *header = NULL;
164 unsigned long rsdp_phys = 0; 164 int i = 0;
165 struct acpi_table_header *header = NULL; 165 while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) {
166 int i; 166 if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) {
167 struct acpi_table_sdt sdt; 167 struct oem_table *t = (struct oem_table *)header;
168 168 *oem_addr = (unsigned long)__acpi_map_table(t->OEMTableAddr,
169 rsdp_phys = acpi_find_rsdp(); 169 t->OEMTableSize);
170 rsdp = __va(rsdp_phys); 170 return 0;
171 if (rsdp->rsdt_address) {
172 struct acpi_table_rsdt *mapped_rsdt = NULL;
173 sdt.pa = rsdp->rsdt_address;
174
175 header = (struct acpi_table_header *)
176 __acpi_map_table(sdt.pa, sizeof(struct acpi_table_header));
177 if (!header)
178 return -ENODEV;
179
180 sdt.count = (header->length - sizeof(struct acpi_table_header)) >> 3;
181 mapped_rsdt = (struct acpi_table_rsdt *)
182 __acpi_map_table(sdt.pa, header->length);
183 if (!mapped_rsdt)
184 return -ENODEV;
185
186 header = &mapped_rsdt->header;
187
188 for (i = 0; i < sdt.count; i++)
189 sdt.entry[i].pa = (unsigned long) mapped_rsdt->entry[i];
190 };
191 for (i = 0; i < sdt.count; i++) {
192
193 header = (struct acpi_table_header *)
194 __acpi_map_table(sdt.entry[i].pa,
195 sizeof(struct acpi_table_header));
196 if (!header)
197 continue;
198 if (!strncmp((char *) &header->signature, "OEM1", 4)) {
199 if (!strncmp((char *) &header->oem_id, "UNISYS", 6)) {
200 void *addr;
201 struct oem_table *t;
202 acpi_table_print(header, sdt.entry[i].pa);
203 t = (struct oem_table *) __acpi_map_table(sdt.entry[i].pa, header->length);
204 addr = (void *) __acpi_map_table(t->OEMTableAddr, t->OEMTableSize);
205 *oem_addr = (unsigned long) addr;
206 return 0;
207 }
208 } 171 }
209 } 172 }
210 return -1; 173 return -1;
diff --git a/arch/i386/math-emu/get_address.c b/arch/i386/math-emu/get_address.c
index 9819b705efa4..2e2c51a8bd3a 100644
--- a/arch/i386/math-emu/get_address.c
+++ b/arch/i386/math-emu/get_address.c
@@ -56,15 +56,14 @@ static int reg_offset_vm86[] = {
56#define VM86_REG_(x) (*(unsigned short *) \ 56#define VM86_REG_(x) (*(unsigned short *) \
57 (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info)) 57 (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info))
58 58
59/* These are dummy, fs and gs are not saved on the stack. */ 59/* This dummy, gs is not saved on the stack. */
60#define ___FS ___ds
61#define ___GS ___ds 60#define ___GS ___ds
62 61
63static int reg_offset_pm[] = { 62static int reg_offset_pm[] = {
64 offsetof(struct info,___cs), 63 offsetof(struct info,___cs),
65 offsetof(struct info,___ds), 64 offsetof(struct info,___ds),
66 offsetof(struct info,___es), 65 offsetof(struct info,___es),
67 offsetof(struct info,___FS), 66 offsetof(struct info,___fs),
68 offsetof(struct info,___GS), 67 offsetof(struct info,___GS),
69 offsetof(struct info,___ss), 68 offsetof(struct info,___ss),
70 offsetof(struct info,___ds) 69 offsetof(struct info,___ds)
@@ -169,13 +168,10 @@ static long pm_address(u_char FPU_modrm, u_char segment,
169 168
170 switch ( segment ) 169 switch ( segment )
171 { 170 {
172 /* fs and gs aren't used by the kernel, so they still have their 171 /* gs isn't used by the kernel, so it still has its
173 user-space values. */ 172 user-space value. */
174 case PREFIX_FS_-1:
175 /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */
176 savesegment(fs, addr->selector);
177 break;
178 case PREFIX_GS_-1: 173 case PREFIX_GS_-1:
174 /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */
179 savesegment(gs, addr->selector); 175 savesegment(gs, addr->selector);
180 break; 176 break;
181 default: 177 default:
diff --git a/arch/i386/math-emu/status_w.h b/arch/i386/math-emu/status_w.h
index 78d7b7689dd6..59e73302aa60 100644
--- a/arch/i386/math-emu/status_w.h
+++ b/arch/i386/math-emu/status_w.h
@@ -48,9 +48,11 @@
48 48
49#define status_word() \ 49#define status_word() \
50 ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top)) 50 ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top))
51#define setcc(cc) ({ \ 51static inline void setcc(int cc)
52 partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3); \ 52{
53 partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3); }) 53 partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3);
54 partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3);
55}
54 56
55#ifdef PECULIAR_486 57#ifdef PECULIAR_486
56 /* Default, this conveys no information, but an 80486 does it. */ 58 /* Default, this conveys no information, but an 80486 does it. */
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index e0c390d6ceb5..aa58720f6871 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -101,7 +101,6 @@ extern void find_max_pfn(void);
101extern void add_one_highpage_init(struct page *, int, int); 101extern void add_one_highpage_init(struct page *, int, int);
102 102
103extern struct e820map e820; 103extern struct e820map e820;
104extern unsigned long init_pg_tables_end;
105extern unsigned long highend_pfn, highstart_pfn; 104extern unsigned long highend_pfn, highstart_pfn;
106extern unsigned long max_low_pfn; 105extern unsigned long max_low_pfn;
107extern unsigned long totalram_pages; 106extern unsigned long totalram_pages;
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index aaaa4d225f7e..b8c4e259fc8b 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -46,43 +46,17 @@ int unregister_page_fault_notifier(struct notifier_block *nb)
46} 46}
47EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); 47EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
48 48
49static inline int notify_page_fault(enum die_val val, const char *str, 49static inline int notify_page_fault(struct pt_regs *regs, long err)
50 struct pt_regs *regs, long err, int trap, int sig)
51{ 50{
52 struct die_args args = { 51 struct die_args args = {
53 .regs = regs, 52 .regs = regs,
54 .str = str, 53 .str = "page fault",
55 .err = err, 54 .err = err,
56 .trapnr = trap, 55 .trapnr = 14,
57 .signr = sig 56 .signr = SIGSEGV
58 }; 57 };
59 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args); 58 return atomic_notifier_call_chain(&notify_page_fault_chain,
60} 59 DIE_PAGE_FAULT, &args);
61
62/*
63 * Unlock any spinlocks which will prevent us from getting the
64 * message out
65 */
66void bust_spinlocks(int yes)
67{
68 int loglevel_save = console_loglevel;
69
70 if (yes) {
71 oops_in_progress = 1;
72 return;
73 }
74#ifdef CONFIG_VT
75 unblank_screen();
76#endif
77 oops_in_progress = 0;
78 /*
79 * OK, the message is on the console. Now we call printk()
80 * without oops_in_progress set so that printk will give klogd
81 * a poke. Hold onto your hats...
82 */
83 console_loglevel = 15; /* NMI oopser may have shut the console up */
84 printk(" ");
85 console_loglevel = loglevel_save;
86} 60}
87 61
88/* 62/*
@@ -353,8 +327,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
353 if (unlikely(address >= TASK_SIZE)) { 327 if (unlikely(address >= TASK_SIZE)) {
354 if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) 328 if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
355 return; 329 return;
356 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 330 if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
357 SIGSEGV) == NOTIFY_STOP)
358 return; 331 return;
359 /* 332 /*
360 * Don't take the mm semaphore here. If we fixup a prefetch 333 * Don't take the mm semaphore here. If we fixup a prefetch
@@ -363,8 +336,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
363 goto bad_area_nosemaphore; 336 goto bad_area_nosemaphore;
364 } 337 }
365 338
366 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 339 if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
367 SIGSEGV) == NOTIFY_STOP)
368 return; 340 return;
369 341
370 /* It's safe to allow irq's after cr2 has been saved and the vmalloc 342 /* It's safe to allow irq's after cr2 has been saved and the vmalloc
diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c
index e0fa6cb655a8..bb2de1089add 100644
--- a/arch/i386/mm/highmem.c
+++ b/arch/i386/mm/highmem.c
@@ -33,13 +33,14 @@ void *kmap_atomic(struct page *page, enum km_type type)
33 33
34 /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ 34 /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
35 pagefault_disable(); 35 pagefault_disable();
36
37 idx = type + KM_TYPE_NR*smp_processor_id();
38 BUG_ON(!pte_none(*(kmap_pte-idx)));
39
36 if (!PageHighMem(page)) 40 if (!PageHighMem(page))
37 return page_address(page); 41 return page_address(page);
38 42
39 idx = type + KM_TYPE_NR*smp_processor_id();
40 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 43 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
41 if (!pte_none(*(kmap_pte-idx)))
42 BUG();
43 set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); 44 set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
44 45
45 return (void*) vaddr; 46 return (void*) vaddr;
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index c5c5ea700cc7..ae436882af7a 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -62,6 +62,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
62 62
63#ifdef CONFIG_X86_PAE 63#ifdef CONFIG_X86_PAE
64 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); 64 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
65 paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
65 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 66 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
66 pud = pud_offset(pgd, 0); 67 pud = pud_offset(pgd, 0);
67 if (pmd_table != pmd_offset(pud, 0)) 68 if (pmd_table != pmd_offset(pud, 0))
@@ -82,6 +83,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
82{ 83{
83 if (pmd_none(*pmd)) { 84 if (pmd_none(*pmd)) {
84 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); 85 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
86 paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
85 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 87 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
86 if (page_table != pte_offset_kernel(pmd, 0)) 88 if (page_table != pte_offset_kernel(pmd, 0))
87 BUG(); 89 BUG();
@@ -345,6 +347,8 @@ static void __init pagetable_init (void)
345 /* Init entries of the first-level page table to the zero page */ 347 /* Init entries of the first-level page table to the zero page */
346 for (i = 0; i < PTRS_PER_PGD; i++) 348 for (i = 0; i < PTRS_PER_PGD; i++)
347 set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); 349 set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
350#else
351 paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT);
348#endif 352#endif
349 353
350 /* Enable PSE if available */ 354 /* Enable PSE if available */
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index ad91528bdc14..412ebbd8adb0 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -60,6 +60,7 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
60 address = __pa(address); 60 address = __pa(address);
61 addr = address & LARGE_PAGE_MASK; 61 addr = address & LARGE_PAGE_MASK;
62 pbase = (pte_t *)page_address(base); 62 pbase = (pte_t *)page_address(base);
63 paravirt_alloc_pt(page_to_pfn(base));
63 for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { 64 for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
64 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, 65 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
65 addr == address ? prot : ref_prot)); 66 addr == address ? prot : ref_prot));
@@ -172,6 +173,7 @@ __change_page_attr(struct page *page, pgprot_t prot)
172 if (!PageReserved(kpte_page)) { 173 if (!PageReserved(kpte_page)) {
173 if (cpu_has_pse && (page_private(kpte_page) == 0)) { 174 if (cpu_has_pse && (page_private(kpte_page) == 0)) {
174 ClearPagePrivate(kpte_page); 175 ClearPagePrivate(kpte_page);
176 paravirt_release_pt(page_to_pfn(kpte_page));
175 list_add(&kpte_page->lru, &df_list); 177 list_add(&kpte_page->lru, &df_list);
176 revert_page(kpte_page, address); 178 revert_page(kpte_page, address);
177 } 179 }
@@ -224,7 +226,7 @@ void global_flush_tlb(void)
224 list_replace_init(&df_list, &l); 226 list_replace_init(&df_list, &l);
225 spin_unlock_irq(&cpa_lock); 227 spin_unlock_irq(&cpa_lock);
226 if (!cpu_has_clflush) 228 if (!cpu_has_clflush)
227 flush_map(0); 229 flush_map(NULL);
228 list_for_each_entry_safe(pg, next, &l, lru) { 230 list_for_each_entry_safe(pg, next, &l, lru) {
229 if (cpu_has_clflush) 231 if (cpu_has_clflush)
230 flush_map(page_address(pg)); 232 flush_map(page_address(pg));
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index f349eaf450b0..fa0cfbd551e1 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -171,6 +171,8 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
171void reserve_top_address(unsigned long reserve) 171void reserve_top_address(unsigned long reserve)
172{ 172{
173 BUG_ON(fixmaps > 0); 173 BUG_ON(fixmaps > 0);
174 printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
175 (int)-reserve);
174#ifdef CONFIG_COMPAT_VDSO 176#ifdef CONFIG_COMPAT_VDSO
175 BUG_ON(reserve != 0); 177 BUG_ON(reserve != 0);
176#else 178#else
@@ -248,9 +250,15 @@ void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
248 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, 250 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
249 swapper_pg_dir + USER_PTRS_PER_PGD, 251 swapper_pg_dir + USER_PTRS_PER_PGD,
250 KERNEL_PGD_PTRS); 252 KERNEL_PGD_PTRS);
253
251 if (PTRS_PER_PMD > 1) 254 if (PTRS_PER_PMD > 1)
252 return; 255 return;
253 256
257 /* must happen under lock */
258 paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
259 __pa(swapper_pg_dir) >> PAGE_SHIFT,
260 USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD);
261
254 pgd_list_add(pgd); 262 pgd_list_add(pgd);
255 spin_unlock_irqrestore(&pgd_lock, flags); 263 spin_unlock_irqrestore(&pgd_lock, flags);
256} 264}
@@ -260,6 +268,7 @@ void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
260{ 268{
261 unsigned long flags; /* can be called from interrupt context */ 269 unsigned long flags; /* can be called from interrupt context */
262 270
271 paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
263 spin_lock_irqsave(&pgd_lock, flags); 272 spin_lock_irqsave(&pgd_lock, flags);
264 pgd_list_del(pgd); 273 pgd_list_del(pgd);
265 spin_unlock_irqrestore(&pgd_lock, flags); 274 spin_unlock_irqrestore(&pgd_lock, flags);
@@ -277,13 +286,18 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
277 pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); 286 pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
278 if (!pmd) 287 if (!pmd)
279 goto out_oom; 288 goto out_oom;
289 paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
280 set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); 290 set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
281 } 291 }
282 return pgd; 292 return pgd;
283 293
284out_oom: 294out_oom:
285 for (i--; i >= 0; i--) 295 for (i--; i >= 0; i--) {
286 kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); 296 pgd_t pgdent = pgd[i];
297 void* pmd = (void *)__va(pgd_val(pgdent)-1);
298 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
299 kmem_cache_free(pmd_cache, pmd);
300 }
287 kmem_cache_free(pgd_cache, pgd); 301 kmem_cache_free(pgd_cache, pgd);
288 return NULL; 302 return NULL;
289} 303}
@@ -294,8 +308,12 @@ void pgd_free(pgd_t *pgd)
294 308
295 /* in the PAE case user pgd entries are overwritten before usage */ 309 /* in the PAE case user pgd entries are overwritten before usage */
296 if (PTRS_PER_PMD > 1) 310 if (PTRS_PER_PMD > 1)
297 for (i = 0; i < USER_PTRS_PER_PGD; ++i) 311 for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
298 kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); 312 pgd_t pgdent = pgd[i];
313 void* pmd = (void *)__va(pgd_val(pgdent)-1);
314 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
315 kmem_cache_free(pmd_cache, pmd);
316 }
299 /* in the non-PAE case, free_pgtables() clears user pgd entries */ 317 /* in the non-PAE case, free_pgtables() clears user pgd entries */
300 kmem_cache_free(pgd_cache, pgd); 318 kmem_cache_free(pgd_cache, pgd);
301} 319}
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
index 3700eef78743..8fda7be9dd4d 100644
--- a/arch/i386/oprofile/nmi_int.c
+++ b/arch/i386/oprofile/nmi_int.c
@@ -63,7 +63,7 @@ static struct sys_device device_oprofile = {
63}; 63};
64 64
65 65
66static int __init init_driverfs(void) 66static int __init init_sysfs(void)
67{ 67{
68 int error; 68 int error;
69 if (!(error = sysdev_class_register(&oprofile_sysclass))) 69 if (!(error = sysdev_class_register(&oprofile_sysclass)))
@@ -72,15 +72,15 @@ static int __init init_driverfs(void)
72} 72}
73 73
74 74
75static void exit_driverfs(void) 75static void exit_sysfs(void)
76{ 76{
77 sysdev_unregister(&device_oprofile); 77 sysdev_unregister(&device_oprofile);
78 sysdev_class_unregister(&oprofile_sysclass); 78 sysdev_class_unregister(&oprofile_sysclass);
79} 79}
80 80
81#else 81#else
82#define init_driverfs() do { } while (0) 82#define init_sysfs() do { } while (0)
83#define exit_driverfs() do { } while (0) 83#define exit_sysfs() do { } while (0)
84#endif /* CONFIG_PM */ 84#endif /* CONFIG_PM */
85 85
86static int profile_exceptions_notify(struct notifier_block *self, 86static int profile_exceptions_notify(struct notifier_block *self,
@@ -385,7 +385,7 @@ static int __init ppro_init(char ** cpu_type)
385 return 1; 385 return 1;
386} 386}
387 387
388/* in order to get driverfs right */ 388/* in order to get sysfs right */
389static int using_nmi; 389static int using_nmi;
390 390
391int __init op_nmi_init(struct oprofile_operations *ops) 391int __init op_nmi_init(struct oprofile_operations *ops)
@@ -440,7 +440,7 @@ int __init op_nmi_init(struct oprofile_operations *ops)
440 return -ENODEV; 440 return -ENODEV;
441 } 441 }
442 442
443 init_driverfs(); 443 init_sysfs();
444 using_nmi = 1; 444 using_nmi = 1;
445 ops->create_files = nmi_create_files; 445 ops->create_files = nmi_create_files;
446 ops->setup = nmi_setup; 446 ops->setup = nmi_setup;
@@ -456,5 +456,5 @@ int __init op_nmi_init(struct oprofile_operations *ops)
456void op_nmi_exit(void) 456void op_nmi_exit(void)
457{ 457{
458 if (using_nmi) 458 if (using_nmi)
459 exit_driverfs(); 459 exit_sysfs();
460} 460}
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c
index ca2447e05e15..c554f52cb808 100644
--- a/arch/i386/oprofile/op_model_ppro.c
+++ b/arch/i386/oprofile/op_model_ppro.c
@@ -24,7 +24,8 @@
24 24
25#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) 25#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
26#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) 26#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
27#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0) 27#define CTR_32BIT_WRITE(l,msrs,c) \
28 do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0);} while (0)
28#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) 29#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
29 30
30#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) 31#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
@@ -79,7 +80,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
79 for (i = 0; i < NUM_COUNTERS; ++i) { 80 for (i = 0; i < NUM_COUNTERS; ++i) {
80 if (unlikely(!CTR_IS_RESERVED(msrs,i))) 81 if (unlikely(!CTR_IS_RESERVED(msrs,i)))
81 continue; 82 continue;
82 CTR_WRITE(1, msrs, i); 83 CTR_32BIT_WRITE(1, msrs, i);
83 } 84 }
84 85
85 /* enable active counters */ 86 /* enable active counters */
@@ -87,7 +88,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
87 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) { 88 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
88 reset_value[i] = counter_config[i].count; 89 reset_value[i] = counter_config[i].count;
89 90
90 CTR_WRITE(counter_config[i].count, msrs, i); 91 CTR_32BIT_WRITE(counter_config[i].count, msrs, i);
91 92
92 CTRL_READ(low, high, msrs, i); 93 CTRL_READ(low, high, msrs, i);
93 CTRL_CLEAR(low); 94 CTRL_CLEAR(low);
@@ -116,7 +117,7 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
116 CTR_READ(low, high, msrs, i); 117 CTR_READ(low, high, msrs, i);
117 if (CTR_OVERFLOWED(low)) { 118 if (CTR_OVERFLOWED(low)) {
118 oprofile_add_sample(regs, i); 119 oprofile_add_sample(regs, i);
119 CTR_WRITE(reset_value[i], msrs, i); 120 CTR_32BIT_WRITE(reset_value[i], msrs, i);
120 } 121 }
121 } 122 }
122 123
diff --git a/arch/i386/pci/Makefile b/arch/i386/pci/Makefile
index 1594d2f55c8f..44650e03308b 100644
--- a/arch/i386/pci/Makefile
+++ b/arch/i386/pci/Makefile
@@ -1,7 +1,7 @@
1obj-y := i386.o init.o 1obj-y := i386.o init.o
2 2
3obj-$(CONFIG_PCI_BIOS) += pcbios.o 3obj-$(CONFIG_PCI_BIOS) += pcbios.o
4obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o 4obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o mmconfig-shared.o
5obj-$(CONFIG_PCI_DIRECT) += direct.o 5obj-$(CONFIG_PCI_DIRECT) += direct.o
6 6
7pci-y := fixup.o 7pci-y := fixup.o
diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c
index 53ca6e897984..1bb069372143 100644
--- a/arch/i386/pci/common.c
+++ b/arch/i386/pci/common.c
@@ -191,6 +191,94 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = {
191 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2950"), 191 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2950"),
192 }, 192 },
193 }, 193 },
194 {
195 .callback = set_bf_sort,
196 .ident = "HP ProLiant BL20p G3",
197 .matches = {
198 DMI_MATCH(DMI_SYS_VENDOR, "HP"),
199 DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL20p G3"),
200 },
201 },
202 {
203 .callback = set_bf_sort,
204 .ident = "HP ProLiant BL20p G4",
205 .matches = {
206 DMI_MATCH(DMI_SYS_VENDOR, "HP"),
207 DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL20p G4"),
208 },
209 },
210 {
211 .callback = set_bf_sort,
212 .ident = "HP ProLiant BL30p G1",
213 .matches = {
214 DMI_MATCH(DMI_SYS_VENDOR, "HP"),
215 DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL30p G1"),
216 },
217 },
218 {
219 .callback = set_bf_sort,
220 .ident = "HP ProLiant BL25p G1",
221 .matches = {
222 DMI_MATCH(DMI_SYS_VENDOR, "HP"),
223 DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL25p G1"),
224 },
225 },
226 {
227 .callback = set_bf_sort,
228 .ident = "HP ProLiant BL35p G1",
229 .matches = {
230 DMI_MATCH(DMI_SYS_VENDOR, "HP"),
231 DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL35p G1"),
232 },
233 },
234 {
235 .callback = set_bf_sort,
236 .ident = "HP ProLiant BL45p G1",
237 .matches = {
238 DMI_MATCH(DMI_SYS_VENDOR, "HP"),
239 DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL45p G1"),
240 },
241 },
242 {
243 .callback = set_bf_sort,
244 .ident = "HP ProLiant BL45p G2",
245 .matches = {
246 DMI_MATCH(DMI_SYS_VENDOR, "HP"),
247 DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL45p G2"),
248 },
249 },
250 {
251 .callback = set_bf_sort,
252 .ident = "HP ProLiant BL460c G1",
253 .matches = {
254 DMI_MATCH(DMI_SYS_VENDOR, "HP"),
255 DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL460c G1"),
256 },
257 },
258 {
259 .callback = set_bf_sort,
260 .ident = "HP ProLiant BL465c G1",
261 .matches = {
262 DMI_MATCH(DMI_SYS_VENDOR, "HP"),
263 DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL465c G1"),
264 },
265 },
266 {
267 .callback = set_bf_sort,
268 .ident = "HP ProLiant BL480c G1",
269 .matches = {
270 DMI_MATCH(DMI_SYS_VENDOR, "HP"),
271 DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL480c G1"),
272 },
273 },
274 {
275 .callback = set_bf_sort,
276 .ident = "HP ProLiant BL685c G1",
277 .matches = {
278 DMI_MATCH(DMI_SYS_VENDOR, "HP"),
279 DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL685c G1"),
280 },
281 },
194 {} 282 {}
195}; 283};
196 284
diff --git a/arch/i386/pci/mmconfig-shared.c b/arch/i386/pci/mmconfig-shared.c
new file mode 100644
index 000000000000..747d8c63b0c4
--- /dev/null
+++ b/arch/i386/pci/mmconfig-shared.c
@@ -0,0 +1,264 @@
1/*
2 * mmconfig-shared.c - Low-level direct PCI config space access via
3 * MMCONFIG - common code between i386 and x86-64.
4 *
5 * This code does:
6 * - known chipset handling
7 * - ACPI decoding and validation
8 *
9 * Per-architecture code takes care of the mappings and accesses
10 * themselves.
11 */
12
13#include <linux/pci.h>
14#include <linux/init.h>
15#include <linux/acpi.h>
16#include <linux/bitmap.h>
17#include <asm/e820.h>
18
19#include "pci.h"
20
21/* aperture is up to 256MB but BIOS may reserve less */
22#define MMCONFIG_APER_MIN (2 * 1024*1024)
23#define MMCONFIG_APER_MAX (256 * 1024*1024)
24
25DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS);
26
27/* K8 systems have some devices (typically in the builtin northbridge)
28 that are only accessible using type1
29 Normally this can be expressed in the MCFG by not listing them
30 and assigning suitable _SEGs, but this isn't implemented in some BIOS.
31 Instead try to discover all devices on bus 0 that are unreachable using MM
32 and fallback for them. */
33static void __init unreachable_devices(void)
34{
35 int i, bus;
36 /* Use the max bus number from ACPI here? */
37 for (bus = 0; bus < PCI_MMCFG_MAX_CHECK_BUS; bus++) {
38 for (i = 0; i < 32; i++) {
39 unsigned int devfn = PCI_DEVFN(i, 0);
40 u32 val1, val2;
41
42 pci_conf1_read(0, bus, devfn, 0, 4, &val1);
43 if (val1 == 0xffffffff)
44 continue;
45
46 if (pci_mmcfg_arch_reachable(0, bus, devfn)) {
47 raw_pci_ops->read(0, bus, devfn, 0, 4, &val2);
48 if (val1 == val2)
49 continue;
50 }
51 set_bit(i + 32 * bus, pci_mmcfg_fallback_slots);
52 printk(KERN_NOTICE "PCI: No mmconfig possible on device"
53 " %02x:%02x\n", bus, i);
54 }
55 }
56}
57
58static const char __init *pci_mmcfg_e7520(void)
59{
60 u32 win;
61 pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win);
62
63 pci_mmcfg_config_num = 1;
64 pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
65 if (!pci_mmcfg_config)
66 return NULL;
67 pci_mmcfg_config[0].address = (win & 0xf000) << 16;
68 pci_mmcfg_config[0].pci_segment = 0;
69 pci_mmcfg_config[0].start_bus_number = 0;
70 pci_mmcfg_config[0].end_bus_number = 255;
71
72 return "Intel Corporation E7520 Memory Controller Hub";
73}
74
75static const char __init *pci_mmcfg_intel_945(void)
76{
77 u32 pciexbar, mask = 0, len = 0;
78
79 pci_mmcfg_config_num = 1;
80
81 pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0x48, 4, &pciexbar);
82
83 /* Enable bit */
84 if (!(pciexbar & 1))
85 pci_mmcfg_config_num = 0;
86
87 /* Size bits */
88 switch ((pciexbar >> 1) & 3) {
89 case 0:
90 mask = 0xf0000000U;
91 len = 0x10000000U;
92 break;
93 case 1:
94 mask = 0xf8000000U;
95 len = 0x08000000U;
96 break;
97 case 2:
98 mask = 0xfc000000U;
99 len = 0x04000000U;
100 break;
101 default:
102 pci_mmcfg_config_num = 0;
103 }
104
105 /* Errata #2, things break when not aligned on a 256Mb boundary */
106 /* Can only happen in 64M/128M mode */
107
108 if ((pciexbar & mask) & 0x0fffffffU)
109 pci_mmcfg_config_num = 0;
110
111 if (pci_mmcfg_config_num) {
112 pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
113 if (!pci_mmcfg_config)
114 return NULL;
115 pci_mmcfg_config[0].address = pciexbar & mask;
116 pci_mmcfg_config[0].pci_segment = 0;
117 pci_mmcfg_config[0].start_bus_number = 0;
118 pci_mmcfg_config[0].end_bus_number = (len >> 20) - 1;
119 }
120
121 return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub";
122}
123
124struct pci_mmcfg_hostbridge_probe {
125 u32 vendor;
126 u32 device;
127 const char *(*probe)(void);
128};
129
130static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = {
131 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, pci_mmcfg_e7520 },
132 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82945G_HB, pci_mmcfg_intel_945 },
133};
134
135static int __init pci_mmcfg_check_hostbridge(void)
136{
137 u32 l;
138 u16 vendor, device;
139 int i;
140 const char *name;
141
142 pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0, 4, &l);
143 vendor = l & 0xffff;
144 device = (l >> 16) & 0xffff;
145
146 pci_mmcfg_config_num = 0;
147 pci_mmcfg_config = NULL;
148 name = NULL;
149
150 for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) {
151 if (pci_mmcfg_probes[i].vendor == vendor &&
152 pci_mmcfg_probes[i].device == device)
153 name = pci_mmcfg_probes[i].probe();
154 }
155
156 if (name) {
157 printk(KERN_INFO "PCI: Found %s %s MMCONFIG support.\n",
158 name, pci_mmcfg_config_num ? "with" : "without");
159 }
160
161 return name != NULL;
162}
163
164static void __init pci_mmcfg_insert_resources(void)
165{
166#define PCI_MMCFG_RESOURCE_NAME_LEN 19
167 int i;
168 struct resource *res;
169 char *names;
170 unsigned num_buses;
171
172 res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res),
173 pci_mmcfg_config_num, GFP_KERNEL);
174 if (!res) {
175 printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n");
176 return;
177 }
178
179 names = (void *)&res[pci_mmcfg_config_num];
180 for (i = 0; i < pci_mmcfg_config_num; i++, res++) {
181 struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i];
182 num_buses = cfg->end_bus_number - cfg->start_bus_number + 1;
183 res->name = names;
184 snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u",
185 cfg->pci_segment);
186 res->start = cfg->address;
187 res->end = res->start + (num_buses << 20) - 1;
188 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
189 insert_resource(&iomem_resource, res);
190 names += PCI_MMCFG_RESOURCE_NAME_LEN;
191 }
192}
193
194static void __init pci_mmcfg_reject_broken(int type)
195{
196 typeof(pci_mmcfg_config[0]) *cfg;
197
198 if ((pci_mmcfg_config_num == 0) ||
199 (pci_mmcfg_config == NULL) ||
200 (pci_mmcfg_config[0].address == 0))
201 return;
202
203 cfg = &pci_mmcfg_config[0];
204
205 /*
206 * Handle more broken MCFG tables on Asus etc.
207 * They only contain a single entry for bus 0-0.
208 */
209 if (pci_mmcfg_config_num == 1 &&
210 cfg->pci_segment == 0 &&
211 (cfg->start_bus_number | cfg->end_bus_number) == 0) {
212 printk(KERN_ERR "PCI: start and end of bus number is 0. "
213 "Rejected as broken MCFG.\n");
214 goto reject;
215 }
216
217 /*
218 * Only do this check when type 1 works. If it doesn't work
219 * assume we run on a Mac and always use MCFG
220 */
221 if (type == 1 && !e820_all_mapped(cfg->address,
222 cfg->address + MMCONFIG_APER_MIN,
223 E820_RESERVED)) {
224 printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not"
225 " E820-reserved\n", cfg->address);
226 goto reject;
227 }
228 return;
229
230reject:
231 printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
232 kfree(pci_mmcfg_config);
233 pci_mmcfg_config = NULL;
234 pci_mmcfg_config_num = 0;
235}
236
237void __init pci_mmcfg_init(int type)
238{
239 int known_bridge = 0;
240
241 if ((pci_probe & PCI_PROBE_MMCONF) == 0)
242 return;
243
244 if (type == 1 && pci_mmcfg_check_hostbridge())
245 known_bridge = 1;
246
247 if (!known_bridge) {
248 acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
249 pci_mmcfg_reject_broken(type);
250 }
251
252 if ((pci_mmcfg_config_num == 0) ||
253 (pci_mmcfg_config == NULL) ||
254 (pci_mmcfg_config[0].address == 0))
255 return;
256
257 if (pci_mmcfg_arch_init()) {
258 if (type == 1)
259 unreachable_devices();
260 if (known_bridge)
261 pci_mmcfg_insert_resources();
262 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
263 }
264}
diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c
index e2616a266e13..bb1afd9e589d 100644
--- a/arch/i386/pci/mmconfig.c
+++ b/arch/i386/pci/mmconfig.c
@@ -15,55 +15,33 @@
15#include <asm/e820.h> 15#include <asm/e820.h>
16#include "pci.h" 16#include "pci.h"
17 17
18/* aperture is up to 256MB but BIOS may reserve less */
19#define MMCONFIG_APER_MIN (2 * 1024*1024)
20#define MMCONFIG_APER_MAX (256 * 1024*1024)
21
22/* Assume systems with more busses have correct MCFG */ 18/* Assume systems with more busses have correct MCFG */
23#define MAX_CHECK_BUS 16
24
25#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) 19#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
26 20
27/* The base address of the last MMCONFIG device accessed */ 21/* The base address of the last MMCONFIG device accessed */
28static u32 mmcfg_last_accessed_device; 22static u32 mmcfg_last_accessed_device;
29static int mmcfg_last_accessed_cpu; 23static int mmcfg_last_accessed_cpu;
30 24
31static DECLARE_BITMAP(fallback_slots, MAX_CHECK_BUS*32);
32
33/* 25/*
34 * Functions for accessing PCI configuration space with MMCONFIG accesses 26 * Functions for accessing PCI configuration space with MMCONFIG accesses
35 */ 27 */
36static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) 28static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn)
37{ 29{
38 int cfg_num = -1; 30 struct acpi_mcfg_allocation *cfg;
39 struct acpi_table_mcfg_config *cfg; 31 int cfg_num;
40 32
41 if (seg == 0 && bus < MAX_CHECK_BUS && 33 if (seg == 0 && bus < PCI_MMCFG_MAX_CHECK_BUS &&
42 test_bit(PCI_SLOT(devfn) + 32*bus, fallback_slots)) 34 test_bit(PCI_SLOT(devfn) + 32*bus, pci_mmcfg_fallback_slots))
43 return 0; 35 return 0;
44 36
45 while (1) { 37 for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) {
46 ++cfg_num;
47 if (cfg_num >= pci_mmcfg_config_num) {
48 break;
49 }
50 cfg = &pci_mmcfg_config[cfg_num]; 38 cfg = &pci_mmcfg_config[cfg_num];
51 if (cfg->pci_segment_group_number != seg) 39 if (cfg->pci_segment == seg &&
52 continue; 40 (cfg->start_bus_number <= bus) &&
53 if ((cfg->start_bus_number <= bus) &&
54 (cfg->end_bus_number >= bus)) 41 (cfg->end_bus_number >= bus))
55 return cfg->base_address; 42 return cfg->address;
56 } 43 }
57 44
58 /* Handle more broken MCFG tables on Asus etc.
59 They only contain a single entry for bus 0-0. Assume
60 this applies to all busses. */
61 cfg = &pci_mmcfg_config[0];
62 if (pci_mmcfg_config_num == 1 &&
63 cfg->pci_segment_group_number == 0 &&
64 (cfg->start_bus_number | cfg->end_bus_number) == 0)
65 return cfg->base_address;
66
67 /* Fall back to type 0 */ 45 /* Fall back to type 0 */
68 return 0; 46 return 0;
69} 47}
@@ -125,7 +103,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
125 unsigned long flags; 103 unsigned long flags;
126 u32 base; 104 u32 base;
127 105
128 if ((bus > 255) || (devfn > 255) || (reg > 4095)) 106 if ((bus > 255) || (devfn > 255) || (reg > 4095))
129 return -EINVAL; 107 return -EINVAL;
130 108
131 base = get_base_addr(seg, bus, devfn); 109 base = get_base_addr(seg, bus, devfn);
@@ -158,67 +136,15 @@ static struct pci_raw_ops pci_mmcfg = {
158 .write = pci_mmcfg_write, 136 .write = pci_mmcfg_write,
159}; 137};
160 138
161/* K8 systems have some devices (typically in the builtin northbridge) 139int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus,
162 that are only accessible using type1 140 unsigned int devfn)
163 Normally this can be expressed in the MCFG by not listing them
164 and assigning suitable _SEGs, but this isn't implemented in some BIOS.
165 Instead try to discover all devices on bus 0 that are unreachable using MM
166 and fallback for them. */
167static __init void unreachable_devices(void)
168{ 141{
169 int i, k; 142 return get_base_addr(seg, bus, devfn) != 0;
170 unsigned long flags;
171
172 for (k = 0; k < MAX_CHECK_BUS; k++) {
173 for (i = 0; i < 32; i++) {
174 u32 val1;
175 u32 addr;
176
177 pci_conf1_read(0, k, PCI_DEVFN(i, 0), 0, 4, &val1);
178 if (val1 == 0xffffffff)
179 continue;
180
181 /* Locking probably not needed, but safer */
182 spin_lock_irqsave(&pci_config_lock, flags);
183 addr = get_base_addr(0, k, PCI_DEVFN(i, 0));
184 if (addr != 0)
185 pci_exp_set_dev_base(addr, k, PCI_DEVFN(i, 0));
186 if (addr == 0 ||
187 readl((u32 __iomem *)mmcfg_virt_addr) != val1) {
188 set_bit(i + 32*k, fallback_slots);
189 printk(KERN_NOTICE
190 "PCI: No mmconfig possible on %x:%x\n", k, i);
191 }
192 spin_unlock_irqrestore(&pci_config_lock, flags);
193 }
194 }
195} 143}
196 144
197void __init pci_mmcfg_init(int type) 145int __init pci_mmcfg_arch_init(void)
198{ 146{
199 if ((pci_probe & PCI_PROBE_MMCONF) == 0)
200 return;
201
202 acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
203 if ((pci_mmcfg_config_num == 0) ||
204 (pci_mmcfg_config == NULL) ||
205 (pci_mmcfg_config[0].base_address == 0))
206 return;
207
208 /* Only do this check when type 1 works. If it doesn't work
209 assume we run on a Mac and always use MCFG */
210 if (type == 1 && !e820_all_mapped(pci_mmcfg_config[0].base_address,
211 pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
212 E820_RESERVED)) {
213 printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n",
214 pci_mmcfg_config[0].base_address);
215 printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
216 return;
217 }
218
219 printk(KERN_INFO "PCI: Using MMCONFIG\n"); 147 printk(KERN_INFO "PCI: Using MMCONFIG\n");
220 raw_pci_ops = &pci_mmcfg; 148 raw_pci_ops = &pci_mmcfg;
221 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; 149 return 1;
222
223 unreachable_devices();
224} 150}
diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h
index a0a25180b61a..e58bae2076ad 100644
--- a/arch/i386/pci/pci.h
+++ b/arch/i386/pci/pci.h
@@ -94,3 +94,13 @@ extern void pci_pcbios_init(void);
94extern void pci_mmcfg_init(int type); 94extern void pci_mmcfg_init(int type);
95extern void pcibios_sort(void); 95extern void pcibios_sort(void);
96 96
97/* pci-mmconfig.c */
98
99/* Verify the first 16 busses. We assume that systems with more busses
100 get MCFG right. */
101#define PCI_MMCFG_MAX_CHECK_BUS 16
102extern DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS);
103
104extern int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus,
105 unsigned int devfn);
106extern int __init pci_mmcfg_arch_init(void);