aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386')
-rw-r--r--arch/i386/Kconfig18
-rw-r--r--arch/i386/Kconfig.cpu5
-rw-r--r--arch/i386/Kconfig.debug2
-rw-r--r--arch/i386/defconfig51
-rw-r--r--arch/i386/kernel/Makefile3
-rw-r--r--arch/i386/kernel/apic.c6
-rw-r--r--arch/i386/kernel/apm.c26
-rw-r--r--arch/i386/kernel/asm-offsets.c2
-rw-r--r--arch/i386/kernel/cpu/common.c14
-rw-r--r--arch/i386/kernel/cpu/cyrix.c52
-rw-r--r--arch/i386/kernel/cpu/mcheck/mce.c1
-rw-r--r--arch/i386/kernel/cpu/mcheck/mce.h2
-rw-r--r--arch/i386/kernel/cpu/mcheck/p4.c2
-rw-r--r--arch/i386/kernel/cpu/mtrr/if.c30
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c6
-rw-r--r--arch/i386/kernel/cpu/mtrr/mtrr.h2
-rw-r--r--arch/i386/kernel/cpu/proc.c14
-rw-r--r--arch/i386/kernel/cpu/transmeta.c5
-rw-r--r--arch/i386/kernel/cpuid.c7
-rw-r--r--arch/i386/kernel/e820.c18
-rw-r--r--arch/i386/kernel/entry.S78
-rw-r--r--arch/i386/kernel/head.S38
-rw-r--r--arch/i386/kernel/io_apic.c4
-rw-r--r--arch/i386/kernel/irq.c3
-rw-r--r--arch/i386/kernel/kprobes.c6
-rw-r--r--arch/i386/kernel/microcode.c2
-rw-r--r--arch/i386/kernel/msr.c13
-rw-r--r--arch/i386/kernel/nmi.c98
-rw-r--r--arch/i386/kernel/paravirt.c116
-rw-r--r--arch/i386/kernel/pcspeaker.c20
-rw-r--r--arch/i386/kernel/process.c99
-rw-r--r--arch/i386/kernel/ptrace.c16
-rw-r--r--arch/i386/kernel/setup.c35
-rw-r--r--arch/i386/kernel/signal.c16
-rw-r--r--arch/i386/kernel/smp.c7
-rw-r--r--arch/i386/kernel/smpboot.c16
-rw-r--r--arch/i386/kernel/sysenter.c2
-rw-r--r--arch/i386/kernel/time.c14
-rw-r--r--arch/i386/kernel/traps.c27
-rw-r--r--arch/i386/kernel/tsc.c26
-rw-r--r--arch/i386/kernel/vm86.c33
-rw-r--r--arch/i386/kernel/vmi.c949
-rw-r--r--arch/i386/kernel/vmitime.c499
-rw-r--r--arch/i386/kernel/vmlinux.lds.S7
-rw-r--r--arch/i386/math-emu/get_address.c14
-rw-r--r--arch/i386/math-emu/status_w.h8
-rw-r--r--arch/i386/mm/discontig.c1
-rw-r--r--arch/i386/mm/fault.c18
-rw-r--r--arch/i386/mm/init.c4
-rw-r--r--arch/i386/mm/pageattr.c2
-rw-r--r--arch/i386/mm/pgtable.c26
-rw-r--r--arch/i386/oprofile/op_model_ppro.c9
-rw-r--r--arch/i386/pci/Makefile2
-rw-r--r--arch/i386/pci/mmconfig-shared.c264
-rw-r--r--arch/i386/pci/mmconfig.c96
-rw-r--r--arch/i386/pci/pci.h10
56 files changed, 2422 insertions, 422 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 63d5e841caf5..595fb771366e 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -203,6 +203,15 @@ config PARAVIRT
203 However, when run without a hypervisor the kernel is 203 However, when run without a hypervisor the kernel is
204 theoretically slower. If in doubt, say N. 204 theoretically slower. If in doubt, say N.
205 205
206config VMI
207 bool "VMI Paravirt-ops support"
208 depends on PARAVIRT
209 default y
210 help
211 VMI provides a paravirtualized interface to multiple hypervisors
212 include VMware ESX server and Xen by connecting to a ROM module
213 provided by the hypervisor.
214
206config ACPI_SRAT 215config ACPI_SRAT
207 bool 216 bool
208 default y 217 default y
@@ -1263,3 +1272,12 @@ config X86_TRAMPOLINE
1263config KTIME_SCALAR 1272config KTIME_SCALAR
1264 bool 1273 bool
1265 default y 1274 default y
1275
1276config NO_IDLE_HZ
1277 bool
1278 depends on PARAVIRT
1279 default y
1280 help
1281 Switches the regular HZ timer off when the system is going idle.
1282 This helps a hypervisor detect that the Linux system is idle,
1283 reducing the overhead of idle systems.
diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu
index 2aecfba4ac4f..b99c0e2a4e63 100644
--- a/arch/i386/Kconfig.cpu
+++ b/arch/i386/Kconfig.cpu
@@ -226,11 +226,6 @@ config X86_CMPXCHG
226 depends on !M386 226 depends on !M386
227 default y 227 default y
228 228
229config X86_XADD
230 bool
231 depends on !M386
232 default y
233
234config X86_L1_CACHE_SHIFT 229config X86_L1_CACHE_SHIFT
235 int 230 int
236 default "7" if MPENTIUM4 || X86_GENERIC 231 default "7" if MPENTIUM4 || X86_GENERIC
diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
index f68cc6f215f8..458bc1611933 100644
--- a/arch/i386/Kconfig.debug
+++ b/arch/i386/Kconfig.debug
@@ -87,7 +87,7 @@ config DOUBLEFAULT
87 87
88config DEBUG_PARAVIRT 88config DEBUG_PARAVIRT
89 bool "Enable some paravirtualization debugging" 89 bool "Enable some paravirtualization debugging"
90 default y 90 default n
91 depends on PARAVIRT && DEBUG_KERNEL 91 depends on PARAVIRT && DEBUG_KERNEL
92 help 92 help
93 Currently deliberately clobbers regs which are allowed to be 93 Currently deliberately clobbers regs which are allowed to be
diff --git a/arch/i386/defconfig b/arch/i386/defconfig
index bb0c376b62b3..5ae1e0bc8fd7 100644
--- a/arch/i386/defconfig
+++ b/arch/i386/defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.20-rc3 3# Linux kernel version: 2.6.20-git8
4# Fri Jan 5 11:54:46 2007 4# Tue Feb 13 11:25:18 2007
5# 5#
6CONFIG_X86_32=y 6CONFIG_X86_32=y
7CONFIG_GENERIC_TIME=y 7CONFIG_GENERIC_TIME=y
@@ -10,6 +10,7 @@ CONFIG_STACKTRACE_SUPPORT=y
10CONFIG_SEMAPHORE_SLEEPERS=y 10CONFIG_SEMAPHORE_SLEEPERS=y
11CONFIG_X86=y 11CONFIG_X86=y
12CONFIG_MMU=y 12CONFIG_MMU=y
13CONFIG_ZONE_DMA=y
13CONFIG_GENERIC_ISA_DMA=y 14CONFIG_GENERIC_ISA_DMA=y
14CONFIG_GENERIC_IOMAP=y 15CONFIG_GENERIC_IOMAP=y
15CONFIG_GENERIC_BUG=y 16CONFIG_GENERIC_BUG=y
@@ -139,7 +140,6 @@ CONFIG_MPENTIUMIII=y
139# CONFIG_MVIAC3_2 is not set 140# CONFIG_MVIAC3_2 is not set
140CONFIG_X86_GENERIC=y 141CONFIG_X86_GENERIC=y
141CONFIG_X86_CMPXCHG=y 142CONFIG_X86_CMPXCHG=y
142CONFIG_X86_XADD=y
143CONFIG_X86_L1_CACHE_SHIFT=7 143CONFIG_X86_L1_CACHE_SHIFT=7
144CONFIG_RWSEM_XCHGADD_ALGORITHM=y 144CONFIG_RWSEM_XCHGADD_ALGORITHM=y
145# CONFIG_ARCH_HAS_ILOG2_U32 is not set 145# CONFIG_ARCH_HAS_ILOG2_U32 is not set
@@ -198,6 +198,7 @@ CONFIG_FLAT_NODE_MEM_MAP=y
198# CONFIG_SPARSEMEM_STATIC is not set 198# CONFIG_SPARSEMEM_STATIC is not set
199CONFIG_SPLIT_PTLOCK_CPUS=4 199CONFIG_SPLIT_PTLOCK_CPUS=4
200CONFIG_RESOURCES_64BIT=y 200CONFIG_RESOURCES_64BIT=y
201CONFIG_ZONE_DMA_FLAG=1
201# CONFIG_HIGHPTE is not set 202# CONFIG_HIGHPTE is not set
202# CONFIG_MATH_EMULATION is not set 203# CONFIG_MATH_EMULATION is not set
203CONFIG_MTRR=y 204CONFIG_MTRR=y
@@ -211,6 +212,7 @@ CONFIG_HZ_250=y
211CONFIG_HZ=250 212CONFIG_HZ=250
212# CONFIG_KEXEC is not set 213# CONFIG_KEXEC is not set
213# CONFIG_CRASH_DUMP is not set 214# CONFIG_CRASH_DUMP is not set
215CONFIG_PHYSICAL_START=0x100000
214# CONFIG_RELOCATABLE is not set 216# CONFIG_RELOCATABLE is not set
215CONFIG_PHYSICAL_ALIGN=0x100000 217CONFIG_PHYSICAL_ALIGN=0x100000
216# CONFIG_HOTPLUG_CPU is not set 218# CONFIG_HOTPLUG_CPU is not set
@@ -229,13 +231,14 @@ CONFIG_PM_SYSFS_DEPRECATED=y
229# ACPI (Advanced Configuration and Power Interface) Support 231# ACPI (Advanced Configuration and Power Interface) Support
230# 232#
231CONFIG_ACPI=y 233CONFIG_ACPI=y
234CONFIG_ACPI_PROCFS=y
232CONFIG_ACPI_AC=y 235CONFIG_ACPI_AC=y
233CONFIG_ACPI_BATTERY=y 236CONFIG_ACPI_BATTERY=y
234CONFIG_ACPI_BUTTON=y 237CONFIG_ACPI_BUTTON=y
235# CONFIG_ACPI_VIDEO is not set
236# CONFIG_ACPI_HOTKEY is not set 238# CONFIG_ACPI_HOTKEY is not set
237CONFIG_ACPI_FAN=y 239CONFIG_ACPI_FAN=y
238# CONFIG_ACPI_DOCK is not set 240# CONFIG_ACPI_DOCK is not set
241# CONFIG_ACPI_BAY is not set
239CONFIG_ACPI_PROCESSOR=y 242CONFIG_ACPI_PROCESSOR=y
240CONFIG_ACPI_THERMAL=y 243CONFIG_ACPI_THERMAL=y
241# CONFIG_ACPI_ASUS is not set 244# CONFIG_ACPI_ASUS is not set
@@ -306,7 +309,6 @@ CONFIG_PCI_DIRECT=y
306CONFIG_PCI_MMCONFIG=y 309CONFIG_PCI_MMCONFIG=y
307# CONFIG_PCIEPORTBUS is not set 310# CONFIG_PCIEPORTBUS is not set
308CONFIG_PCI_MSI=y 311CONFIG_PCI_MSI=y
309# CONFIG_PCI_MULTITHREAD_PROBE is not set
310# CONFIG_PCI_DEBUG is not set 312# CONFIG_PCI_DEBUG is not set
311# CONFIG_HT_IRQ is not set 313# CONFIG_HT_IRQ is not set
312CONFIG_ISA_DMA_API=y 314CONFIG_ISA_DMA_API=y
@@ -347,6 +349,7 @@ CONFIG_UNIX=y
347CONFIG_XFRM=y 349CONFIG_XFRM=y
348# CONFIG_XFRM_USER is not set 350# CONFIG_XFRM_USER is not set
349# CONFIG_XFRM_SUB_POLICY is not set 351# CONFIG_XFRM_SUB_POLICY is not set
352# CONFIG_XFRM_MIGRATE is not set
350# CONFIG_NET_KEY is not set 353# CONFIG_NET_KEY is not set
351CONFIG_INET=y 354CONFIG_INET=y
352CONFIG_IP_MULTICAST=y 355CONFIG_IP_MULTICAST=y
@@ -446,6 +449,7 @@ CONFIG_STANDALONE=y
446CONFIG_PREVENT_FIRMWARE_BUILD=y 449CONFIG_PREVENT_FIRMWARE_BUILD=y
447CONFIG_FW_LOADER=y 450CONFIG_FW_LOADER=y
448# CONFIG_DEBUG_DRIVER is not set 451# CONFIG_DEBUG_DRIVER is not set
452# CONFIG_DEBUG_DEVRES is not set
449# CONFIG_SYS_HYPERVISOR is not set 453# CONFIG_SYS_HYPERVISOR is not set
450 454
451# 455#
@@ -466,8 +470,7 @@ CONFIG_FW_LOADER=y
466# 470#
467# Plug and Play support 471# Plug and Play support
468# 472#
469CONFIG_PNP=y 473# CONFIG_PNP is not set
470CONFIG_PNPACPI=y
471 474
472# 475#
473# Block devices 476# Block devices
@@ -515,6 +518,7 @@ CONFIG_BLK_DEV_IDECD=y
515# CONFIG_BLK_DEV_IDETAPE is not set 518# CONFIG_BLK_DEV_IDETAPE is not set
516# CONFIG_BLK_DEV_IDEFLOPPY is not set 519# CONFIG_BLK_DEV_IDEFLOPPY is not set
517# CONFIG_BLK_DEV_IDESCSI is not set 520# CONFIG_BLK_DEV_IDESCSI is not set
521CONFIG_BLK_DEV_IDEACPI=y
518# CONFIG_IDE_TASK_IOCTL is not set 522# CONFIG_IDE_TASK_IOCTL is not set
519 523
520# 524#
@@ -547,6 +551,7 @@ CONFIG_BLK_DEV_AMD74XX=y
547# CONFIG_BLK_DEV_JMICRON is not set 551# CONFIG_BLK_DEV_JMICRON is not set
548# CONFIG_BLK_DEV_SC1200 is not set 552# CONFIG_BLK_DEV_SC1200 is not set
549CONFIG_BLK_DEV_PIIX=y 553CONFIG_BLK_DEV_PIIX=y
554# CONFIG_BLK_DEV_IT8213 is not set
550# CONFIG_BLK_DEV_IT821X is not set 555# CONFIG_BLK_DEV_IT821X is not set
551# CONFIG_BLK_DEV_NS87415 is not set 556# CONFIG_BLK_DEV_NS87415 is not set
552# CONFIG_BLK_DEV_PDC202XX_OLD is not set 557# CONFIG_BLK_DEV_PDC202XX_OLD is not set
@@ -557,6 +562,7 @@ CONFIG_BLK_DEV_PIIX=y
557# CONFIG_BLK_DEV_SLC90E66 is not set 562# CONFIG_BLK_DEV_SLC90E66 is not set
558# CONFIG_BLK_DEV_TRM290 is not set 563# CONFIG_BLK_DEV_TRM290 is not set
559# CONFIG_BLK_DEV_VIA82CXXX is not set 564# CONFIG_BLK_DEV_VIA82CXXX is not set
565# CONFIG_BLK_DEV_TC86C001 is not set
560# CONFIG_IDE_ARM is not set 566# CONFIG_IDE_ARM is not set
561CONFIG_BLK_DEV_IDEDMA=y 567CONFIG_BLK_DEV_IDEDMA=y
562# CONFIG_IDEDMA_IVB is not set 568# CONFIG_IDEDMA_IVB is not set
@@ -655,6 +661,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0
655# Serial ATA (prod) and Parallel ATA (experimental) drivers 661# Serial ATA (prod) and Parallel ATA (experimental) drivers
656# 662#
657CONFIG_ATA=y 663CONFIG_ATA=y
664# CONFIG_ATA_NONSTANDARD is not set
658CONFIG_SATA_AHCI=y 665CONFIG_SATA_AHCI=y
659CONFIG_SATA_SVW=y 666CONFIG_SATA_SVW=y
660CONFIG_ATA_PIIX=y 667CONFIG_ATA_PIIX=y
@@ -670,6 +677,7 @@ CONFIG_SATA_SIL=y
670# CONFIG_SATA_ULI is not set 677# CONFIG_SATA_ULI is not set
671CONFIG_SATA_VIA=y 678CONFIG_SATA_VIA=y
672# CONFIG_SATA_VITESSE is not set 679# CONFIG_SATA_VITESSE is not set
680# CONFIG_SATA_INIC162X is not set
673CONFIG_SATA_INTEL_COMBINED=y 681CONFIG_SATA_INTEL_COMBINED=y
674# CONFIG_PATA_ALI is not set 682# CONFIG_PATA_ALI is not set
675# CONFIG_PATA_AMD is not set 683# CONFIG_PATA_AMD is not set
@@ -687,6 +695,7 @@ CONFIG_SATA_INTEL_COMBINED=y
687# CONFIG_PATA_HPT3X2N is not set 695# CONFIG_PATA_HPT3X2N is not set
688# CONFIG_PATA_HPT3X3 is not set 696# CONFIG_PATA_HPT3X3 is not set
689# CONFIG_PATA_IT821X is not set 697# CONFIG_PATA_IT821X is not set
698# CONFIG_PATA_IT8213 is not set
690# CONFIG_PATA_JMICRON is not set 699# CONFIG_PATA_JMICRON is not set
691# CONFIG_PATA_TRIFLEX is not set 700# CONFIG_PATA_TRIFLEX is not set
692# CONFIG_PATA_MARVELL is not set 701# CONFIG_PATA_MARVELL is not set
@@ -739,9 +748,7 @@ CONFIG_IEEE1394=y
739# Subsystem Options 748# Subsystem Options
740# 749#
741# CONFIG_IEEE1394_VERBOSEDEBUG is not set 750# CONFIG_IEEE1394_VERBOSEDEBUG is not set
742# CONFIG_IEEE1394_OUI_DB is not set
743# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set 751# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
744# CONFIG_IEEE1394_EXPORT_FULL_API is not set
745 752
746# 753#
747# Device Drivers 754# Device Drivers
@@ -767,6 +774,11 @@ CONFIG_IEEE1394_RAWIO=y
767# CONFIG_I2O is not set 774# CONFIG_I2O is not set
768 775
769# 776#
777# Macintosh device drivers
778#
779# CONFIG_MAC_EMUMOUSEBTN is not set
780
781#
770# Network device support 782# Network device support
771# 783#
772CONFIG_NETDEVICES=y 784CONFIG_NETDEVICES=y
@@ -833,6 +845,7 @@ CONFIG_8139TOO=y
833# CONFIG_SUNDANCE is not set 845# CONFIG_SUNDANCE is not set
834# CONFIG_TLAN is not set 846# CONFIG_TLAN is not set
835# CONFIG_VIA_RHINE is not set 847# CONFIG_VIA_RHINE is not set
848# CONFIG_SC92031 is not set
836 849
837# 850#
838# Ethernet (1000 Mbit) 851# Ethernet (1000 Mbit)
@@ -855,11 +868,13 @@ CONFIG_SKY2=y
855CONFIG_TIGON3=y 868CONFIG_TIGON3=y
856CONFIG_BNX2=y 869CONFIG_BNX2=y
857# CONFIG_QLA3XXX is not set 870# CONFIG_QLA3XXX is not set
871# CONFIG_ATL1 is not set
858 872
859# 873#
860# Ethernet (10000 Mbit) 874# Ethernet (10000 Mbit)
861# 875#
862# CONFIG_CHELSIO_T1 is not set 876# CONFIG_CHELSIO_T1 is not set
877# CONFIG_CHELSIO_T3 is not set
863# CONFIG_IXGB is not set 878# CONFIG_IXGB is not set
864# CONFIG_S2IO is not set 879# CONFIG_S2IO is not set
865# CONFIG_MYRI10GE is not set 880# CONFIG_MYRI10GE is not set
@@ -1090,6 +1105,7 @@ CONFIG_SOUND=y
1090# Open Sound System 1105# Open Sound System
1091# 1106#
1092CONFIG_SOUND_PRIME=y 1107CONFIG_SOUND_PRIME=y
1108CONFIG_OBSOLETE_OSS=y
1093# CONFIG_SOUND_BT878 is not set 1109# CONFIG_SOUND_BT878 is not set
1094# CONFIG_SOUND_ES1371 is not set 1110# CONFIG_SOUND_ES1371 is not set
1095CONFIG_SOUND_ICH=y 1111CONFIG_SOUND_ICH=y
@@ -1103,6 +1119,7 @@ CONFIG_SOUND_ICH=y
1103# HID Devices 1119# HID Devices
1104# 1120#
1105CONFIG_HID=y 1121CONFIG_HID=y
1122# CONFIG_HID_DEBUG is not set
1106 1123
1107# 1124#
1108# USB support 1125# USB support
@@ -1117,10 +1134,8 @@ CONFIG_USB=y
1117# Miscellaneous USB options 1134# Miscellaneous USB options
1118# 1135#
1119CONFIG_USB_DEVICEFS=y 1136CONFIG_USB_DEVICEFS=y
1120# CONFIG_USB_BANDWIDTH is not set
1121# CONFIG_USB_DYNAMIC_MINORS is not set 1137# CONFIG_USB_DYNAMIC_MINORS is not set
1122# CONFIG_USB_SUSPEND is not set 1138# CONFIG_USB_SUSPEND is not set
1123# CONFIG_USB_MULTITHREAD_PROBE is not set
1124# CONFIG_USB_OTG is not set 1139# CONFIG_USB_OTG is not set
1125 1140
1126# 1141#
@@ -1130,9 +1145,11 @@ CONFIG_USB_EHCI_HCD=y
1130# CONFIG_USB_EHCI_SPLIT_ISO is not set 1145# CONFIG_USB_EHCI_SPLIT_ISO is not set
1131# CONFIG_USB_EHCI_ROOT_HUB_TT is not set 1146# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
1132# CONFIG_USB_EHCI_TT_NEWSCHED is not set 1147# CONFIG_USB_EHCI_TT_NEWSCHED is not set
1148# CONFIG_USB_EHCI_BIG_ENDIAN_MMIO is not set
1133# CONFIG_USB_ISP116X_HCD is not set 1149# CONFIG_USB_ISP116X_HCD is not set
1134CONFIG_USB_OHCI_HCD=y 1150CONFIG_USB_OHCI_HCD=y
1135# CONFIG_USB_OHCI_BIG_ENDIAN is not set 1151# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set
1152# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set
1136CONFIG_USB_OHCI_LITTLE_ENDIAN=y 1153CONFIG_USB_OHCI_LITTLE_ENDIAN=y
1137CONFIG_USB_UHCI_HCD=y 1154CONFIG_USB_UHCI_HCD=y
1138# CONFIG_USB_SL811_HCD is not set 1155# CONFIG_USB_SL811_HCD is not set
@@ -1183,6 +1200,7 @@ CONFIG_USB_HID=y
1183# CONFIG_USB_ATI_REMOTE2 is not set 1200# CONFIG_USB_ATI_REMOTE2 is not set
1184# CONFIG_USB_KEYSPAN_REMOTE is not set 1201# CONFIG_USB_KEYSPAN_REMOTE is not set
1185# CONFIG_USB_APPLETOUCH is not set 1202# CONFIG_USB_APPLETOUCH is not set
1203# CONFIG_USB_GTCO is not set
1186 1204
1187# 1205#
1188# USB Imaging devices 1206# USB Imaging devices
@@ -1288,6 +1306,10 @@ CONFIG_USB_MON=y
1288# 1306#
1289 1307
1290# 1308#
1309# Auxiliary Display support
1310#
1311
1312#
1291# Virtualization 1313# Virtualization
1292# 1314#
1293# CONFIG_KVM is not set 1315# CONFIG_KVM is not set
@@ -1480,6 +1502,7 @@ CONFIG_UNUSED_SYMBOLS=y
1480# CONFIG_DEBUG_FS is not set 1502# CONFIG_DEBUG_FS is not set
1481# CONFIG_HEADERS_CHECK is not set 1503# CONFIG_HEADERS_CHECK is not set
1482CONFIG_DEBUG_KERNEL=y 1504CONFIG_DEBUG_KERNEL=y
1505# CONFIG_DEBUG_SHIRQ is not set
1483CONFIG_LOG_BUF_SHIFT=18 1506CONFIG_LOG_BUF_SHIFT=18
1484CONFIG_DETECT_SOFTLOCKUP=y 1507CONFIG_DETECT_SOFTLOCKUP=y
1485# CONFIG_SCHEDSTATS is not set 1508# CONFIG_SCHEDSTATS is not set
@@ -1488,7 +1511,6 @@ CONFIG_DETECT_SOFTLOCKUP=y
1488# CONFIG_RT_MUTEX_TESTER is not set 1511# CONFIG_RT_MUTEX_TESTER is not set
1489# CONFIG_DEBUG_SPINLOCK is not set 1512# CONFIG_DEBUG_SPINLOCK is not set
1490# CONFIG_DEBUG_MUTEXES is not set 1513# CONFIG_DEBUG_MUTEXES is not set
1491# CONFIG_DEBUG_RWSEMS is not set
1492# CONFIG_DEBUG_LOCK_ALLOC is not set 1514# CONFIG_DEBUG_LOCK_ALLOC is not set
1493# CONFIG_PROVE_LOCKING is not set 1515# CONFIG_PROVE_LOCKING is not set
1494# CONFIG_DEBUG_SPINLOCK_SLEEP is not set 1516# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
@@ -1533,7 +1555,8 @@ CONFIG_CRC32=y
1533# CONFIG_LIBCRC32C is not set 1555# CONFIG_LIBCRC32C is not set
1534CONFIG_ZLIB_INFLATE=y 1556CONFIG_ZLIB_INFLATE=y
1535CONFIG_PLIST=y 1557CONFIG_PLIST=y
1536CONFIG_IOMAP_COPY=y 1558CONFIG_HAS_IOMEM=y
1559CONFIG_HAS_IOPORT=y
1537CONFIG_GENERIC_HARDIRQS=y 1560CONFIG_GENERIC_HARDIRQS=y
1538CONFIG_GENERIC_IRQ_PROBE=y 1561CONFIG_GENERIC_IRQ_PROBE=y
1539CONFIG_GENERIC_PENDING_IRQ=y 1562CONFIG_GENERIC_PENDING_IRQ=y
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 1e8988e558c5..cbe4e601885c 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -40,8 +40,9 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
40obj-$(CONFIG_HPET_TIMER) += hpet.o 40obj-$(CONFIG_HPET_TIMER) += hpet.o
41obj-$(CONFIG_K8_NB) += k8.o 41obj-$(CONFIG_K8_NB) += k8.o
42 42
43# Make sure this is linked after any other paravirt_ops structs: see head.S 43obj-$(CONFIG_VMI) += vmi.o vmitime.o
44obj-$(CONFIG_PARAVIRT) += paravirt.o 44obj-$(CONFIG_PARAVIRT) += paravirt.o
45obj-y += pcspeaker.o
45 46
46EXTRA_AFLAGS := -traditional 47EXTRA_AFLAGS := -traditional
47 48
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 776d9be26af9..f4159e0a7ae9 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -36,6 +36,7 @@
36#include <asm/hpet.h> 36#include <asm/hpet.h>
37#include <asm/i8253.h> 37#include <asm/i8253.h>
38#include <asm/nmi.h> 38#include <asm/nmi.h>
39#include <asm/idle.h>
39 40
40#include <mach_apic.h> 41#include <mach_apic.h>
41#include <mach_apicdef.h> 42#include <mach_apicdef.h>
@@ -1255,6 +1256,7 @@ fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
1255 * Besides, if we don't timer interrupts ignore the global 1256 * Besides, if we don't timer interrupts ignore the global
1256 * interrupt lock, which is the WrongThing (tm) to do. 1257 * interrupt lock, which is the WrongThing (tm) to do.
1257 */ 1258 */
1259 exit_idle();
1258 irq_enter(); 1260 irq_enter();
1259 smp_local_timer_interrupt(); 1261 smp_local_timer_interrupt();
1260 irq_exit(); 1262 irq_exit();
@@ -1305,6 +1307,7 @@ fastcall void smp_spurious_interrupt(struct pt_regs *regs)
1305{ 1307{
1306 unsigned long v; 1308 unsigned long v;
1307 1309
1310 exit_idle();
1308 irq_enter(); 1311 irq_enter();
1309 /* 1312 /*
1310 * Check if this really is a spurious interrupt and ACK it 1313 * Check if this really is a spurious interrupt and ACK it
@@ -1329,6 +1332,7 @@ fastcall void smp_error_interrupt(struct pt_regs *regs)
1329{ 1332{
1330 unsigned long v, v1; 1333 unsigned long v, v1;
1331 1334
1335 exit_idle();
1332 irq_enter(); 1336 irq_enter();
1333 /* First tickle the hardware, only then report what went on. -- REW */ 1337 /* First tickle the hardware, only then report what went on. -- REW */
1334 v = apic_read(APIC_ESR); 1338 v = apic_read(APIC_ESR);
@@ -1395,7 +1399,7 @@ int __init APIC_init_uniprocessor (void)
1395 if (!skip_ioapic_setup && nr_ioapics) 1399 if (!skip_ioapic_setup && nr_ioapics)
1396 setup_IO_APIC(); 1400 setup_IO_APIC();
1397#endif 1401#endif
1398 setup_boot_APIC_clock(); 1402 setup_boot_clock();
1399 1403
1400 return 0; 1404 return 0;
1401} 1405}
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index db99a8948dae..f9ba0af7ee1f 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -211,6 +211,7 @@
211#include <linux/slab.h> 211#include <linux/slab.h>
212#include <linux/stat.h> 212#include <linux/stat.h>
213#include <linux/proc_fs.h> 213#include <linux/proc_fs.h>
214#include <linux/seq_file.h>
214#include <linux/miscdevice.h> 215#include <linux/miscdevice.h>
215#include <linux/apm_bios.h> 216#include <linux/apm_bios.h>
216#include <linux/init.h> 217#include <linux/init.h>
@@ -1636,9 +1637,8 @@ static int do_open(struct inode * inode, struct file * filp)
1636 return 0; 1637 return 0;
1637} 1638}
1638 1639
1639static int apm_get_info(char *buf, char **start, off_t fpos, int length) 1640static int proc_apm_show(struct seq_file *m, void *v)
1640{ 1641{
1641 char * p;
1642 unsigned short bx; 1642 unsigned short bx;
1643 unsigned short cx; 1643 unsigned short cx;
1644 unsigned short dx; 1644 unsigned short dx;
@@ -1650,8 +1650,6 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length)
1650 int time_units = -1; 1650 int time_units = -1;
1651 char *units = "?"; 1651 char *units = "?";
1652 1652
1653 p = buf;
1654
1655 if ((num_online_cpus() == 1) && 1653 if ((num_online_cpus() == 1) &&
1656 !(error = apm_get_power_status(&bx, &cx, &dx))) { 1654 !(error = apm_get_power_status(&bx, &cx, &dx))) {
1657 ac_line_status = (bx >> 8) & 0xff; 1655 ac_line_status = (bx >> 8) & 0xff;
@@ -1705,7 +1703,7 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length)
1705 -1: Unknown 1703 -1: Unknown
1706 8) min = minutes; sec = seconds */ 1704 8) min = minutes; sec = seconds */
1707 1705
1708 p += sprintf(p, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n", 1706 seq_printf(m, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n",
1709 driver_version, 1707 driver_version,
1710 (apm_info.bios.version >> 8) & 0xff, 1708 (apm_info.bios.version >> 8) & 0xff,
1711 apm_info.bios.version & 0xff, 1709 apm_info.bios.version & 0xff,
@@ -1716,10 +1714,22 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length)
1716 percentage, 1714 percentage,
1717 time_units, 1715 time_units,
1718 units); 1716 units);
1717 return 0;
1718}
1719 1719
1720 return p - buf; 1720static int proc_apm_open(struct inode *inode, struct file *file)
1721{
1722 return single_open(file, proc_apm_show, NULL);
1721} 1723}
1722 1724
1725static const struct file_operations apm_file_ops = {
1726 .owner = THIS_MODULE,
1727 .open = proc_apm_open,
1728 .read = seq_read,
1729 .llseek = seq_lseek,
1730 .release = single_release,
1731};
1732
1723static int apm(void *unused) 1733static int apm(void *unused)
1724{ 1734{
1725 unsigned short bx; 1735 unsigned short bx;
@@ -2341,9 +2351,9 @@ static int __init apm_init(void)
2341 set_base(gdt[APM_DS >> 3], 2351 set_base(gdt[APM_DS >> 3],
2342 __va((unsigned long)apm_info.bios.dseg << 4)); 2352 __va((unsigned long)apm_info.bios.dseg << 4));
2343 2353
2344 apm_proc = create_proc_info_entry("apm", 0, NULL, apm_get_info); 2354 apm_proc = create_proc_entry("apm", 0, NULL);
2345 if (apm_proc) 2355 if (apm_proc)
2346 apm_proc->owner = THIS_MODULE; 2356 apm_proc->proc_fops = &apm_file_ops;
2347 2357
2348 kapmd_task = kthread_create(apm, NULL, "kapmd"); 2358 kapmd_task = kthread_create(apm, NULL, "kapmd");
2349 if (IS_ERR(kapmd_task)) { 2359 if (IS_ERR(kapmd_task)) {
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
index 1b2f3cd33270..c37535163bfc 100644
--- a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -72,7 +72,7 @@ void foo(void)
72 OFFSET(PT_EAX, pt_regs, eax); 72 OFFSET(PT_EAX, pt_regs, eax);
73 OFFSET(PT_DS, pt_regs, xds); 73 OFFSET(PT_DS, pt_regs, xds);
74 OFFSET(PT_ES, pt_regs, xes); 74 OFFSET(PT_ES, pt_regs, xes);
75 OFFSET(PT_GS, pt_regs, xgs); 75 OFFSET(PT_FS, pt_regs, xfs);
76 OFFSET(PT_ORIG_EAX, pt_regs, orig_eax); 76 OFFSET(PT_ORIG_EAX, pt_regs, orig_eax);
77 OFFSET(PT_EIP, pt_regs, eip); 77 OFFSET(PT_EIP, pt_regs, eip);
78 OFFSET(PT_CS, pt_regs, xcs); 78 OFFSET(PT_CS, pt_regs, xcs);
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 8a8bbdaaf38a..dcbbd0a8bfc2 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -605,7 +605,7 @@ void __init early_cpu_init(void)
605struct pt_regs * __devinit idle_regs(struct pt_regs *regs) 605struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
606{ 606{
607 memset(regs, 0, sizeof(struct pt_regs)); 607 memset(regs, 0, sizeof(struct pt_regs));
608 regs->xgs = __KERNEL_PDA; 608 regs->xfs = __KERNEL_PDA;
609 return regs; 609 return regs;
610} 610}
611 611
@@ -662,12 +662,12 @@ struct i386_pda boot_pda = {
662 .pcurrent = &init_task, 662 .pcurrent = &init_task,
663}; 663};
664 664
665static inline void set_kernel_gs(void) 665static inline void set_kernel_fs(void)
666{ 666{
667 /* Set %gs for this CPU's PDA. Memory clobber is to create a 667 /* Set %fs for this CPU's PDA. Memory clobber is to create a
668 barrier with respect to any PDA operations, so the compiler 668 barrier with respect to any PDA operations, so the compiler
669 doesn't move any before here. */ 669 doesn't move any before here. */
670 asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory"); 670 asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory");
671} 671}
672 672
673/* Initialize the CPU's GDT and PDA. The boot CPU does this for 673/* Initialize the CPU's GDT and PDA. The boot CPU does this for
@@ -718,7 +718,7 @@ void __cpuinit cpu_set_gdt(int cpu)
718 the boot CPU, this will transition from the boot gdt+pda to 718 the boot CPU, this will transition from the boot gdt+pda to
719 the real ones). */ 719 the real ones). */
720 load_gdt(cpu_gdt_descr); 720 load_gdt(cpu_gdt_descr);
721 set_kernel_gs(); 721 set_kernel_fs();
722} 722}
723 723
724/* Common CPU init for both boot and secondary CPUs */ 724/* Common CPU init for both boot and secondary CPUs */
@@ -764,8 +764,8 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
764 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); 764 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
765#endif 765#endif
766 766
767 /* Clear %fs. */ 767 /* Clear %gs. */
768 asm volatile ("mov %0, %%fs" : : "r" (0)); 768 asm volatile ("mov %0, %%gs" : : "r" (0));
769 769
770 /* Clear all 6 debug registers: */ 770 /* Clear all 6 debug registers: */
771 set_debugreg(0, 0); 771 set_debugreg(0, 0);
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index c0c3b59de32c..de27bd07bc9c 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -6,6 +6,7 @@
6#include <asm/io.h> 6#include <asm/io.h>
7#include <asm/processor.h> 7#include <asm/processor.h>
8#include <asm/timer.h> 8#include <asm/timer.h>
9#include <asm/pci-direct.h>
9 10
10#include "cpu.h" 11#include "cpu.h"
11 12
@@ -161,19 +162,19 @@ static void __cpuinit set_cx86_inc(void)
161static void __cpuinit geode_configure(void) 162static void __cpuinit geode_configure(void)
162{ 163{
163 unsigned long flags; 164 unsigned long flags;
164 u8 ccr3, ccr4; 165 u8 ccr3;
165 local_irq_save(flags); 166 local_irq_save(flags);
166 167
167 /* Suspend on halt power saving and enable #SUSP pin */ 168 /* Suspend on halt power saving and enable #SUSP pin */
168 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); 169 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88);
169 170
170 ccr3 = getCx86(CX86_CCR3); 171 ccr3 = getCx86(CX86_CCR3);
171 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* Enable */ 172 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
172
173 ccr4 = getCx86(CX86_CCR4);
174 ccr4 |= 0x38; /* FPU fast, DTE cache, Mem bypass */
175 173
176 setCx86(CX86_CCR3, ccr3); 174
175 /* FPU fast, DTE cache, Mem bypass */
176 setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38);
177 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
177 178
178 set_cx86_memwb(); 179 set_cx86_memwb();
179 set_cx86_reorder(); 180 set_cx86_reorder();
@@ -183,14 +184,6 @@ static void __cpuinit geode_configure(void)
183} 184}
184 185
185 186
186#ifdef CONFIG_PCI
187static struct pci_device_id __cpuinitdata cyrix_55x0[] = {
188 { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) },
189 { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) },
190 { },
191};
192#endif
193
194static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) 187static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
195{ 188{
196 unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; 189 unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
@@ -258,6 +251,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
258 251
259 case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */ 252 case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
260#ifdef CONFIG_PCI 253#ifdef CONFIG_PCI
254 {
255 u32 vendor, device;
261 /* It isn't really a PCI quirk directly, but the cure is the 256 /* It isn't really a PCI quirk directly, but the cure is the
262 same. The MediaGX has deep magic SMM stuff that handles the 257 same. The MediaGX has deep magic SMM stuff that handles the
263 SB emulation. It thows away the fifo on disable_dma() which 258 SB emulation. It thows away the fifo on disable_dma() which
@@ -273,22 +268,34 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
273 printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); 268 printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
274 isa_dma_bridge_buggy = 2; 269 isa_dma_bridge_buggy = 2;
275 270
271 /* We do this before the PCI layer is running. However we
272 are safe here as we know the bridge must be a Cyrix
273 companion and must be present */
274 vendor = read_pci_config_16(0, 0, 0x12, PCI_VENDOR_ID);
275 device = read_pci_config_16(0, 0, 0x12, PCI_DEVICE_ID);
276 276
277 /* 277 /*
278 * The 5510/5520 companion chips have a funky PIT. 278 * The 5510/5520 companion chips have a funky PIT.
279 */ 279 */
280 if (pci_dev_present(cyrix_55x0)) 280 if (vendor == PCI_VENDOR_ID_CYRIX &&
281 (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520))
281 pit_latch_buggy = 1; 282 pit_latch_buggy = 1;
283 }
282#endif 284#endif
283 c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ 285 c->x86_cache_size=16; /* Yep 16K integrated cache thats it */
284 286
285 /* GXm supports extended cpuid levels 'ala' AMD */ 287 /* GXm supports extended cpuid levels 'ala' AMD */
286 if (c->cpuid_level == 2) { 288 if (c->cpuid_level == 2) {
287 /* Enable cxMMX extensions (GX1 Datasheet 54) */ 289 /* Enable cxMMX extensions (GX1 Datasheet 54) */
288 setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); 290 setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1);
289 291
290 /* GXlv/GXm/GX1 */ 292 /*
291 if((dir1 >= 0x50 && dir1 <= 0x54) || dir1 >= 0x63) 293 * GXm : 0x30 ... 0x5f GXm datasheet 51
294 * GXlv: 0x6x GXlv datasheet 54
295 * ? : 0x7x
296 * GX1 : 0x8x GX1 datasheet 56
297 */
298 if((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <=dir1 && dir1 <= 0x8f))
292 geode_configure(); 299 geode_configure();
293 get_model_name(c); /* get CPU marketing name */ 300 get_model_name(c); /* get CPU marketing name */
294 return; 301 return;
@@ -415,15 +422,14 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c)
415 422
416 if (dir0 == 5 || dir0 == 3) 423 if (dir0 == 5 || dir0 == 3)
417 { 424 {
418 unsigned char ccr3, ccr4; 425 unsigned char ccr3;
419 unsigned long flags; 426 unsigned long flags;
420 printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); 427 printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
421 local_irq_save(flags); 428 local_irq_save(flags);
422 ccr3 = getCx86(CX86_CCR3); 429 ccr3 = getCx86(CX86_CCR3);
423 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ 430 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
424 ccr4 = getCx86(CX86_CCR4); 431 setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */
425 setCx86(CX86_CCR4, ccr4 | 0x80); /* enable cpuid */ 432 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
426 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
427 local_irq_restore(flags); 433 local_irq_restore(flags);
428 } 434 }
429 } 435 }
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c
index d555bec0db99..4f10c62d180c 100644
--- a/arch/i386/kernel/cpu/mcheck/mce.c
+++ b/arch/i386/kernel/cpu/mcheck/mce.c
@@ -12,6 +12,7 @@
12 12
13#include <asm/processor.h> 13#include <asm/processor.h>
14#include <asm/system.h> 14#include <asm/system.h>
15#include <asm/mce.h>
15 16
16#include "mce.h" 17#include "mce.h"
17 18
diff --git a/arch/i386/kernel/cpu/mcheck/mce.h b/arch/i386/kernel/cpu/mcheck/mce.h
index 84fd4cf7d0fb..81fb6e2d35f3 100644
--- a/arch/i386/kernel/cpu/mcheck/mce.h
+++ b/arch/i386/kernel/cpu/mcheck/mce.h
@@ -1,4 +1,5 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <asm/mce.h>
2 3
3void amd_mcheck_init(struct cpuinfo_x86 *c); 4void amd_mcheck_init(struct cpuinfo_x86 *c);
4void intel_p4_mcheck_init(struct cpuinfo_x86 *c); 5void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
@@ -9,6 +10,5 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c);
9/* Call the installed machine check handler for this CPU setup. */ 10/* Call the installed machine check handler for this CPU setup. */
10extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); 11extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code);
11 12
12extern int mce_disabled;
13extern int nr_mce_banks; 13extern int nr_mce_banks;
14 14
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
index 504434a46011..8359c19d3a23 100644
--- a/arch/i386/kernel/cpu/mcheck/p4.c
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -12,6 +12,7 @@
12#include <asm/system.h> 12#include <asm/system.h>
13#include <asm/msr.h> 13#include <asm/msr.h>
14#include <asm/apic.h> 14#include <asm/apic.h>
15#include <asm/idle.h>
15 16
16#include <asm/therm_throt.h> 17#include <asm/therm_throt.h>
17 18
@@ -59,6 +60,7 @@ static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_therm
59 60
60fastcall void smp_thermal_interrupt(struct pt_regs *regs) 61fastcall void smp_thermal_interrupt(struct pt_regs *regs)
61{ 62{
63 exit_idle();
62 irq_enter(); 64 irq_enter();
63 vendor_thermal_interrupt(regs); 65 vendor_thermal_interrupt(regs);
64 irq_exit(); 66 irq_exit();
diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
index ee771f305f96..c7d8f1756745 100644
--- a/arch/i386/kernel/cpu/mtrr/if.c
+++ b/arch/i386/kernel/cpu/mtrr/if.c
@@ -211,6 +211,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
211 default: 211 default:
212 return -ENOTTY; 212 return -ENOTTY;
213 case MTRRIOC_ADD_ENTRY: 213 case MTRRIOC_ADD_ENTRY:
214#ifdef CONFIG_COMPAT
215 case MTRRIOC32_ADD_ENTRY:
216#endif
214 if (!capable(CAP_SYS_ADMIN)) 217 if (!capable(CAP_SYS_ADMIN))
215 return -EPERM; 218 return -EPERM;
216 err = 219 err =
@@ -218,21 +221,33 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
218 file, 0); 221 file, 0);
219 break; 222 break;
220 case MTRRIOC_SET_ENTRY: 223 case MTRRIOC_SET_ENTRY:
224#ifdef CONFIG_COMPAT
225 case MTRRIOC32_SET_ENTRY:
226#endif
221 if (!capable(CAP_SYS_ADMIN)) 227 if (!capable(CAP_SYS_ADMIN))
222 return -EPERM; 228 return -EPERM;
223 err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); 229 err = mtrr_add(sentry.base, sentry.size, sentry.type, 0);
224 break; 230 break;
225 case MTRRIOC_DEL_ENTRY: 231 case MTRRIOC_DEL_ENTRY:
232#ifdef CONFIG_COMPAT
233 case MTRRIOC32_DEL_ENTRY:
234#endif
226 if (!capable(CAP_SYS_ADMIN)) 235 if (!capable(CAP_SYS_ADMIN))
227 return -EPERM; 236 return -EPERM;
228 err = mtrr_file_del(sentry.base, sentry.size, file, 0); 237 err = mtrr_file_del(sentry.base, sentry.size, file, 0);
229 break; 238 break;
230 case MTRRIOC_KILL_ENTRY: 239 case MTRRIOC_KILL_ENTRY:
240#ifdef CONFIG_COMPAT
241 case MTRRIOC32_KILL_ENTRY:
242#endif
231 if (!capable(CAP_SYS_ADMIN)) 243 if (!capable(CAP_SYS_ADMIN))
232 return -EPERM; 244 return -EPERM;
233 err = mtrr_del(-1, sentry.base, sentry.size); 245 err = mtrr_del(-1, sentry.base, sentry.size);
234 break; 246 break;
235 case MTRRIOC_GET_ENTRY: 247 case MTRRIOC_GET_ENTRY:
248#ifdef CONFIG_COMPAT
249 case MTRRIOC32_GET_ENTRY:
250#endif
236 if (gentry.regnum >= num_var_ranges) 251 if (gentry.regnum >= num_var_ranges)
237 return -EINVAL; 252 return -EINVAL;
238 mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); 253 mtrr_if->get(gentry.regnum, &gentry.base, &size, &type);
@@ -249,6 +264,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
249 264
250 break; 265 break;
251 case MTRRIOC_ADD_PAGE_ENTRY: 266 case MTRRIOC_ADD_PAGE_ENTRY:
267#ifdef CONFIG_COMPAT
268 case MTRRIOC32_ADD_PAGE_ENTRY:
269#endif
252 if (!capable(CAP_SYS_ADMIN)) 270 if (!capable(CAP_SYS_ADMIN))
253 return -EPERM; 271 return -EPERM;
254 err = 272 err =
@@ -256,21 +274,33 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
256 file, 1); 274 file, 1);
257 break; 275 break;
258 case MTRRIOC_SET_PAGE_ENTRY: 276 case MTRRIOC_SET_PAGE_ENTRY:
277#ifdef CONFIG_COMPAT
278 case MTRRIOC32_SET_PAGE_ENTRY:
279#endif
259 if (!capable(CAP_SYS_ADMIN)) 280 if (!capable(CAP_SYS_ADMIN))
260 return -EPERM; 281 return -EPERM;
261 err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); 282 err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0);
262 break; 283 break;
263 case MTRRIOC_DEL_PAGE_ENTRY: 284 case MTRRIOC_DEL_PAGE_ENTRY:
285#ifdef CONFIG_COMPAT
286 case MTRRIOC32_DEL_PAGE_ENTRY:
287#endif
264 if (!capable(CAP_SYS_ADMIN)) 288 if (!capable(CAP_SYS_ADMIN))
265 return -EPERM; 289 return -EPERM;
266 err = mtrr_file_del(sentry.base, sentry.size, file, 1); 290 err = mtrr_file_del(sentry.base, sentry.size, file, 1);
267 break; 291 break;
268 case MTRRIOC_KILL_PAGE_ENTRY: 292 case MTRRIOC_KILL_PAGE_ENTRY:
293#ifdef CONFIG_COMPAT
294 case MTRRIOC32_KILL_PAGE_ENTRY:
295#endif
269 if (!capable(CAP_SYS_ADMIN)) 296 if (!capable(CAP_SYS_ADMIN))
270 return -EPERM; 297 return -EPERM;
271 err = mtrr_del_page(-1, sentry.base, sentry.size); 298 err = mtrr_del_page(-1, sentry.base, sentry.size);
272 break; 299 break;
273 case MTRRIOC_GET_PAGE_ENTRY: 300 case MTRRIOC_GET_PAGE_ENTRY:
301#ifdef CONFIG_COMPAT
302 case MTRRIOC32_GET_PAGE_ENTRY:
303#endif
274 if (gentry.regnum >= num_var_ranges) 304 if (gentry.regnum >= num_var_ranges)
275 return -EINVAL; 305 return -EINVAL;
276 mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); 306 mtrr_if->get(gentry.regnum, &gentry.base, &size, &type);
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 16bb7ea87145..0acfb6a5a220 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -50,7 +50,7 @@ u32 num_var_ranges = 0;
50unsigned int *usage_table; 50unsigned int *usage_table;
51static DEFINE_MUTEX(mtrr_mutex); 51static DEFINE_MUTEX(mtrr_mutex);
52 52
53u32 size_or_mask, size_and_mask; 53u64 size_or_mask, size_and_mask;
54 54
55static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; 55static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
56 56
@@ -662,8 +662,8 @@ void __init mtrr_bp_init(void)
662 boot_cpu_data.x86_mask == 0x4)) 662 boot_cpu_data.x86_mask == 0x4))
663 phys_addr = 36; 663 phys_addr = 36;
664 664
665 size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1); 665 size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1);
666 size_and_mask = ~size_or_mask & 0xfff00000; 666 size_and_mask = ~size_or_mask & 0xfffff00000ULL;
667 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && 667 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
668 boot_cpu_data.x86 == 6) { 668 boot_cpu_data.x86 == 6) {
669 /* VIA C* family have Intel style MTRRs, but 669 /* VIA C* family have Intel style MTRRs, but
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
index d61ea9db6cfe..289dfe6030e3 100644
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -84,7 +84,7 @@ void get_mtrr_state(void);
84 84
85extern void set_mtrr_ops(struct mtrr_ops * ops); 85extern void set_mtrr_ops(struct mtrr_ops * ops);
86 86
87extern u32 size_or_mask, size_and_mask; 87extern u64 size_or_mask, size_and_mask;
88extern struct mtrr_ops * mtrr_if; 88extern struct mtrr_ops * mtrr_if;
89 89
90#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) 90#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index 6624d8583c42..47e3ebbfb28d 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -29,7 +29,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
29 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 29 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
30 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, 30 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
31 NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, 31 NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
32 NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow", 32 NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", "3dnowext", "3dnow",
33 33
34 /* Transmeta-defined */ 34 /* Transmeta-defined */
35 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, 35 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
@@ -47,7 +47,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
47 /* Intel-defined (#2) */ 47 /* Intel-defined (#2) */
48 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", 48 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
49 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, 49 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
50 NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL, 50 NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt",
51 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 51 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
52 52
53 /* VIA/Cyrix/Centaur-defined */ 53 /* VIA/Cyrix/Centaur-defined */
@@ -57,8 +57,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
57 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 57 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
58 58
59 /* AMD-defined (#2) */ 59 /* AMD-defined (#2) */
60 "lahf_lm", "cmp_legacy", "svm", NULL, "cr8legacy", NULL, NULL, NULL, 60 "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8legacy", "abm",
61 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 61 "sse4a", "misalignsse",
62 "3dnowprefetch", "osvw", "ibs", NULL, NULL, NULL, NULL, NULL,
62 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 63 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
63 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 64 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
64 }; 65 };
@@ -69,8 +70,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
69 "ttp", /* thermal trip */ 70 "ttp", /* thermal trip */
70 "tm", 71 "tm",
71 "stc", 72 "stc",
73 "100mhzsteps",
74 "hwpstate",
72 NULL, 75 NULL,
73 /* nothing */ /* constant_tsc - moved to flags */ 76 NULL, /* constant_tsc - moved to flags */
77 /* nothing */
74 }; 78 };
75 struct cpuinfo_x86 *c = v; 79 struct cpuinfo_x86 *c = v;
76 int i, n = c - cpu_data; 80 int i, n = c - cpu_data;
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c
index 4056fb7d2cdf..5678d46863c6 100644
--- a/arch/i386/kernel/cpu/transmeta.c
+++ b/arch/i386/kernel/cpu/transmeta.c
@@ -9,7 +9,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
9{ 9{
10 unsigned int cap_mask, uk, max, dummy; 10 unsigned int cap_mask, uk, max, dummy;
11 unsigned int cms_rev1, cms_rev2; 11 unsigned int cms_rev1, cms_rev2;
12 unsigned int cpu_rev, cpu_freq, cpu_flags, new_cpu_rev; 12 unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev;
13 char cpu_info[65]; 13 char cpu_info[65];
14 14
15 get_model_name(c); /* Same as AMD/Cyrix */ 15 get_model_name(c); /* Same as AMD/Cyrix */
@@ -72,6 +72,9 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
72 wrmsr(0x80860004, ~0, uk); 72 wrmsr(0x80860004, ~0, uk);
73 c->x86_capability[0] = cpuid_edx(0x00000001); 73 c->x86_capability[0] = cpuid_edx(0x00000001);
74 wrmsr(0x80860004, cap_mask, uk); 74 wrmsr(0x80860004, cap_mask, uk);
75
76 /* All Transmeta CPUs have a constant TSC */
77 set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
75 78
76 /* If we can run i686 user-space code, call us an i686 */ 79 /* If we can run i686 user-space code, call us an i686 */
77#define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV) 80#define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV)
diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c
index 4da75fa3208d..eeae0d992337 100644
--- a/arch/i386/kernel/cpuid.c
+++ b/arch/i386/kernel/cpuid.c
@@ -48,7 +48,6 @@ static struct class *cpuid_class;
48#ifdef CONFIG_SMP 48#ifdef CONFIG_SMP
49 49
50struct cpuid_command { 50struct cpuid_command {
51 int cpu;
52 u32 reg; 51 u32 reg;
53 u32 *data; 52 u32 *data;
54}; 53};
@@ -57,8 +56,7 @@ static void cpuid_smp_cpuid(void *cmd_block)
57{ 56{
58 struct cpuid_command *cmd = (struct cpuid_command *)cmd_block; 57 struct cpuid_command *cmd = (struct cpuid_command *)cmd_block;
59 58
60 if (cmd->cpu == smp_processor_id()) 59 cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2],
61 cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2],
62 &cmd->data[3]); 60 &cmd->data[3]);
63} 61}
64 62
@@ -70,11 +68,10 @@ static inline void do_cpuid(int cpu, u32 reg, u32 * data)
70 if (cpu == smp_processor_id()) { 68 if (cpu == smp_processor_id()) {
71 cpuid(reg, &data[0], &data[1], &data[2], &data[3]); 69 cpuid(reg, &data[0], &data[1], &data[2], &data[3]);
72 } else { 70 } else {
73 cmd.cpu = cpu;
74 cmd.reg = reg; 71 cmd.reg = reg;
75 cmd.data = data; 72 cmd.data = data;
76 73
77 smp_call_function(cpuid_smp_cpuid, &cmd, 1, 1); 74 smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
78 } 75 }
79 preempt_enable(); 76 preempt_enable();
80} 77}
diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c
index f391abcf7da9..70f39560846a 100644
--- a/arch/i386/kernel/e820.c
+++ b/arch/i386/kernel/e820.c
@@ -14,6 +14,7 @@
14#include <asm/pgtable.h> 14#include <asm/pgtable.h>
15#include <asm/page.h> 15#include <asm/page.h>
16#include <asm/e820.h> 16#include <asm/e820.h>
17#include <asm/setup.h>
17 18
18#ifdef CONFIG_EFI 19#ifdef CONFIG_EFI
19int efi_enabled = 0; 20int efi_enabled = 0;
@@ -156,21 +157,22 @@ static struct resource standard_io_resources[] = { {
156 .flags = IORESOURCE_BUSY | IORESOURCE_IO 157 .flags = IORESOURCE_BUSY | IORESOURCE_IO
157} }; 158} };
158 159
159static int romsignature(const unsigned char *x) 160#define ROMSIGNATURE 0xaa55
161
162static int __init romsignature(const unsigned char *rom)
160{ 163{
161 unsigned short sig; 164 unsigned short sig;
162 int ret = 0; 165
163 if (probe_kernel_address((const unsigned short *)x, sig) == 0) 166 return probe_kernel_address((const unsigned short *)rom, sig) == 0 &&
164 ret = (sig == 0xaa55); 167 sig == ROMSIGNATURE;
165 return ret;
166} 168}
167 169
168static int __init romchecksum(unsigned char *rom, unsigned long length) 170static int __init romchecksum(unsigned char *rom, unsigned long length)
169{ 171{
170 unsigned char *p, sum = 0; 172 unsigned char sum;
171 173
172 for (p = rom; p < rom + length; p++) 174 for (sum = 0; length; length--)
173 sum += *p; 175 sum += *rom++;
174 return sum == 0; 176 return sum == 0;
175} 177}
176 178
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 5e47683fc63a..18bddcb8e9e8 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -30,7 +30,7 @@
30 * 18(%esp) - %eax 30 * 18(%esp) - %eax
31 * 1C(%esp) - %ds 31 * 1C(%esp) - %ds
32 * 20(%esp) - %es 32 * 20(%esp) - %es
33 * 24(%esp) - %gs 33 * 24(%esp) - %fs
34 * 28(%esp) - orig_eax 34 * 28(%esp) - orig_eax
35 * 2C(%esp) - %eip 35 * 2C(%esp) - %eip
36 * 30(%esp) - %cs 36 * 30(%esp) - %cs
@@ -99,9 +99,9 @@ VM_MASK = 0x00020000
99 99
100#define SAVE_ALL \ 100#define SAVE_ALL \
101 cld; \ 101 cld; \
102 pushl %gs; \ 102 pushl %fs; \
103 CFI_ADJUST_CFA_OFFSET 4;\ 103 CFI_ADJUST_CFA_OFFSET 4;\
104 /*CFI_REL_OFFSET gs, 0;*/\ 104 /*CFI_REL_OFFSET fs, 0;*/\
105 pushl %es; \ 105 pushl %es; \
106 CFI_ADJUST_CFA_OFFSET 4;\ 106 CFI_ADJUST_CFA_OFFSET 4;\
107 /*CFI_REL_OFFSET es, 0;*/\ 107 /*CFI_REL_OFFSET es, 0;*/\
@@ -133,7 +133,7 @@ VM_MASK = 0x00020000
133 movl %edx, %ds; \ 133 movl %edx, %ds; \
134 movl %edx, %es; \ 134 movl %edx, %es; \
135 movl $(__KERNEL_PDA), %edx; \ 135 movl $(__KERNEL_PDA), %edx; \
136 movl %edx, %gs 136 movl %edx, %fs
137 137
138#define RESTORE_INT_REGS \ 138#define RESTORE_INT_REGS \
139 popl %ebx; \ 139 popl %ebx; \
@@ -166,9 +166,9 @@ VM_MASK = 0x00020000
1662: popl %es; \ 1662: popl %es; \
167 CFI_ADJUST_CFA_OFFSET -4;\ 167 CFI_ADJUST_CFA_OFFSET -4;\
168 /*CFI_RESTORE es;*/\ 168 /*CFI_RESTORE es;*/\
1693: popl %gs; \ 1693: popl %fs; \
170 CFI_ADJUST_CFA_OFFSET -4;\ 170 CFI_ADJUST_CFA_OFFSET -4;\
171 /*CFI_RESTORE gs;*/\ 171 /*CFI_RESTORE fs;*/\
172.pushsection .fixup,"ax"; \ 172.pushsection .fixup,"ax"; \
1734: movl $0,(%esp); \ 1734: movl $0,(%esp); \
174 jmp 1b; \ 174 jmp 1b; \
@@ -227,6 +227,7 @@ ENTRY(ret_from_fork)
227 CFI_ADJUST_CFA_OFFSET -4 227 CFI_ADJUST_CFA_OFFSET -4
228 jmp syscall_exit 228 jmp syscall_exit
229 CFI_ENDPROC 229 CFI_ENDPROC
230END(ret_from_fork)
230 231
231/* 232/*
232 * Return to user mode is not as complex as all this looks, 233 * Return to user mode is not as complex as all this looks,
@@ -258,6 +259,7 @@ ENTRY(resume_userspace)
258 # int/exception return? 259 # int/exception return?
259 jne work_pending 260 jne work_pending
260 jmp restore_all 261 jmp restore_all
262END(ret_from_exception)
261 263
262#ifdef CONFIG_PREEMPT 264#ifdef CONFIG_PREEMPT
263ENTRY(resume_kernel) 265ENTRY(resume_kernel)
@@ -272,6 +274,7 @@ need_resched:
272 jz restore_all 274 jz restore_all
273 call preempt_schedule_irq 275 call preempt_schedule_irq
274 jmp need_resched 276 jmp need_resched
277END(resume_kernel)
275#endif 278#endif
276 CFI_ENDPROC 279 CFI_ENDPROC
277 280
@@ -349,16 +352,17 @@ sysenter_past_esp:
349 movl PT_OLDESP(%esp), %ecx 352 movl PT_OLDESP(%esp), %ecx
350 xorl %ebp,%ebp 353 xorl %ebp,%ebp
351 TRACE_IRQS_ON 354 TRACE_IRQS_ON
3521: mov PT_GS(%esp), %gs 3551: mov PT_FS(%esp), %fs
353 ENABLE_INTERRUPTS_SYSEXIT 356 ENABLE_INTERRUPTS_SYSEXIT
354 CFI_ENDPROC 357 CFI_ENDPROC
355.pushsection .fixup,"ax" 358.pushsection .fixup,"ax"
3562: movl $0,PT_GS(%esp) 3592: movl $0,PT_FS(%esp)
357 jmp 1b 360 jmp 1b
358.section __ex_table,"a" 361.section __ex_table,"a"
359 .align 4 362 .align 4
360 .long 1b,2b 363 .long 1b,2b
361.popsection 364.popsection
365ENDPROC(sysenter_entry)
362 366
363 # system call handler stub 367 # system call handler stub
364ENTRY(system_call) 368ENTRY(system_call)
@@ -459,6 +463,7 @@ ldt_ss:
459 CFI_ADJUST_CFA_OFFSET -8 463 CFI_ADJUST_CFA_OFFSET -8
460 jmp restore_nocheck 464 jmp restore_nocheck
461 CFI_ENDPROC 465 CFI_ENDPROC
466ENDPROC(system_call)
462 467
463 # perform work that needs to be done immediately before resumption 468 # perform work that needs to be done immediately before resumption
464 ALIGN 469 ALIGN
@@ -504,6 +509,7 @@ work_notifysig_v86:
504 xorl %edx, %edx 509 xorl %edx, %edx
505 call do_notify_resume 510 call do_notify_resume
506 jmp resume_userspace_sig 511 jmp resume_userspace_sig
512END(work_pending)
507 513
508 # perform syscall exit tracing 514 # perform syscall exit tracing
509 ALIGN 515 ALIGN
@@ -519,6 +525,7 @@ syscall_trace_entry:
519 cmpl $(nr_syscalls), %eax 525 cmpl $(nr_syscalls), %eax
520 jnae syscall_call 526 jnae syscall_call
521 jmp syscall_exit 527 jmp syscall_exit
528END(syscall_trace_entry)
522 529
523 # perform syscall exit tracing 530 # perform syscall exit tracing
524 ALIGN 531 ALIGN
@@ -532,6 +539,7 @@ syscall_exit_work:
532 movl $1, %edx 539 movl $1, %edx
533 call do_syscall_trace 540 call do_syscall_trace
534 jmp resume_userspace 541 jmp resume_userspace
542END(syscall_exit_work)
535 CFI_ENDPROC 543 CFI_ENDPROC
536 544
537 RING0_INT_FRAME # can't unwind into user space anyway 545 RING0_INT_FRAME # can't unwind into user space anyway
@@ -542,15 +550,17 @@ syscall_fault:
542 GET_THREAD_INFO(%ebp) 550 GET_THREAD_INFO(%ebp)
543 movl $-EFAULT,PT_EAX(%esp) 551 movl $-EFAULT,PT_EAX(%esp)
544 jmp resume_userspace 552 jmp resume_userspace
553END(syscall_fault)
545 554
546syscall_badsys: 555syscall_badsys:
547 movl $-ENOSYS,PT_EAX(%esp) 556 movl $-ENOSYS,PT_EAX(%esp)
548 jmp resume_userspace 557 jmp resume_userspace
558END(syscall_badsys)
549 CFI_ENDPROC 559 CFI_ENDPROC
550 560
551#define FIXUP_ESPFIX_STACK \ 561#define FIXUP_ESPFIX_STACK \
552 /* since we are on a wrong stack, we cant make it a C code :( */ \ 562 /* since we are on a wrong stack, we cant make it a C code :( */ \
553 movl %gs:PDA_cpu, %ebx; \ 563 movl %fs:PDA_cpu, %ebx; \
554 PER_CPU(cpu_gdt_descr, %ebx); \ 564 PER_CPU(cpu_gdt_descr, %ebx); \
555 movl GDS_address(%ebx), %ebx; \ 565 movl GDS_address(%ebx), %ebx; \
556 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ 566 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
@@ -581,9 +591,9 @@ syscall_badsys:
581ENTRY(interrupt) 591ENTRY(interrupt)
582.text 592.text
583 593
584vector=0
585ENTRY(irq_entries_start) 594ENTRY(irq_entries_start)
586 RING0_INT_FRAME 595 RING0_INT_FRAME
596vector=0
587.rept NR_IRQS 597.rept NR_IRQS
588 ALIGN 598 ALIGN
589 .if vector 599 .if vector
@@ -592,11 +602,16 @@ ENTRY(irq_entries_start)
5921: pushl $~(vector) 6021: pushl $~(vector)
593 CFI_ADJUST_CFA_OFFSET 4 603 CFI_ADJUST_CFA_OFFSET 4
594 jmp common_interrupt 604 jmp common_interrupt
595.data 605 .previous
596 .long 1b 606 .long 1b
597.text 607 .text
598vector=vector+1 608vector=vector+1
599.endr 609.endr
610END(irq_entries_start)
611
612.previous
613END(interrupt)
614.previous
600 615
601/* 616/*
602 * the CPU automatically disables interrupts when executing an IRQ vector, 617 * the CPU automatically disables interrupts when executing an IRQ vector,
@@ -609,6 +624,7 @@ common_interrupt:
609 movl %esp,%eax 624 movl %esp,%eax
610 call do_IRQ 625 call do_IRQ
611 jmp ret_from_intr 626 jmp ret_from_intr
627ENDPROC(common_interrupt)
612 CFI_ENDPROC 628 CFI_ENDPROC
613 629
614#define BUILD_INTERRUPT(name, nr) \ 630#define BUILD_INTERRUPT(name, nr) \
@@ -621,18 +637,24 @@ ENTRY(name) \
621 movl %esp,%eax; \ 637 movl %esp,%eax; \
622 call smp_/**/name; \ 638 call smp_/**/name; \
623 jmp ret_from_intr; \ 639 jmp ret_from_intr; \
624 CFI_ENDPROC 640 CFI_ENDPROC; \
641ENDPROC(name)
625 642
626/* The include is where all of the SMP etc. interrupts come from */ 643/* The include is where all of the SMP etc. interrupts come from */
627#include "entry_arch.h" 644#include "entry_arch.h"
628 645
646/* This alternate entry is needed because we hijack the apic LVTT */
647#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC)
648BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR)
649#endif
650
629KPROBE_ENTRY(page_fault) 651KPROBE_ENTRY(page_fault)
630 RING0_EC_FRAME 652 RING0_EC_FRAME
631 pushl $do_page_fault 653 pushl $do_page_fault
632 CFI_ADJUST_CFA_OFFSET 4 654 CFI_ADJUST_CFA_OFFSET 4
633 ALIGN 655 ALIGN
634error_code: 656error_code:
635 /* the function address is in %gs's slot on the stack */ 657 /* the function address is in %fs's slot on the stack */
636 pushl %es 658 pushl %es
637 CFI_ADJUST_CFA_OFFSET 4 659 CFI_ADJUST_CFA_OFFSET 4
638 /*CFI_REL_OFFSET es, 0*/ 660 /*CFI_REL_OFFSET es, 0*/
@@ -661,20 +683,20 @@ error_code:
661 CFI_ADJUST_CFA_OFFSET 4 683 CFI_ADJUST_CFA_OFFSET 4
662 CFI_REL_OFFSET ebx, 0 684 CFI_REL_OFFSET ebx, 0
663 cld 685 cld
664 pushl %gs 686 pushl %fs
665 CFI_ADJUST_CFA_OFFSET 4 687 CFI_ADJUST_CFA_OFFSET 4
666 /*CFI_REL_OFFSET gs, 0*/ 688 /*CFI_REL_OFFSET fs, 0*/
667 movl $(__KERNEL_PDA), %ecx 689 movl $(__KERNEL_PDA), %ecx
668 movl %ecx, %gs 690 movl %ecx, %fs
669 UNWIND_ESPFIX_STACK 691 UNWIND_ESPFIX_STACK
670 popl %ecx 692 popl %ecx
671 CFI_ADJUST_CFA_OFFSET -4 693 CFI_ADJUST_CFA_OFFSET -4
672 /*CFI_REGISTER es, ecx*/ 694 /*CFI_REGISTER es, ecx*/
673 movl PT_GS(%esp), %edi # get the function address 695 movl PT_FS(%esp), %edi # get the function address
674 movl PT_ORIG_EAX(%esp), %edx # get the error code 696 movl PT_ORIG_EAX(%esp), %edx # get the error code
675 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart 697 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
676 mov %ecx, PT_GS(%esp) 698 mov %ecx, PT_FS(%esp)
677 /*CFI_REL_OFFSET gs, ES*/ 699 /*CFI_REL_OFFSET fs, ES*/
678 movl $(__USER_DS), %ecx 700 movl $(__USER_DS), %ecx
679 movl %ecx, %ds 701 movl %ecx, %ds
680 movl %ecx, %es 702 movl %ecx, %es
@@ -692,6 +714,7 @@ ENTRY(coprocessor_error)
692 CFI_ADJUST_CFA_OFFSET 4 714 CFI_ADJUST_CFA_OFFSET 4
693 jmp error_code 715 jmp error_code
694 CFI_ENDPROC 716 CFI_ENDPROC
717END(coprocessor_error)
695 718
696ENTRY(simd_coprocessor_error) 719ENTRY(simd_coprocessor_error)
697 RING0_INT_FRAME 720 RING0_INT_FRAME
@@ -701,6 +724,7 @@ ENTRY(simd_coprocessor_error)
701 CFI_ADJUST_CFA_OFFSET 4 724 CFI_ADJUST_CFA_OFFSET 4
702 jmp error_code 725 jmp error_code
703 CFI_ENDPROC 726 CFI_ENDPROC
727END(simd_coprocessor_error)
704 728
705ENTRY(device_not_available) 729ENTRY(device_not_available)
706 RING0_INT_FRAME 730 RING0_INT_FRAME
@@ -721,6 +745,7 @@ device_not_available_emulate:
721 CFI_ADJUST_CFA_OFFSET -4 745 CFI_ADJUST_CFA_OFFSET -4
722 jmp ret_from_exception 746 jmp ret_from_exception
723 CFI_ENDPROC 747 CFI_ENDPROC
748END(device_not_available)
724 749
725/* 750/*
726 * Debug traps and NMI can happen at the one SYSENTER instruction 751 * Debug traps and NMI can happen at the one SYSENTER instruction
@@ -864,10 +889,12 @@ ENTRY(native_iret)
864 .align 4 889 .align 4
865 .long 1b,iret_exc 890 .long 1b,iret_exc
866.previous 891.previous
892END(native_iret)
867 893
868ENTRY(native_irq_enable_sysexit) 894ENTRY(native_irq_enable_sysexit)
869 sti 895 sti
870 sysexit 896 sysexit
897END(native_irq_enable_sysexit)
871#endif 898#endif
872 899
873KPROBE_ENTRY(int3) 900KPROBE_ENTRY(int3)
@@ -890,6 +917,7 @@ ENTRY(overflow)
890 CFI_ADJUST_CFA_OFFSET 4 917 CFI_ADJUST_CFA_OFFSET 4
891 jmp error_code 918 jmp error_code
892 CFI_ENDPROC 919 CFI_ENDPROC
920END(overflow)
893 921
894ENTRY(bounds) 922ENTRY(bounds)
895 RING0_INT_FRAME 923 RING0_INT_FRAME
@@ -899,6 +927,7 @@ ENTRY(bounds)
899 CFI_ADJUST_CFA_OFFSET 4 927 CFI_ADJUST_CFA_OFFSET 4
900 jmp error_code 928 jmp error_code
901 CFI_ENDPROC 929 CFI_ENDPROC
930END(bounds)
902 931
903ENTRY(invalid_op) 932ENTRY(invalid_op)
904 RING0_INT_FRAME 933 RING0_INT_FRAME
@@ -908,6 +937,7 @@ ENTRY(invalid_op)
908 CFI_ADJUST_CFA_OFFSET 4 937 CFI_ADJUST_CFA_OFFSET 4
909 jmp error_code 938 jmp error_code
910 CFI_ENDPROC 939 CFI_ENDPROC
940END(invalid_op)
911 941
912ENTRY(coprocessor_segment_overrun) 942ENTRY(coprocessor_segment_overrun)
913 RING0_INT_FRAME 943 RING0_INT_FRAME
@@ -917,6 +947,7 @@ ENTRY(coprocessor_segment_overrun)
917 CFI_ADJUST_CFA_OFFSET 4 947 CFI_ADJUST_CFA_OFFSET 4
918 jmp error_code 948 jmp error_code
919 CFI_ENDPROC 949 CFI_ENDPROC
950END(coprocessor_segment_overrun)
920 951
921ENTRY(invalid_TSS) 952ENTRY(invalid_TSS)
922 RING0_EC_FRAME 953 RING0_EC_FRAME
@@ -924,6 +955,7 @@ ENTRY(invalid_TSS)
924 CFI_ADJUST_CFA_OFFSET 4 955 CFI_ADJUST_CFA_OFFSET 4
925 jmp error_code 956 jmp error_code
926 CFI_ENDPROC 957 CFI_ENDPROC
958END(invalid_TSS)
927 959
928ENTRY(segment_not_present) 960ENTRY(segment_not_present)
929 RING0_EC_FRAME 961 RING0_EC_FRAME
@@ -931,6 +963,7 @@ ENTRY(segment_not_present)
931 CFI_ADJUST_CFA_OFFSET 4 963 CFI_ADJUST_CFA_OFFSET 4
932 jmp error_code 964 jmp error_code
933 CFI_ENDPROC 965 CFI_ENDPROC
966END(segment_not_present)
934 967
935ENTRY(stack_segment) 968ENTRY(stack_segment)
936 RING0_EC_FRAME 969 RING0_EC_FRAME
@@ -938,6 +971,7 @@ ENTRY(stack_segment)
938 CFI_ADJUST_CFA_OFFSET 4 971 CFI_ADJUST_CFA_OFFSET 4
939 jmp error_code 972 jmp error_code
940 CFI_ENDPROC 973 CFI_ENDPROC
974END(stack_segment)
941 975
942KPROBE_ENTRY(general_protection) 976KPROBE_ENTRY(general_protection)
943 RING0_EC_FRAME 977 RING0_EC_FRAME
@@ -953,6 +987,7 @@ ENTRY(alignment_check)
953 CFI_ADJUST_CFA_OFFSET 4 987 CFI_ADJUST_CFA_OFFSET 4
954 jmp error_code 988 jmp error_code
955 CFI_ENDPROC 989 CFI_ENDPROC
990END(alignment_check)
956 991
957ENTRY(divide_error) 992ENTRY(divide_error)
958 RING0_INT_FRAME 993 RING0_INT_FRAME
@@ -962,6 +997,7 @@ ENTRY(divide_error)
962 CFI_ADJUST_CFA_OFFSET 4 997 CFI_ADJUST_CFA_OFFSET 4
963 jmp error_code 998 jmp error_code
964 CFI_ENDPROC 999 CFI_ENDPROC
1000END(divide_error)
965 1001
966#ifdef CONFIG_X86_MCE 1002#ifdef CONFIG_X86_MCE
967ENTRY(machine_check) 1003ENTRY(machine_check)
@@ -972,6 +1008,7 @@ ENTRY(machine_check)
972 CFI_ADJUST_CFA_OFFSET 4 1008 CFI_ADJUST_CFA_OFFSET 4
973 jmp error_code 1009 jmp error_code
974 CFI_ENDPROC 1010 CFI_ENDPROC
1011END(machine_check)
975#endif 1012#endif
976 1013
977ENTRY(spurious_interrupt_bug) 1014ENTRY(spurious_interrupt_bug)
@@ -982,6 +1019,7 @@ ENTRY(spurious_interrupt_bug)
982 CFI_ADJUST_CFA_OFFSET 4 1019 CFI_ADJUST_CFA_OFFSET 4
983 jmp error_code 1020 jmp error_code
984 CFI_ENDPROC 1021 CFI_ENDPROC
1022END(spurious_interrupt_bug)
985 1023
986ENTRY(kernel_thread_helper) 1024ENTRY(kernel_thread_helper)
987 pushl $0 # fake return address for unwinder 1025 pushl $0 # fake return address for unwinder
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index cb9abdfced9b..3fa7f9389afe 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -53,6 +53,7 @@
53 * any particular GDT layout, because we load our own as soon as we 53 * any particular GDT layout, because we load our own as soon as we
54 * can. 54 * can.
55 */ 55 */
56.section .text.head,"ax",@progbits
56ENTRY(startup_32) 57ENTRY(startup_32)
57 58
58#ifdef CONFIG_PARAVIRT 59#ifdef CONFIG_PARAVIRT
@@ -141,16 +142,25 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
141 jb 10b 142 jb 10b
142 movl %edi,(init_pg_tables_end - __PAGE_OFFSET) 143 movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
143 144
144#ifdef CONFIG_SMP
145 xorl %ebx,%ebx /* This is the boot CPU (BSP) */ 145 xorl %ebx,%ebx /* This is the boot CPU (BSP) */
146 jmp 3f 146 jmp 3f
147
148/* 147/*
149 * Non-boot CPU entry point; entered from trampoline.S 148 * Non-boot CPU entry point; entered from trampoline.S
150 * We can't lgdt here, because lgdt itself uses a data segment, but 149 * We can't lgdt here, because lgdt itself uses a data segment, but
151 * we know the trampoline has already loaded the boot_gdt_table GDT 150 * we know the trampoline has already loaded the boot_gdt_table GDT
152 * for us. 151 * for us.
152 *
153 * If cpu hotplug is not supported then this code can go in init section
154 * which will be freed later
153 */ 155 */
156
157#ifdef CONFIG_HOTPLUG_CPU
158.section .text,"ax",@progbits
159#else
160.section .init.text,"ax",@progbits
161#endif
162
163#ifdef CONFIG_SMP
154ENTRY(startup_32_smp) 164ENTRY(startup_32_smp)
155 cld 165 cld
156 movl $(__BOOT_DS),%eax 166 movl $(__BOOT_DS),%eax
@@ -208,8 +218,8 @@ ENTRY(startup_32_smp)
208 xorl %ebx,%ebx 218 xorl %ebx,%ebx
209 incl %ebx 219 incl %ebx
210 220
2113:
212#endif /* CONFIG_SMP */ 221#endif /* CONFIG_SMP */
2223:
213 223
214/* 224/*
215 * Enable paging 225 * Enable paging
@@ -309,7 +319,7 @@ is386: movl $2,%ecx # set MP
309 319
310 call check_x87 320 call check_x87
311 call setup_pda 321 call setup_pda
312 lgdt cpu_gdt_descr 322 lgdt early_gdt_descr
313 lidt idt_descr 323 lidt idt_descr
314 ljmp $(__KERNEL_CS),$1f 324 ljmp $(__KERNEL_CS),$1f
3151: movl $(__KERNEL_DS),%eax # reload all the segment registers 3251: movl $(__KERNEL_DS),%eax # reload all the segment registers
@@ -319,12 +329,12 @@ is386: movl $2,%ecx # set MP
319 movl %eax,%ds 329 movl %eax,%ds
320 movl %eax,%es 330 movl %eax,%es
321 331
322 xorl %eax,%eax # Clear FS and LDT 332 xorl %eax,%eax # Clear GS and LDT
323 movl %eax,%fs 333 movl %eax,%gs
324 lldt %ax 334 lldt %ax
325 335
326 movl $(__KERNEL_PDA),%eax 336 movl $(__KERNEL_PDA),%eax
327 mov %eax,%gs 337 mov %eax,%fs
328 338
329 cld # gcc2 wants the direction flag cleared at all times 339 cld # gcc2 wants the direction flag cleared at all times
330 pushl $0 # fake return address for unwinder 340 pushl $0 # fake return address for unwinder
@@ -360,12 +370,12 @@ check_x87:
360 * cpu_gdt_table and boot_pda; for secondary CPUs, these will be 370 * cpu_gdt_table and boot_pda; for secondary CPUs, these will be
361 * that CPU's GDT and PDA. 371 * that CPU's GDT and PDA.
362 */ 372 */
363setup_pda: 373ENTRY(setup_pda)
364 /* get the PDA pointer */ 374 /* get the PDA pointer */
365 movl start_pda, %eax 375 movl start_pda, %eax
366 376
367 /* slot the PDA address into the GDT */ 377 /* slot the PDA address into the GDT */
368 mov cpu_gdt_descr+2, %ecx 378 mov early_gdt_descr+2, %ecx
369 mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ 379 mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */
370 shr $16, %eax 380 shr $16, %eax
371 mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ 381 mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */
@@ -492,6 +502,7 @@ ignore_int:
492#endif 502#endif
493 iret 503 iret
494 504
505.section .text
495#ifdef CONFIG_PARAVIRT 506#ifdef CONFIG_PARAVIRT
496startup_paravirt: 507startup_paravirt:
497 cld 508 cld
@@ -502,10 +513,11 @@ startup_paravirt:
502 pushl %ecx 513 pushl %ecx
503 pushl %eax 514 pushl %eax
504 515
505 /* paravirt.o is last in link, and that probe fn never returns */
506 pushl $__start_paravirtprobe 516 pushl $__start_paravirtprobe
5071: 5171:
508 movl 0(%esp), %eax 518 movl 0(%esp), %eax
519 cmpl $__stop_paravirtprobe, %eax
520 je unhandled_paravirt
509 pushl (%eax) 521 pushl (%eax)
510 movl 8(%esp), %eax 522 movl 8(%esp), %eax
511 call *(%esp) 523 call *(%esp)
@@ -517,6 +529,10 @@ startup_paravirt:
517 529
518 addl $4, (%esp) 530 addl $4, (%esp)
519 jmp 1b 531 jmp 1b
532
533unhandled_paravirt:
534 /* Nothing wanted us: we're screwed. */
535 ud2
520#endif 536#endif
521 537
522/* 538/*
@@ -581,7 +597,7 @@ idt_descr:
581 597
582# boot GDT descriptor (later on used by CPU#0): 598# boot GDT descriptor (later on used by CPU#0):
583 .word 0 # 32 bit align gdt_desc.address 599 .word 0 # 32 bit align gdt_desc.address
584ENTRY(cpu_gdt_descr) 600ENTRY(early_gdt_descr)
585 .word GDT_ENTRIES*8-1 601 .word GDT_ENTRIES*8-1
586 .long cpu_gdt_table 602 .long cpu_gdt_table
587 603
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index ba8d302a0b72..e30ccedad0b9 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -1920,7 +1920,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
1920static void __init setup_ioapic_ids_from_mpc(void) { } 1920static void __init setup_ioapic_ids_from_mpc(void) { }
1921#endif 1921#endif
1922 1922
1923static int no_timer_check __initdata; 1923int no_timer_check __initdata;
1924 1924
1925static int __init notimercheck(char *s) 1925static int __init notimercheck(char *s)
1926{ 1926{
@@ -2310,7 +2310,7 @@ static inline void __init check_timer(void)
2310 2310
2311 disable_8259A_irq(0); 2311 disable_8259A_irq(0);
2312 set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, 2312 set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
2313 "fasteio"); 2313 "fasteoi");
2314 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ 2314 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
2315 enable_8259A_irq(0); 2315 enable_8259A_irq(0);
2316 2316
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 3201d421090a..5785d84103a6 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -19,6 +19,8 @@
19#include <linux/cpu.h> 19#include <linux/cpu.h>
20#include <linux/delay.h> 20#include <linux/delay.h>
21 21
22#include <asm/idle.h>
23
22DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; 24DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
23EXPORT_PER_CPU_SYMBOL(irq_stat); 25EXPORT_PER_CPU_SYMBOL(irq_stat);
24 26
@@ -61,6 +63,7 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs)
61 union irq_ctx *curctx, *irqctx; 63 union irq_ctx *curctx, *irqctx;
62 u32 *isp; 64 u32 *isp;
63#endif 65#endif
66 exit_idle();
64 67
65 if (unlikely((unsigned)irq >= NR_IRQS)) { 68 if (unlikely((unsigned)irq >= NR_IRQS)) {
66 printk(KERN_EMERG "%s: cannot handle IRQ %d\n", 69 printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index af1d53344993..b545bc746fce 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -363,7 +363,7 @@ no_kprobe:
363 " pushf\n" 363 " pushf\n"
364 /* skip cs, eip, orig_eax */ 364 /* skip cs, eip, orig_eax */
365 " subl $12, %esp\n" 365 " subl $12, %esp\n"
366 " pushl %gs\n" 366 " pushl %fs\n"
367 " pushl %ds\n" 367 " pushl %ds\n"
368 " pushl %es\n" 368 " pushl %es\n"
369 " pushl %eax\n" 369 " pushl %eax\n"
@@ -387,7 +387,7 @@ no_kprobe:
387 " popl %edi\n" 387 " popl %edi\n"
388 " popl %ebp\n" 388 " popl %ebp\n"
389 " popl %eax\n" 389 " popl %eax\n"
390 /* skip eip, orig_eax, es, ds, gs */ 390 /* skip eip, orig_eax, es, ds, fs */
391 " addl $20, %esp\n" 391 " addl $20, %esp\n"
392 " popf\n" 392 " popf\n"
393 " ret\n"); 393 " ret\n");
@@ -408,7 +408,7 @@ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
408 spin_lock_irqsave(&kretprobe_lock, flags); 408 spin_lock_irqsave(&kretprobe_lock, flags);
409 head = kretprobe_inst_table_head(current); 409 head = kretprobe_inst_table_head(current);
410 /* fixup registers */ 410 /* fixup registers */
411 regs->xcs = __KERNEL_CS; 411 regs->xcs = __KERNEL_CS | get_kernel_rpl();
412 regs->eip = trampoline_address; 412 regs->eip = trampoline_address;
413 regs->orig_eax = 0xffffffff; 413 regs->orig_eax = 0xffffffff;
414 414
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
index 381252bae3d8..b8f16633a6ec 100644
--- a/arch/i386/kernel/microcode.c
+++ b/arch/i386/kernel/microcode.c
@@ -384,7 +384,7 @@ static int do_microcode_update (void)
384{ 384{
385 long cursor = 0; 385 long cursor = 0;
386 int error = 0; 386 int error = 0;
387 void *new_mc; 387 void *new_mc = NULL;
388 int cpu; 388 int cpu;
389 cpumask_t old; 389 cpumask_t old;
390 390
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c
index 4e14264f392a..bcaa6e9b6197 100644
--- a/arch/i386/kernel/msr.c
+++ b/arch/i386/kernel/msr.c
@@ -68,7 +68,6 @@ static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx)
68#ifdef CONFIG_SMP 68#ifdef CONFIG_SMP
69 69
70struct msr_command { 70struct msr_command {
71 int cpu;
72 int err; 71 int err;
73 u32 reg; 72 u32 reg;
74 u32 data[2]; 73 u32 data[2];
@@ -78,16 +77,14 @@ static void msr_smp_wrmsr(void *cmd_block)
78{ 77{
79 struct msr_command *cmd = (struct msr_command *)cmd_block; 78 struct msr_command *cmd = (struct msr_command *)cmd_block;
80 79
81 if (cmd->cpu == smp_processor_id()) 80 cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]);
82 cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]);
83} 81}
84 82
85static void msr_smp_rdmsr(void *cmd_block) 83static void msr_smp_rdmsr(void *cmd_block)
86{ 84{
87 struct msr_command *cmd = (struct msr_command *)cmd_block; 85 struct msr_command *cmd = (struct msr_command *)cmd_block;
88 86
89 if (cmd->cpu == smp_processor_id()) 87 cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]);
90 cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]);
91} 88}
92 89
93static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) 90static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
@@ -99,12 +96,11 @@ static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
99 if (cpu == smp_processor_id()) { 96 if (cpu == smp_processor_id()) {
100 ret = wrmsr_eio(reg, eax, edx); 97 ret = wrmsr_eio(reg, eax, edx);
101 } else { 98 } else {
102 cmd.cpu = cpu;
103 cmd.reg = reg; 99 cmd.reg = reg;
104 cmd.data[0] = eax; 100 cmd.data[0] = eax;
105 cmd.data[1] = edx; 101 cmd.data[1] = edx;
106 102
107 smp_call_function(msr_smp_wrmsr, &cmd, 1, 1); 103 smp_call_function_single(cpu, msr_smp_wrmsr, &cmd, 1, 1);
108 ret = cmd.err; 104 ret = cmd.err;
109 } 105 }
110 preempt_enable(); 106 preempt_enable();
@@ -120,10 +116,9 @@ static inline int do_rdmsr(int cpu, u32 reg, u32 * eax, u32 * edx)
120 if (cpu == smp_processor_id()) { 116 if (cpu == smp_processor_id()) {
121 ret = rdmsr_eio(reg, eax, edx); 117 ret = rdmsr_eio(reg, eax, edx);
122 } else { 118 } else {
123 cmd.cpu = cpu;
124 cmd.reg = reg; 119 cmd.reg = reg;
125 120
126 smp_call_function(msr_smp_rdmsr, &cmd, 1, 1); 121 smp_call_function_single(cpu, msr_smp_rdmsr, &cmd, 1, 1);
127 122
128 *eax = cmd.data[0]; 123 *eax = cmd.data[0];
129 *edx = cmd.data[1]; 124 *edx = cmd.data[1];
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 1a6f8bb8881c..5d8a07c20281 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -185,7 +185,8 @@ static __cpuinit inline int nmi_known_cpu(void)
185{ 185{
186 switch (boot_cpu_data.x86_vendor) { 186 switch (boot_cpu_data.x86_vendor) {
187 case X86_VENDOR_AMD: 187 case X86_VENDOR_AMD:
188 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); 188 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)
189 || (boot_cpu_data.x86 == 16));
189 case X86_VENDOR_INTEL: 190 case X86_VENDOR_INTEL:
190 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 191 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
191 return 1; 192 return 1;
@@ -216,6 +217,28 @@ static __init void nmi_cpu_busy(void *data)
216} 217}
217#endif 218#endif
218 219
220static unsigned int adjust_for_32bit_ctr(unsigned int hz)
221{
222 u64 counter_val;
223 unsigned int retval = hz;
224
225 /*
226 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
227 * are writable, with higher bits sign extending from bit 31.
228 * So, we can only program the counter with 31 bit values and
229 * 32nd bit should be 1, for 33.. to be 1.
230 * Find the appropriate nmi_hz
231 */
232 counter_val = (u64)cpu_khz * 1000;
233 do_div(counter_val, retval);
234 if (counter_val > 0x7fffffffULL) {
235 u64 count = (u64)cpu_khz * 1000;
236 do_div(count, 0x7fffffffUL);
237 retval = count + 1;
238 }
239 return retval;
240}
241
219static int __init check_nmi_watchdog(void) 242static int __init check_nmi_watchdog(void)
220{ 243{
221 unsigned int *prev_nmi_count; 244 unsigned int *prev_nmi_count;
@@ -281,18 +304,10 @@ static int __init check_nmi_watchdog(void)
281 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 304 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
282 305
283 nmi_hz = 1; 306 nmi_hz = 1;
284 /* 307
285 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter 308 if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
286 * are writable, with higher bits sign extending from bit 31. 309 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
287 * So, we can only program the counter with 31 bit values and 310 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
288 * 32nd bit should be 1, for 33.. to be 1.
289 * Find the appropriate nmi_hz
290 */
291 if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
292 ((u64)cpu_khz * 1000) > 0x7fffffffULL) {
293 u64 count = (u64)cpu_khz * 1000;
294 do_div(count, 0x7fffffffUL);
295 nmi_hz = count + 1;
296 } 311 }
297 } 312 }
298 313
@@ -369,6 +384,34 @@ void enable_timer_nmi_watchdog(void)
369 } 384 }
370} 385}
371 386
387static void __acpi_nmi_disable(void *__unused)
388{
389 apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
390}
391
392/*
393 * Disable timer based NMIs on all CPUs:
394 */
395void acpi_nmi_disable(void)
396{
397 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
398 on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
399}
400
401static void __acpi_nmi_enable(void *__unused)
402{
403 apic_write_around(APIC_LVT0, APIC_DM_NMI);
404}
405
406/*
407 * Enable timer based NMIs on all CPUs:
408 */
409void acpi_nmi_enable(void)
410{
411 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
412 on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
413}
414
372#ifdef CONFIG_PM 415#ifdef CONFIG_PM
373 416
374static int nmi_pm_active; /* nmi_active before suspend */ 417static int nmi_pm_active; /* nmi_active before suspend */
@@ -442,6 +485,17 @@ static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
442 wrmsrl(perfctr_msr, 0 - count); 485 wrmsrl(perfctr_msr, 0 - count);
443} 486}
444 487
488static void write_watchdog_counter32(unsigned int perfctr_msr,
489 const char *descr)
490{
491 u64 count = (u64)cpu_khz * 1000;
492
493 do_div(count, nmi_hz);
494 if(descr)
495 Dprintk("setting %s to -0x%08Lx\n", descr, count);
496 wrmsr(perfctr_msr, (u32)(-count), 0);
497}
498
445/* Note that these events don't tick when the CPU idles. This means 499/* Note that these events don't tick when the CPU idles. This means
446 the frequency varies with CPU load. */ 500 the frequency varies with CPU load. */
447 501
@@ -531,7 +585,8 @@ static int setup_p6_watchdog(void)
531 585
532 /* setup the timer */ 586 /* setup the timer */
533 wrmsr(evntsel_msr, evntsel, 0); 587 wrmsr(evntsel_msr, evntsel, 0);
534 write_watchdog_counter(perfctr_msr, "P6_PERFCTR0"); 588 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
589 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0");
535 apic_write(APIC_LVTPC, APIC_DM_NMI); 590 apic_write(APIC_LVTPC, APIC_DM_NMI);
536 evntsel |= P6_EVNTSEL0_ENABLE; 591 evntsel |= P6_EVNTSEL0_ENABLE;
537 wrmsr(evntsel_msr, evntsel, 0); 592 wrmsr(evntsel_msr, evntsel, 0);
@@ -704,7 +759,8 @@ static int setup_intel_arch_watchdog(void)
704 759
705 /* setup the timer */ 760 /* setup the timer */
706 wrmsr(evntsel_msr, evntsel, 0); 761 wrmsr(evntsel_msr, evntsel, 0);
707 write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0"); 762 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
763 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0");
708 apic_write(APIC_LVTPC, APIC_DM_NMI); 764 apic_write(APIC_LVTPC, APIC_DM_NMI);
709 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; 765 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
710 wrmsr(evntsel_msr, evntsel, 0); 766 wrmsr(evntsel_msr, evntsel, 0);
@@ -762,7 +818,8 @@ void setup_apic_nmi_watchdog (void *unused)
762 if (nmi_watchdog == NMI_LOCAL_APIC) { 818 if (nmi_watchdog == NMI_LOCAL_APIC) {
763 switch (boot_cpu_data.x86_vendor) { 819 switch (boot_cpu_data.x86_vendor) {
764 case X86_VENDOR_AMD: 820 case X86_VENDOR_AMD:
765 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) 821 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
822 boot_cpu_data.x86 != 16)
766 return; 823 return;
767 if (!setup_k7_watchdog()) 824 if (!setup_k7_watchdog())
768 return; 825 return;
@@ -956,6 +1013,8 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
956 dummy &= ~P4_CCCR_OVF; 1013 dummy &= ~P4_CCCR_OVF;
957 wrmsrl(wd->cccr_msr, dummy); 1014 wrmsrl(wd->cccr_msr, dummy);
958 apic_write(APIC_LVTPC, APIC_DM_NMI); 1015 apic_write(APIC_LVTPC, APIC_DM_NMI);
1016 /* start the cycle over again */
1017 write_watchdog_counter(wd->perfctr_msr, NULL);
959 } 1018 }
960 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 || 1019 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
961 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { 1020 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
@@ -964,9 +1023,12 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
964 * other P6 variant. 1023 * other P6 variant.
965 * ArchPerfom/Core Duo also needs this */ 1024 * ArchPerfom/Core Duo also needs this */
966 apic_write(APIC_LVTPC, APIC_DM_NMI); 1025 apic_write(APIC_LVTPC, APIC_DM_NMI);
1026 /* P6/ARCH_PERFMON has 32 bit counter write */
1027 write_watchdog_counter32(wd->perfctr_msr, NULL);
1028 } else {
1029 /* start the cycle over again */
1030 write_watchdog_counter(wd->perfctr_msr, NULL);
967 } 1031 }
968 /* start the cycle over again */
969 write_watchdog_counter(wd->perfctr_msr, NULL);
970 rc = 1; 1032 rc = 1;
971 } else if (nmi_watchdog == NMI_IO_APIC) { 1033 } else if (nmi_watchdog == NMI_IO_APIC) {
972 /* don't know how to accurately check for this. 1034 /* don't know how to accurately check for this.
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index e55fd05da0f5..c156ecfa3872 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -92,7 +92,7 @@ static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
92 return insn_len; 92 return insn_len;
93} 93}
94 94
95static fastcall unsigned long native_get_debugreg(int regno) 95static unsigned long native_get_debugreg(int regno)
96{ 96{
97 unsigned long val = 0; /* Damn you, gcc! */ 97 unsigned long val = 0; /* Damn you, gcc! */
98 98
@@ -115,7 +115,7 @@ static fastcall unsigned long native_get_debugreg(int regno)
115 return val; 115 return val;
116} 116}
117 117
118static fastcall void native_set_debugreg(int regno, unsigned long value) 118static void native_set_debugreg(int regno, unsigned long value)
119{ 119{
120 switch (regno) { 120 switch (regno) {
121 case 0: 121 case 0:
@@ -146,55 +146,55 @@ void init_IRQ(void)
146 paravirt_ops.init_IRQ(); 146 paravirt_ops.init_IRQ();
147} 147}
148 148
149static fastcall void native_clts(void) 149static void native_clts(void)
150{ 150{
151 asm volatile ("clts"); 151 asm volatile ("clts");
152} 152}
153 153
154static fastcall unsigned long native_read_cr0(void) 154static unsigned long native_read_cr0(void)
155{ 155{
156 unsigned long val; 156 unsigned long val;
157 asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); 157 asm volatile("movl %%cr0,%0\n\t" :"=r" (val));
158 return val; 158 return val;
159} 159}
160 160
161static fastcall void native_write_cr0(unsigned long val) 161static void native_write_cr0(unsigned long val)
162{ 162{
163 asm volatile("movl %0,%%cr0": :"r" (val)); 163 asm volatile("movl %0,%%cr0": :"r" (val));
164} 164}
165 165
166static fastcall unsigned long native_read_cr2(void) 166static unsigned long native_read_cr2(void)
167{ 167{
168 unsigned long val; 168 unsigned long val;
169 asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); 169 asm volatile("movl %%cr2,%0\n\t" :"=r" (val));
170 return val; 170 return val;
171} 171}
172 172
173static fastcall void native_write_cr2(unsigned long val) 173static void native_write_cr2(unsigned long val)
174{ 174{
175 asm volatile("movl %0,%%cr2": :"r" (val)); 175 asm volatile("movl %0,%%cr2": :"r" (val));
176} 176}
177 177
178static fastcall unsigned long native_read_cr3(void) 178static unsigned long native_read_cr3(void)
179{ 179{
180 unsigned long val; 180 unsigned long val;
181 asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); 181 asm volatile("movl %%cr3,%0\n\t" :"=r" (val));
182 return val; 182 return val;
183} 183}
184 184
185static fastcall void native_write_cr3(unsigned long val) 185static void native_write_cr3(unsigned long val)
186{ 186{
187 asm volatile("movl %0,%%cr3": :"r" (val)); 187 asm volatile("movl %0,%%cr3": :"r" (val));
188} 188}
189 189
190static fastcall unsigned long native_read_cr4(void) 190static unsigned long native_read_cr4(void)
191{ 191{
192 unsigned long val; 192 unsigned long val;
193 asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); 193 asm volatile("movl %%cr4,%0\n\t" :"=r" (val));
194 return val; 194 return val;
195} 195}
196 196
197static fastcall unsigned long native_read_cr4_safe(void) 197static unsigned long native_read_cr4_safe(void)
198{ 198{
199 unsigned long val; 199 unsigned long val;
200 /* This could fault if %cr4 does not exist */ 200 /* This could fault if %cr4 does not exist */
@@ -207,51 +207,51 @@ static fastcall unsigned long native_read_cr4_safe(void)
207 return val; 207 return val;
208} 208}
209 209
210static fastcall void native_write_cr4(unsigned long val) 210static void native_write_cr4(unsigned long val)
211{ 211{
212 asm volatile("movl %0,%%cr4": :"r" (val)); 212 asm volatile("movl %0,%%cr4": :"r" (val));
213} 213}
214 214
215static fastcall unsigned long native_save_fl(void) 215static unsigned long native_save_fl(void)
216{ 216{
217 unsigned long f; 217 unsigned long f;
218 asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); 218 asm volatile("pushfl ; popl %0":"=g" (f): /* no input */);
219 return f; 219 return f;
220} 220}
221 221
222static fastcall void native_restore_fl(unsigned long f) 222static void native_restore_fl(unsigned long f)
223{ 223{
224 asm volatile("pushl %0 ; popfl": /* no output */ 224 asm volatile("pushl %0 ; popfl": /* no output */
225 :"g" (f) 225 :"g" (f)
226 :"memory", "cc"); 226 :"memory", "cc");
227} 227}
228 228
229static fastcall void native_irq_disable(void) 229static void native_irq_disable(void)
230{ 230{
231 asm volatile("cli": : :"memory"); 231 asm volatile("cli": : :"memory");
232} 232}
233 233
234static fastcall void native_irq_enable(void) 234static void native_irq_enable(void)
235{ 235{
236 asm volatile("sti": : :"memory"); 236 asm volatile("sti": : :"memory");
237} 237}
238 238
239static fastcall void native_safe_halt(void) 239static void native_safe_halt(void)
240{ 240{
241 asm volatile("sti; hlt": : :"memory"); 241 asm volatile("sti; hlt": : :"memory");
242} 242}
243 243
244static fastcall void native_halt(void) 244static void native_halt(void)
245{ 245{
246 asm volatile("hlt": : :"memory"); 246 asm volatile("hlt": : :"memory");
247} 247}
248 248
249static fastcall void native_wbinvd(void) 249static void native_wbinvd(void)
250{ 250{
251 asm volatile("wbinvd": : :"memory"); 251 asm volatile("wbinvd": : :"memory");
252} 252}
253 253
254static fastcall unsigned long long native_read_msr(unsigned int msr, int *err) 254static unsigned long long native_read_msr(unsigned int msr, int *err)
255{ 255{
256 unsigned long long val; 256 unsigned long long val;
257 257
@@ -270,7 +270,7 @@ static fastcall unsigned long long native_read_msr(unsigned int msr, int *err)
270 return val; 270 return val;
271} 271}
272 272
273static fastcall int native_write_msr(unsigned int msr, unsigned long long val) 273static int native_write_msr(unsigned int msr, unsigned long long val)
274{ 274{
275 int err; 275 int err;
276 asm volatile("2: wrmsr ; xorl %0,%0\n" 276 asm volatile("2: wrmsr ; xorl %0,%0\n"
@@ -288,53 +288,53 @@ static fastcall int native_write_msr(unsigned int msr, unsigned long long val)
288 return err; 288 return err;
289} 289}
290 290
291static fastcall unsigned long long native_read_tsc(void) 291static unsigned long long native_read_tsc(void)
292{ 292{
293 unsigned long long val; 293 unsigned long long val;
294 asm volatile("rdtsc" : "=A" (val)); 294 asm volatile("rdtsc" : "=A" (val));
295 return val; 295 return val;
296} 296}
297 297
298static fastcall unsigned long long native_read_pmc(void) 298static unsigned long long native_read_pmc(void)
299{ 299{
300 unsigned long long val; 300 unsigned long long val;
301 asm volatile("rdpmc" : "=A" (val)); 301 asm volatile("rdpmc" : "=A" (val));
302 return val; 302 return val;
303} 303}
304 304
305static fastcall void native_load_tr_desc(void) 305static void native_load_tr_desc(void)
306{ 306{
307 asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); 307 asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
308} 308}
309 309
310static fastcall void native_load_gdt(const struct Xgt_desc_struct *dtr) 310static void native_load_gdt(const struct Xgt_desc_struct *dtr)
311{ 311{
312 asm volatile("lgdt %0"::"m" (*dtr)); 312 asm volatile("lgdt %0"::"m" (*dtr));
313} 313}
314 314
315static fastcall void native_load_idt(const struct Xgt_desc_struct *dtr) 315static void native_load_idt(const struct Xgt_desc_struct *dtr)
316{ 316{
317 asm volatile("lidt %0"::"m" (*dtr)); 317 asm volatile("lidt %0"::"m" (*dtr));
318} 318}
319 319
320static fastcall void native_store_gdt(struct Xgt_desc_struct *dtr) 320static void native_store_gdt(struct Xgt_desc_struct *dtr)
321{ 321{
322 asm ("sgdt %0":"=m" (*dtr)); 322 asm ("sgdt %0":"=m" (*dtr));
323} 323}
324 324
325static fastcall void native_store_idt(struct Xgt_desc_struct *dtr) 325static void native_store_idt(struct Xgt_desc_struct *dtr)
326{ 326{
327 asm ("sidt %0":"=m" (*dtr)); 327 asm ("sidt %0":"=m" (*dtr));
328} 328}
329 329
330static fastcall unsigned long native_store_tr(void) 330static unsigned long native_store_tr(void)
331{ 331{
332 unsigned long tr; 332 unsigned long tr;
333 asm ("str %0":"=r" (tr)); 333 asm ("str %0":"=r" (tr));
334 return tr; 334 return tr;
335} 335}
336 336
337static fastcall void native_load_tls(struct thread_struct *t, unsigned int cpu) 337static void native_load_tls(struct thread_struct *t, unsigned int cpu)
338{ 338{
339#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] 339#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
340 C(0); C(1); C(2); 340 C(0); C(1); C(2);
@@ -348,22 +348,22 @@ static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32
348 lp[1] = entry_high; 348 lp[1] = entry_high;
349} 349}
350 350
351static fastcall void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high) 351static void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high)
352{ 352{
353 native_write_dt_entry(dt, entrynum, low, high); 353 native_write_dt_entry(dt, entrynum, low, high);
354} 354}
355 355
356static fastcall void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high) 356static void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high)
357{ 357{
358 native_write_dt_entry(dt, entrynum, low, high); 358 native_write_dt_entry(dt, entrynum, low, high);
359} 359}
360 360
361static fastcall void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high) 361static void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high)
362{ 362{
363 native_write_dt_entry(dt, entrynum, low, high); 363 native_write_dt_entry(dt, entrynum, low, high);
364} 364}
365 365
366static fastcall void native_load_esp0(struct tss_struct *tss, 366static void native_load_esp0(struct tss_struct *tss,
367 struct thread_struct *thread) 367 struct thread_struct *thread)
368{ 368{
369 tss->esp0 = thread->esp0; 369 tss->esp0 = thread->esp0;
@@ -375,12 +375,12 @@ static fastcall void native_load_esp0(struct tss_struct *tss,
375 } 375 }
376} 376}
377 377
378static fastcall void native_io_delay(void) 378static void native_io_delay(void)
379{ 379{
380 asm volatile("outb %al,$0x80"); 380 asm volatile("outb %al,$0x80");
381} 381}
382 382
383static fastcall void native_flush_tlb(void) 383static void native_flush_tlb(void)
384{ 384{
385 __native_flush_tlb(); 385 __native_flush_tlb();
386} 386}
@@ -389,49 +389,49 @@ static fastcall void native_flush_tlb(void)
389 * Global pages have to be flushed a bit differently. Not a real 389 * Global pages have to be flushed a bit differently. Not a real
390 * performance problem because this does not happen often. 390 * performance problem because this does not happen often.
391 */ 391 */
392static fastcall void native_flush_tlb_global(void) 392static void native_flush_tlb_global(void)
393{ 393{
394 __native_flush_tlb_global(); 394 __native_flush_tlb_global();
395} 395}
396 396
397static fastcall void native_flush_tlb_single(u32 addr) 397static void native_flush_tlb_single(u32 addr)
398{ 398{
399 __native_flush_tlb_single(addr); 399 __native_flush_tlb_single(addr);
400} 400}
401 401
402#ifndef CONFIG_X86_PAE 402#ifndef CONFIG_X86_PAE
403static fastcall void native_set_pte(pte_t *ptep, pte_t pteval) 403static void native_set_pte(pte_t *ptep, pte_t pteval)
404{ 404{
405 *ptep = pteval; 405 *ptep = pteval;
406} 406}
407 407
408static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) 408static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval)
409{ 409{
410 *ptep = pteval; 410 *ptep = pteval;
411} 411}
412 412
413static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) 413static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
414{ 414{
415 *pmdp = pmdval; 415 *pmdp = pmdval;
416} 416}
417 417
418#else /* CONFIG_X86_PAE */ 418#else /* CONFIG_X86_PAE */
419 419
420static fastcall void native_set_pte(pte_t *ptep, pte_t pte) 420static void native_set_pte(pte_t *ptep, pte_t pte)
421{ 421{
422 ptep->pte_high = pte.pte_high; 422 ptep->pte_high = pte.pte_high;
423 smp_wmb(); 423 smp_wmb();
424 ptep->pte_low = pte.pte_low; 424 ptep->pte_low = pte.pte_low;
425} 425}
426 426
427static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) 427static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte)
428{ 428{
429 ptep->pte_high = pte.pte_high; 429 ptep->pte_high = pte.pte_high;
430 smp_wmb(); 430 smp_wmb();
431 ptep->pte_low = pte.pte_low; 431 ptep->pte_low = pte.pte_low;
432} 432}
433 433
434static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) 434static void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
435{ 435{
436 ptep->pte_low = 0; 436 ptep->pte_low = 0;
437 smp_wmb(); 437 smp_wmb();
@@ -440,29 +440,29 @@ static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long
440 ptep->pte_low = pte.pte_low; 440 ptep->pte_low = pte.pte_low;
441} 441}
442 442
443static fastcall void native_set_pte_atomic(pte_t *ptep, pte_t pteval) 443static void native_set_pte_atomic(pte_t *ptep, pte_t pteval)
444{ 444{
445 set_64bit((unsigned long long *)ptep,pte_val(pteval)); 445 set_64bit((unsigned long long *)ptep,pte_val(pteval));
446} 446}
447 447
448static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) 448static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
449{ 449{
450 set_64bit((unsigned long long *)pmdp,pmd_val(pmdval)); 450 set_64bit((unsigned long long *)pmdp,pmd_val(pmdval));
451} 451}
452 452
453static fastcall void native_set_pud(pud_t *pudp, pud_t pudval) 453static void native_set_pud(pud_t *pudp, pud_t pudval)
454{ 454{
455 *pudp = pudval; 455 *pudp = pudval;
456} 456}
457 457
458static fastcall void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 458static void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
459{ 459{
460 ptep->pte_low = 0; 460 ptep->pte_low = 0;
461 smp_wmb(); 461 smp_wmb();
462 ptep->pte_high = 0; 462 ptep->pte_high = 0;
463} 463}
464 464
465static fastcall void native_pmd_clear(pmd_t *pmd) 465static void native_pmd_clear(pmd_t *pmd)
466{ 466{
467 u32 *tmp = (u32 *)pmd; 467 u32 *tmp = (u32 *)pmd;
468 *tmp = 0; 468 *tmp = 0;
@@ -472,8 +472,8 @@ static fastcall void native_pmd_clear(pmd_t *pmd)
472#endif /* CONFIG_X86_PAE */ 472#endif /* CONFIG_X86_PAE */
473 473
474/* These are in entry.S */ 474/* These are in entry.S */
475extern fastcall void native_iret(void); 475extern void native_iret(void);
476extern fastcall void native_irq_enable_sysexit(void); 476extern void native_irq_enable_sysexit(void);
477 477
478static int __init print_banner(void) 478static int __init print_banner(void)
479{ 479{
@@ -482,9 +482,6 @@ static int __init print_banner(void)
482} 482}
483core_initcall(print_banner); 483core_initcall(print_banner);
484 484
485/* We simply declare start_kernel to be the paravirt probe of last resort. */
486paravirt_probe(start_kernel);
487
488struct paravirt_ops paravirt_ops = { 485struct paravirt_ops paravirt_ops = {
489 .name = "bare hardware", 486 .name = "bare hardware",
490 .paravirt_enabled = 0, 487 .paravirt_enabled = 0,
@@ -544,12 +541,21 @@ struct paravirt_ops paravirt_ops = {
544 .apic_write = native_apic_write, 541 .apic_write = native_apic_write,
545 .apic_write_atomic = native_apic_write_atomic, 542 .apic_write_atomic = native_apic_write_atomic,
546 .apic_read = native_apic_read, 543 .apic_read = native_apic_read,
544 .setup_boot_clock = setup_boot_APIC_clock,
545 .setup_secondary_clock = setup_secondary_APIC_clock,
547#endif 546#endif
547 .set_lazy_mode = (void *)native_nop,
548 548
549 .flush_tlb_user = native_flush_tlb, 549 .flush_tlb_user = native_flush_tlb,
550 .flush_tlb_kernel = native_flush_tlb_global, 550 .flush_tlb_kernel = native_flush_tlb_global,
551 .flush_tlb_single = native_flush_tlb_single, 551 .flush_tlb_single = native_flush_tlb_single,
552 552
553 .alloc_pt = (void *)native_nop,
554 .alloc_pd = (void *)native_nop,
555 .alloc_pd_clone = (void *)native_nop,
556 .release_pt = (void *)native_nop,
557 .release_pd = (void *)native_nop,
558
553 .set_pte = native_set_pte, 559 .set_pte = native_set_pte,
554 .set_pte_at = native_set_pte_at, 560 .set_pte_at = native_set_pte_at,
555 .set_pmd = native_set_pmd, 561 .set_pmd = native_set_pmd,
@@ -565,6 +571,8 @@ struct paravirt_ops paravirt_ops = {
565 571
566 .irq_enable_sysexit = native_irq_enable_sysexit, 572 .irq_enable_sysexit = native_irq_enable_sysexit,
567 .iret = native_iret, 573 .iret = native_iret,
574
575 .startup_ipi_hook = (void *)native_nop,
568}; 576};
569 577
570/* 578/*
diff --git a/arch/i386/kernel/pcspeaker.c b/arch/i386/kernel/pcspeaker.c
new file mode 100644
index 000000000000..bc1f2d3ea277
--- /dev/null
+++ b/arch/i386/kernel/pcspeaker.c
@@ -0,0 +1,20 @@
1#include <linux/platform_device.h>
2#include <linux/errno.h>
3#include <linux/init.h>
4
5static __init int add_pcspkr(void)
6{
7 struct platform_device *pd;
8 int ret;
9
10 pd = platform_device_alloc("pcspkr", -1);
11 if (!pd)
12 return -ENOMEM;
13
14 ret = platform_device_add(pd);
15 if (ret)
16 platform_device_put(pd);
17
18 return ret;
19}
20device_initcall(add_pcspkr);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index c641056233a6..7845d480c293 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -48,6 +48,7 @@
48#include <asm/i387.h> 48#include <asm/i387.h>
49#include <asm/desc.h> 49#include <asm/desc.h>
50#include <asm/vm86.h> 50#include <asm/vm86.h>
51#include <asm/idle.h>
51#ifdef CONFIG_MATH_EMULATION 52#ifdef CONFIG_MATH_EMULATION
52#include <asm/math_emu.h> 53#include <asm/math_emu.h>
53#endif 54#endif
@@ -80,6 +81,42 @@ void (*pm_idle)(void);
80EXPORT_SYMBOL(pm_idle); 81EXPORT_SYMBOL(pm_idle);
81static DEFINE_PER_CPU(unsigned int, cpu_idle_state); 82static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
82 83
84static ATOMIC_NOTIFIER_HEAD(idle_notifier);
85
86void idle_notifier_register(struct notifier_block *n)
87{
88 atomic_notifier_chain_register(&idle_notifier, n);
89}
90
91void idle_notifier_unregister(struct notifier_block *n)
92{
93 atomic_notifier_chain_unregister(&idle_notifier, n);
94}
95
96static DEFINE_PER_CPU(volatile unsigned long, idle_state);
97
98void enter_idle(void)
99{
100 /* needs to be atomic w.r.t. interrupts, not against other CPUs */
101 __set_bit(0, &__get_cpu_var(idle_state));
102 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
103}
104
105static void __exit_idle(void)
106{
107 /* needs to be atomic w.r.t. interrupts, not against other CPUs */
108 if (__test_and_clear_bit(0, &__get_cpu_var(idle_state)) == 0)
109 return;
110 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
111}
112
113void exit_idle(void)
114{
115 if (current->pid)
116 return;
117 __exit_idle();
118}
119
83void disable_hlt(void) 120void disable_hlt(void)
84{ 121{
85 hlt_counter++; 122 hlt_counter++;
@@ -130,6 +167,7 @@ EXPORT_SYMBOL(default_idle);
130 */ 167 */
131static void poll_idle (void) 168static void poll_idle (void)
132{ 169{
170 local_irq_enable();
133 cpu_relax(); 171 cpu_relax();
134} 172}
135 173
@@ -189,7 +227,16 @@ void cpu_idle(void)
189 play_dead(); 227 play_dead();
190 228
191 __get_cpu_var(irq_stat).idle_timestamp = jiffies; 229 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
230
231 /*
232 * Idle routines should keep interrupts disabled
233 * from here on, until they go to idle.
234 * Otherwise, idle callbacks can misfire.
235 */
236 local_irq_disable();
237 enter_idle();
192 idle(); 238 idle();
239 __exit_idle();
193 } 240 }
194 preempt_enable_no_resched(); 241 preempt_enable_no_resched();
195 schedule(); 242 schedule();
@@ -243,7 +290,11 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
243 __monitor((void *)&current_thread_info()->flags, 0, 0); 290 __monitor((void *)&current_thread_info()->flags, 0, 0);
244 smp_mb(); 291 smp_mb();
245 if (!need_resched()) 292 if (!need_resched())
246 __mwait(eax, ecx); 293 __sti_mwait(eax, ecx);
294 else
295 local_irq_enable();
296 } else {
297 local_irq_enable();
247 } 298 }
248} 299}
249 300
@@ -308,8 +359,8 @@ void show_regs(struct pt_regs * regs)
308 regs->eax,regs->ebx,regs->ecx,regs->edx); 359 regs->eax,regs->ebx,regs->ecx,regs->edx);
309 printk("ESI: %08lx EDI: %08lx EBP: %08lx", 360 printk("ESI: %08lx EDI: %08lx EBP: %08lx",
310 regs->esi, regs->edi, regs->ebp); 361 regs->esi, regs->edi, regs->ebp);
311 printk(" DS: %04x ES: %04x GS: %04x\n", 362 printk(" DS: %04x ES: %04x FS: %04x\n",
312 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); 363 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs);
313 364
314 cr0 = read_cr0(); 365 cr0 = read_cr0();
315 cr2 = read_cr2(); 366 cr2 = read_cr2();
@@ -340,7 +391,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
340 391
341 regs.xds = __USER_DS; 392 regs.xds = __USER_DS;
342 regs.xes = __USER_DS; 393 regs.xes = __USER_DS;
343 regs.xgs = __KERNEL_PDA; 394 regs.xfs = __KERNEL_PDA;
344 regs.orig_eax = -1; 395 regs.orig_eax = -1;
345 regs.eip = (unsigned long) kernel_thread_helper; 396 regs.eip = (unsigned long) kernel_thread_helper;
346 regs.xcs = __KERNEL_CS | get_kernel_rpl(); 397 regs.xcs = __KERNEL_CS | get_kernel_rpl();
@@ -425,7 +476,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
425 476
426 p->thread.eip = (unsigned long) ret_from_fork; 477 p->thread.eip = (unsigned long) ret_from_fork;
427 478
428 savesegment(fs,p->thread.fs); 479 savesegment(gs,p->thread.gs);
429 480
430 tsk = current; 481 tsk = current;
431 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { 482 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
@@ -501,8 +552,8 @@ void dump_thread(struct pt_regs * regs, struct user * dump)
501 dump->regs.eax = regs->eax; 552 dump->regs.eax = regs->eax;
502 dump->regs.ds = regs->xds; 553 dump->regs.ds = regs->xds;
503 dump->regs.es = regs->xes; 554 dump->regs.es = regs->xes;
504 savesegment(fs,dump->regs.fs); 555 dump->regs.fs = regs->xfs;
505 dump->regs.gs = regs->xgs; 556 savesegment(gs,dump->regs.gs);
506 dump->regs.orig_eax = regs->orig_eax; 557 dump->regs.orig_eax = regs->orig_eax;
507 dump->regs.eip = regs->eip; 558 dump->regs.eip = regs->eip;
508 dump->regs.cs = regs->xcs; 559 dump->regs.cs = regs->xcs;
@@ -653,7 +704,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
653 load_esp0(tss, next); 704 load_esp0(tss, next);
654 705
655 /* 706 /*
656 * Save away %fs. No need to save %gs, as it was saved on the 707 * Save away %gs. No need to save %fs, as it was saved on the
657 * stack on entry. No need to save %es and %ds, as those are 708 * stack on entry. No need to save %es and %ds, as those are
658 * always kernel segments while inside the kernel. Doing this 709 * always kernel segments while inside the kernel. Doing this
659 * before setting the new TLS descriptors avoids the situation 710 * before setting the new TLS descriptors avoids the situation
@@ -662,7 +713,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
662 * used %fs or %gs (it does not today), or if the kernel is 713 * used %fs or %gs (it does not today), or if the kernel is
663 * running inside of a hypervisor layer. 714 * running inside of a hypervisor layer.
664 */ 715 */
665 savesegment(fs, prev->fs); 716 savesegment(gs, prev->gs);
666 717
667 /* 718 /*
668 * Load the per-thread Thread-Local Storage descriptor. 719 * Load the per-thread Thread-Local Storage descriptor.
@@ -670,14 +721,13 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
670 load_TLS(next, cpu); 721 load_TLS(next, cpu);
671 722
672 /* 723 /*
673 * Restore %fs if needed. 724 * Restore IOPL if needed. In normal use, the flags restore
674 * 725 * in the switch assembly will handle this. But if the kernel
675 * Glibc normally makes %fs be zero. 726 * is running virtualized at a non-zero CPL, the popf will
727 * not restore flags, so it must be done in a separate step.
676 */ 728 */
677 if (unlikely(prev->fs | next->fs)) 729 if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
678 loadsegment(fs, next->fs); 730 set_iopl_mask(next->iopl);
679
680 write_pda(pcurrent, next_p);
681 731
682 /* 732 /*
683 * Now maybe handle debug registers and/or IO bitmaps 733 * Now maybe handle debug registers and/or IO bitmaps
@@ -688,6 +738,15 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
688 738
689 disable_tsc(prev_p, next_p); 739 disable_tsc(prev_p, next_p);
690 740
741 /*
742 * Leave lazy mode, flushing any hypercalls made here.
743 * This must be done before restoring TLS segments so
744 * the GDT and LDT are properly updated, and must be
745 * done before math_state_restore, so the TS bit is up
746 * to date.
747 */
748 arch_leave_lazy_cpu_mode();
749
691 /* If the task has used fpu the last 5 timeslices, just do a full 750 /* If the task has used fpu the last 5 timeslices, just do a full
692 * restore of the math state immediately to avoid the trap; the 751 * restore of the math state immediately to avoid the trap; the
693 * chances of needing FPU soon are obviously high now 752 * chances of needing FPU soon are obviously high now
@@ -695,6 +754,14 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
695 if (next_p->fpu_counter > 5) 754 if (next_p->fpu_counter > 5)
696 math_state_restore(); 755 math_state_restore();
697 756
757 /*
758 * Restore %gs if needed (which is common)
759 */
760 if (prev->gs | next->gs)
761 loadsegment(gs, next->gs);
762
763 write_pda(pcurrent, next_p);
764
698 return prev_p; 765 return prev_p;
699} 766}
700 767
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index af8aabe85800..4a8f8a259723 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -89,14 +89,14 @@ static int putreg(struct task_struct *child,
89 unsigned long regno, unsigned long value) 89 unsigned long regno, unsigned long value)
90{ 90{
91 switch (regno >> 2) { 91 switch (regno >> 2) {
92 case FS: 92 case GS:
93 if (value && (value & 3) != 3) 93 if (value && (value & 3) != 3)
94 return -EIO; 94 return -EIO;
95 child->thread.fs = value; 95 child->thread.gs = value;
96 return 0; 96 return 0;
97 case DS: 97 case DS:
98 case ES: 98 case ES:
99 case GS: 99 case FS:
100 if (value && (value & 3) != 3) 100 if (value && (value & 3) != 3)
101 return -EIO; 101 return -EIO;
102 value &= 0xffff; 102 value &= 0xffff;
@@ -112,7 +112,7 @@ static int putreg(struct task_struct *child,
112 value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; 112 value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK;
113 break; 113 break;
114 } 114 }
115 if (regno > ES*4) 115 if (regno > FS*4)
116 regno -= 1*4; 116 regno -= 1*4;
117 put_stack_long(child, regno, value); 117 put_stack_long(child, regno, value);
118 return 0; 118 return 0;
@@ -124,18 +124,18 @@ static unsigned long getreg(struct task_struct *child,
124 unsigned long retval = ~0UL; 124 unsigned long retval = ~0UL;
125 125
126 switch (regno >> 2) { 126 switch (regno >> 2) {
127 case FS: 127 case GS:
128 retval = child->thread.fs; 128 retval = child->thread.gs;
129 break; 129 break;
130 case DS: 130 case DS:
131 case ES: 131 case ES:
132 case GS: 132 case FS:
133 case SS: 133 case SS:
134 case CS: 134 case CS:
135 retval = 0xffff; 135 retval = 0xffff;
136 /* fall through */ 136 /* fall through */
137 default: 137 default:
138 if (regno > ES*4) 138 if (regno > FS*4)
139 regno -= 1*4; 139 regno -= 1*4;
140 retval &= get_stack_long(child, regno); 140 retval &= get_stack_long(child, regno);
141 } 141 }
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 4694ac980cd2..122623dcc6e1 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -33,7 +33,6 @@
33#include <linux/initrd.h> 33#include <linux/initrd.h>
34#include <linux/bootmem.h> 34#include <linux/bootmem.h>
35#include <linux/seq_file.h> 35#include <linux/seq_file.h>
36#include <linux/platform_device.h>
37#include <linux/console.h> 36#include <linux/console.h>
38#include <linux/mca.h> 37#include <linux/mca.h>
39#include <linux/root_dev.h> 38#include <linux/root_dev.h>
@@ -60,6 +59,7 @@
60#include <asm/io_apic.h> 59#include <asm/io_apic.h>
61#include <asm/ist.h> 60#include <asm/ist.h>
62#include <asm/io.h> 61#include <asm/io.h>
62#include <asm/vmi.h>
63#include <setup_arch.h> 63#include <setup_arch.h>
64#include <bios_ebda.h> 64#include <bios_ebda.h>
65 65
@@ -581,6 +581,14 @@ void __init setup_arch(char **cmdline_p)
581 581
582 max_low_pfn = setup_memory(); 582 max_low_pfn = setup_memory();
583 583
584#ifdef CONFIG_VMI
585 /*
586 * Must be after max_low_pfn is determined, and before kernel
587 * pagetables are setup.
588 */
589 vmi_init();
590#endif
591
584 /* 592 /*
585 * NOTE: before this point _nobody_ is allowed to allocate 593 * NOTE: before this point _nobody_ is allowed to allocate
586 * any memory using the bootmem allocator. Although the 594 * any memory using the bootmem allocator. Although the
@@ -651,28 +659,3 @@ void __init setup_arch(char **cmdline_p)
651#endif 659#endif
652 tsc_init(); 660 tsc_init();
653} 661}
654
655static __init int add_pcspkr(void)
656{
657 struct platform_device *pd;
658 int ret;
659
660 pd = platform_device_alloc("pcspkr", -1);
661 if (!pd)
662 return -ENOMEM;
663
664 ret = platform_device_add(pd);
665 if (ret)
666 platform_device_put(pd);
667
668 return ret;
669}
670device_initcall(add_pcspkr);
671
672/*
673 * Local Variables:
674 * mode:c
675 * c-file-style:"k&r"
676 * c-basic-offset:8
677 * End:
678 */
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index 65d7620eaa09..4f99e870c986 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -21,6 +21,7 @@
21#include <linux/suspend.h> 21#include <linux/suspend.h>
22#include <linux/ptrace.h> 22#include <linux/ptrace.h>
23#include <linux/elf.h> 23#include <linux/elf.h>
24#include <linux/binfmts.h>
24#include <asm/processor.h> 25#include <asm/processor.h>
25#include <asm/ucontext.h> 26#include <asm/ucontext.h>
26#include <asm/uaccess.h> 27#include <asm/uaccess.h>
@@ -128,8 +129,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax
128 X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ 129 X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \
129 X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) 130 X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF)
130 131
131 COPY_SEG(gs); 132 GET_SEG(gs);
132 GET_SEG(fs); 133 COPY_SEG(fs);
133 COPY_SEG(es); 134 COPY_SEG(es);
134 COPY_SEG(ds); 135 COPY_SEG(ds);
135 COPY(edi); 136 COPY(edi);
@@ -244,9 +245,9 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
244{ 245{
245 int tmp, err = 0; 246 int tmp, err = 0;
246 247
247 err |= __put_user(regs->xgs, (unsigned int __user *)&sc->gs); 248 err |= __put_user(regs->xfs, (unsigned int __user *)&sc->fs);
248 savesegment(fs, tmp); 249 savesegment(gs, tmp);
249 err |= __put_user(tmp, (unsigned int __user *)&sc->fs); 250 err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
250 251
251 err |= __put_user(regs->xes, (unsigned int __user *)&sc->es); 252 err |= __put_user(regs->xes, (unsigned int __user *)&sc->es);
252 err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds); 253 err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds);
@@ -349,7 +350,10 @@ static int setup_frame(int sig, struct k_sigaction *ka,
349 goto give_sigsegv; 350 goto give_sigsegv;
350 } 351 }
351 352
352 restorer = (void *)VDSO_SYM(&__kernel_sigreturn); 353 if (current->binfmt->hasvdso)
354 restorer = (void *)VDSO_SYM(&__kernel_sigreturn);
355 else
356 restorer = (void *)&frame->retcode;
353 if (ka->sa.sa_flags & SA_RESTORER) 357 if (ka->sa.sa_flags & SA_RESTORER)
354 restorer = ka->sa.sa_restorer; 358 restorer = ka->sa.sa_restorer;
355 359
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 5285aff8367f..9bd9637ae692 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -23,6 +23,7 @@
23 23
24#include <asm/mtrr.h> 24#include <asm/mtrr.h>
25#include <asm/tlbflush.h> 25#include <asm/tlbflush.h>
26#include <asm/idle.h>
26#include <mach_apic.h> 27#include <mach_apic.h>
27 28
28/* 29/*
@@ -374,8 +375,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
374 /* 375 /*
375 * i'm not happy about this global shared spinlock in the 376 * i'm not happy about this global shared spinlock in the
376 * MM hot path, but we'll see how contended it is. 377 * MM hot path, but we'll see how contended it is.
377 * Temporarily this turns IRQs off, so that lockups are 378 * AK: x86-64 has a faster method that could be ported.
378 * detected by the NMI watchdog.
379 */ 379 */
380 spin_lock(&tlbstate_lock); 380 spin_lock(&tlbstate_lock);
381 381
@@ -400,7 +400,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
400 400
401 while (!cpus_empty(flush_cpumask)) 401 while (!cpus_empty(flush_cpumask))
402 /* nothing. lockup detection does not belong here */ 402 /* nothing. lockup detection does not belong here */
403 mb(); 403 cpu_relax();
404 404
405 flush_mm = NULL; 405 flush_mm = NULL;
406 flush_va = 0; 406 flush_va = 0;
@@ -624,6 +624,7 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs)
624 /* 624 /*
625 * At this point the info structure may be out of scope unless wait==1 625 * At this point the info structure may be out of scope unless wait==1
626 */ 626 */
627 exit_idle();
627 irq_enter(); 628 irq_enter();
628 (*func)(info); 629 (*func)(info);
629 irq_exit(); 630 irq_exit();
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 8c6c8c52b95c..f46a4d095e6c 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -63,6 +63,7 @@
63#include <mach_apic.h> 63#include <mach_apic.h>
64#include <mach_wakecpu.h> 64#include <mach_wakecpu.h>
65#include <smpboot_hooks.h> 65#include <smpboot_hooks.h>
66#include <asm/vmi.h>
66 67
67/* Set if we find a B stepping CPU */ 68/* Set if we find a B stepping CPU */
68static int __devinitdata smp_b_stepping; 69static int __devinitdata smp_b_stepping;
@@ -545,12 +546,15 @@ static void __cpuinit start_secondary(void *unused)
545 * booting is too fragile that we want to limit the 546 * booting is too fragile that we want to limit the
546 * things done here to the most necessary things. 547 * things done here to the most necessary things.
547 */ 548 */
549#ifdef CONFIG_VMI
550 vmi_bringup();
551#endif
548 secondary_cpu_init(); 552 secondary_cpu_init();
549 preempt_disable(); 553 preempt_disable();
550 smp_callin(); 554 smp_callin();
551 while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) 555 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
552 rep_nop(); 556 rep_nop();
553 setup_secondary_APIC_clock(); 557 setup_secondary_clock();
554 if (nmi_watchdog == NMI_IO_APIC) { 558 if (nmi_watchdog == NMI_IO_APIC) {
555 disable_8259A_irq(0); 559 disable_8259A_irq(0);
556 enable_NMI_through_LVT0(NULL); 560 enable_NMI_through_LVT0(NULL);
@@ -619,7 +623,6 @@ extern struct {
619 unsigned short ss; 623 unsigned short ss;
620} stack_start; 624} stack_start;
621extern struct i386_pda *start_pda; 625extern struct i386_pda *start_pda;
622extern struct Xgt_desc_struct cpu_gdt_descr;
623 626
624#ifdef CONFIG_NUMA 627#ifdef CONFIG_NUMA
625 628
@@ -835,6 +838,13 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
835 num_starts = 0; 838 num_starts = 0;
836 839
837 /* 840 /*
841 * Paravirt / VMI wants a startup IPI hook here to set up the
842 * target processor state.
843 */
844 startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
845 (unsigned long) stack_start.esp);
846
847 /*
838 * Run STARTUP IPI loop. 848 * Run STARTUP IPI loop.
839 */ 849 */
840 Dprintk("#startup loops: %d.\n", num_starts); 850 Dprintk("#startup loops: %d.\n", num_starts);
@@ -1320,7 +1330,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
1320 1330
1321 smpboot_setup_io_apic(); 1331 smpboot_setup_io_apic();
1322 1332
1323 setup_boot_APIC_clock(); 1333 setup_boot_clock();
1324 1334
1325 /* 1335 /*
1326 * Synchronize the TSC with the AP 1336 * Synchronize the TSC with the AP
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index bc882a2b1db6..13ca54a85a1c 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -78,7 +78,7 @@ int __init sysenter_setup(void)
78 syscall_pages[0] = virt_to_page(syscall_page); 78 syscall_pages[0] = virt_to_page(syscall_page);
79 79
80#ifdef CONFIG_COMPAT_VDSO 80#ifdef CONFIG_COMPAT_VDSO
81 __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY); 81 __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC);
82 printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); 82 printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
83#endif 83#endif
84 84
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index c505b16c0990..a4f67a6e6821 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -131,15 +131,13 @@ unsigned long profile_pc(struct pt_regs *regs)
131 unsigned long pc = instruction_pointer(regs); 131 unsigned long pc = instruction_pointer(regs);
132 132
133#ifdef CONFIG_SMP 133#ifdef CONFIG_SMP
134 if (!user_mode_vm(regs) && in_lock_functions(pc)) { 134 if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs) &&
135 in_lock_functions(pc)) {
135#ifdef CONFIG_FRAME_POINTER 136#ifdef CONFIG_FRAME_POINTER
136 return *(unsigned long *)(regs->ebp + 4); 137 return *(unsigned long *)(regs->ebp + 4);
137#else 138#else
138 unsigned long *sp; 139 unsigned long *sp = (unsigned long *)&regs->esp;
139 if ((regs->xcs & 3) == 0) 140
140 sp = (unsigned long *)&regs->esp;
141 else
142 sp = (unsigned long *)regs->esp;
143 /* Return address is either directly at stack pointer 141 /* Return address is either directly at stack pointer
144 or above a saved eflags. Eflags has bits 22-31 zero, 142 or above a saved eflags. Eflags has bits 22-31 zero,
145 kernel addresses don't. */ 143 kernel addresses don't. */
@@ -232,6 +230,7 @@ EXPORT_SYMBOL(get_cmos_time);
232static void sync_cmos_clock(unsigned long dummy); 230static void sync_cmos_clock(unsigned long dummy);
233 231
234static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); 232static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
233int no_sync_cmos_clock;
235 234
236static void sync_cmos_clock(unsigned long dummy) 235static void sync_cmos_clock(unsigned long dummy)
237{ 236{
@@ -275,7 +274,8 @@ static void sync_cmos_clock(unsigned long dummy)
275 274
276void notify_arch_cmos_timer(void) 275void notify_arch_cmos_timer(void)
277{ 276{
278 mod_timer(&sync_cmos_timer, jiffies + 1); 277 if (!no_sync_cmos_clock)
278 mod_timer(&sync_cmos_timer, jiffies + 1);
279} 279}
280 280
281static long clock_cmos_diff; 281static long clock_cmos_diff;
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 0efad8aeb41a..af0d3f70a817 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -94,6 +94,7 @@ asmlinkage void spurious_interrupt_bug(void);
94asmlinkage void machine_check(void); 94asmlinkage void machine_check(void);
95 95
96int kstack_depth_to_print = 24; 96int kstack_depth_to_print = 24;
97static unsigned int code_bytes = 64;
97ATOMIC_NOTIFIER_HEAD(i386die_chain); 98ATOMIC_NOTIFIER_HEAD(i386die_chain);
98 99
99int register_die_notifier(struct notifier_block *nb) 100int register_die_notifier(struct notifier_block *nb)
@@ -291,10 +292,11 @@ void show_registers(struct pt_regs *regs)
291 int i; 292 int i;
292 int in_kernel = 1; 293 int in_kernel = 1;
293 unsigned long esp; 294 unsigned long esp;
294 unsigned short ss; 295 unsigned short ss, gs;
295 296
296 esp = (unsigned long) (&regs->esp); 297 esp = (unsigned long) (&regs->esp);
297 savesegment(ss, ss); 298 savesegment(ss, ss);
299 savesegment(gs, gs);
298 if (user_mode_vm(regs)) { 300 if (user_mode_vm(regs)) {
299 in_kernel = 0; 301 in_kernel = 0;
300 esp = regs->esp; 302 esp = regs->esp;
@@ -313,8 +315,8 @@ void show_registers(struct pt_regs *regs)
313 regs->eax, regs->ebx, regs->ecx, regs->edx); 315 regs->eax, regs->ebx, regs->ecx, regs->edx);
314 printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", 316 printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
315 regs->esi, regs->edi, regs->ebp, esp); 317 regs->esi, regs->edi, regs->ebp, esp);
316 printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", 318 printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
317 regs->xds & 0xffff, regs->xes & 0xffff, ss); 319 regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
318 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", 320 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
319 TASK_COMM_LEN, current->comm, current->pid, 321 TASK_COMM_LEN, current->comm, current->pid,
320 current_thread_info(), current, current->thread_info); 322 current_thread_info(), current, current->thread_info);
@@ -324,7 +326,8 @@ void show_registers(struct pt_regs *regs)
324 */ 326 */
325 if (in_kernel) { 327 if (in_kernel) {
326 u8 *eip; 328 u8 *eip;
327 int code_bytes = 64; 329 unsigned int code_prologue = code_bytes * 43 / 64;
330 unsigned int code_len = code_bytes;
328 unsigned char c; 331 unsigned char c;
329 332
330 printk("\n" KERN_EMERG "Stack: "); 333 printk("\n" KERN_EMERG "Stack: ");
@@ -332,14 +335,14 @@ void show_registers(struct pt_regs *regs)
332 335
333 printk(KERN_EMERG "Code: "); 336 printk(KERN_EMERG "Code: ");
334 337
335 eip = (u8 *)regs->eip - 43; 338 eip = (u8 *)regs->eip - code_prologue;
336 if (eip < (u8 *)PAGE_OFFSET || 339 if (eip < (u8 *)PAGE_OFFSET ||
337 probe_kernel_address(eip, c)) { 340 probe_kernel_address(eip, c)) {
338 /* try starting at EIP */ 341 /* try starting at EIP */
339 eip = (u8 *)regs->eip; 342 eip = (u8 *)regs->eip;
340 code_bytes = 32; 343 code_len = code_len - code_prologue + 1;
341 } 344 }
342 for (i = 0; i < code_bytes; i++, eip++) { 345 for (i = 0; i < code_len; i++, eip++) {
343 if (eip < (u8 *)PAGE_OFFSET || 346 if (eip < (u8 *)PAGE_OFFSET ||
344 probe_kernel_address(eip, c)) { 347 probe_kernel_address(eip, c)) {
345 printk(" Bad EIP value."); 348 printk(" Bad EIP value.");
@@ -1191,3 +1194,13 @@ static int __init kstack_setup(char *s)
1191 return 1; 1194 return 1;
1192} 1195}
1193__setup("kstack=", kstack_setup); 1196__setup("kstack=", kstack_setup);
1197
1198static int __init code_bytes_setup(char *s)
1199{
1200 code_bytes = simple_strtoul(s, NULL, 0);
1201 if (code_bytes > 8192)
1202 code_bytes = 8192;
1203
1204 return 1;
1205}
1206__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 2cfc7b09b925..46f752a8bbf3 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -23,6 +23,7 @@
23 * an extra value to store the TSC freq 23 * an extra value to store the TSC freq
24 */ 24 */
25unsigned int tsc_khz; 25unsigned int tsc_khz;
26unsigned long long (*custom_sched_clock)(void);
26 27
27int tsc_disable; 28int tsc_disable;
28 29
@@ -107,14 +108,14 @@ unsigned long long sched_clock(void)
107{ 108{
108 unsigned long long this_offset; 109 unsigned long long this_offset;
109 110
111 if (unlikely(custom_sched_clock))
112 return (*custom_sched_clock)();
113
110 /* 114 /*
111 * in the NUMA case we dont use the TSC as they are not 115 * Fall back to jiffies if there's no TSC available:
112 * synchronized across all CPUs.
113 */ 116 */
114#ifndef CONFIG_NUMA 117 if (unlikely(tsc_disable))
115 if (!cpu_khz || check_tsc_unstable()) 118 /* No locking but a rare wrong value is not a big deal: */
116#endif
117 /* no locking but a rare wrong value is not a big deal */
118 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); 119 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
119 120
120 /* read the Time Stamp Counter: */ 121 /* read the Time Stamp Counter: */
@@ -194,13 +195,13 @@ EXPORT_SYMBOL(recalibrate_cpu_khz);
194void __init tsc_init(void) 195void __init tsc_init(void)
195{ 196{
196 if (!cpu_has_tsc || tsc_disable) 197 if (!cpu_has_tsc || tsc_disable)
197 return; 198 goto out_no_tsc;
198 199
199 cpu_khz = calculate_cpu_khz(); 200 cpu_khz = calculate_cpu_khz();
200 tsc_khz = cpu_khz; 201 tsc_khz = cpu_khz;
201 202
202 if (!cpu_khz) 203 if (!cpu_khz)
203 return; 204 goto out_no_tsc;
204 205
205 printk("Detected %lu.%03lu MHz processor.\n", 206 printk("Detected %lu.%03lu MHz processor.\n",
206 (unsigned long)cpu_khz / 1000, 207 (unsigned long)cpu_khz / 1000,
@@ -208,6 +209,15 @@ void __init tsc_init(void)
208 209
209 set_cyc2ns_scale(cpu_khz); 210 set_cyc2ns_scale(cpu_khz);
210 use_tsc_delay(); 211 use_tsc_delay();
212 return;
213
214out_no_tsc:
215 /*
216 * Set the tsc_disable flag if there's no TSC support, this
217 * makes it a fast flag for the kernel to see whether it
218 * should be using the TSC.
219 */
220 tsc_disable = 1;
211} 221}
212 222
213#ifdef CONFIG_CPU_FREQ 223#ifdef CONFIG_CPU_FREQ
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
index be2f96e67f78..d1b8f2b7aea6 100644
--- a/arch/i386/kernel/vm86.c
+++ b/arch/i386/kernel/vm86.c
@@ -96,12 +96,12 @@ static int copy_vm86_regs_to_user(struct vm86_regs __user *user,
96{ 96{
97 int ret = 0; 97 int ret = 0;
98 98
99 /* kernel_vm86_regs is missing xfs, so copy everything up to 99 /* kernel_vm86_regs is missing xgs, so copy everything up to
100 (but not including) xgs, and then rest after xgs. */ 100 (but not including) orig_eax, and then rest including orig_eax. */
101 ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.xgs)); 101 ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_eax));
102 ret += copy_to_user(&user->__null_gs, &regs->pt.xgs, 102 ret += copy_to_user(&user->orig_eax, &regs->pt.orig_eax,
103 sizeof(struct kernel_vm86_regs) - 103 sizeof(struct kernel_vm86_regs) -
104 offsetof(struct kernel_vm86_regs, pt.xgs)); 104 offsetof(struct kernel_vm86_regs, pt.orig_eax));
105 105
106 return ret; 106 return ret;
107} 107}
@@ -113,12 +113,13 @@ static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs,
113{ 113{
114 int ret = 0; 114 int ret = 0;
115 115
116 ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.xgs)); 116 /* copy eax-xfs inclusive */
117 ret += copy_from_user(&regs->pt.xgs, &user->__null_gs, 117 ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_eax));
118 /* copy orig_eax-__gsh+extra */
119 ret += copy_from_user(&regs->pt.orig_eax, &user->orig_eax,
118 sizeof(struct kernel_vm86_regs) - 120 sizeof(struct kernel_vm86_regs) -
119 offsetof(struct kernel_vm86_regs, pt.xgs) + 121 offsetof(struct kernel_vm86_regs, pt.orig_eax) +
120 extra); 122 extra);
121
122 return ret; 123 return ret;
123} 124}
124 125
@@ -157,8 +158,8 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
157 158
158 ret = KVM86->regs32; 159 ret = KVM86->regs32;
159 160
160 loadsegment(fs, current->thread.saved_fs); 161 ret->xfs = current->thread.saved_fs;
161 ret->xgs = current->thread.saved_gs; 162 loadsegment(gs, current->thread.saved_gs);
162 163
163 return ret; 164 return ret;
164} 165}
@@ -285,9 +286,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
285 */ 286 */
286 info->regs.pt.xds = 0; 287 info->regs.pt.xds = 0;
287 info->regs.pt.xes = 0; 288 info->regs.pt.xes = 0;
288 info->regs.pt.xgs = 0; 289 info->regs.pt.xfs = 0;
289 290
290/* we are clearing fs later just before "jmp resume_userspace", 291/* we are clearing gs later just before "jmp resume_userspace",
291 * because it is not saved/restored. 292 * because it is not saved/restored.
292 */ 293 */
293 294
@@ -321,8 +322,8 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
321 */ 322 */
322 info->regs32->eax = 0; 323 info->regs32->eax = 0;
323 tsk->thread.saved_esp0 = tsk->thread.esp0; 324 tsk->thread.saved_esp0 = tsk->thread.esp0;
324 savesegment(fs, tsk->thread.saved_fs); 325 tsk->thread.saved_fs = info->regs32->xfs;
325 tsk->thread.saved_gs = info->regs32->xgs; 326 savesegment(gs, tsk->thread.saved_gs);
326 327
327 tss = &per_cpu(init_tss, get_cpu()); 328 tss = &per_cpu(init_tss, get_cpu());
328 tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; 329 tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
@@ -342,7 +343,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
342 __asm__ __volatile__( 343 __asm__ __volatile__(
343 "movl %0,%%esp\n\t" 344 "movl %0,%%esp\n\t"
344 "movl %1,%%ebp\n\t" 345 "movl %1,%%ebp\n\t"
345 "mov %2, %%fs\n\t" 346 "mov %2, %%gs\n\t"
346 "jmp resume_userspace" 347 "jmp resume_userspace"
347 : /* no outputs */ 348 : /* no outputs */
348 :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); 349 :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0));
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c
new file mode 100644
index 000000000000..bb5a7abf949c
--- /dev/null
+++ b/arch/i386/kernel/vmi.c
@@ -0,0 +1,949 @@
1/*
2 * VMI specific paravirt-ops implementation
3 *
4 * Copyright (C) 2005, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Send feedback to zach@vmware.com
22 *
23 */
24
25#include <linux/module.h>
26#include <linux/license.h>
27#include <linux/cpu.h>
28#include <linux/bootmem.h>
29#include <linux/mm.h>
30#include <asm/vmi.h>
31#include <asm/io.h>
32#include <asm/fixmap.h>
33#include <asm/apicdef.h>
34#include <asm/apic.h>
35#include <asm/processor.h>
36#include <asm/timer.h>
37#include <asm/vmi_time.h>
38
39/* Convenient for calling VMI functions indirectly in the ROM */
40typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void);
41typedef u64 __attribute__((regparm(2))) (VROMLONGFUNC)(int);
42
43#define call_vrom_func(rom,func) \
44 (((VROMFUNC *)(rom->func))())
45
46#define call_vrom_long_func(rom,func,arg) \
47 (((VROMLONGFUNC *)(rom->func)) (arg))
48
49static struct vrom_header *vmi_rom;
50static int license_gplok;
51static int disable_nodelay;
52static int disable_pge;
53static int disable_pse;
54static int disable_sep;
55static int disable_tsc;
56static int disable_mtrr;
57
58/* Cached VMI operations */
59struct {
60 void (*cpuid)(void /* non-c */);
61 void (*_set_ldt)(u32 selector);
62 void (*set_tr)(u32 selector);
63 void (*set_kernel_stack)(u32 selector, u32 esp0);
64 void (*allocate_page)(u32, u32, u32, u32, u32);
65 void (*release_page)(u32, u32);
66 void (*set_pte)(pte_t, pte_t *, unsigned);
67 void (*update_pte)(pte_t *, unsigned);
68 void (*set_linear_mapping)(int, u32, u32, u32);
69 void (*flush_tlb)(int);
70 void (*set_initial_ap_state)(int, int);
71 void (*halt)(void);
72} vmi_ops;
73
74/* XXX move this to alternative.h */
75extern struct paravirt_patch __start_parainstructions[],
76 __stop_parainstructions[];
77
78/*
79 * VMI patching routines.
80 */
81#define MNEM_CALL 0xe8
82#define MNEM_JMP 0xe9
83#define MNEM_RET 0xc3
84
85static char irq_save_disable_callout[] = {
86 MNEM_CALL, 0, 0, 0, 0,
87 MNEM_CALL, 0, 0, 0, 0,
88 MNEM_RET
89};
90#define IRQ_PATCH_INT_MASK 0
91#define IRQ_PATCH_DISABLE 5
92
93static inline void patch_offset(unsigned char *eip, unsigned char *dest)
94{
95 *(unsigned long *)(eip+1) = dest-eip-5;
96}
97
98static unsigned patch_internal(int call, unsigned len, void *insns)
99{
100 u64 reloc;
101 struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc;
102 reloc = call_vrom_long_func(vmi_rom, get_reloc, call);
103 switch(rel->type) {
104 case VMI_RELOCATION_CALL_REL:
105 BUG_ON(len < 5);
106 *(char *)insns = MNEM_CALL;
107 patch_offset(insns, rel->eip);
108 return 5;
109
110 case VMI_RELOCATION_JUMP_REL:
111 BUG_ON(len < 5);
112 *(char *)insns = MNEM_JMP;
113 patch_offset(insns, rel->eip);
114 return 5;
115
116 case VMI_RELOCATION_NOP:
117 /* obliterate the whole thing */
118 return 0;
119
120 case VMI_RELOCATION_NONE:
121 /* leave native code in place */
122 break;
123
124 default:
125 BUG();
126 }
127 return len;
128}
129
130/*
131 * Apply patch if appropriate, return length of new instruction
132 * sequence. The callee does nop padding for us.
133 */
134static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len)
135{
136 switch (type) {
137 case PARAVIRT_IRQ_DISABLE:
138 return patch_internal(VMI_CALL_DisableInterrupts, len, insns);
139 case PARAVIRT_IRQ_ENABLE:
140 return patch_internal(VMI_CALL_EnableInterrupts, len, insns);
141 case PARAVIRT_RESTORE_FLAGS:
142 return patch_internal(VMI_CALL_SetInterruptMask, len, insns);
143 case PARAVIRT_SAVE_FLAGS:
144 return patch_internal(VMI_CALL_GetInterruptMask, len, insns);
145 case PARAVIRT_SAVE_FLAGS_IRQ_DISABLE:
146 if (len >= 10) {
147 patch_internal(VMI_CALL_GetInterruptMask, len, insns);
148 patch_internal(VMI_CALL_DisableInterrupts, len-5, insns+5);
149 return 10;
150 } else {
151 /*
152 * You bastards didn't leave enough room to
153 * patch save_flags_irq_disable inline. Patch
154 * to a helper
155 */
156 BUG_ON(len < 5);
157 *(char *)insns = MNEM_CALL;
158 patch_offset(insns, irq_save_disable_callout);
159 return 5;
160 }
161 case PARAVIRT_INTERRUPT_RETURN:
162 return patch_internal(VMI_CALL_IRET, len, insns);
163 case PARAVIRT_STI_SYSEXIT:
164 return patch_internal(VMI_CALL_SYSEXIT, len, insns);
165 default:
166 break;
167 }
168 return len;
169}
170
171/* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */
172static void vmi_cpuid(unsigned int *eax, unsigned int *ebx,
173 unsigned int *ecx, unsigned int *edx)
174{
175 int override = 0;
176 if (*eax == 1)
177 override = 1;
178 asm volatile ("call *%6"
179 : "=a" (*eax),
180 "=b" (*ebx),
181 "=c" (*ecx),
182 "=d" (*edx)
183 : "0" (*eax), "2" (*ecx), "r" (vmi_ops.cpuid));
184 if (override) {
185 if (disable_pse)
186 *edx &= ~X86_FEATURE_PSE;
187 if (disable_pge)
188 *edx &= ~X86_FEATURE_PGE;
189 if (disable_sep)
190 *edx &= ~X86_FEATURE_SEP;
191 if (disable_tsc)
192 *edx &= ~X86_FEATURE_TSC;
193 if (disable_mtrr)
194 *edx &= ~X86_FEATURE_MTRR;
195 }
196}
197
198static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new)
199{
200 if (gdt[nr].a != new->a || gdt[nr].b != new->b)
201 write_gdt_entry(gdt, nr, new->a, new->b);
202}
203
204static void vmi_load_tls(struct thread_struct *t, unsigned int cpu)
205{
206 struct desc_struct *gdt = get_cpu_gdt_table(cpu);
207 vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 0, &t->tls_array[0]);
208 vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 1, &t->tls_array[1]);
209 vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 2, &t->tls_array[2]);
210}
211
212static void vmi_set_ldt(const void *addr, unsigned entries)
213{
214 unsigned cpu = smp_processor_id();
215 u32 low, high;
216
217 pack_descriptor(&low, &high, (unsigned long)addr,
218 entries * sizeof(struct desc_struct) - 1,
219 DESCTYPE_LDT, 0);
220 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, low, high);
221 vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0);
222}
223
224static void vmi_set_tr(void)
225{
226 vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct));
227}
228
229static void vmi_load_esp0(struct tss_struct *tss,
230 struct thread_struct *thread)
231{
232 tss->esp0 = thread->esp0;
233
234 /* This can only happen when SEP is enabled, no need to test "SEP"arately */
235 if (unlikely(tss->ss1 != thread->sysenter_cs)) {
236 tss->ss1 = thread->sysenter_cs;
237 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
238 }
239 vmi_ops.set_kernel_stack(__KERNEL_DS, tss->esp0);
240}
241
242static void vmi_flush_tlb_user(void)
243{
244 vmi_ops.flush_tlb(VMI_FLUSH_TLB);
245}
246
247static void vmi_flush_tlb_kernel(void)
248{
249 vmi_ops.flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL);
250}
251
252/* Stub to do nothing at all; used for delays and unimplemented calls */
253static void vmi_nop(void)
254{
255}
256
257/* For NO_IDLE_HZ, we stop the clock when halting the kernel */
258#ifdef CONFIG_NO_IDLE_HZ
259static fastcall void vmi_safe_halt(void)
260{
261 int idle = vmi_stop_hz_timer();
262 vmi_ops.halt();
263 if (idle) {
264 local_irq_disable();
265 vmi_account_time_restart_hz_timer();
266 local_irq_enable();
267 }
268}
269#endif
270
271#ifdef CONFIG_DEBUG_PAGE_TYPE
272
273#ifdef CONFIG_X86_PAE
274#define MAX_BOOT_PTS (2048+4+1)
275#else
276#define MAX_BOOT_PTS (1024+1)
277#endif
278
279/*
280 * During boot, mem_map is not yet available in paging_init, so stash
281 * all the boot page allocations here.
282 */
283static struct {
284 u32 pfn;
285 int type;
286} boot_page_allocations[MAX_BOOT_PTS];
287static int num_boot_page_allocations;
288static int boot_allocations_applied;
289
290void vmi_apply_boot_page_allocations(void)
291{
292 int i;
293 BUG_ON(!mem_map);
294 for (i = 0; i < num_boot_page_allocations; i++) {
295 struct page *page = pfn_to_page(boot_page_allocations[i].pfn);
296 page->type = boot_page_allocations[i].type;
297 page->type = boot_page_allocations[i].type &
298 ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
299 }
300 boot_allocations_applied = 1;
301}
302
303static void record_page_type(u32 pfn, int type)
304{
305 BUG_ON(num_boot_page_allocations >= MAX_BOOT_PTS);
306 boot_page_allocations[num_boot_page_allocations].pfn = pfn;
307 boot_page_allocations[num_boot_page_allocations].type = type;
308 num_boot_page_allocations++;
309}
310
311static void check_zeroed_page(u32 pfn, int type, struct page *page)
312{
313 u32 *ptr;
314 int i;
315 int limit = PAGE_SIZE / sizeof(int);
316
317 if (page_address(page))
318 ptr = (u32 *)page_address(page);
319 else
320 ptr = (u32 *)__va(pfn << PAGE_SHIFT);
321 /*
322 * When cloning the root in non-PAE mode, only the userspace
323 * pdes need to be zeroed.
324 */
325 if (type & VMI_PAGE_CLONE)
326 limit = USER_PTRS_PER_PGD;
327 for (i = 0; i < limit; i++)
328 BUG_ON(ptr[i]);
329}
330
331/*
332 * We stash the page type into struct page so we can verify the page
333 * types are used properly.
334 */
335static void vmi_set_page_type(u32 pfn, int type)
336{
337 /* PAE can have multiple roots per page - don't track */
338 if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP))
339 return;
340
341 if (boot_allocations_applied) {
342 struct page *page = pfn_to_page(pfn);
343 if (type != VMI_PAGE_NORMAL)
344 BUG_ON(page->type);
345 else
346 BUG_ON(page->type == VMI_PAGE_NORMAL);
347 page->type = type & ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
348 if (type & VMI_PAGE_ZEROED)
349 check_zeroed_page(pfn, type, page);
350 } else {
351 record_page_type(pfn, type);
352 }
353}
354
355static void vmi_check_page_type(u32 pfn, int type)
356{
357 /* PAE can have multiple roots per page - skip checks */
358 if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP))
359 return;
360
361 type &= ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE);
362 if (boot_allocations_applied) {
363 struct page *page = pfn_to_page(pfn);
364 BUG_ON((page->type ^ type) & VMI_PAGE_PAE);
365 BUG_ON(type == VMI_PAGE_NORMAL && page->type);
366 BUG_ON((type & page->type) == 0);
367 }
368}
369#else
370#define vmi_set_page_type(p,t) do { } while (0)
371#define vmi_check_page_type(p,t) do { } while (0)
372#endif
373
374static void vmi_allocate_pt(u32 pfn)
375{
376 vmi_set_page_type(pfn, VMI_PAGE_L1);
377 vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
378}
379
380static void vmi_allocate_pd(u32 pfn)
381{
382 /*
383 * This call comes in very early, before mem_map is setup.
384 * It is called only for swapper_pg_dir, which already has
385 * data on it.
386 */
387 vmi_set_page_type(pfn, VMI_PAGE_L2);
388 vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
389}
390
391static void vmi_allocate_pd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
392{
393 vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
394 vmi_check_page_type(clonepfn, VMI_PAGE_L2);
395 vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
396}
397
398static void vmi_release_pt(u32 pfn)
399{
400 vmi_ops.release_page(pfn, VMI_PAGE_L1);
401 vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
402}
403
404static void vmi_release_pd(u32 pfn)
405{
406 vmi_ops.release_page(pfn, VMI_PAGE_L2);
407 vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
408}
409
410/*
411 * Helper macros for MMU update flags. We can defer updates until a flush
412 * or page invalidation only if the update is to the current address space
413 * (otherwise, there is no flush). We must check against init_mm, since
414 * this could be a kernel update, which usually passes init_mm, although
415 * sometimes this check can be skipped if we know the particular function
416 * is only called on user mode PTEs. We could change the kernel to pass
417 * current->active_mm here, but in particular, I was unsure if changing
418 * mm/highmem.c to do this would still be correct on other architectures.
419 */
420#define is_current_as(mm, mustbeuser) ((mm) == current->active_mm || \
421 (!mustbeuser && (mm) == &init_mm))
422#define vmi_flags_addr(mm, addr, level, user) \
423 ((level) | (is_current_as(mm, user) ? \
424 (VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0))
425#define vmi_flags_addr_defer(mm, addr, level, user) \
426 ((level) | (is_current_as(mm, user) ? \
427 (VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0))
428
429static void vmi_update_pte(struct mm_struct *mm, u32 addr, pte_t *ptep)
430{
431 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
432 vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
433}
434
435static void vmi_update_pte_defer(struct mm_struct *mm, u32 addr, pte_t *ptep)
436{
437 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
438 vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0));
439}
440
441static void vmi_set_pte(pte_t *ptep, pte_t pte)
442{
443 /* XXX because of set_pmd_pte, this can be called on PT or PD layers */
444 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE | VMI_PAGE_PD);
445 vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT);
446}
447
448static void vmi_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte)
449{
450 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
451 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
452}
453
454static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval)
455{
456#ifdef CONFIG_X86_PAE
457 const pte_t pte = { pmdval.pmd, pmdval.pmd >> 32 };
458 vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD);
459#else
460 const pte_t pte = { pmdval.pud.pgd.pgd };
461 vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PGD);
462#endif
463 vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD);
464}
465
466#ifdef CONFIG_X86_PAE
467
468static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval)
469{
470 /*
471 * XXX This is called from set_pmd_pte, but at both PT
472 * and PD layers so the VMI_PAGE_PT flag is wrong. But
473 * it is only called for large page mapping changes,
474 * the Xen backend, doesn't support large pages, and the
475 * ESX backend doesn't depend on the flag.
476 */
477 set_64bit((unsigned long long *)ptep,pte_val(pteval));
478 vmi_ops.update_pte(ptep, VMI_PAGE_PT);
479}
480
481static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
482{
483 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
484 vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1));
485}
486
487static void vmi_set_pud(pud_t *pudp, pud_t pudval)
488{
489 /* Um, eww */
490 const pte_t pte = { pudval.pgd.pgd, pudval.pgd.pgd >> 32 };
491 vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD);
492 vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP);
493}
494
495static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
496{
497 const pte_t pte = { 0 };
498 vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
499 vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
500}
501
502void vmi_pmd_clear(pmd_t *pmd)
503{
504 const pte_t pte = { 0 };
505 vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD);
506 vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD);
507}
508#endif
509
510#ifdef CONFIG_SMP
511struct vmi_ap_state ap;
512extern void setup_pda(void);
513
514static void __init /* XXX cpu hotplug */
515vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
516 unsigned long start_esp)
517{
518 /* Default everything to zero. This is fine for most GPRs. */
519 memset(&ap, 0, sizeof(struct vmi_ap_state));
520
521 ap.gdtr_limit = GDT_SIZE - 1;
522 ap.gdtr_base = (unsigned long) get_cpu_gdt_table(phys_apicid);
523
524 ap.idtr_limit = IDT_ENTRIES * 8 - 1;
525 ap.idtr_base = (unsigned long) idt_table;
526
527 ap.ldtr = 0;
528
529 ap.cs = __KERNEL_CS;
530 ap.eip = (unsigned long) start_eip;
531 ap.ss = __KERNEL_DS;
532 ap.esp = (unsigned long) start_esp;
533
534 ap.ds = __USER_DS;
535 ap.es = __USER_DS;
536 ap.fs = __KERNEL_PDA;
537 ap.gs = 0;
538
539 ap.eflags = 0;
540
541 setup_pda();
542
543#ifdef CONFIG_X86_PAE
544 /* efer should match BSP efer. */
545 if (cpu_has_nx) {
546 unsigned l, h;
547 rdmsr(MSR_EFER, l, h);
548 ap.efer = (unsigned long long) h << 32 | l;
549 }
550#endif
551
552 ap.cr3 = __pa(swapper_pg_dir);
553 /* Protected mode, paging, AM, WP, NE, MP. */
554 ap.cr0 = 0x80050023;
555 ap.cr4 = mmu_cr4_features;
556 vmi_ops.set_initial_ap_state(__pa(&ap), phys_apicid);
557}
558#endif
559
560static inline int __init check_vmi_rom(struct vrom_header *rom)
561{
562 struct pci_header *pci;
563 struct pnp_header *pnp;
564 const char *manufacturer = "UNKNOWN";
565 const char *product = "UNKNOWN";
566 const char *license = "unspecified";
567
568 if (rom->rom_signature != 0xaa55)
569 return 0;
570 if (rom->vrom_signature != VMI_SIGNATURE)
571 return 0;
572 if (rom->api_version_maj != VMI_API_REV_MAJOR ||
573 rom->api_version_min+1 < VMI_API_REV_MINOR+1) {
574 printk(KERN_WARNING "VMI: Found mismatched rom version %d.%d\n",
575 rom->api_version_maj,
576 rom->api_version_min);
577 return 0;
578 }
579
580 /*
581 * Relying on the VMI_SIGNATURE field is not 100% safe, so check
582 * the PCI header and device type to make sure this is really a
583 * VMI device.
584 */
585 if (!rom->pci_header_offs) {
586 printk(KERN_WARNING "VMI: ROM does not contain PCI header.\n");
587 return 0;
588 }
589
590 pci = (struct pci_header *)((char *)rom+rom->pci_header_offs);
591 if (pci->vendorID != PCI_VENDOR_ID_VMWARE ||
592 pci->deviceID != PCI_DEVICE_ID_VMWARE_VMI) {
593 /* Allow it to run... anyways, but warn */
594 printk(KERN_WARNING "VMI: ROM from unknown manufacturer\n");
595 }
596
597 if (rom->pnp_header_offs) {
598 pnp = (struct pnp_header *)((char *)rom+rom->pnp_header_offs);
599 if (pnp->manufacturer_offset)
600 manufacturer = (const char *)rom+pnp->manufacturer_offset;
601 if (pnp->product_offset)
602 product = (const char *)rom+pnp->product_offset;
603 }
604
605 if (rom->license_offs)
606 license = (char *)rom+rom->license_offs;
607
608 printk(KERN_INFO "VMI: Found %s %s, API version %d.%d, ROM version %d.%d\n",
609 manufacturer, product,
610 rom->api_version_maj, rom->api_version_min,
611 pci->rom_version_maj, pci->rom_version_min);
612
613 license_gplok = license_is_gpl_compatible(license);
614 if (!license_gplok) {
615 printk(KERN_WARNING "VMI: ROM license '%s' taints kernel... "
616 "inlining disabled\n",
617 license);
618 add_taint(TAINT_PROPRIETARY_MODULE);
619 }
620 return 1;
621}
622
623/*
624 * Probe for the VMI option ROM
625 */
626static inline int __init probe_vmi_rom(void)
627{
628 unsigned long base;
629
630 /* VMI ROM is in option ROM area, check signature */
631 for (base = 0xC0000; base < 0xE0000; base += 2048) {
632 struct vrom_header *romstart;
633 romstart = (struct vrom_header *)isa_bus_to_virt(base);
634 if (check_vmi_rom(romstart)) {
635 vmi_rom = romstart;
636 return 1;
637 }
638 }
639 return 0;
640}
641
642/*
643 * VMI setup common to all processors
644 */
645void vmi_bringup(void)
646{
647 /* We must establish the lowmem mapping for MMU ops to work */
648 if (vmi_rom)
649 vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0);
650}
651
652/*
653 * Return a pointer to the VMI function or a NOP stub
654 */
655static void *vmi_get_function(int vmicall)
656{
657 u64 reloc;
658 const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc;
659 reloc = call_vrom_long_func(vmi_rom, get_reloc, vmicall);
660 BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL);
661 if (rel->type == VMI_RELOCATION_CALL_REL)
662 return (void *)rel->eip;
663 else
664 return (void *)vmi_nop;
665}
666
667/*
668 * Helper macro for making the VMI paravirt-ops fill code readable.
669 * For unimplemented operations, fall back to default.
670 */
671#define para_fill(opname, vmicall) \
672do { \
673 reloc = call_vrom_long_func(vmi_rom, get_reloc, \
674 VMI_CALL_##vmicall); \
675 if (rel->type != VMI_RELOCATION_NONE) { \
676 BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); \
677 paravirt_ops.opname = (void *)rel->eip; \
678 } \
679} while (0)
680
681/*
682 * Activate the VMI interface and switch into paravirtualized mode
683 */
684static inline int __init activate_vmi(void)
685{
686 short kernel_cs;
687 u64 reloc;
688 const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc;
689
690 if (call_vrom_func(vmi_rom, vmi_init) != 0) {
691 printk(KERN_ERR "VMI ROM failed to initialize!");
692 return 0;
693 }
694 savesegment(cs, kernel_cs);
695
696 paravirt_ops.paravirt_enabled = 1;
697 paravirt_ops.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
698
699 paravirt_ops.patch = vmi_patch;
700 paravirt_ops.name = "vmi";
701
702 /*
703 * Many of these operations are ABI compatible with VMI.
704 * This means we can fill in the paravirt-ops with direct
705 * pointers into the VMI ROM. If the calling convention for
706 * these operations changes, this code needs to be updated.
707 *
708 * Exceptions
709 * CPUID paravirt-op uses pointers, not the native ISA
710 * halt has no VMI equivalent; all VMI halts are "safe"
711 * no MSR support yet - just trap and emulate. VMI uses the
712 * same ABI as the native ISA, but Linux wants exceptions
713 * from bogus MSR read / write handled
714 * rdpmc is not yet used in Linux
715 */
716
717 /* CPUID is special, so very special */
718 reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_CPUID);
719 if (rel->type != VMI_RELOCATION_NONE) {
720 BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
721 vmi_ops.cpuid = (void *)rel->eip;
722 paravirt_ops.cpuid = vmi_cpuid;
723 }
724
725 para_fill(clts, CLTS);
726 para_fill(get_debugreg, GetDR);
727 para_fill(set_debugreg, SetDR);
728 para_fill(read_cr0, GetCR0);
729 para_fill(read_cr2, GetCR2);
730 para_fill(read_cr3, GetCR3);
731 para_fill(read_cr4, GetCR4);
732 para_fill(write_cr0, SetCR0);
733 para_fill(write_cr2, SetCR2);
734 para_fill(write_cr3, SetCR3);
735 para_fill(write_cr4, SetCR4);
736 para_fill(save_fl, GetInterruptMask);
737 para_fill(restore_fl, SetInterruptMask);
738 para_fill(irq_disable, DisableInterrupts);
739 para_fill(irq_enable, EnableInterrupts);
740 /* irq_save_disable !!! sheer pain */
741 patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK],
742 (char *)paravirt_ops.save_fl);
743 patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE],
744 (char *)paravirt_ops.irq_disable);
745#ifndef CONFIG_NO_IDLE_HZ
746 para_fill(safe_halt, Halt);
747#else
748 vmi_ops.halt = vmi_get_function(VMI_CALL_Halt);
749 paravirt_ops.safe_halt = vmi_safe_halt;
750#endif
751 para_fill(wbinvd, WBINVD);
752 /* paravirt_ops.read_msr = vmi_rdmsr */
753 /* paravirt_ops.write_msr = vmi_wrmsr */
754 para_fill(read_tsc, RDTSC);
755 /* paravirt_ops.rdpmc = vmi_rdpmc */
756
757 /* TR interface doesn't pass TR value */
758 reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetTR);
759 if (rel->type != VMI_RELOCATION_NONE) {
760 BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
761 vmi_ops.set_tr = (void *)rel->eip;
762 paravirt_ops.load_tr_desc = vmi_set_tr;
763 }
764
765 /* LDT is special, too */
766 reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetLDT);
767 if (rel->type != VMI_RELOCATION_NONE) {
768 BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
769 vmi_ops._set_ldt = (void *)rel->eip;
770 paravirt_ops.set_ldt = vmi_set_ldt;
771 }
772
773 para_fill(load_gdt, SetGDT);
774 para_fill(load_idt, SetIDT);
775 para_fill(store_gdt, GetGDT);
776 para_fill(store_idt, GetIDT);
777 para_fill(store_tr, GetTR);
778 paravirt_ops.load_tls = vmi_load_tls;
779 para_fill(write_ldt_entry, WriteLDTEntry);
780 para_fill(write_gdt_entry, WriteGDTEntry);
781 para_fill(write_idt_entry, WriteIDTEntry);
782 reloc = call_vrom_long_func(vmi_rom, get_reloc,
783 VMI_CALL_UpdateKernelStack);
784 if (rel->type != VMI_RELOCATION_NONE) {
785 BUG_ON(rel->type != VMI_RELOCATION_CALL_REL);
786 vmi_ops.set_kernel_stack = (void *)rel->eip;
787 paravirt_ops.load_esp0 = vmi_load_esp0;
788 }
789
790 para_fill(set_iopl_mask, SetIOPLMask);
791 paravirt_ops.io_delay = (void *)vmi_nop;
792 if (!disable_nodelay) {
793 paravirt_ops.const_udelay = (void *)vmi_nop;
794 }
795
796 para_fill(set_lazy_mode, SetLazyMode);
797
798 reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_FlushTLB);
799 if (rel->type != VMI_RELOCATION_NONE) {
800 vmi_ops.flush_tlb = (void *)rel->eip;
801 paravirt_ops.flush_tlb_user = vmi_flush_tlb_user;
802 paravirt_ops.flush_tlb_kernel = vmi_flush_tlb_kernel;
803 }
804 para_fill(flush_tlb_single, InvalPage);
805
806 /*
807 * Until a standard flag format can be agreed on, we need to
808 * implement these as wrappers in Linux. Get the VMI ROM
809 * function pointers for the two backend calls.
810 */
811#ifdef CONFIG_X86_PAE
812 vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxELong);
813 vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxELong);
814#else
815 vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE);
816 vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE);
817#endif
818 vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
819 vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
820 vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
821
822 paravirt_ops.alloc_pt = vmi_allocate_pt;
823 paravirt_ops.alloc_pd = vmi_allocate_pd;
824 paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone;
825 paravirt_ops.release_pt = vmi_release_pt;
826 paravirt_ops.release_pd = vmi_release_pd;
827 paravirt_ops.set_pte = vmi_set_pte;
828 paravirt_ops.set_pte_at = vmi_set_pte_at;
829 paravirt_ops.set_pmd = vmi_set_pmd;
830 paravirt_ops.pte_update = vmi_update_pte;
831 paravirt_ops.pte_update_defer = vmi_update_pte_defer;
832#ifdef CONFIG_X86_PAE
833 paravirt_ops.set_pte_atomic = vmi_set_pte_atomic;
834 paravirt_ops.set_pte_present = vmi_set_pte_present;
835 paravirt_ops.set_pud = vmi_set_pud;
836 paravirt_ops.pte_clear = vmi_pte_clear;
837 paravirt_ops.pmd_clear = vmi_pmd_clear;
838#endif
839 /*
840 * These MUST always be patched. Don't support indirect jumps
841 * through these operations, as the VMI interface may use either
842 * a jump or a call to get to these operations, depending on
843 * the backend. They are performance critical anyway, so requiring
844 * a patch is not a big problem.
845 */
846 paravirt_ops.irq_enable_sysexit = (void *)0xfeedbab0;
847 paravirt_ops.iret = (void *)0xbadbab0;
848
849#ifdef CONFIG_SMP
850 paravirt_ops.startup_ipi_hook = vmi_startup_ipi_hook;
851 vmi_ops.set_initial_ap_state = vmi_get_function(VMI_CALL_SetInitialAPState);
852#endif
853
854#ifdef CONFIG_X86_LOCAL_APIC
855 paravirt_ops.apic_read = vmi_get_function(VMI_CALL_APICRead);
856 paravirt_ops.apic_write = vmi_get_function(VMI_CALL_APICWrite);
857 paravirt_ops.apic_write_atomic = vmi_get_function(VMI_CALL_APICWrite);
858#endif
859
860 /*
861 * Check for VMI timer functionality by probing for a cycle frequency method
862 */
863 reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency);
864 if (rel->type != VMI_RELOCATION_NONE) {
865 vmi_timer_ops.get_cycle_frequency = (void *)rel->eip;
866 vmi_timer_ops.get_cycle_counter =
867 vmi_get_function(VMI_CALL_GetCycleCounter);
868 vmi_timer_ops.get_wallclock =
869 vmi_get_function(VMI_CALL_GetWallclockTime);
870 vmi_timer_ops.wallclock_updated =
871 vmi_get_function(VMI_CALL_WallclockUpdated);
872 vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm);
873 vmi_timer_ops.cancel_alarm =
874 vmi_get_function(VMI_CALL_CancelAlarm);
875 paravirt_ops.time_init = vmi_time_init;
876 paravirt_ops.get_wallclock = vmi_get_wallclock;
877 paravirt_ops.set_wallclock = vmi_set_wallclock;
878#ifdef CONFIG_X86_LOCAL_APIC
879 paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm;
880 paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm;
881#endif
882 custom_sched_clock = vmi_sched_clock;
883 }
884
885 /*
886 * Alternative instruction rewriting doesn't happen soon enough
887 * to convert VMI_IRET to a call instead of a jump; so we have
888 * to do this before IRQs get reenabled. Fortunately, it is
889 * idempotent.
890 */
891 apply_paravirt(__start_parainstructions, __stop_parainstructions);
892
893 vmi_bringup();
894
895 return 1;
896}
897
898#undef para_fill
899
900void __init vmi_init(void)
901{
902 unsigned long flags;
903
904 if (!vmi_rom)
905 probe_vmi_rom();
906 else
907 check_vmi_rom(vmi_rom);
908
909 /* In case probing for or validating the ROM failed, basil */
910 if (!vmi_rom)
911 return;
912
913 reserve_top_address(-vmi_rom->virtual_top);
914
915 local_irq_save(flags);
916 activate_vmi();
917#ifdef CONFIG_SMP
918 no_timer_check = 1;
919#endif
920 local_irq_restore(flags & X86_EFLAGS_IF);
921}
922
923static int __init parse_vmi(char *arg)
924{
925 if (!arg)
926 return -EINVAL;
927
928 if (!strcmp(arg, "disable_nodelay"))
929 disable_nodelay = 1;
930 else if (!strcmp(arg, "disable_pge")) {
931 clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
932 disable_pge = 1;
933 } else if (!strcmp(arg, "disable_pse")) {
934 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
935 disable_pse = 1;
936 } else if (!strcmp(arg, "disable_sep")) {
937 clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability);
938 disable_sep = 1;
939 } else if (!strcmp(arg, "disable_tsc")) {
940 clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
941 disable_tsc = 1;
942 } else if (!strcmp(arg, "disable_mtrr")) {
943 clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability);
944 disable_mtrr = 1;
945 }
946 return 0;
947}
948
949early_param("vmi", parse_vmi);
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c
new file mode 100644
index 000000000000..2e2d8dbcbd68
--- /dev/null
+++ b/arch/i386/kernel/vmitime.c
@@ -0,0 +1,499 @@
1/*
2 * VMI paravirtual timer support routines.
3 *
4 * Copyright (C) 2005, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Send feedback to dhecht@vmware.com
22 *
23 */
24
25/*
26 * Portions of this code from arch/i386/kernel/timers/timer_tsc.c.
27 * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c.
28 * See comments there for proper credits.
29 */
30
31#include <linux/spinlock.h>
32#include <linux/init.h>
33#include <linux/errno.h>
34#include <linux/jiffies.h>
35#include <linux/interrupt.h>
36#include <linux/kernel_stat.h>
37#include <linux/rcupdate.h>
38#include <linux/clocksource.h>
39
40#include <asm/timer.h>
41#include <asm/io.h>
42#include <asm/apic.h>
43#include <asm/div64.h>
44#include <asm/timer.h>
45#include <asm/desc.h>
46
47#include <asm/vmi.h>
48#include <asm/vmi_time.h>
49
50#include <mach_timer.h>
51#include <io_ports.h>
52
53#ifdef CONFIG_X86_LOCAL_APIC
54#define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT
55#else
56#define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0
57#endif
58
59/* Cached VMI operations */
60struct vmi_timer_ops vmi_timer_ops;
61
62#ifdef CONFIG_NO_IDLE_HZ
63
64/* /proc/sys/kernel/hz_timer state. */
65int sysctl_hz_timer;
66
67/* Some stats */
68static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs);
69static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies);
70static DEFINE_PER_CPU(unsigned long, idle_start_jiffies);
71
72#endif /* CONFIG_NO_IDLE_HZ */
73
74/* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */
75static int alarm_hz = CONFIG_VMI_ALARM_HZ;
76
77/* Cache of the value get_cycle_frequency / HZ. */
78static signed long long cycles_per_jiffy;
79
80/* Cache of the value get_cycle_frequency / alarm_hz. */
81static signed long long cycles_per_alarm;
82
83/* The number of cycles accounted for by the 'jiffies'/'xtime' count.
84 * Protected by xtime_lock. */
85static unsigned long long real_cycles_accounted_system;
86
87/* The number of cycles accounted for by update_process_times(), per cpu. */
88static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu);
89
90/* The number of stolen cycles accounted, per cpu. */
91static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu);
92
93/* Clock source. */
94static cycle_t read_real_cycles(void)
95{
96 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
97}
98
99static cycle_t read_available_cycles(void)
100{
101 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
102}
103
104#if 0
105static cycle_t read_stolen_cycles(void)
106{
107 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN);
108}
109#endif /* 0 */
110
111static struct clocksource clocksource_vmi = {
112 .name = "vmi-timer",
113 .rating = 450,
114 .read = read_real_cycles,
115 .mask = CLOCKSOURCE_MASK(64),
116 .mult = 0, /* to be set */
117 .shift = 22,
118 .is_continuous = 1,
119};
120
121
122/* Timer interrupt handler. */
123static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id);
124
125static struct irqaction vmi_timer_irq = {
126 vmi_timer_interrupt,
127 SA_INTERRUPT,
128 CPU_MASK_NONE,
129 "VMI-alarm",
130 NULL,
131 NULL
132};
133
134/* Alarm rate */
135static int __init vmi_timer_alarm_rate_setup(char* str)
136{
137 int alarm_rate;
138 if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) {
139 alarm_hz = alarm_rate;
140 printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz);
141 }
142 return 1;
143}
144__setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup);
145
146
147/* Initialization */
148static void vmi_get_wallclock_ts(struct timespec *ts)
149{
150 unsigned long long wallclock;
151 wallclock = vmi_timer_ops.get_wallclock(); // nsec units
152 ts->tv_nsec = do_div(wallclock, 1000000000);
153 ts->tv_sec = wallclock;
154}
155
156static void update_xtime_from_wallclock(void)
157{
158 struct timespec ts;
159 vmi_get_wallclock_ts(&ts);
160 do_settimeofday(&ts);
161}
162
163unsigned long vmi_get_wallclock(void)
164{
165 struct timespec ts;
166 vmi_get_wallclock_ts(&ts);
167 return ts.tv_sec;
168}
169
170int vmi_set_wallclock(unsigned long now)
171{
172 return -1;
173}
174
175unsigned long long vmi_sched_clock(void)
176{
177 return read_available_cycles();
178}
179
180void __init vmi_time_init(void)
181{
182 unsigned long long cycles_per_sec, cycles_per_msec;
183 unsigned long flags;
184
185 local_irq_save(flags);
186 setup_irq(0, &vmi_timer_irq);
187#ifdef CONFIG_X86_LOCAL_APIC
188 set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt);
189#endif
190
191 no_sync_cmos_clock = 1;
192
193 vmi_get_wallclock_ts(&xtime);
194 set_normalized_timespec(&wall_to_monotonic,
195 -xtime.tv_sec, -xtime.tv_nsec);
196
197 real_cycles_accounted_system = read_real_cycles();
198 update_xtime_from_wallclock();
199 per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles();
200
201 cycles_per_sec = vmi_timer_ops.get_cycle_frequency();
202
203 cycles_per_jiffy = cycles_per_sec;
204 (void)do_div(cycles_per_jiffy, HZ);
205 cycles_per_alarm = cycles_per_sec;
206 (void)do_div(cycles_per_alarm, alarm_hz);
207 cycles_per_msec = cycles_per_sec;
208 (void)do_div(cycles_per_msec, 1000);
209 cpu_khz = cycles_per_msec;
210
211 printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;"
212 "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy,
213 cycles_per_alarm);
214
215 clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
216 clocksource_vmi.shift);
217 if (clocksource_register(&clocksource_vmi))
218 printk(KERN_WARNING "Error registering VMITIME clocksource.");
219
220 /* Disable PIT. */
221 outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
222
223 /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu
224 * reduce the latency calling update_process_times. */
225 vmi_timer_ops.set_alarm(
226 VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
227 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
228 cycles_per_alarm);
229
230 local_irq_restore(flags);
231}
232
233#ifdef CONFIG_X86_LOCAL_APIC
234
235void __init vmi_timer_setup_boot_alarm(void)
236{
237 local_irq_disable();
238
239 /* Route the interrupt to the correct vector. */
240 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
241
242 /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */
243 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
244 vmi_timer_ops.set_alarm(
245 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
246 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
247 cycles_per_alarm);
248 local_irq_enable();
249}
250
251/* Initialize the time accounting variables for an AP on an SMP system.
252 * Also, set the local alarm for the AP. */
253void __init vmi_timer_setup_secondary_alarm(void)
254{
255 int cpu = smp_processor_id();
256
257 /* Route the interrupt to the correct vector. */
258 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
259
260 per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles();
261
262 vmi_timer_ops.set_alarm(
263 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
264 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
265 cycles_per_alarm);
266}
267
268#endif
269
270/* Update system wide (real) time accounting (e.g. jiffies, xtime). */
271static void vmi_account_real_cycles(unsigned long long cur_real_cycles)
272{
273 long long cycles_not_accounted;
274
275 write_seqlock(&xtime_lock);
276
277 cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system;
278 while (cycles_not_accounted >= cycles_per_jiffy) {
279 /* systems wide jiffies and wallclock. */
280 do_timer(1);
281
282 cycles_not_accounted -= cycles_per_jiffy;
283 real_cycles_accounted_system += cycles_per_jiffy;
284 }
285
286 if (vmi_timer_ops.wallclock_updated())
287 update_xtime_from_wallclock();
288
289 write_sequnlock(&xtime_lock);
290}
291
292/* Update per-cpu process times. */
293static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu,
294 unsigned long long cur_process_times_cycles)
295{
296 long long cycles_not_accounted;
297 cycles_not_accounted = cur_process_times_cycles -
298 per_cpu(process_times_cycles_accounted_cpu, cpu);
299
300 while (cycles_not_accounted >= cycles_per_jiffy) {
301 /* Account time to the current process. This includes
302 * calling into the scheduler to decrement the timeslice
303 * and possibly reschedule.*/
304 update_process_times(user_mode(regs));
305 /* XXX handle /proc/profile multiplier. */
306 profile_tick(CPU_PROFILING);
307
308 cycles_not_accounted -= cycles_per_jiffy;
309 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
310 }
311}
312
313#ifdef CONFIG_NO_IDLE_HZ
314/* Update per-cpu idle times. Used when a no-hz halt is ended. */
315static void vmi_account_no_hz_idle_cycles(int cpu,
316 unsigned long long cur_process_times_cycles)
317{
318 long long cycles_not_accounted;
319 unsigned long no_idle_hz_jiffies = 0;
320
321 cycles_not_accounted = cur_process_times_cycles -
322 per_cpu(process_times_cycles_accounted_cpu, cpu);
323
324 while (cycles_not_accounted >= cycles_per_jiffy) {
325 no_idle_hz_jiffies++;
326 cycles_not_accounted -= cycles_per_jiffy;
327 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
328 }
329 /* Account time to the idle process. */
330 account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies));
331}
332#endif
333
334/* Update per-cpu stolen time. */
335static void vmi_account_stolen_cycles(int cpu,
336 unsigned long long cur_real_cycles,
337 unsigned long long cur_avail_cycles)
338{
339 long long stolen_cycles_not_accounted;
340 unsigned long stolen_jiffies = 0;
341
342 if (cur_real_cycles < cur_avail_cycles)
343 return;
344
345 stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles -
346 per_cpu(stolen_cycles_accounted_cpu, cpu);
347
348 while (stolen_cycles_not_accounted >= cycles_per_jiffy) {
349 stolen_jiffies++;
350 stolen_cycles_not_accounted -= cycles_per_jiffy;
351 per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
352 }
353 /* HACK: pass NULL to force time onto cpustat->steal. */
354 account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies));
355}
356
357/* Body of either IRQ0 interrupt handler (UP no local-APIC) or
358 * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */
359static void vmi_local_timer_interrupt(int cpu)
360{
361 unsigned long long cur_real_cycles, cur_process_times_cycles;
362
363 cur_real_cycles = read_real_cycles();
364 cur_process_times_cycles = read_available_cycles();
365 /* Update system wide (real) time state (xtime, jiffies). */
366 vmi_account_real_cycles(cur_real_cycles);
367 /* Update per-cpu process times. */
368 vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles);
369 /* Update time stolen from this cpu by the hypervisor. */
370 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
371}
372
373#ifdef CONFIG_NO_IDLE_HZ
374
375/* Must be called only from idle loop, with interrupts disabled. */
376int vmi_stop_hz_timer(void)
377{
378 /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */
379
380 unsigned long seq, next;
381 unsigned long long real_cycles_expiry;
382 int cpu = smp_processor_id();
383 int idle;
384
385 BUG_ON(!irqs_disabled());
386 if (sysctl_hz_timer != 0)
387 return 0;
388
389 cpu_set(cpu, nohz_cpu_mask);
390 smp_mb();
391 if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
392 (next = next_timer_interrupt(), time_before_eq(next, jiffies))) {
393 cpu_clear(cpu, nohz_cpu_mask);
394 next = jiffies;
395 idle = 0;
396 } else
397 idle = 1;
398
399 /* Convert jiffies to the real cycle counter. */
400 do {
401 seq = read_seqbegin(&xtime_lock);
402 real_cycles_expiry = real_cycles_accounted_system +
403 (long)(next - jiffies) * cycles_per_jiffy;
404 } while (read_seqretry(&xtime_lock, seq));
405
406 /* This cpu is going idle. Disable the periodic alarm. */
407 if (idle) {
408 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
409 per_cpu(idle_start_jiffies, cpu) = jiffies;
410 }
411
412 /* Set the real time alarm to expire at the next event. */
413 vmi_timer_ops.set_alarm(
414 VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
415 real_cycles_expiry, 0);
416
417 return idle;
418}
419
420static void vmi_reenable_hz_timer(int cpu)
421{
422 /* For /proc/vmi/info idle_hz stat. */
423 per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu);
424 per_cpu(vmi_idle_no_hz_irqs, cpu)++;
425
426 /* Don't bother explicitly cancelling the one-shot alarm -- at
427 * worse we will receive a spurious timer interrupt. */
428 vmi_timer_ops.set_alarm(
429 VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
430 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
431 cycles_per_alarm);
432 /* Indicate this cpu is no longer nohz idle. */
433 cpu_clear(cpu, nohz_cpu_mask);
434}
435
436/* Called from interrupt handlers when (local) HZ timer is disabled. */
437void vmi_account_time_restart_hz_timer(void)
438{
439 unsigned long long cur_real_cycles, cur_process_times_cycles;
440 int cpu = smp_processor_id();
441
442 BUG_ON(!irqs_disabled());
443 /* Account the time during which the HZ timer was disabled. */
444 cur_real_cycles = read_real_cycles();
445 cur_process_times_cycles = read_available_cycles();
446 /* Update system wide (real) time state (xtime, jiffies). */
447 vmi_account_real_cycles(cur_real_cycles);
448 /* Update per-cpu idle times. */
449 vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles);
450 /* Update time stolen from this cpu by the hypervisor. */
451 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
452 /* Reenable the hz timer. */
453 vmi_reenable_hz_timer(cpu);
454}
455
456#endif /* CONFIG_NO_IDLE_HZ */
457
458/* UP (and no local-APIC) VMI-timer alarm interrupt handler.
459 * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after
460 * APIC setup and setup_boot_vmi_alarm() is called. */
461static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
462{
463 vmi_local_timer_interrupt(smp_processor_id());
464 return IRQ_HANDLED;
465}
466
467#ifdef CONFIG_X86_LOCAL_APIC
468
469/* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector.
470 * Also used in UP when CONFIG_X86_LOCAL_APIC.
471 * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */
472void smp_apic_vmi_timer_interrupt(struct pt_regs *regs)
473{
474 struct pt_regs *old_regs = set_irq_regs(regs);
475 int cpu = smp_processor_id();
476
477 /*
478 * the NMI deadlock-detector uses this.
479 */
480 per_cpu(irq_stat,cpu).apic_timer_irqs++;
481
482 /*
483 * NOTE! We'd better ACK the irq immediately,
484 * because timer handling can be slow.
485 */
486 ack_APIC_irq();
487
488 /*
489 * update_process_times() expects us to have done irq_enter().
490 * Besides, if we don't timer interrupts ignore the global
491 * interrupt lock, which is the WrongThing (tm) to do.
492 */
493 irq_enter();
494 vmi_local_timer_interrupt(cpu);
495 irq_exit();
496 set_irq_regs(old_regs);
497}
498
499#endif /* CONFIG_X86_LOCAL_APIC */
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 5038a73d554e..ca51610955df 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -37,9 +37,14 @@ SECTIONS
37{ 37{
38 . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; 38 . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
39 phys_startup_32 = startup_32 - LOAD_OFFSET; 39 phys_startup_32 = startup_32 - LOAD_OFFSET;
40
41 .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
42 _text = .; /* Text and read-only data */
43 *(.text.head)
44 } :text = 0x9090
45
40 /* read-only */ 46 /* read-only */
41 .text : AT(ADDR(.text) - LOAD_OFFSET) { 47 .text : AT(ADDR(.text) - LOAD_OFFSET) {
42 _text = .; /* Text and read-only data */
43 *(.text) 48 *(.text)
44 SCHED_TEXT 49 SCHED_TEXT
45 LOCK_TEXT 50 LOCK_TEXT
diff --git a/arch/i386/math-emu/get_address.c b/arch/i386/math-emu/get_address.c
index 9819b705efa4..2e2c51a8bd3a 100644
--- a/arch/i386/math-emu/get_address.c
+++ b/arch/i386/math-emu/get_address.c
@@ -56,15 +56,14 @@ static int reg_offset_vm86[] = {
56#define VM86_REG_(x) (*(unsigned short *) \ 56#define VM86_REG_(x) (*(unsigned short *) \
57 (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info)) 57 (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info))
58 58
59/* These are dummy, fs and gs are not saved on the stack. */ 59/* This dummy, gs is not saved on the stack. */
60#define ___FS ___ds
61#define ___GS ___ds 60#define ___GS ___ds
62 61
63static int reg_offset_pm[] = { 62static int reg_offset_pm[] = {
64 offsetof(struct info,___cs), 63 offsetof(struct info,___cs),
65 offsetof(struct info,___ds), 64 offsetof(struct info,___ds),
66 offsetof(struct info,___es), 65 offsetof(struct info,___es),
67 offsetof(struct info,___FS), 66 offsetof(struct info,___fs),
68 offsetof(struct info,___GS), 67 offsetof(struct info,___GS),
69 offsetof(struct info,___ss), 68 offsetof(struct info,___ss),
70 offsetof(struct info,___ds) 69 offsetof(struct info,___ds)
@@ -169,13 +168,10 @@ static long pm_address(u_char FPU_modrm, u_char segment,
169 168
170 switch ( segment ) 169 switch ( segment )
171 { 170 {
172 /* fs and gs aren't used by the kernel, so they still have their 171 /* gs isn't used by the kernel, so it still has its
173 user-space values. */ 172 user-space value. */
174 case PREFIX_FS_-1:
175 /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */
176 savesegment(fs, addr->selector);
177 break;
178 case PREFIX_GS_-1: 173 case PREFIX_GS_-1:
174 /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */
179 savesegment(gs, addr->selector); 175 savesegment(gs, addr->selector);
180 break; 176 break;
181 default: 177 default:
diff --git a/arch/i386/math-emu/status_w.h b/arch/i386/math-emu/status_w.h
index 78d7b7689dd6..59e73302aa60 100644
--- a/arch/i386/math-emu/status_w.h
+++ b/arch/i386/math-emu/status_w.h
@@ -48,9 +48,11 @@
48 48
49#define status_word() \ 49#define status_word() \
50 ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top)) 50 ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top))
51#define setcc(cc) ({ \ 51static inline void setcc(int cc)
52 partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3); \ 52{
53 partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3); }) 53 partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3);
54 partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3);
55}
54 56
55#ifdef PECULIAR_486 57#ifdef PECULIAR_486
56 /* Default, this conveys no information, but an 80486 does it. */ 58 /* Default, this conveys no information, but an 80486 does it. */
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index e0c390d6ceb5..aa58720f6871 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -101,7 +101,6 @@ extern void find_max_pfn(void);
101extern void add_one_highpage_init(struct page *, int, int); 101extern void add_one_highpage_init(struct page *, int, int);
102 102
103extern struct e820map e820; 103extern struct e820map e820;
104extern unsigned long init_pg_tables_end;
105extern unsigned long highend_pfn, highstart_pfn; 104extern unsigned long highend_pfn, highstart_pfn;
106extern unsigned long max_low_pfn; 105extern unsigned long max_low_pfn;
107extern unsigned long totalram_pages; 106extern unsigned long totalram_pages;
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index cba9b3894a33..b8c4e259fc8b 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -46,17 +46,17 @@ int unregister_page_fault_notifier(struct notifier_block *nb)
46} 46}
47EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); 47EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
48 48
49static inline int notify_page_fault(enum die_val val, const char *str, 49static inline int notify_page_fault(struct pt_regs *regs, long err)
50 struct pt_regs *regs, long err, int trap, int sig)
51{ 50{
52 struct die_args args = { 51 struct die_args args = {
53 .regs = regs, 52 .regs = regs,
54 .str = str, 53 .str = "page fault",
55 .err = err, 54 .err = err,
56 .trapnr = trap, 55 .trapnr = 14,
57 .signr = sig 56 .signr = SIGSEGV
58 }; 57 };
59 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args); 58 return atomic_notifier_call_chain(&notify_page_fault_chain,
59 DIE_PAGE_FAULT, &args);
60} 60}
61 61
62/* 62/*
@@ -327,8 +327,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
327 if (unlikely(address >= TASK_SIZE)) { 327 if (unlikely(address >= TASK_SIZE)) {
328 if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) 328 if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
329 return; 329 return;
330 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 330 if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
331 SIGSEGV) == NOTIFY_STOP)
332 return; 331 return;
333 /* 332 /*
334 * Don't take the mm semaphore here. If we fixup a prefetch 333 * Don't take the mm semaphore here. If we fixup a prefetch
@@ -337,8 +336,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
337 goto bad_area_nosemaphore; 336 goto bad_area_nosemaphore;
338 } 337 }
339 338
340 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, 339 if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
341 SIGSEGV) == NOTIFY_STOP)
342 return; 340 return;
343 341
344 /* It's safe to allow irq's after cr2 has been saved and the vmalloc 342 /* It's safe to allow irq's after cr2 has been saved and the vmalloc
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index c5c5ea700cc7..ae436882af7a 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -62,6 +62,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
62 62
63#ifdef CONFIG_X86_PAE 63#ifdef CONFIG_X86_PAE
64 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); 64 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
65 paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
65 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); 66 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
66 pud = pud_offset(pgd, 0); 67 pud = pud_offset(pgd, 0);
67 if (pmd_table != pmd_offset(pud, 0)) 68 if (pmd_table != pmd_offset(pud, 0))
@@ -82,6 +83,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
82{ 83{
83 if (pmd_none(*pmd)) { 84 if (pmd_none(*pmd)) {
84 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); 85 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
86 paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
85 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 87 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
86 if (page_table != pte_offset_kernel(pmd, 0)) 88 if (page_table != pte_offset_kernel(pmd, 0))
87 BUG(); 89 BUG();
@@ -345,6 +347,8 @@ static void __init pagetable_init (void)
345 /* Init entries of the first-level page table to the zero page */ 347 /* Init entries of the first-level page table to the zero page */
346 for (i = 0; i < PTRS_PER_PGD; i++) 348 for (i = 0; i < PTRS_PER_PGD; i++)
347 set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); 349 set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
350#else
351 paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT);
348#endif 352#endif
349 353
350 /* Enable PSE if available */ 354 /* Enable PSE if available */
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index e223b1d4981c..412ebbd8adb0 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -60,6 +60,7 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
60 address = __pa(address); 60 address = __pa(address);
61 addr = address & LARGE_PAGE_MASK; 61 addr = address & LARGE_PAGE_MASK;
62 pbase = (pte_t *)page_address(base); 62 pbase = (pte_t *)page_address(base);
63 paravirt_alloc_pt(page_to_pfn(base));
63 for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { 64 for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
64 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, 65 set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
65 addr == address ? prot : ref_prot)); 66 addr == address ? prot : ref_prot));
@@ -172,6 +173,7 @@ __change_page_attr(struct page *page, pgprot_t prot)
172 if (!PageReserved(kpte_page)) { 173 if (!PageReserved(kpte_page)) {
173 if (cpu_has_pse && (page_private(kpte_page) == 0)) { 174 if (cpu_has_pse && (page_private(kpte_page) == 0)) {
174 ClearPagePrivate(kpte_page); 175 ClearPagePrivate(kpte_page);
176 paravirt_release_pt(page_to_pfn(kpte_page));
175 list_add(&kpte_page->lru, &df_list); 177 list_add(&kpte_page->lru, &df_list);
176 revert_page(kpte_page, address); 178 revert_page(kpte_page, address);
177 } 179 }
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index f349eaf450b0..fa0cfbd551e1 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -171,6 +171,8 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
171void reserve_top_address(unsigned long reserve) 171void reserve_top_address(unsigned long reserve)
172{ 172{
173 BUG_ON(fixmaps > 0); 173 BUG_ON(fixmaps > 0);
174 printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
175 (int)-reserve);
174#ifdef CONFIG_COMPAT_VDSO 176#ifdef CONFIG_COMPAT_VDSO
175 BUG_ON(reserve != 0); 177 BUG_ON(reserve != 0);
176#else 178#else
@@ -248,9 +250,15 @@ void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
248 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, 250 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
249 swapper_pg_dir + USER_PTRS_PER_PGD, 251 swapper_pg_dir + USER_PTRS_PER_PGD,
250 KERNEL_PGD_PTRS); 252 KERNEL_PGD_PTRS);
253
251 if (PTRS_PER_PMD > 1) 254 if (PTRS_PER_PMD > 1)
252 return; 255 return;
253 256
257 /* must happen under lock */
258 paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
259 __pa(swapper_pg_dir) >> PAGE_SHIFT,
260 USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD);
261
254 pgd_list_add(pgd); 262 pgd_list_add(pgd);
255 spin_unlock_irqrestore(&pgd_lock, flags); 263 spin_unlock_irqrestore(&pgd_lock, flags);
256} 264}
@@ -260,6 +268,7 @@ void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
260{ 268{
261 unsigned long flags; /* can be called from interrupt context */ 269 unsigned long flags; /* can be called from interrupt context */
262 270
271 paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
263 spin_lock_irqsave(&pgd_lock, flags); 272 spin_lock_irqsave(&pgd_lock, flags);
264 pgd_list_del(pgd); 273 pgd_list_del(pgd);
265 spin_unlock_irqrestore(&pgd_lock, flags); 274 spin_unlock_irqrestore(&pgd_lock, flags);
@@ -277,13 +286,18 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
277 pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); 286 pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
278 if (!pmd) 287 if (!pmd)
279 goto out_oom; 288 goto out_oom;
289 paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
280 set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); 290 set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
281 } 291 }
282 return pgd; 292 return pgd;
283 293
284out_oom: 294out_oom:
285 for (i--; i >= 0; i--) 295 for (i--; i >= 0; i--) {
286 kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); 296 pgd_t pgdent = pgd[i];
297 void* pmd = (void *)__va(pgd_val(pgdent)-1);
298 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
299 kmem_cache_free(pmd_cache, pmd);
300 }
287 kmem_cache_free(pgd_cache, pgd); 301 kmem_cache_free(pgd_cache, pgd);
288 return NULL; 302 return NULL;
289} 303}
@@ -294,8 +308,12 @@ void pgd_free(pgd_t *pgd)
294 308
295 /* in the PAE case user pgd entries are overwritten before usage */ 309 /* in the PAE case user pgd entries are overwritten before usage */
296 if (PTRS_PER_PMD > 1) 310 if (PTRS_PER_PMD > 1)
297 for (i = 0; i < USER_PTRS_PER_PGD; ++i) 311 for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
298 kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); 312 pgd_t pgdent = pgd[i];
313 void* pmd = (void *)__va(pgd_val(pgdent)-1);
314 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
315 kmem_cache_free(pmd_cache, pmd);
316 }
299 /* in the non-PAE case, free_pgtables() clears user pgd entries */ 317 /* in the non-PAE case, free_pgtables() clears user pgd entries */
300 kmem_cache_free(pgd_cache, pgd); 318 kmem_cache_free(pgd_cache, pgd);
301} 319}
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c
index ca2447e05e15..c554f52cb808 100644
--- a/arch/i386/oprofile/op_model_ppro.c
+++ b/arch/i386/oprofile/op_model_ppro.c
@@ -24,7 +24,8 @@
24 24
25#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) 25#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
26#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) 26#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
27#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0) 27#define CTR_32BIT_WRITE(l,msrs,c) \
28 do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0);} while (0)
28#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) 29#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
29 30
30#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) 31#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
@@ -79,7 +80,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
79 for (i = 0; i < NUM_COUNTERS; ++i) { 80 for (i = 0; i < NUM_COUNTERS; ++i) {
80 if (unlikely(!CTR_IS_RESERVED(msrs,i))) 81 if (unlikely(!CTR_IS_RESERVED(msrs,i)))
81 continue; 82 continue;
82 CTR_WRITE(1, msrs, i); 83 CTR_32BIT_WRITE(1, msrs, i);
83 } 84 }
84 85
85 /* enable active counters */ 86 /* enable active counters */
@@ -87,7 +88,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
87 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) { 88 if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
88 reset_value[i] = counter_config[i].count; 89 reset_value[i] = counter_config[i].count;
89 90
90 CTR_WRITE(counter_config[i].count, msrs, i); 91 CTR_32BIT_WRITE(counter_config[i].count, msrs, i);
91 92
92 CTRL_READ(low, high, msrs, i); 93 CTRL_READ(low, high, msrs, i);
93 CTRL_CLEAR(low); 94 CTRL_CLEAR(low);
@@ -116,7 +117,7 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
116 CTR_READ(low, high, msrs, i); 117 CTR_READ(low, high, msrs, i);
117 if (CTR_OVERFLOWED(low)) { 118 if (CTR_OVERFLOWED(low)) {
118 oprofile_add_sample(regs, i); 119 oprofile_add_sample(regs, i);
119 CTR_WRITE(reset_value[i], msrs, i); 120 CTR_32BIT_WRITE(reset_value[i], msrs, i);
120 } 121 }
121 } 122 }
122 123
diff --git a/arch/i386/pci/Makefile b/arch/i386/pci/Makefile
index 1594d2f55c8f..44650e03308b 100644
--- a/arch/i386/pci/Makefile
+++ b/arch/i386/pci/Makefile
@@ -1,7 +1,7 @@
1obj-y := i386.o init.o 1obj-y := i386.o init.o
2 2
3obj-$(CONFIG_PCI_BIOS) += pcbios.o 3obj-$(CONFIG_PCI_BIOS) += pcbios.o
4obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o 4obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o mmconfig-shared.o
5obj-$(CONFIG_PCI_DIRECT) += direct.o 5obj-$(CONFIG_PCI_DIRECT) += direct.o
6 6
7pci-y := fixup.o 7pci-y := fixup.o
diff --git a/arch/i386/pci/mmconfig-shared.c b/arch/i386/pci/mmconfig-shared.c
new file mode 100644
index 000000000000..747d8c63b0c4
--- /dev/null
+++ b/arch/i386/pci/mmconfig-shared.c
@@ -0,0 +1,264 @@
1/*
2 * mmconfig-shared.c - Low-level direct PCI config space access via
3 * MMCONFIG - common code between i386 and x86-64.
4 *
5 * This code does:
6 * - known chipset handling
7 * - ACPI decoding and validation
8 *
9 * Per-architecture code takes care of the mappings and accesses
10 * themselves.
11 */
12
13#include <linux/pci.h>
14#include <linux/init.h>
15#include <linux/acpi.h>
16#include <linux/bitmap.h>
17#include <asm/e820.h>
18
19#include "pci.h"
20
21/* aperture is up to 256MB but BIOS may reserve less */
22#define MMCONFIG_APER_MIN (2 * 1024*1024)
23#define MMCONFIG_APER_MAX (256 * 1024*1024)
24
25DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS);
26
27/* K8 systems have some devices (typically in the builtin northbridge)
28 that are only accessible using type1
29 Normally this can be expressed in the MCFG by not listing them
30 and assigning suitable _SEGs, but this isn't implemented in some BIOS.
31 Instead try to discover all devices on bus 0 that are unreachable using MM
32 and fallback for them. */
33static void __init unreachable_devices(void)
34{
35 int i, bus;
36 /* Use the max bus number from ACPI here? */
37 for (bus = 0; bus < PCI_MMCFG_MAX_CHECK_BUS; bus++) {
38 for (i = 0; i < 32; i++) {
39 unsigned int devfn = PCI_DEVFN(i, 0);
40 u32 val1, val2;
41
42 pci_conf1_read(0, bus, devfn, 0, 4, &val1);
43 if (val1 == 0xffffffff)
44 continue;
45
46 if (pci_mmcfg_arch_reachable(0, bus, devfn)) {
47 raw_pci_ops->read(0, bus, devfn, 0, 4, &val2);
48 if (val1 == val2)
49 continue;
50 }
51 set_bit(i + 32 * bus, pci_mmcfg_fallback_slots);
52 printk(KERN_NOTICE "PCI: No mmconfig possible on device"
53 " %02x:%02x\n", bus, i);
54 }
55 }
56}
57
58static const char __init *pci_mmcfg_e7520(void)
59{
60 u32 win;
61 pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win);
62
63 pci_mmcfg_config_num = 1;
64 pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
65 if (!pci_mmcfg_config)
66 return NULL;
67 pci_mmcfg_config[0].address = (win & 0xf000) << 16;
68 pci_mmcfg_config[0].pci_segment = 0;
69 pci_mmcfg_config[0].start_bus_number = 0;
70 pci_mmcfg_config[0].end_bus_number = 255;
71
72 return "Intel Corporation E7520 Memory Controller Hub";
73}
74
75static const char __init *pci_mmcfg_intel_945(void)
76{
77 u32 pciexbar, mask = 0, len = 0;
78
79 pci_mmcfg_config_num = 1;
80
81 pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0x48, 4, &pciexbar);
82
83 /* Enable bit */
84 if (!(pciexbar & 1))
85 pci_mmcfg_config_num = 0;
86
87 /* Size bits */
88 switch ((pciexbar >> 1) & 3) {
89 case 0:
90 mask = 0xf0000000U;
91 len = 0x10000000U;
92 break;
93 case 1:
94 mask = 0xf8000000U;
95 len = 0x08000000U;
96 break;
97 case 2:
98 mask = 0xfc000000U;
99 len = 0x04000000U;
100 break;
101 default:
102 pci_mmcfg_config_num = 0;
103 }
104
105 /* Errata #2, things break when not aligned on a 256Mb boundary */
106 /* Can only happen in 64M/128M mode */
107
108 if ((pciexbar & mask) & 0x0fffffffU)
109 pci_mmcfg_config_num = 0;
110
111 if (pci_mmcfg_config_num) {
112 pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
113 if (!pci_mmcfg_config)
114 return NULL;
115 pci_mmcfg_config[0].address = pciexbar & mask;
116 pci_mmcfg_config[0].pci_segment = 0;
117 pci_mmcfg_config[0].start_bus_number = 0;
118 pci_mmcfg_config[0].end_bus_number = (len >> 20) - 1;
119 }
120
121 return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub";
122}
123
124struct pci_mmcfg_hostbridge_probe {
125 u32 vendor;
126 u32 device;
127 const char *(*probe)(void);
128};
129
130static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = {
131 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, pci_mmcfg_e7520 },
132 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82945G_HB, pci_mmcfg_intel_945 },
133};
134
135static int __init pci_mmcfg_check_hostbridge(void)
136{
137 u32 l;
138 u16 vendor, device;
139 int i;
140 const char *name;
141
142 pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0, 4, &l);
143 vendor = l & 0xffff;
144 device = (l >> 16) & 0xffff;
145
146 pci_mmcfg_config_num = 0;
147 pci_mmcfg_config = NULL;
148 name = NULL;
149
150 for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) {
151 if (pci_mmcfg_probes[i].vendor == vendor &&
152 pci_mmcfg_probes[i].device == device)
153 name = pci_mmcfg_probes[i].probe();
154 }
155
156 if (name) {
157 printk(KERN_INFO "PCI: Found %s %s MMCONFIG support.\n",
158 name, pci_mmcfg_config_num ? "with" : "without");
159 }
160
161 return name != NULL;
162}
163
164static void __init pci_mmcfg_insert_resources(void)
165{
166#define PCI_MMCFG_RESOURCE_NAME_LEN 19
167 int i;
168 struct resource *res;
169 char *names;
170 unsigned num_buses;
171
172 res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res),
173 pci_mmcfg_config_num, GFP_KERNEL);
174 if (!res) {
175 printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n");
176 return;
177 }
178
179 names = (void *)&res[pci_mmcfg_config_num];
180 for (i = 0; i < pci_mmcfg_config_num; i++, res++) {
181 struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i];
182 num_buses = cfg->end_bus_number - cfg->start_bus_number + 1;
183 res->name = names;
184 snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u",
185 cfg->pci_segment);
186 res->start = cfg->address;
187 res->end = res->start + (num_buses << 20) - 1;
188 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
189 insert_resource(&iomem_resource, res);
190 names += PCI_MMCFG_RESOURCE_NAME_LEN;
191 }
192}
193
194static void __init pci_mmcfg_reject_broken(int type)
195{
196 typeof(pci_mmcfg_config[0]) *cfg;
197
198 if ((pci_mmcfg_config_num == 0) ||
199 (pci_mmcfg_config == NULL) ||
200 (pci_mmcfg_config[0].address == 0))
201 return;
202
203 cfg = &pci_mmcfg_config[0];
204
205 /*
206 * Handle more broken MCFG tables on Asus etc.
207 * They only contain a single entry for bus 0-0.
208 */
209 if (pci_mmcfg_config_num == 1 &&
210 cfg->pci_segment == 0 &&
211 (cfg->start_bus_number | cfg->end_bus_number) == 0) {
212 printk(KERN_ERR "PCI: start and end of bus number is 0. "
213 "Rejected as broken MCFG.\n");
214 goto reject;
215 }
216
217 /*
218 * Only do this check when type 1 works. If it doesn't work
219 * assume we run on a Mac and always use MCFG
220 */
221 if (type == 1 && !e820_all_mapped(cfg->address,
222 cfg->address + MMCONFIG_APER_MIN,
223 E820_RESERVED)) {
224 printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not"
225 " E820-reserved\n", cfg->address);
226 goto reject;
227 }
228 return;
229
230reject:
231 printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
232 kfree(pci_mmcfg_config);
233 pci_mmcfg_config = NULL;
234 pci_mmcfg_config_num = 0;
235}
236
237void __init pci_mmcfg_init(int type)
238{
239 int known_bridge = 0;
240
241 if ((pci_probe & PCI_PROBE_MMCONF) == 0)
242 return;
243
244 if (type == 1 && pci_mmcfg_check_hostbridge())
245 known_bridge = 1;
246
247 if (!known_bridge) {
248 acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
249 pci_mmcfg_reject_broken(type);
250 }
251
252 if ((pci_mmcfg_config_num == 0) ||
253 (pci_mmcfg_config == NULL) ||
254 (pci_mmcfg_config[0].address == 0))
255 return;
256
257 if (pci_mmcfg_arch_init()) {
258 if (type == 1)
259 unreachable_devices();
260 if (known_bridge)
261 pci_mmcfg_insert_resources();
262 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
263 }
264}
diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c
index 5700220dcf5f..bb1afd9e589d 100644
--- a/arch/i386/pci/mmconfig.c
+++ b/arch/i386/pci/mmconfig.c
@@ -15,55 +15,33 @@
15#include <asm/e820.h> 15#include <asm/e820.h>
16#include "pci.h" 16#include "pci.h"
17 17
18/* aperture is up to 256MB but BIOS may reserve less */
19#define MMCONFIG_APER_MIN (2 * 1024*1024)
20#define MMCONFIG_APER_MAX (256 * 1024*1024)
21
22/* Assume systems with more busses have correct MCFG */ 18/* Assume systems with more busses have correct MCFG */
23#define MAX_CHECK_BUS 16
24
25#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) 19#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
26 20
27/* The base address of the last MMCONFIG device accessed */ 21/* The base address of the last MMCONFIG device accessed */
28static u32 mmcfg_last_accessed_device; 22static u32 mmcfg_last_accessed_device;
29static int mmcfg_last_accessed_cpu; 23static int mmcfg_last_accessed_cpu;
30 24
31static DECLARE_BITMAP(fallback_slots, MAX_CHECK_BUS*32);
32
33/* 25/*
34 * Functions for accessing PCI configuration space with MMCONFIG accesses 26 * Functions for accessing PCI configuration space with MMCONFIG accesses
35 */ 27 */
36static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) 28static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn)
37{ 29{
38 int cfg_num = -1;
39 struct acpi_mcfg_allocation *cfg; 30 struct acpi_mcfg_allocation *cfg;
31 int cfg_num;
40 32
41 if (seg == 0 && bus < MAX_CHECK_BUS && 33 if (seg == 0 && bus < PCI_MMCFG_MAX_CHECK_BUS &&
42 test_bit(PCI_SLOT(devfn) + 32*bus, fallback_slots)) 34 test_bit(PCI_SLOT(devfn) + 32*bus, pci_mmcfg_fallback_slots))
43 return 0; 35 return 0;
44 36
45 while (1) { 37 for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) {
46 ++cfg_num;
47 if (cfg_num >= pci_mmcfg_config_num) {
48 break;
49 }
50 cfg = &pci_mmcfg_config[cfg_num]; 38 cfg = &pci_mmcfg_config[cfg_num];
51 if (cfg->pci_segment != seg) 39 if (cfg->pci_segment == seg &&
52 continue; 40 (cfg->start_bus_number <= bus) &&
53 if ((cfg->start_bus_number <= bus) &&
54 (cfg->end_bus_number >= bus)) 41 (cfg->end_bus_number >= bus))
55 return cfg->address; 42 return cfg->address;
56 } 43 }
57 44
58 /* Handle more broken MCFG tables on Asus etc.
59 They only contain a single entry for bus 0-0. Assume
60 this applies to all busses. */
61 cfg = &pci_mmcfg_config[0];
62 if (pci_mmcfg_config_num == 1 &&
63 cfg->pci_segment == 0 &&
64 (cfg->start_bus_number | cfg->end_bus_number) == 0)
65 return cfg->address;
66
67 /* Fall back to type 0 */ 45 /* Fall back to type 0 */
68 return 0; 46 return 0;
69} 47}
@@ -158,67 +136,15 @@ static struct pci_raw_ops pci_mmcfg = {
158 .write = pci_mmcfg_write, 136 .write = pci_mmcfg_write,
159}; 137};
160 138
161/* K8 systems have some devices (typically in the builtin northbridge) 139int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus,
162 that are only accessible using type1 140 unsigned int devfn)
163 Normally this can be expressed in the MCFG by not listing them
164 and assigning suitable _SEGs, but this isn't implemented in some BIOS.
165 Instead try to discover all devices on bus 0 that are unreachable using MM
166 and fallback for them. */
167static __init void unreachable_devices(void)
168{ 141{
169 int i, k; 142 return get_base_addr(seg, bus, devfn) != 0;
170 unsigned long flags;
171
172 for (k = 0; k < MAX_CHECK_BUS; k++) {
173 for (i = 0; i < 32; i++) {
174 u32 val1;
175 u32 addr;
176
177 pci_conf1_read(0, k, PCI_DEVFN(i, 0), 0, 4, &val1);
178 if (val1 == 0xffffffff)
179 continue;
180
181 /* Locking probably not needed, but safer */
182 spin_lock_irqsave(&pci_config_lock, flags);
183 addr = get_base_addr(0, k, PCI_DEVFN(i, 0));
184 if (addr != 0)
185 pci_exp_set_dev_base(addr, k, PCI_DEVFN(i, 0));
186 if (addr == 0 ||
187 readl((u32 __iomem *)mmcfg_virt_addr) != val1) {
188 set_bit(i + 32*k, fallback_slots);
189 printk(KERN_NOTICE
190 "PCI: No mmconfig possible on %x:%x\n", k, i);
191 }
192 spin_unlock_irqrestore(&pci_config_lock, flags);
193 }
194 }
195} 143}
196 144
197void __init pci_mmcfg_init(int type) 145int __init pci_mmcfg_arch_init(void)
198{ 146{
199 if ((pci_probe & PCI_PROBE_MMCONF) == 0)
200 return;
201
202 acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
203 if ((pci_mmcfg_config_num == 0) ||
204 (pci_mmcfg_config == NULL) ||
205 (pci_mmcfg_config[0].address == 0))
206 return;
207
208 /* Only do this check when type 1 works. If it doesn't work
209 assume we run on a Mac and always use MCFG */
210 if (type == 1 && !e820_all_mapped(pci_mmcfg_config[0].address,
211 pci_mmcfg_config[0].address + MMCONFIG_APER_MIN,
212 E820_RESERVED)) {
213 printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %lx is not E820-reserved\n",
214 (unsigned long)pci_mmcfg_config[0].address);
215 printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
216 return;
217 }
218
219 printk(KERN_INFO "PCI: Using MMCONFIG\n"); 147 printk(KERN_INFO "PCI: Using MMCONFIG\n");
220 raw_pci_ops = &pci_mmcfg; 148 raw_pci_ops = &pci_mmcfg;
221 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; 149 return 1;
222
223 unreachable_devices();
224} 150}
diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h
index a0a25180b61a..e58bae2076ad 100644
--- a/arch/i386/pci/pci.h
+++ b/arch/i386/pci/pci.h
@@ -94,3 +94,13 @@ extern void pci_pcbios_init(void);
94extern void pci_mmcfg_init(int type); 94extern void pci_mmcfg_init(int type);
95extern void pcibios_sort(void); 95extern void pcibios_sort(void);
96 96
97/* pci-mmconfig.c */
98
99/* Verify the first 16 busses. We assume that systems with more busses
100 get MCFG right. */
101#define PCI_MMCFG_MAX_CHECK_BUS 16
102extern DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS);
103
104extern int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus,
105 unsigned int devfn);
106extern int __init pci_mmcfg_arch_init(void);