aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-04-07 07:34:26 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-07 07:34:42 -0400
commit2e8844e13ab73f1107aea4317a53ff5879f2e1d7 (patch)
tree36165371cf6fd26d674610f1c6bb5fac50e6e13f /arch/x86
parentc78a3956b982418186e40978a51636a2b43221bc (diff)
parentd508afb437daee7cf07da085b635c44a4ebf9b38 (diff)
Merge branch 'linus' into tracing/hw-branch-tracing
Merge reason: update to latest tracing and ptrace APIs Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig27
-rw-r--r--arch/x86/Kconfig.cpu17
-rw-r--r--arch/x86/Kconfig.debug8
-rw-r--r--arch/x86/Makefile29
-rw-r--r--arch/x86/boot/Makefile56
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/boot/header.S29
-rw-r--r--arch/x86/boot/memory.c39
-rw-r--r--arch/x86/boot/pm.c44
-rw-r--r--arch/x86/boot/pmjump.S1
-rw-r--r--arch/x86/boot/setup.ld3
-rw-r--r--arch/x86/boot/tools/build.c9
-rw-r--r--arch/x86/boot/video-vga.c22
-rw-r--r--arch/x86/crypto/Makefile3
-rw-r--r--arch/x86/crypto/aes-i586-asm_32.S18
-rw-r--r--arch/x86/crypto/aes-x86_64-asm_64.S6
-rw-r--r--arch/x86/crypto/aes_glue.c20
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S896
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c461
-rw-r--r--arch/x86/ia32/ia32entry.S4
-rw-r--r--arch/x86/ia32/sys_ia32.c22
-rw-r--r--arch/x86/include/asm/aes.h11
-rw-r--r--arch/x86/include/asm/apic.h38
-rw-r--r--arch/x86/include/asm/boot.h4
-rw-r--r--arch/x86/include/asm/cacheflush.h3
-rwxr-xr-xarch/x86/include/asm/cpu_debug.h226
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/cpumask.h18
-rw-r--r--arch/x86/include/asm/desc.h3
-rw-r--r--arch/x86/include/asm/device.h2
-rw-r--r--arch/x86/include/asm/dma-mapping.h188
-rw-r--r--arch/x86/include/asm/dmi.h19
-rw-r--r--arch/x86/include/asm/e820.h2
-rw-r--r--arch/x86/include/asm/ftrace.h7
-rw-r--r--arch/x86/include/asm/highmem.h1
-rw-r--r--arch/x86/include/asm/ia32.h7
-rw-r--r--arch/x86/include/asm/io_apic.h12
-rw-r--r--arch/x86/include/asm/iommu.h2
-rw-r--r--arch/x86/include/asm/irq_remapping.h2
-rw-r--r--arch/x86/include/asm/kexec.h13
-rw-r--r--arch/x86/include/asm/kvm.h24
-rw-r--r--arch/x86/include/asm/kvm_host.h61
-rw-r--r--arch/x86/include/asm/lguest_hcall.h24
-rw-r--r--arch/x86/include/asm/linkage.h13
-rw-r--r--arch/x86/include/asm/msidef.h1
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/page_32_types.h5
-rw-r--r--arch/x86/include/asm/paravirt.h19
-rw-r--r--arch/x86/include/asm/pci.h44
-rw-r--r--arch/x86/include/asm/pci_32.h34
-rw-r--r--arch/x86/include/asm/pci_64.h22
-rw-r--r--arch/x86/include/asm/percpu.h8
-rw-r--r--arch/x86/include/asm/pgtable-2level.h7
-rw-r--r--arch/x86/include/asm/pgtable-3level.h17
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/pgtable_32.h3
-rw-r--r--arch/x86/include/asm/processor.h8
-rw-r--r--arch/x86/include/asm/sections.h7
-rw-r--r--arch/x86/include/asm/setup.h39
-rw-r--r--arch/x86/include/asm/smp.h13
-rw-r--r--arch/x86/include/asm/socket.h3
-rw-r--r--arch/x86/include/asm/spinlock.h3
-rw-r--r--arch/x86/include/asm/suspend_32.h24
-rw-r--r--arch/x86/include/asm/svm.h4
-rw-r--r--arch/x86/include/asm/sys_ia32.h2
-rw-r--r--arch/x86/include/asm/timer.h2
-rw-r--r--arch/x86/include/asm/topology.h99
-rw-r--r--arch/x86/include/asm/unistd_32.h2
-rw-r--r--arch/x86/include/asm/unistd_64.h4
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h14
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h153
-rw-r--r--arch/x86/include/asm/virtext.h2
-rw-r--r--arch/x86/include/asm/vmx.h5
-rw-r--r--arch/x86/include/asm/xen/hypercall.h2
-rw-r--r--arch/x86/kernel/Makefile7
-rw-r--r--arch/x86/kernel/acpi/boot.c65
-rw-r--r--arch/x86/kernel/amd_iommu.c33
-rw-r--r--arch/x86/kernel/apic/apic.c82
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c18
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c16
-rw-r--r--arch/x86/kernel/apic/es7000_32.c17
-rw-r--r--arch/x86/kernel/apic/io_apic.c412
-rw-r--r--arch/x86/kernel/apic/nmi.c11
-rw-r--r--arch/x86/kernel/apic/numaq_32.c11
-rw-r--r--arch/x86/kernel/apic/probe_32.c3
-rw-r--r--arch/x86/kernel/apic/probe_64.c7
-rw-r--r--arch/x86/kernel/apic/summit_32.c21
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c6
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c6
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c9
-rw-r--r--arch/x86/kernel/apm_32.c263
-rw-r--r--arch/x86/kernel/asm-offsets_32.c1
-rw-r--r--arch/x86/kernel/asm-offsets_64.c1
-rw-r--r--arch/x86/kernel/check.c8
-rw-r--r--arch/x86/kernel/cpu/Makefile5
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/centaur.c36
-rw-r--r--arch/x86/kernel/cpu/centaur_64.c37
-rw-r--r--arch/x86/kernel/cpu/common.c405
-rw-r--r--arch/x86/kernel/cpu/cpu.h25
-rwxr-xr-xarch/x86/kernel/cpu/cpu_debug.c901
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig19
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Makefile8
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c48
-rw-r--r--arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c54
-rw-r--r--arch/x86/kernel/cpu/cpufreq/e_powersaver.c21
-rw-r--r--arch/x86/kernel/cpu/cpufreq/elanfreq.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/gx-suspmod.c105
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c199
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.h12
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longrun.c25
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c74
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k6.c44
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c239
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c399
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h5
-rw-r--r--arch/x86/kernel/cpu/cpufreq/sc520_freq.c30
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c72
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c163
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.h18
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-smi.c166
-rw-r--r--arch/x86/kernel/cpu/cyrix.c16
-rw-r--r--arch/x86/kernel/cpu/intel.c7
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c32
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c14
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c46
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c1101
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c199
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c10
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c1069
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h4
-rw-r--r--arch/x86/kernel/cpu/proc.c6
-rw-r--r--arch/x86/kernel/cpu/transmeta.c2
-rw-r--r--arch/x86/kernel/cpu/umc.c2
-rw-r--r--arch/x86/kernel/e820.c142
-rw-r--r--arch/x86/kernel/early_printk.c20
-rw-r--r--arch/x86/kernel/entry_32.S18
-rw-r--r--arch/x86/kernel/entry_64.S4
-rw-r--r--arch/x86/kernel/ftrace.c132
-rw-r--r--arch/x86/kernel/head32.c5
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S76
-rw-r--r--arch/x86/kernel/hpet.c80
-rw-r--r--arch/x86/kernel/i8253.c68
-rw-r--r--arch/x86/kernel/io_delay.c27
-rw-r--r--arch/x86/kernel/irq.c54
-rw-r--r--arch/x86/kernel/irqinit_32.c2
-rw-r--r--arch/x86/kernel/irqinit_64.c1
-rw-r--r--arch/x86/kernel/kdebugfs.c82
-rw-r--r--arch/x86/kernel/kprobes.c20
-rw-r--r--arch/x86/kernel/kvm.c7
-rw-r--r--arch/x86/kernel/machine_kexec_32.c17
-rw-r--r--arch/x86/kernel/machine_kexec_64.c99
-rw-r--r--arch/x86/kernel/mfgpt_32.c1
-rw-r--r--arch/x86/kernel/microcode_amd.c43
-rw-r--r--arch/x86/kernel/microcode_core.c160
-rw-r--r--arch/x86/kernel/microcode_intel.c83
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c2
-rw-r--r--arch/x86/kernel/mpparse.c375
-rw-r--r--arch/x86/kernel/paravirt.c1
-rw-r--r--arch/x86/kernel/pci-calgary_64.c38
-rw-r--r--arch/x86/kernel/pci-dma.c17
-rw-r--r--arch/x86/kernel/pci-gart_64.c34
-rw-r--r--arch/x86/kernel/pci-nommu.c39
-rw-r--r--arch/x86/kernel/pci-swiotlb.c (renamed from arch/x86/kernel/pci-swiotlb_64.c)19
-rw-r--r--arch/x86/kernel/process.c25
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/ptrace.c5
-rw-r--r--arch/x86/kernel/quirks.c6
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S24
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S189
-rw-r--r--arch/x86/kernel/rtc.c20
-rw-r--r--arch/x86/kernel/setup.c55
-rw-r--r--arch/x86/kernel/setup_percpu.c79
-rw-r--r--arch/x86/kernel/signal.c48
-rw-r--r--arch/x86/kernel/smpboot.c70
-rw-r--r--arch/x86/kernel/syscall_table_32.S2
-rw-r--r--arch/x86/kernel/time_64.c2
-rw-r--r--arch/x86/kernel/tlb_uv.c12
-rw-r--r--arch/x86/kernel/topology.c14
-rw-r--r--arch/x86/kernel/tsc.c122
-rw-r--r--arch/x86/kernel/visws_quirks.c2
-rw-r--r--arch/x86/kernel/vmi_32.c6
-rw-r--r--arch/x86/kernel/vmiclock_32.c1
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S21
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S101
-rw-r--r--arch/x86/kernel/vsmp_64.c12
-rw-r--r--arch/x86/kvm/Kconfig4
-rw-r--r--arch/x86/kvm/i8254.c21
-rw-r--r--arch/x86/kvm/i8254.h2
-rw-r--r--arch/x86/kvm/i8259.c25
-rw-r--r--arch/x86/kvm/irq.h2
-rw-r--r--arch/x86/kvm/kvm_svm.h16
-rw-r--r--arch/x86/kvm/mmu.c237
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/paging_tmpl.h219
-rw-r--r--arch/x86/kvm/svm.c916
-rw-r--r--arch/x86/kvm/vmx.c393
-rw-r--r--arch/x86/kvm/x86.c432
-rw-r--r--arch/x86/kvm/x86_emulate.c56
-rw-r--r--arch/x86/lguest/boot.c94
-rw-r--r--arch/x86/lguest/i386_head.S4
-rw-r--r--arch/x86/lib/memcpy_64.S143
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/highmem_32.c60
-rw-r--r--arch/x86/mm/init.c6
-rw-r--r--arch/x86/mm/iomap_32.c30
-rw-r--r--arch/x86/mm/ioremap.c19
-rw-r--r--arch/x86/mm/kmmio.c2
-rw-r--r--arch/x86/mm/mmio-mod.c19
-rw-r--r--arch/x86/mm/numa.c67
-rw-r--r--arch/x86/mm/numa_64.c111
-rw-r--r--arch/x86/mm/pageattr.c147
-rw-r--r--arch/x86/mm/pat.c5
-rw-r--r--arch/x86/mm/pgtable_32.c2
-rw-r--r--arch/x86/mm/srat_64.c30
-rw-r--r--arch/x86/mm/tlb.c5
-rw-r--r--arch/x86/oprofile/op_model_p4.c2
-rw-r--r--arch/x86/pci/common.c4
-rw-r--r--arch/x86/pci/early.c19
-rw-r--r--arch/x86/pci/fixup.c24
-rw-r--r--arch/x86/pci/i386.c43
-rw-r--r--arch/x86/pci/legacy.c3
-rw-r--r--arch/x86/pci/mmconfig-shared.c227
-rw-r--r--arch/x86/pci/mmconfig_64.c17
-rw-r--r--arch/x86/power/Makefile5
-rw-r--r--arch/x86/power/cpu_32.c1
-rw-r--r--arch/x86/power/cpu_64.c1
-rw-r--r--arch/x86/power/hibernate_64.c1
-rw-r--r--arch/x86/xen/mmu.c7
-rw-r--r--arch/x86/xen/smp.c6
235 files changed, 10110 insertions, 5393 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b0a638b4199a..4b3408206091 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -42,6 +42,7 @@ config X86
42 select HAVE_GENERIC_DMA_COHERENT if X86_32 42 select HAVE_GENERIC_DMA_COHERENT if X86_32
43 select HAVE_EFFICIENT_UNALIGNED_ACCESS 43 select HAVE_EFFICIENT_UNALIGNED_ACCESS
44 select USER_STACKTRACE_SUPPORT 44 select USER_STACKTRACE_SUPPORT
45 select HAVE_DMA_API_DEBUG
45 select HAVE_KERNEL_GZIP 46 select HAVE_KERNEL_GZIP
46 select HAVE_KERNEL_BZIP2 47 select HAVE_KERNEL_BZIP2
47 select HAVE_KERNEL_LZMA 48 select HAVE_KERNEL_LZMA
@@ -166,11 +167,17 @@ config AUDIT_ARCH
166config ARCH_SUPPORTS_OPTIMIZED_INLINING 167config ARCH_SUPPORTS_OPTIMIZED_INLINING
167 def_bool y 168 def_bool y
168 169
170config ARCH_SUPPORTS_DEBUG_PAGEALLOC
171 def_bool y
172
169# Use the generic interrupt handling code in kernel/irq/: 173# Use the generic interrupt handling code in kernel/irq/:
170config GENERIC_HARDIRQS 174config GENERIC_HARDIRQS
171 bool 175 bool
172 default y 176 default y
173 177
178config GENERIC_HARDIRQS_NO__DO_IRQ
179 def_bool y
180
174config GENERIC_IRQ_PROBE 181config GENERIC_IRQ_PROBE
175 bool 182 bool
176 default y 183 default y
@@ -246,6 +253,7 @@ config SMP
246config X86_X2APIC 253config X86_X2APIC
247 bool "Support x2apic" 254 bool "Support x2apic"
248 depends on X86_LOCAL_APIC && X86_64 255 depends on X86_LOCAL_APIC && X86_64
256 select INTR_REMAP
249 ---help--- 257 ---help---
250 This enables x2apic support on CPUs that have this feature. 258 This enables x2apic support on CPUs that have this feature.
251 259
@@ -933,6 +941,12 @@ config X86_CPUID
933 with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to 941 with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
934 /dev/cpu/31/cpuid. 942 /dev/cpu/31/cpuid.
935 943
944config X86_CPU_DEBUG
945 tristate "/sys/kernel/debug/x86/cpu/* - CPU Debug support"
946 ---help---
947 If you select this option, this will provide various x86 CPUs
948 information through debugfs.
949
936choice 950choice
937 prompt "High Memory Support" 951 prompt "High Memory Support"
938 default HIGHMEM4G if !X86_NUMAQ 952 default HIGHMEM4G if !X86_NUMAQ
@@ -1125,7 +1139,7 @@ config NUMA_EMU
1125 1139
1126config NODES_SHIFT 1140config NODES_SHIFT
1127 int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP 1141 int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
1128 range 1 9 if X86_64 1142 range 1 9
1129 default "9" if MAXSMP 1143 default "9" if MAXSMP
1130 default "6" if X86_64 1144 default "6" if X86_64
1131 default "4" if X86_NUMAQ 1145 default "4" if X86_NUMAQ
@@ -1133,7 +1147,7 @@ config NODES_SHIFT
1133 depends on NEED_MULTIPLE_NODES 1147 depends on NEED_MULTIPLE_NODES
1134 ---help--- 1148 ---help---
1135 Specify the maximum number of NUMA Nodes available on the target 1149 Specify the maximum number of NUMA Nodes available on the target
1136 system. Increases memory reserved to accomodate various tables. 1150 system. Increases memory reserved to accommodate various tables.
1137 1151
1138config HAVE_ARCH_BOOTMEM 1152config HAVE_ARCH_BOOTMEM
1139 def_bool y 1153 def_bool y
@@ -1311,7 +1325,7 @@ config MTRR_SANITIZER
1311 add writeback entries. 1325 add writeback entries.
1312 1326
1313 Can be disabled with disable_mtrr_cleanup on the kernel command line. 1327 Can be disabled with disable_mtrr_cleanup on the kernel command line.
1314 The largest mtrr entry size for a continous block can be set with 1328 The largest mtrr entry size for a continuous block can be set with
1315 mtrr_chunk_size. 1329 mtrr_chunk_size.
1316 1330
1317 If unsure, say Y. 1331 If unsure, say Y.
@@ -1433,7 +1447,7 @@ config CRASH_DUMP
1433config KEXEC_JUMP 1447config KEXEC_JUMP
1434 bool "kexec jump (EXPERIMENTAL)" 1448 bool "kexec jump (EXPERIMENTAL)"
1435 depends on EXPERIMENTAL 1449 depends on EXPERIMENTAL
1436 depends on KEXEC && HIBERNATION && X86_32 1450 depends on KEXEC && HIBERNATION
1437 ---help--- 1451 ---help---
1438 Jump between original kernel and kexeced kernel and invoke 1452 Jump between original kernel and kexeced kernel and invoke
1439 code in physical address mode via KEXEC 1453 code in physical address mode via KEXEC
@@ -1826,8 +1840,8 @@ config PCI_MMCONFIG
1826 1840
1827config DMAR 1841config DMAR
1828 bool "Support for DMA Remapping Devices (EXPERIMENTAL)" 1842 bool "Support for DMA Remapping Devices (EXPERIMENTAL)"
1829 depends on X86_64 && PCI_MSI && ACPI && EXPERIMENTAL 1843 depends on PCI_MSI && ACPI && EXPERIMENTAL
1830 ---help--- 1844 help
1831 DMA remapping (DMAR) devices support enables independent address 1845 DMA remapping (DMAR) devices support enables independent address
1832 translations for Direct Memory Access (DMA) from devices. 1846 translations for Direct Memory Access (DMA) from devices.
1833 These DMA remapping devices are reported via ACPI tables 1847 These DMA remapping devices are reported via ACPI tables
@@ -1868,7 +1882,6 @@ config DMAR_FLOPPY_WA
1868config INTR_REMAP 1882config INTR_REMAP
1869 bool "Support for Interrupt Remapping (EXPERIMENTAL)" 1883 bool "Support for Interrupt Remapping (EXPERIMENTAL)"
1870 depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL 1884 depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
1871 select X86_X2APIC
1872 ---help--- 1885 ---help---
1873 Supports Interrupt remapping for IO-APIC and MSI devices. 1886 Supports Interrupt remapping for IO-APIC and MSI devices.
1874 To use x2apic mode in the CPU's which support x2APIC enhancements or 1887 To use x2apic mode in the CPU's which support x2APIC enhancements or
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index a95eaf0e582a..924e156a85ab 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -456,24 +456,9 @@ config CPU_SUP_AMD
456 456
457 If unsure, say N. 457 If unsure, say N.
458 458
459config CPU_SUP_CENTAUR_32 459config CPU_SUP_CENTAUR
460 default y 460 default y
461 bool "Support Centaur processors" if PROCESSOR_SELECT 461 bool "Support Centaur processors" if PROCESSOR_SELECT
462 depends on !64BIT
463 ---help---
464 This enables detection, tunings and quirks for Centaur processors
465
466 You need this enabled if you want your kernel to run on a
467 Centaur CPU. Disabling this option on other types of CPUs
468 makes the kernel a tiny bit smaller. Disabling it on a Centaur
469 CPU might render the kernel unbootable.
470
471 If unsure, say N.
472
473config CPU_SUP_CENTAUR_64
474 default y
475 bool "Support Centaur processors" if PROCESSOR_SELECT
476 depends on 64BIT
477 ---help--- 462 ---help---
478 This enables detection, tunings and quirks for Centaur processors 463 This enables detection, tunings and quirks for Centaur processors
479 464
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index dfd74abc03f8..22b752e09487 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -72,14 +72,6 @@ config DEBUG_STACK_USAGE
72 72
73 This option will slow down process creation somewhat. 73 This option will slow down process creation somewhat.
74 74
75config DEBUG_PAGEALLOC
76 bool "Debug page memory allocations"
77 depends on DEBUG_KERNEL
78 ---help---
79 Unmap pages from the kernel linear mapping after free_pages().
80 This results in a large slowdown, but helps to find certain types
81 of memory corruptions.
82
83config DEBUG_PER_CPU_MAPS 75config DEBUG_PER_CPU_MAPS
84 bool "Debug access to per_cpu maps" 76 bool "Debug access to per_cpu maps"
85 depends on DEBUG_KERNEL 77 depends on DEBUG_KERNEL
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1836191839ee..f05d8c91d9e5 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -153,34 +153,23 @@ endif
153 153
154boot := arch/x86/boot 154boot := arch/x86/boot
155 155
156PHONY += zImage bzImage compressed zlilo bzlilo \ 156BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage install
157 zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install 157
158PHONY += bzImage $(BOOT_TARGETS)
158 159
159# Default kernel to build 160# Default kernel to build
160all: bzImage 161all: bzImage
161 162
162# KBUILD_IMAGE specify target image being built 163# KBUILD_IMAGE specify target image being built
163 KBUILD_IMAGE := $(boot)/bzImage 164KBUILD_IMAGE := $(boot)/bzImage
164zImage zlilo zdisk: KBUILD_IMAGE := $(boot)/zImage
165 165
166zImage bzImage: vmlinux 166bzImage: vmlinux
167 $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) 167 $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
168 $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot 168 $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
169 $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ 169 $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
170 170
171compressed: zImage 171$(BOOT_TARGETS): vmlinux
172 172 $(Q)$(MAKE) $(build)=$(boot) $@
173zlilo bzlilo: vmlinux
174 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zlilo
175
176zdisk bzdisk: vmlinux
177 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zdisk
178
179fdimage fdimage144 fdimage288 isoimage: vmlinux
180 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
181
182install:
183 $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
184 173
185PHONY += vdso_install 174PHONY += vdso_install
186vdso_install: 175vdso_install:
@@ -205,7 +194,3 @@ define archhelp
205 echo ' FDARGS="..." arguments for the booted kernel' 194 echo ' FDARGS="..." arguments for the booted kernel'
206 echo ' FDINITRD=file initrd for the booted kernel' 195 echo ' FDINITRD=file initrd for the booted kernel'
207endef 196endef
208
209CLEAN_FILES += arch/x86/boot/fdimage \
210 arch/x86/boot/image.iso \
211 arch/x86/boot/mtools.conf
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index c70eff69a1fb..6633b6e7505a 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -6,26 +6,24 @@
6# for more details. 6# for more details.
7# 7#
8# Copyright (C) 1994 by Linus Torvalds 8# Copyright (C) 1994 by Linus Torvalds
9# Changed by many, many contributors over the years.
9# 10#
10 11
11# ROOT_DEV specifies the default root-device when making the image. 12# ROOT_DEV specifies the default root-device when making the image.
12# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case 13# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case
13# the default of FLOPPY is used by 'build'. 14# the default of FLOPPY is used by 'build'.
14 15
15ROOT_DEV := CURRENT 16ROOT_DEV := CURRENT
16 17
17# If you want to preset the SVGA mode, uncomment the next line and 18# If you want to preset the SVGA mode, uncomment the next line and
18# set SVGA_MODE to whatever number you want. 19# set SVGA_MODE to whatever number you want.
19# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. 20# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
20# The number is the same as you would ordinarily press at bootup. 21# The number is the same as you would ordinarily press at bootup.
21 22
22SVGA_MODE := -DSVGA_MODE=NORMAL_VGA 23SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
23 24
24# If you want the RAM disk device, define this to be the size in blocks. 25targets := vmlinux.bin setup.bin setup.elf bzImage
25 26targets += fdimage fdimage144 fdimage288 image.iso mtools.conf
26#RAMDISK := -DRAMDISK=512
27
28targets := vmlinux.bin setup.bin setup.elf zImage bzImage
29subdir- := compressed 27subdir- := compressed
30 28
31setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o 29setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
@@ -59,6 +57,7 @@ $(obj)/cpustr.h: $(obj)/mkcpustr FORCE
59# How to compile the 16-bit code. Note we always compile for -march=i386, 57# How to compile the 16-bit code. Note we always compile for -march=i386,
60# that way we can complain to the user if the CPU is insufficient. 58# that way we can complain to the user if the CPU is insufficient.
61KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ 59KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
60 -DDISABLE_BRANCH_PROFILING \
62 -Wall -Wstrict-prototypes \ 61 -Wall -Wstrict-prototypes \
63 -march=i386 -mregparm=3 \ 62 -march=i386 -mregparm=3 \
64 -include $(srctree)/$(src)/code16gcc.h \ 63 -include $(srctree)/$(src)/code16gcc.h \
@@ -68,20 +67,16 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
68 $(call cc-option, -fno-unit-at-a-time)) \ 67 $(call cc-option, -fno-unit-at-a-time)) \
69 $(call cc-option, -fno-stack-protector) \ 68 $(call cc-option, -fno-stack-protector) \
70 $(call cc-option, -mpreferred-stack-boundary=2) 69 $(call cc-option, -mpreferred-stack-boundary=2)
71KBUILD_CFLAGS += $(call cc-option,-m32) 70KBUILD_CFLAGS += $(call cc-option, -m32)
72KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ 71KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
73 72
74$(obj)/zImage: asflags-y := $(SVGA_MODE) $(RAMDISK) 73$(obj)/bzImage: asflags-y := $(SVGA_MODE)
75$(obj)/bzImage: ccflags-y := -D__BIG_KERNEL__
76$(obj)/bzImage: asflags-y := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
77$(obj)/bzImage: BUILDFLAGS := -b
78 74
79quiet_cmd_image = BUILD $@ 75quiet_cmd_image = BUILD $@
80cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/setup.bin \ 76cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
81 $(obj)/vmlinux.bin $(ROOT_DEV) > $@ 77 $(ROOT_DEV) > $@
82 78
83$(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \ 79$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
84 $(obj)/vmlinux.bin $(obj)/tools/build FORCE
85 $(call if_changed,image) 80 $(call if_changed,image)
86 @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' 81 @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
87 82
@@ -116,9 +111,11 @@ $(obj)/setup.bin: $(obj)/setup.elf FORCE
116$(obj)/compressed/vmlinux: FORCE 111$(obj)/compressed/vmlinux: FORCE
117 $(Q)$(MAKE) $(build)=$(obj)/compressed $@ 112 $(Q)$(MAKE) $(build)=$(obj)/compressed $@
118 113
119# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel 114# Set this if you want to pass append arguments to the
115# bzdisk/fdimage/isoimage kernel
120FDARGS = 116FDARGS =
121# Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel 117# Set this if you want an initrd included with the
118# bzdisk/fdimage/isoimage kernel
122FDINITRD = 119FDINITRD =
123 120
124image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,) 121image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,)
@@ -127,7 +124,7 @@ $(obj)/mtools.conf: $(src)/mtools.conf.in
127 sed -e 's|@OBJ@|$(obj)|g' < $< > $@ 124 sed -e 's|@OBJ@|$(obj)|g' < $< > $@
128 125
129# This requires write access to /dev/fd0 126# This requires write access to /dev/fd0
130zdisk: $(BOOTIMAGE) $(obj)/mtools.conf 127bzdisk: $(obj)/bzImage $(obj)/mtools.conf
131 MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync 128 MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync
132 syslinux /dev/fd0 ; sync 129 syslinux /dev/fd0 ; sync
133 echo '$(image_cmdline)' | \ 130 echo '$(image_cmdline)' | \
@@ -135,10 +132,10 @@ zdisk: $(BOOTIMAGE) $(obj)/mtools.conf
135 if [ -f '$(FDINITRD)' ] ; then \ 132 if [ -f '$(FDINITRD)' ] ; then \
136 MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \ 133 MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \
137 fi 134 fi
138 MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) a:linux ; sync 135 MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage a:linux ; sync
139 136
140# These require being root or having syslinux 2.02 or higher installed 137# These require being root or having syslinux 2.02 or higher installed
141fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf 138fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf
142 dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440 139 dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440
143 MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync 140 MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync
144 syslinux $(obj)/fdimage ; sync 141 syslinux $(obj)/fdimage ; sync
@@ -147,9 +144,9 @@ fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf
147 if [ -f '$(FDINITRD)' ] ; then \ 144 if [ -f '$(FDINITRD)' ] ; then \
148 MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \ 145 MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \
149 fi 146 fi
150 MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) v:linux ; sync 147 MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage v:linux ; sync
151 148
152fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf 149fdimage288: $(obj)/bzImage $(obj)/mtools.conf
153 dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880 150 dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880
154 MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync 151 MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync
155 syslinux $(obj)/fdimage ; sync 152 syslinux $(obj)/fdimage ; sync
@@ -158,9 +155,9 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
158 if [ -f '$(FDINITRD)' ] ; then \ 155 if [ -f '$(FDINITRD)' ] ; then \
159 MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \ 156 MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \
160 fi 157 fi
161 MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) w:linux ; sync 158 MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage w:linux ; sync
162 159
163isoimage: $(BOOTIMAGE) 160isoimage: $(obj)/bzImage
164 -rm -rf $(obj)/isoimage 161 -rm -rf $(obj)/isoimage
165 mkdir $(obj)/isoimage 162 mkdir $(obj)/isoimage
166 for i in lib lib64 share end ; do \ 163 for i in lib lib64 share end ; do \
@@ -170,7 +167,7 @@ isoimage: $(BOOTIMAGE)
170 fi ; \ 167 fi ; \
171 if [ $$i = end ] ; then exit 1 ; fi ; \ 168 if [ $$i = end ] ; then exit 1 ; fi ; \
172 done 169 done
173 cp $(BOOTIMAGE) $(obj)/isoimage/linux 170 cp $(obj)/bzImage $(obj)/isoimage/linux
174 echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg 171 echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
175 if [ -f '$(FDINITRD)' ] ; then \ 172 if [ -f '$(FDINITRD)' ] ; then \
176 cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \ 173 cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \
@@ -181,12 +178,13 @@ isoimage: $(BOOTIMAGE)
181 isohybrid $(obj)/image.iso 2>/dev/null || true 178 isohybrid $(obj)/image.iso 2>/dev/null || true
182 rm -rf $(obj)/isoimage 179 rm -rf $(obj)/isoimage
183 180
184zlilo: $(BOOTIMAGE) 181bzlilo: $(obj)/bzImage
185 if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi 182 if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi
186 if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi 183 if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
187 cat $(BOOTIMAGE) > $(INSTALL_PATH)/vmlinuz 184 cat $(obj)/bzImage > $(INSTALL_PATH)/vmlinuz
188 cp System.map $(INSTALL_PATH)/ 185 cp System.map $(INSTALL_PATH)/
189 if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi 186 if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
190 187
191install: 188install:
192 sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)" 189 sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
190 System.map "$(INSTALL_PATH)"
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 3ca4c194b8e5..65551c9f8571 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -8,6 +8,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma h
8 8
9KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 9KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
10KBUILD_CFLAGS += -fno-strict-aliasing -fPIC 10KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
11KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
11cflags-$(CONFIG_X86_64) := -mcmodel=small 12cflags-$(CONFIG_X86_64) := -mcmodel=small
12KBUILD_CFLAGS += $(cflags-y) 13KBUILD_CFLAGS += $(cflags-y)
13KBUILD_CFLAGS += $(call cc-option,-ffreestanding) 14KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 7ccff4884a23..5d84d1c74e4c 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -24,12 +24,8 @@
24#include "boot.h" 24#include "boot.h"
25#include "offsets.h" 25#include "offsets.h"
26 26
27SETUPSECTS = 4 /* default nr of setup-sectors */
28BOOTSEG = 0x07C0 /* original address of boot-sector */ 27BOOTSEG = 0x07C0 /* original address of boot-sector */
29SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */ 28SYSSEG = 0x1000 /* historical load address >> 4 */
30SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */
31 /* to be loaded */
32ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */
33 29
34#ifndef SVGA_MODE 30#ifndef SVGA_MODE
35#define SVGA_MODE ASK_VGA 31#define SVGA_MODE ASK_VGA
@@ -97,12 +93,12 @@ bugger_off_msg:
97 .section ".header", "a" 93 .section ".header", "a"
98 .globl hdr 94 .globl hdr
99hdr: 95hdr:
100setup_sects: .byte SETUPSECTS 96setup_sects: .byte 0 /* Filled in by build.c */
101root_flags: .word ROOT_RDONLY 97root_flags: .word ROOT_RDONLY
102syssize: .long SYSSIZE 98syssize: .long 0 /* Filled in by build.c */
103ram_size: .word RAMDISK 99ram_size: .word 0 /* Obsolete */
104vid_mode: .word SVGA_MODE 100vid_mode: .word SVGA_MODE
105root_dev: .word ROOT_DEV 101root_dev: .word 0 /* Filled in by build.c */
106boot_flag: .word 0xAA55 102boot_flag: .word 0xAA55
107 103
108 # offset 512, entry point 104 # offset 512, entry point
@@ -123,14 +119,15 @@ _start:
123 # or else old loadlin-1.5 will fail) 119 # or else old loadlin-1.5 will fail)
124 .globl realmode_swtch 120 .globl realmode_swtch
125realmode_swtch: .word 0, 0 # default_switch, SETUPSEG 121realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
126start_sys_seg: .word SYSSEG 122start_sys_seg: .word SYSSEG # obsolete and meaningless, but just
123 # in case something decided to "use" it
127 .word kernel_version-512 # pointing to kernel version string 124 .word kernel_version-512 # pointing to kernel version string
128 # above section of header is compatible 125 # above section of header is compatible
129 # with loadlin-1.5 (header v1.5). Don't 126 # with loadlin-1.5 (header v1.5). Don't
130 # change it. 127 # change it.
131 128
132type_of_loader: .byte 0 # = 0, old one (LILO, Loadlin, 129type_of_loader: .byte 0 # 0 means ancient bootloader, newer
133 # Bootlin, SYSLX, bootsect...) 130 # bootloaders know to change this.
134 # See Documentation/i386/boot.txt for 131 # See Documentation/i386/boot.txt for
135 # assigned ids 132 # assigned ids
136 133
@@ -142,11 +139,7 @@ CAN_USE_HEAP = 0x80 # If set, the loader also has set
142 # space behind setup.S can be used for 139 # space behind setup.S can be used for
143 # heap purposes. 140 # heap purposes.
144 # Only the loader knows what is free 141 # Only the loader knows what is free
145#ifndef __BIG_KERNEL__
146 .byte 0
147#else
148 .byte LOADED_HIGH 142 .byte LOADED_HIGH
149#endif
150 143
151setup_move_size: .word 0x8000 # size to move, when setup is not 144setup_move_size: .word 0x8000 # size to move, when setup is not
152 # loaded at 0x90000. We will move setup 145 # loaded at 0x90000. We will move setup
@@ -157,11 +150,7 @@ setup_move_size: .word 0x8000 # size to move, when setup is not
157 150
158code32_start: # here loaders can put a different 151code32_start: # here loaders can put a different
159 # start address for 32-bit code. 152 # start address for 32-bit code.
160#ifndef __BIG_KERNEL__
161 .long 0x1000 # 0x1000 = default for zImage
162#else
163 .long 0x100000 # 0x100000 = default for big kernel 153 .long 0x100000 # 0x100000 = default for big kernel
164#endif
165 154
166ramdisk_image: .long 0 # address of loaded ramdisk image 155ramdisk_image: .long 0 # address of loaded ramdisk image
167 # Here the loader puts the 32-bit 156 # Here the loader puts the 32-bit
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index 8c3c25f35578..5054c2ddd1a0 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -2,6 +2,7 @@
2 * 2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds 3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved 4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 * Copyright 2009 Intel Corporation; author H. Peter Anvin
5 * 6 *
6 * This file is part of the Linux kernel, and is made available under 7 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2. 8 * the terms of the GNU General Public License version 2.
@@ -16,24 +17,38 @@
16 17
17#define SMAP 0x534d4150 /* ASCII "SMAP" */ 18#define SMAP 0x534d4150 /* ASCII "SMAP" */
18 19
20struct e820_ext_entry {
21 struct e820entry std;
22 u32 ext_flags;
23} __attribute__((packed));
24
19static int detect_memory_e820(void) 25static int detect_memory_e820(void)
20{ 26{
21 int count = 0; 27 int count = 0;
22 u32 next = 0; 28 u32 next = 0;
23 u32 size, id; 29 u32 size, id, edi;
24 u8 err; 30 u8 err;
25 struct e820entry *desc = boot_params.e820_map; 31 struct e820entry *desc = boot_params.e820_map;
32 static struct e820_ext_entry buf; /* static so it is zeroed */
33
34 /*
35 * Set this here so that if the BIOS doesn't change this field
36 * but still doesn't change %ecx, we're still okay...
37 */
38 buf.ext_flags = 1;
26 39
27 do { 40 do {
28 size = sizeof(struct e820entry); 41 size = sizeof buf;
29 42
30 /* Important: %edx is clobbered by some BIOSes, 43 /* Important: %edx and %esi are clobbered by some BIOSes,
31 so it must be either used for the error output 44 so they must be either used for the error output
32 or explicitly marked clobbered. */ 45 or explicitly marked clobbered. Given that, assume there
33 asm("int $0x15; setc %0" 46 is something out there clobbering %ebp and %edi, too. */
47 asm("pushl %%ebp; int $0x15; popl %%ebp; setc %0"
34 : "=d" (err), "+b" (next), "=a" (id), "+c" (size), 48 : "=d" (err), "+b" (next), "=a" (id), "+c" (size),
35 "=m" (*desc) 49 "=D" (edi), "+m" (buf)
36 : "D" (desc), "d" (SMAP), "a" (0xe820)); 50 : "D" (&buf), "d" (SMAP), "a" (0xe820)
51 : "esi");
37 52
38 /* BIOSes which terminate the chain with CF = 1 as opposed 53 /* BIOSes which terminate the chain with CF = 1 as opposed
39 to %ebx = 0 don't always report the SMAP signature on 54 to %ebx = 0 don't always report the SMAP signature on
@@ -51,8 +66,14 @@ static int detect_memory_e820(void)
51 break; 66 break;
52 } 67 }
53 68
69 /* ACPI 3.0 added the extended flags support. If bit 0
70 in the extended flags is zero, we're supposed to simply
71 ignore the entry -- a backwards incompatible change! */
72 if (size > 20 && !(buf.ext_flags & 1))
73 continue;
74
75 *desc++ = buf.std;
54 count++; 76 count++;
55 desc++;
56 } while (next && count < ARRAY_SIZE(boot_params.e820_map)); 77 } while (next && count < ARRAY_SIZE(boot_params.e820_map));
57 78
58 return boot_params.e820_entries = count; 79 return boot_params.e820_entries = count;
diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c
index 85a1cd8a8ff8..8062f8915250 100644
--- a/arch/x86/boot/pm.c
+++ b/arch/x86/boot/pm.c
@@ -33,47 +33,6 @@ static void realmode_switch_hook(void)
33} 33}
34 34
35/* 35/*
36 * A zImage kernel is loaded at 0x10000 but wants to run at 0x1000.
37 * A bzImage kernel is loaded and runs at 0x100000.
38 */
39static void move_kernel_around(void)
40{
41 /* Note: rely on the compile-time option here rather than
42 the LOADED_HIGH flag. The Qemu kernel loader unconditionally
43 sets the loadflags to zero. */
44#ifndef __BIG_KERNEL__
45 u16 dst_seg, src_seg;
46 u32 syssize;
47
48 dst_seg = 0x1000 >> 4;
49 src_seg = 0x10000 >> 4;
50 syssize = boot_params.hdr.syssize; /* Size in 16-byte paragraphs */
51
52 while (syssize) {
53 int paras = (syssize >= 0x1000) ? 0x1000 : syssize;
54 int dwords = paras << 2;
55
56 asm volatile("pushw %%es ; "
57 "pushw %%ds ; "
58 "movw %1,%%es ; "
59 "movw %2,%%ds ; "
60 "xorw %%di,%%di ; "
61 "xorw %%si,%%si ; "
62 "rep;movsl ; "
63 "popw %%ds ; "
64 "popw %%es"
65 : "+c" (dwords)
66 : "r" (dst_seg), "r" (src_seg)
67 : "esi", "edi");
68
69 syssize -= paras;
70 dst_seg += paras;
71 src_seg += paras;
72 }
73#endif
74}
75
76/*
77 * Disable all interrupts at the legacy PIC. 36 * Disable all interrupts at the legacy PIC.
78 */ 37 */
79static void mask_all_interrupts(void) 38static void mask_all_interrupts(void)
@@ -147,9 +106,6 @@ void go_to_protected_mode(void)
147 /* Hook before leaving real mode, also disables interrupts */ 106 /* Hook before leaving real mode, also disables interrupts */
148 realmode_switch_hook(); 107 realmode_switch_hook();
149 108
150 /* Move the kernel/setup to their final resting places */
151 move_kernel_around();
152
153 /* Enable the A20 gate */ 109 /* Enable the A20 gate */
154 if (enable_a20()) { 110 if (enable_a20()) {
155 puts("A20 gate not responding, unable to boot...\n"); 111 puts("A20 gate not responding, unable to boot...\n");
diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
index 019c17a75851..3e0edc6d2a20 100644
--- a/arch/x86/boot/pmjump.S
+++ b/arch/x86/boot/pmjump.S
@@ -47,6 +47,7 @@ GLOBAL(protected_mode_jump)
47ENDPROC(protected_mode_jump) 47ENDPROC(protected_mode_jump)
48 48
49 .code32 49 .code32
50 .section ".text32","ax"
50GLOBAL(in_pm32) 51GLOBAL(in_pm32)
51 # Set up data segments for flat 32-bit mode 52 # Set up data segments for flat 32-bit mode
52 movl %ecx, %ds 53 movl %ecx, %ds
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index df9234b3a5e0..bb8dc2de7969 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -17,7 +17,8 @@ SECTIONS
17 .header : { *(.header) } 17 .header : { *(.header) }
18 .inittext : { *(.inittext) } 18 .inittext : { *(.inittext) }
19 .initdata : { *(.initdata) } 19 .initdata : { *(.initdata) }
20 .text : { *(.text*) } 20 .text : { *(.text) }
21 .text32 : { *(.text32) }
21 22
22 . = ALIGN(16); 23 . = ALIGN(16);
23 .rodata : { *(.rodata*) } 24 .rodata : { *(.rodata*) }
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 44dc1923c0e3..ee3a4ea923ac 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -130,7 +130,7 @@ static void die(const char * str, ...)
130 130
131static void usage(void) 131static void usage(void)
132{ 132{
133 die("Usage: build [-b] setup system [rootdev] [> image]"); 133 die("Usage: build setup system [rootdev] [> image]");
134} 134}
135 135
136int main(int argc, char ** argv) 136int main(int argc, char ** argv)
@@ -145,11 +145,6 @@ int main(int argc, char ** argv)
145 void *kernel; 145 void *kernel;
146 u32 crc = 0xffffffffUL; 146 u32 crc = 0xffffffffUL;
147 147
148 if (argc > 2 && !strcmp(argv[1], "-b"))
149 {
150 is_big_kernel = 1;
151 argc--, argv++;
152 }
153 if ((argc < 3) || (argc > 4)) 148 if ((argc < 3) || (argc > 4))
154 usage(); 149 usage();
155 if (argc > 3) { 150 if (argc > 3) {
@@ -216,8 +211,6 @@ int main(int argc, char ** argv)
216 die("Unable to mmap '%s': %m", argv[2]); 211 die("Unable to mmap '%s': %m", argv[2]);
217 /* Number of 16-byte paragraphs, including space for a 4-byte CRC */ 212 /* Number of 16-byte paragraphs, including space for a 4-byte CRC */
218 sys_size = (sz + 15 + 4) / 16; 213 sys_size = (sz + 15 + 4) / 16;
219 if (!is_big_kernel && sys_size > DEF_SYSSIZE)
220 die("System is too big. Try using bzImage or modules.");
221 214
222 /* Patch the setup code with the appropriate size parameters */ 215 /* Patch the setup code with the appropriate size parameters */
223 buf[0x1f1] = setup_sectors-1; 216 buf[0x1f1] = setup_sectors-1;
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index 5d4742ed4aa2..95d86ce0421c 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -129,41 +129,45 @@ u16 vga_crtc(void)
129 return (inb(0x3cc) & 1) ? 0x3d4 : 0x3b4; 129 return (inb(0x3cc) & 1) ? 0x3d4 : 0x3b4;
130} 130}
131 131
132static void vga_set_480_scanlines(int end) 132static void vga_set_480_scanlines(int lines)
133{ 133{
134 u16 crtc; 134 u16 crtc; /* CRTC base address */
135 u8 csel; 135 u8 csel; /* CRTC miscellaneous output register */
136 u8 ovfw; /* CRTC overflow register */
137 int end = lines-1;
136 138
137 crtc = vga_crtc(); 139 crtc = vga_crtc();
138 140
141 ovfw = 0x3c | ((end >> (8-1)) & 0x02) | ((end >> (9-6)) & 0x40);
142
139 out_idx(0x0c, crtc, 0x11); /* Vertical sync end, unlock CR0-7 */ 143 out_idx(0x0c, crtc, 0x11); /* Vertical sync end, unlock CR0-7 */
140 out_idx(0x0b, crtc, 0x06); /* Vertical total */ 144 out_idx(0x0b, crtc, 0x06); /* Vertical total */
141 out_idx(0x3e, crtc, 0x07); /* Vertical overflow */ 145 out_idx(ovfw, crtc, 0x07); /* Vertical overflow */
142 out_idx(0xea, crtc, 0x10); /* Vertical sync start */ 146 out_idx(0xea, crtc, 0x10); /* Vertical sync start */
143 out_idx(end, crtc, 0x12); /* Vertical display end */ 147 out_idx(end, crtc, 0x12); /* Vertical display end */
144 out_idx(0xe7, crtc, 0x15); /* Vertical blank start */ 148 out_idx(0xe7, crtc, 0x15); /* Vertical blank start */
145 out_idx(0x04, crtc, 0x16); /* Vertical blank end */ 149 out_idx(0x04, crtc, 0x16); /* Vertical blank end */
146 csel = inb(0x3cc); 150 csel = inb(0x3cc);
147 csel &= 0x0d; 151 csel &= 0x0d;
148 csel |= 0xe2; 152 csel |= 0xe2;
149 outb(csel, 0x3cc); 153 outb(csel, 0x3c2);
150} 154}
151 155
152static void vga_set_80x30(void) 156static void vga_set_80x30(void)
153{ 157{
154 vga_set_480_scanlines(0xdf); 158 vga_set_480_scanlines(30*16);
155} 159}
156 160
157static void vga_set_80x34(void) 161static void vga_set_80x34(void)
158{ 162{
159 vga_set_14font(); 163 vga_set_14font();
160 vga_set_480_scanlines(0xdb); 164 vga_set_480_scanlines(34*14);
161} 165}
162 166
163static void vga_set_80x60(void) 167static void vga_set_80x60(void)
164{ 168{
165 vga_set_8font(); 169 vga_set_8font();
166 vga_set_480_scanlines(0xdf); 170 vga_set_480_scanlines(60*8);
167} 171}
168 172
169static int vga_set_mode(struct mode_info *mode) 173static int vga_set_mode(struct mode_info *mode)
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 903de4aa5094..ebe7deedd5b4 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
9obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o 9obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
10obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o 10obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
11obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o 11obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
12obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
12 13
13obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o 14obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
14 15
@@ -19,3 +20,5 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
19aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o 20aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
20twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o 21twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
21salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o 22salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
23
24aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S
index e41b147f4509..b949ec2f9af4 100644
--- a/arch/x86/crypto/aes-i586-asm_32.S
+++ b/arch/x86/crypto/aes-i586-asm_32.S
@@ -41,14 +41,14 @@
41#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) 41#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
42 42
43/* offsets to parameters with one register pushed onto stack */ 43/* offsets to parameters with one register pushed onto stack */
44#define tfm 8 44#define ctx 8
45#define out_blk 12 45#define out_blk 12
46#define in_blk 16 46#define in_blk 16
47 47
48/* offsets in crypto_tfm structure */ 48/* offsets in crypto_aes_ctx structure */
49#define klen (crypto_tfm_ctx_offset + 0) 49#define klen (480)
50#define ekey (crypto_tfm_ctx_offset + 4) 50#define ekey (0)
51#define dkey (crypto_tfm_ctx_offset + 244) 51#define dkey (240)
52 52
53// register mapping for encrypt and decrypt subroutines 53// register mapping for encrypt and decrypt subroutines
54 54
@@ -217,7 +217,7 @@
217 do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ 217 do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
218 218
219// AES (Rijndael) Encryption Subroutine 219// AES (Rijndael) Encryption Subroutine
220/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ 220/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
221 221
222.global aes_enc_blk 222.global aes_enc_blk
223 223
@@ -228,7 +228,7 @@
228 228
229aes_enc_blk: 229aes_enc_blk:
230 push %ebp 230 push %ebp
231 mov tfm(%esp),%ebp 231 mov ctx(%esp),%ebp
232 232
233// CAUTION: the order and the values used in these assigns 233// CAUTION: the order and the values used in these assigns
234// rely on the register mappings 234// rely on the register mappings
@@ -292,7 +292,7 @@ aes_enc_blk:
292 ret 292 ret
293 293
294// AES (Rijndael) Decryption Subroutine 294// AES (Rijndael) Decryption Subroutine
295/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ 295/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
296 296
297.global aes_dec_blk 297.global aes_dec_blk
298 298
@@ -303,7 +303,7 @@ aes_enc_blk:
303 303
304aes_dec_blk: 304aes_dec_blk:
305 push %ebp 305 push %ebp
306 mov tfm(%esp),%ebp 306 mov ctx(%esp),%ebp
307 307
308// CAUTION: the order and the values used in these assigns 308// CAUTION: the order and the values used in these assigns
309// rely on the register mappings 309// rely on the register mappings
diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
index a120f526c3df..5b577d5a059b 100644
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ b/arch/x86/crypto/aes-x86_64-asm_64.S
@@ -17,8 +17,6 @@
17 17
18#include <asm/asm-offsets.h> 18#include <asm/asm-offsets.h>
19 19
20#define BASE crypto_tfm_ctx_offset
21
22#define R1 %rax 20#define R1 %rax
23#define R1E %eax 21#define R1E %eax
24#define R1X %ax 22#define R1X %ax
@@ -56,13 +54,13 @@
56 .align 8; \ 54 .align 8; \
57FUNC: movq r1,r2; \ 55FUNC: movq r1,r2; \
58 movq r3,r4; \ 56 movq r3,r4; \
59 leaq BASE+KEY+48+4(r8),r9; \ 57 leaq KEY+48(r8),r9; \
60 movq r10,r11; \ 58 movq r10,r11; \
61 movl (r7),r5 ## E; \ 59 movl (r7),r5 ## E; \
62 movl 4(r7),r1 ## E; \ 60 movl 4(r7),r1 ## E; \
63 movl 8(r7),r6 ## E; \ 61 movl 8(r7),r6 ## E; \
64 movl 12(r7),r7 ## E; \ 62 movl 12(r7),r7 ## E; \
65 movl BASE+0(r8),r10 ## E; \ 63 movl 480(r8),r10 ## E; \
66 xorl -48(r9),r5 ## E; \ 64 xorl -48(r9),r5 ## E; \
67 xorl -44(r9),r1 ## E; \ 65 xorl -44(r9),r1 ## E; \
68 xorl -40(r9),r6 ## E; \ 66 xorl -40(r9),r6 ## E; \
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
index 71f457827116..49ae9fe32b22 100644
--- a/arch/x86/crypto/aes_glue.c
+++ b/arch/x86/crypto/aes_glue.c
@@ -5,17 +5,29 @@
5 5
6#include <crypto/aes.h> 6#include <crypto/aes.h>
7 7
8asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); 8asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
9asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); 9asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
10
11void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
12{
13 aes_enc_blk(ctx, dst, src);
14}
15EXPORT_SYMBOL_GPL(crypto_aes_encrypt_x86);
16
17void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
18{
19 aes_dec_blk(ctx, dst, src);
20}
21EXPORT_SYMBOL_GPL(crypto_aes_decrypt_x86);
10 22
11static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) 23static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
12{ 24{
13 aes_enc_blk(tfm, dst, src); 25 aes_enc_blk(crypto_tfm_ctx(tfm), dst, src);
14} 26}
15 27
16static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) 28static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
17{ 29{
18 aes_dec_blk(tfm, dst, src); 30 aes_dec_blk(crypto_tfm_ctx(tfm), dst, src);
19} 31}
20 32
21static struct crypto_alg aes_alg = { 33static struct crypto_alg aes_alg = {
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
new file mode 100644
index 000000000000..caba99601703
--- /dev/null
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -0,0 +1,896 @@
1/*
2 * Implement AES algorithm in Intel AES-NI instructions.
3 *
4 * The white paper of AES-NI instructions can be downloaded from:
5 * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
6 *
7 * Copyright (C) 2008, Intel Corp.
8 * Author: Huang Ying <ying.huang@intel.com>
9 * Vinodh Gopal <vinodh.gopal@intel.com>
10 * Kahraman Akdemir
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 */
17
18#include <linux/linkage.h>
19
20.text
21
22#define STATE1 %xmm0
23#define STATE2 %xmm4
24#define STATE3 %xmm5
25#define STATE4 %xmm6
26#define STATE STATE1
27#define IN1 %xmm1
28#define IN2 %xmm7
29#define IN3 %xmm8
30#define IN4 %xmm9
31#define IN IN1
32#define KEY %xmm2
33#define IV %xmm3
34
35#define KEYP %rdi
36#define OUTP %rsi
37#define INP %rdx
38#define LEN %rcx
39#define IVP %r8
40#define KLEN %r9d
41#define T1 %r10
42#define TKEYP T1
43#define T2 %r11
44
45_key_expansion_128:
46_key_expansion_256a:
47 pshufd $0b11111111, %xmm1, %xmm1
48 shufps $0b00010000, %xmm0, %xmm4
49 pxor %xmm4, %xmm0
50 shufps $0b10001100, %xmm0, %xmm4
51 pxor %xmm4, %xmm0
52 pxor %xmm1, %xmm0
53 movaps %xmm0, (%rcx)
54 add $0x10, %rcx
55 ret
56
57_key_expansion_192a:
58 pshufd $0b01010101, %xmm1, %xmm1
59 shufps $0b00010000, %xmm0, %xmm4
60 pxor %xmm4, %xmm0
61 shufps $0b10001100, %xmm0, %xmm4
62 pxor %xmm4, %xmm0
63 pxor %xmm1, %xmm0
64
65 movaps %xmm2, %xmm5
66 movaps %xmm2, %xmm6
67 pslldq $4, %xmm5
68 pshufd $0b11111111, %xmm0, %xmm3
69 pxor %xmm3, %xmm2
70 pxor %xmm5, %xmm2
71
72 movaps %xmm0, %xmm1
73 shufps $0b01000100, %xmm0, %xmm6
74 movaps %xmm6, (%rcx)
75 shufps $0b01001110, %xmm2, %xmm1
76 movaps %xmm1, 16(%rcx)
77 add $0x20, %rcx
78 ret
79
80_key_expansion_192b:
81 pshufd $0b01010101, %xmm1, %xmm1
82 shufps $0b00010000, %xmm0, %xmm4
83 pxor %xmm4, %xmm0
84 shufps $0b10001100, %xmm0, %xmm4
85 pxor %xmm4, %xmm0
86 pxor %xmm1, %xmm0
87
88 movaps %xmm2, %xmm5
89 pslldq $4, %xmm5
90 pshufd $0b11111111, %xmm0, %xmm3
91 pxor %xmm3, %xmm2
92 pxor %xmm5, %xmm2
93
94 movaps %xmm0, (%rcx)
95 add $0x10, %rcx
96 ret
97
98_key_expansion_256b:
99 pshufd $0b10101010, %xmm1, %xmm1
100 shufps $0b00010000, %xmm2, %xmm4
101 pxor %xmm4, %xmm2
102 shufps $0b10001100, %xmm2, %xmm4
103 pxor %xmm4, %xmm2
104 pxor %xmm1, %xmm2
105 movaps %xmm2, (%rcx)
106 add $0x10, %rcx
107 ret
108
109/*
110 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
111 * unsigned int key_len)
112 */
113ENTRY(aesni_set_key)
114 movups (%rsi), %xmm0 # user key (first 16 bytes)
115 movaps %xmm0, (%rdi)
116 lea 0x10(%rdi), %rcx # key addr
117 movl %edx, 480(%rdi)
118 pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
119 cmp $24, %dl
120 jb .Lenc_key128
121 je .Lenc_key192
122 movups 0x10(%rsi), %xmm2 # other user key
123 movaps %xmm2, (%rcx)
124 add $0x10, %rcx
125 # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
126 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
127 call _key_expansion_256a
128 # aeskeygenassist $0x1, %xmm0, %xmm1
129 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
130 call _key_expansion_256b
131 # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
132 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
133 call _key_expansion_256a
134 # aeskeygenassist $0x2, %xmm0, %xmm1
135 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
136 call _key_expansion_256b
137 # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
138 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
139 call _key_expansion_256a
140 # aeskeygenassist $0x4, %xmm0, %xmm1
141 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
142 call _key_expansion_256b
143 # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
144 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
145 call _key_expansion_256a
146 # aeskeygenassist $0x8, %xmm0, %xmm1
147 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
148 call _key_expansion_256b
149 # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
150 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
151 call _key_expansion_256a
152 # aeskeygenassist $0x10, %xmm0, %xmm1
153 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
154 call _key_expansion_256b
155 # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
156 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
157 call _key_expansion_256a
158 # aeskeygenassist $0x20, %xmm0, %xmm1
159 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
160 call _key_expansion_256b
161 # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
162 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
163 call _key_expansion_256a
164 jmp .Ldec_key
165.Lenc_key192:
166 movq 0x10(%rsi), %xmm2 # other user key
167 # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
168 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
169 call _key_expansion_192a
170 # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
171 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
172 call _key_expansion_192b
173 # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
174 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
175 call _key_expansion_192a
176 # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
177 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
178 call _key_expansion_192b
179 # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
180 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
181 call _key_expansion_192a
182 # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
183 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
184 call _key_expansion_192b
185 # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
186 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
187 call _key_expansion_192a
188 # aeskeygenassist $0x80, %xmm2, %xmm1 # round 8
189 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80
190 call _key_expansion_192b
191 jmp .Ldec_key
192.Lenc_key128:
193 # aeskeygenassist $0x1, %xmm0, %xmm1 # round 1
194 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
195 call _key_expansion_128
196 # aeskeygenassist $0x2, %xmm0, %xmm1 # round 2
197 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
198 call _key_expansion_128
199 # aeskeygenassist $0x4, %xmm0, %xmm1 # round 3
200 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
201 call _key_expansion_128
202 # aeskeygenassist $0x8, %xmm0, %xmm1 # round 4
203 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
204 call _key_expansion_128
205 # aeskeygenassist $0x10, %xmm0, %xmm1 # round 5
206 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
207 call _key_expansion_128
208 # aeskeygenassist $0x20, %xmm0, %xmm1 # round 6
209 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
210 call _key_expansion_128
211 # aeskeygenassist $0x40, %xmm0, %xmm1 # round 7
212 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40
213 call _key_expansion_128
214 # aeskeygenassist $0x80, %xmm0, %xmm1 # round 8
215 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80
216 call _key_expansion_128
217 # aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9
218 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b
219 call _key_expansion_128
220 # aeskeygenassist $0x36, %xmm0, %xmm1 # round 10
221 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36
222 call _key_expansion_128
223.Ldec_key:
224 sub $0x10, %rcx
225 movaps (%rdi), %xmm0
226 movaps (%rcx), %xmm1
227 movaps %xmm0, 240(%rcx)
228 movaps %xmm1, 240(%rdi)
229 add $0x10, %rdi
230 lea 240-16(%rcx), %rsi
231.align 4
232.Ldec_key_loop:
233 movaps (%rdi), %xmm0
234 # aesimc %xmm0, %xmm1
235 .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8
236 movaps %xmm1, (%rsi)
237 add $0x10, %rdi
238 sub $0x10, %rsi
239 cmp %rcx, %rdi
240 jb .Ldec_key_loop
241 xor %rax, %rax
242 ret
243
244/*
245 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
246 */
247ENTRY(aesni_enc)
248 movl 480(KEYP), KLEN # key length
249 movups (INP), STATE # input
250 call _aesni_enc1
251 movups STATE, (OUTP) # output
252 ret
253
254/*
255 * _aesni_enc1: internal ABI
256 * input:
257 * KEYP: key struct pointer
258 * KLEN: round count
259 * STATE: initial state (input)
260 * output:
261 * STATE: finial state (output)
262 * changed:
263 * KEY
264 * TKEYP (T1)
265 */
266_aesni_enc1:
267 movaps (KEYP), KEY # key
268 mov KEYP, TKEYP
269 pxor KEY, STATE # round 0
270 add $0x30, TKEYP
271 cmp $24, KLEN
272 jb .Lenc128
273 lea 0x20(TKEYP), TKEYP
274 je .Lenc192
275 add $0x20, TKEYP
276 movaps -0x60(TKEYP), KEY
277 # aesenc KEY, STATE
278 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
279 movaps -0x50(TKEYP), KEY
280 # aesenc KEY, STATE
281 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
282.align 4
283.Lenc192:
284 movaps -0x40(TKEYP), KEY
285 # aesenc KEY, STATE
286 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
287 movaps -0x30(TKEYP), KEY
288 # aesenc KEY, STATE
289 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
290.align 4
291.Lenc128:
292 movaps -0x20(TKEYP), KEY
293 # aesenc KEY, STATE
294 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
295 movaps -0x10(TKEYP), KEY
296 # aesenc KEY, STATE
297 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
298 movaps (TKEYP), KEY
299 # aesenc KEY, STATE
300 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
301 movaps 0x10(TKEYP), KEY
302 # aesenc KEY, STATE
303 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
304 movaps 0x20(TKEYP), KEY
305 # aesenc KEY, STATE
306 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
307 movaps 0x30(TKEYP), KEY
308 # aesenc KEY, STATE
309 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
310 movaps 0x40(TKEYP), KEY
311 # aesenc KEY, STATE
312 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
313 movaps 0x50(TKEYP), KEY
314 # aesenc KEY, STATE
315 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
316 movaps 0x60(TKEYP), KEY
317 # aesenc KEY, STATE
318 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
319 movaps 0x70(TKEYP), KEY
320 # aesenclast KEY, STATE # last round
321 .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
322 ret
323
324/*
325 * _aesni_enc4: internal ABI
326 * input:
327 * KEYP: key struct pointer
328 * KLEN: round count
329 * STATE1: initial state (input)
330 * STATE2
331 * STATE3
332 * STATE4
333 * output:
334 * STATE1: finial state (output)
335 * STATE2
336 * STATE3
337 * STATE4
338 * changed:
339 * KEY
340 * TKEYP (T1)
341 */
342_aesni_enc4:
343 movaps (KEYP), KEY # key
344 mov KEYP, TKEYP
345 pxor KEY, STATE1 # round 0
346 pxor KEY, STATE2
347 pxor KEY, STATE3
348 pxor KEY, STATE4
349 add $0x30, TKEYP
350 cmp $24, KLEN
351 jb .L4enc128
352 lea 0x20(TKEYP), TKEYP
353 je .L4enc192
354 add $0x20, TKEYP
355 movaps -0x60(TKEYP), KEY
356 # aesenc KEY, STATE1
357 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
358 # aesenc KEY, STATE2
359 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
360 # aesenc KEY, STATE3
361 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
362 # aesenc KEY, STATE4
363 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
364 movaps -0x50(TKEYP), KEY
365 # aesenc KEY, STATE1
366 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
367 # aesenc KEY, STATE2
368 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
369 # aesenc KEY, STATE3
370 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
371 # aesenc KEY, STATE4
372 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
373#.align 4
374.L4enc192:
375 movaps -0x40(TKEYP), KEY
376 # aesenc KEY, STATE1
377 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
378 # aesenc KEY, STATE2
379 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
380 # aesenc KEY, STATE3
381 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
382 # aesenc KEY, STATE4
383 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
384 movaps -0x30(TKEYP), KEY
385 # aesenc KEY, STATE1
386 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
387 # aesenc KEY, STATE2
388 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
389 # aesenc KEY, STATE3
390 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
391 # aesenc KEY, STATE4
392 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
393#.align 4
394.L4enc128:
395 movaps -0x20(TKEYP), KEY
396 # aesenc KEY, STATE1
397 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
398 # aesenc KEY, STATE2
399 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
400 # aesenc KEY, STATE3
401 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
402 # aesenc KEY, STATE4
403 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
404 movaps -0x10(TKEYP), KEY
405 # aesenc KEY, STATE1
406 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
407 # aesenc KEY, STATE2
408 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
409 # aesenc KEY, STATE3
410 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
411 # aesenc KEY, STATE4
412 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
413 movaps (TKEYP), KEY
414 # aesenc KEY, STATE1
415 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
416 # aesenc KEY, STATE2
417 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
418 # aesenc KEY, STATE3
419 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
420 # aesenc KEY, STATE4
421 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
422 movaps 0x10(TKEYP), KEY
423 # aesenc KEY, STATE1
424 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
425 # aesenc KEY, STATE2
426 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
427 # aesenc KEY, STATE3
428 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
429 # aesenc KEY, STATE4
430 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
431 movaps 0x20(TKEYP), KEY
432 # aesenc KEY, STATE1
433 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
434 # aesenc KEY, STATE2
435 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
436 # aesenc KEY, STATE3
437 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
438 # aesenc KEY, STATE4
439 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
440 movaps 0x30(TKEYP), KEY
441 # aesenc KEY, STATE1
442 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
443 # aesenc KEY, STATE2
444 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
445 # aesenc KEY, STATE3
446 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
447 # aesenc KEY, STATE4
448 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
449 movaps 0x40(TKEYP), KEY
450 # aesenc KEY, STATE1
451 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
452 # aesenc KEY, STATE2
453 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
454 # aesenc KEY, STATE3
455 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
456 # aesenc KEY, STATE4
457 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
458 movaps 0x50(TKEYP), KEY
459 # aesenc KEY, STATE1
460 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
461 # aesenc KEY, STATE2
462 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
463 # aesenc KEY, STATE3
464 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
465 # aesenc KEY, STATE4
466 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
467 movaps 0x60(TKEYP), KEY
468 # aesenc KEY, STATE1
469 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
470 # aesenc KEY, STATE2
471 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
472 # aesenc KEY, STATE3
473 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
474 # aesenc KEY, STATE4
475 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
476 movaps 0x70(TKEYP), KEY
477 # aesenclast KEY, STATE1 # last round
478 .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
479 # aesenclast KEY, STATE2
480 .byte 0x66, 0x0f, 0x38, 0xdd, 0xe2
481 # aesenclast KEY, STATE3
482 .byte 0x66, 0x0f, 0x38, 0xdd, 0xea
483 # aesenclast KEY, STATE4
484 .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2
485 ret
486
487/*
488 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
489 */
490ENTRY(aesni_dec)
491 mov 480(KEYP), KLEN # key length
492 add $240, KEYP
493 movups (INP), STATE # input
494 call _aesni_dec1
495 movups STATE, (OUTP) #output
496 ret
497
498/*
499 * _aesni_dec1: internal ABI
500 * input:
501 * KEYP: key struct pointer
502 * KLEN: key length
503 * STATE: initial state (input)
504 * output:
505 * STATE: finial state (output)
506 * changed:
507 * KEY
508 * TKEYP (T1)
509 */
510_aesni_dec1:
511 movaps (KEYP), KEY # key
512 mov KEYP, TKEYP
513 pxor KEY, STATE # round 0
514 add $0x30, TKEYP
515 cmp $24, KLEN
516 jb .Ldec128
517 lea 0x20(TKEYP), TKEYP
518 je .Ldec192
519 add $0x20, TKEYP
520 movaps -0x60(TKEYP), KEY
521 # aesdec KEY, STATE
522 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
523 movaps -0x50(TKEYP), KEY
524 # aesdec KEY, STATE
525 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
526.align 4
527.Ldec192:
528 movaps -0x40(TKEYP), KEY
529 # aesdec KEY, STATE
530 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
531 movaps -0x30(TKEYP), KEY
532 # aesdec KEY, STATE
533 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
534.align 4
535.Ldec128:
536 movaps -0x20(TKEYP), KEY
537 # aesdec KEY, STATE
538 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
539 movaps -0x10(TKEYP), KEY
540 # aesdec KEY, STATE
541 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
542 movaps (TKEYP), KEY
543 # aesdec KEY, STATE
544 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
545 movaps 0x10(TKEYP), KEY
546 # aesdec KEY, STATE
547 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
548 movaps 0x20(TKEYP), KEY
549 # aesdec KEY, STATE
550 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
551 movaps 0x30(TKEYP), KEY
552 # aesdec KEY, STATE
553 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
554 movaps 0x40(TKEYP), KEY
555 # aesdec KEY, STATE
556 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
557 movaps 0x50(TKEYP), KEY
558 # aesdec KEY, STATE
559 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
560 movaps 0x60(TKEYP), KEY
561 # aesdec KEY, STATE
562 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
563 movaps 0x70(TKEYP), KEY
564 # aesdeclast KEY, STATE # last round
565 .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
566 ret
567
568/*
569 * _aesni_dec4: internal ABI
570 * input:
571 * KEYP: key struct pointer
572 * KLEN: key length
573 * STATE1: initial state (input)
574 * STATE2
575 * STATE3
576 * STATE4
577 * output:
578 * STATE1: finial state (output)
579 * STATE2
580 * STATE3
581 * STATE4
582 * changed:
583 * KEY
584 * TKEYP (T1)
585 */
586_aesni_dec4:
587 movaps (KEYP), KEY # key
588 mov KEYP, TKEYP
589 pxor KEY, STATE1 # round 0
590 pxor KEY, STATE2
591 pxor KEY, STATE3
592 pxor KEY, STATE4
593 add $0x30, TKEYP
594 cmp $24, KLEN
595 jb .L4dec128
596 lea 0x20(TKEYP), TKEYP
597 je .L4dec192
598 add $0x20, TKEYP
599 movaps -0x60(TKEYP), KEY
600 # aesdec KEY, STATE1
601 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
602 # aesdec KEY, STATE2
603 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
604 # aesdec KEY, STATE3
605 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
606 # aesdec KEY, STATE4
607 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
608 movaps -0x50(TKEYP), KEY
609 # aesdec KEY, STATE1
610 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
611 # aesdec KEY, STATE2
612 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
613 # aesdec KEY, STATE3
614 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
615 # aesdec KEY, STATE4
616 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
617.align 4
618.L4dec192:
619 movaps -0x40(TKEYP), KEY
620 # aesdec KEY, STATE1
621 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
622 # aesdec KEY, STATE2
623 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
624 # aesdec KEY, STATE3
625 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
626 # aesdec KEY, STATE4
627 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
628 movaps -0x30(TKEYP), KEY
629 # aesdec KEY, STATE1
630 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
631 # aesdec KEY, STATE2
632 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
633 # aesdec KEY, STATE3
634 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
635 # aesdec KEY, STATE4
636 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
637.align 4
638.L4dec128:
639 movaps -0x20(TKEYP), KEY
640 # aesdec KEY, STATE1
641 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
642 # aesdec KEY, STATE2
643 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
644 # aesdec KEY, STATE3
645 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
646 # aesdec KEY, STATE4
647 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
648 movaps -0x10(TKEYP), KEY
649 # aesdec KEY, STATE1
650 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
651 # aesdec KEY, STATE2
652 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
653 # aesdec KEY, STATE3
654 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
655 # aesdec KEY, STATE4
656 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
657 movaps (TKEYP), KEY
658 # aesdec KEY, STATE1
659 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
660 # aesdec KEY, STATE2
661 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
662 # aesdec KEY, STATE3
663 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
664 # aesdec KEY, STATE4
665 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
666 movaps 0x10(TKEYP), KEY
667 # aesdec KEY, STATE1
668 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
669 # aesdec KEY, STATE2
670 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
671 # aesdec KEY, STATE3
672 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
673 # aesdec KEY, STATE4
674 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
675 movaps 0x20(TKEYP), KEY
676 # aesdec KEY, STATE1
677 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
678 # aesdec KEY, STATE2
679 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
680 # aesdec KEY, STATE3
681 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
682 # aesdec KEY, STATE4
683 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
684 movaps 0x30(TKEYP), KEY
685 # aesdec KEY, STATE1
686 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
687 # aesdec KEY, STATE2
688 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
689 # aesdec KEY, STATE3
690 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
691 # aesdec KEY, STATE4
692 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
693 movaps 0x40(TKEYP), KEY
694 # aesdec KEY, STATE1
695 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
696 # aesdec KEY, STATE2
697 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
698 # aesdec KEY, STATE3
699 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
700 # aesdec KEY, STATE4
701 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
702 movaps 0x50(TKEYP), KEY
703 # aesdec KEY, STATE1
704 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
705 # aesdec KEY, STATE2
706 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
707 # aesdec KEY, STATE3
708 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
709 # aesdec KEY, STATE4
710 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
711 movaps 0x60(TKEYP), KEY
712 # aesdec KEY, STATE1
713 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
714 # aesdec KEY, STATE2
715 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2
716 # aesdec KEY, STATE3
717 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
718 # aesdec KEY, STATE4
719 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
720 movaps 0x70(TKEYP), KEY
721 # aesdeclast KEY, STATE1 # last round
722 .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
723 # aesdeclast KEY, STATE2
724 .byte 0x66, 0x0f, 0x38, 0xdf, 0xe2
725 # aesdeclast KEY, STATE3
726 .byte 0x66, 0x0f, 0x38, 0xdf, 0xea
727 # aesdeclast KEY, STATE4
728 .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2
729 ret
730
731/*
732 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
733 * size_t len)
734 */
735ENTRY(aesni_ecb_enc)
736 test LEN, LEN # check length
737 jz .Lecb_enc_ret
738 mov 480(KEYP), KLEN
739 cmp $16, LEN
740 jb .Lecb_enc_ret
741 cmp $64, LEN
742 jb .Lecb_enc_loop1
743.align 4
744.Lecb_enc_loop4:
745 movups (INP), STATE1
746 movups 0x10(INP), STATE2
747 movups 0x20(INP), STATE3
748 movups 0x30(INP), STATE4
749 call _aesni_enc4
750 movups STATE1, (OUTP)
751 movups STATE2, 0x10(OUTP)
752 movups STATE3, 0x20(OUTP)
753 movups STATE4, 0x30(OUTP)
754 sub $64, LEN
755 add $64, INP
756 add $64, OUTP
757 cmp $64, LEN
758 jge .Lecb_enc_loop4
759 cmp $16, LEN
760 jb .Lecb_enc_ret
761.align 4
762.Lecb_enc_loop1:
763 movups (INP), STATE1
764 call _aesni_enc1
765 movups STATE1, (OUTP)
766 sub $16, LEN
767 add $16, INP
768 add $16, OUTP
769 cmp $16, LEN
770 jge .Lecb_enc_loop1
771.Lecb_enc_ret:
772 ret
773
774/*
775 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
776 * size_t len);
777 */
778ENTRY(aesni_ecb_dec)
779 test LEN, LEN
780 jz .Lecb_dec_ret
781 mov 480(KEYP), KLEN
782 add $240, KEYP
783 cmp $16, LEN
784 jb .Lecb_dec_ret
785 cmp $64, LEN
786 jb .Lecb_dec_loop1
787.align 4
788.Lecb_dec_loop4:
789 movups (INP), STATE1
790 movups 0x10(INP), STATE2
791 movups 0x20(INP), STATE3
792 movups 0x30(INP), STATE4
793 call _aesni_dec4
794 movups STATE1, (OUTP)
795 movups STATE2, 0x10(OUTP)
796 movups STATE3, 0x20(OUTP)
797 movups STATE4, 0x30(OUTP)
798 sub $64, LEN
799 add $64, INP
800 add $64, OUTP
801 cmp $64, LEN
802 jge .Lecb_dec_loop4
803 cmp $16, LEN
804 jb .Lecb_dec_ret
805.align 4
806.Lecb_dec_loop1:
807 movups (INP), STATE1
808 call _aesni_dec1
809 movups STATE1, (OUTP)
810 sub $16, LEN
811 add $16, INP
812 add $16, OUTP
813 cmp $16, LEN
814 jge .Lecb_dec_loop1
815.Lecb_dec_ret:
816 ret
817
818/*
819 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
820 * size_t len, u8 *iv)
821 */
822ENTRY(aesni_cbc_enc)
823 cmp $16, LEN
824 jb .Lcbc_enc_ret
825 mov 480(KEYP), KLEN
826 movups (IVP), STATE # load iv as initial state
827.align 4
828.Lcbc_enc_loop:
829 movups (INP), IN # load input
830 pxor IN, STATE
831 call _aesni_enc1
832 movups STATE, (OUTP) # store output
833 sub $16, LEN
834 add $16, INP
835 add $16, OUTP
836 cmp $16, LEN
837 jge .Lcbc_enc_loop
838 movups STATE, (IVP)
839.Lcbc_enc_ret:
840 ret
841
842/*
843 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
844 * size_t len, u8 *iv)
845 */
846ENTRY(aesni_cbc_dec)
847 cmp $16, LEN
848 jb .Lcbc_dec_ret
849 mov 480(KEYP), KLEN
850 add $240, KEYP
851 movups (IVP), IV
852 cmp $64, LEN
853 jb .Lcbc_dec_loop1
854.align 4
855.Lcbc_dec_loop4:
856 movups (INP), IN1
857 movaps IN1, STATE1
858 movups 0x10(INP), IN2
859 movaps IN2, STATE2
860 movups 0x20(INP), IN3
861 movaps IN3, STATE3
862 movups 0x30(INP), IN4
863 movaps IN4, STATE4
864 call _aesni_dec4
865 pxor IV, STATE1
866 pxor IN1, STATE2
867 pxor IN2, STATE3
868 pxor IN3, STATE4
869 movaps IN4, IV
870 movups STATE1, (OUTP)
871 movups STATE2, 0x10(OUTP)
872 movups STATE3, 0x20(OUTP)
873 movups STATE4, 0x30(OUTP)
874 sub $64, LEN
875 add $64, INP
876 add $64, OUTP
877 cmp $64, LEN
878 jge .Lcbc_dec_loop4
879 cmp $16, LEN
880 jb .Lcbc_dec_ret
881.align 4
882.Lcbc_dec_loop1:
883 movups (INP), IN
884 movaps IN, STATE
885 call _aesni_dec1
886 pxor IV, STATE
887 movups STATE, (OUTP)
888 movaps IN, IV
889 sub $16, LEN
890 add $16, INP
891 add $16, OUTP
892 cmp $16, LEN
893 jge .Lcbc_dec_loop1
894 movups IV, (IVP)
895.Lcbc_dec_ret:
896 ret
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
new file mode 100644
index 000000000000..02af0af65497
--- /dev/null
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -0,0 +1,461 @@
1/*
2 * Support for Intel AES-NI instructions. This file contains glue
3 * code, the real AES implementation is in intel-aes_asm.S.
4 *
5 * Copyright (C) 2008, Intel Corp.
6 * Author: Huang Ying <ying.huang@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/hardirq.h>
15#include <linux/types.h>
16#include <linux/crypto.h>
17#include <linux/err.h>
18#include <crypto/algapi.h>
19#include <crypto/aes.h>
20#include <crypto/cryptd.h>
21#include <asm/i387.h>
22#include <asm/aes.h>
23
24struct async_aes_ctx {
25 struct cryptd_ablkcipher *cryptd_tfm;
26};
27
28#define AESNI_ALIGN 16
29#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1))
30
31asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
32 unsigned int key_len);
33asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
34 const u8 *in);
35asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out,
36 const u8 *in);
37asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out,
38 const u8 *in, unsigned int len);
39asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out,
40 const u8 *in, unsigned int len);
41asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
42 const u8 *in, unsigned int len, u8 *iv);
43asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
44 const u8 *in, unsigned int len, u8 *iv);
45
46static inline int kernel_fpu_using(void)
47{
48 if (in_interrupt() && !(read_cr0() & X86_CR0_TS))
49 return 1;
50 return 0;
51}
52
53static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
54{
55 unsigned long addr = (unsigned long)raw_ctx;
56 unsigned long align = AESNI_ALIGN;
57
58 if (align <= crypto_tfm_ctx_alignment())
59 align = 1;
60 return (struct crypto_aes_ctx *)ALIGN(addr, align);
61}
62
63static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
64 const u8 *in_key, unsigned int key_len)
65{
66 struct crypto_aes_ctx *ctx = aes_ctx(raw_ctx);
67 u32 *flags = &tfm->crt_flags;
68 int err;
69
70 if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
71 key_len != AES_KEYSIZE_256) {
72 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
73 return -EINVAL;
74 }
75
76 if (kernel_fpu_using())
77 err = crypto_aes_expand_key(ctx, in_key, key_len);
78 else {
79 kernel_fpu_begin();
80 err = aesni_set_key(ctx, in_key, key_len);
81 kernel_fpu_end();
82 }
83
84 return err;
85}
86
87static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
88 unsigned int key_len)
89{
90 return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len);
91}
92
93static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
94{
95 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
96
97 if (kernel_fpu_using())
98 crypto_aes_encrypt_x86(ctx, dst, src);
99 else {
100 kernel_fpu_begin();
101 aesni_enc(ctx, dst, src);
102 kernel_fpu_end();
103 }
104}
105
106static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
107{
108 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
109
110 if (kernel_fpu_using())
111 crypto_aes_decrypt_x86(ctx, dst, src);
112 else {
113 kernel_fpu_begin();
114 aesni_dec(ctx, dst, src);
115 kernel_fpu_end();
116 }
117}
118
119static struct crypto_alg aesni_alg = {
120 .cra_name = "aes",
121 .cra_driver_name = "aes-aesni",
122 .cra_priority = 300,
123 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
124 .cra_blocksize = AES_BLOCK_SIZE,
125 .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
126 .cra_alignmask = 0,
127 .cra_module = THIS_MODULE,
128 .cra_list = LIST_HEAD_INIT(aesni_alg.cra_list),
129 .cra_u = {
130 .cipher = {
131 .cia_min_keysize = AES_MIN_KEY_SIZE,
132 .cia_max_keysize = AES_MAX_KEY_SIZE,
133 .cia_setkey = aes_set_key,
134 .cia_encrypt = aes_encrypt,
135 .cia_decrypt = aes_decrypt
136 }
137 }
138};
139
140static int ecb_encrypt(struct blkcipher_desc *desc,
141 struct scatterlist *dst, struct scatterlist *src,
142 unsigned int nbytes)
143{
144 struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
145 struct blkcipher_walk walk;
146 int err;
147
148 blkcipher_walk_init(&walk, dst, src, nbytes);
149 err = blkcipher_walk_virt(desc, &walk);
150
151 kernel_fpu_begin();
152 while ((nbytes = walk.nbytes)) {
153 aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
154 nbytes & AES_BLOCK_MASK);
155 nbytes &= AES_BLOCK_SIZE - 1;
156 err = blkcipher_walk_done(desc, &walk, nbytes);
157 }
158 kernel_fpu_end();
159
160 return err;
161}
162
163static int ecb_decrypt(struct blkcipher_desc *desc,
164 struct scatterlist *dst, struct scatterlist *src,
165 unsigned int nbytes)
166{
167 struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
168 struct blkcipher_walk walk;
169 int err;
170
171 blkcipher_walk_init(&walk, dst, src, nbytes);
172 err = blkcipher_walk_virt(desc, &walk);
173
174 kernel_fpu_begin();
175 while ((nbytes = walk.nbytes)) {
176 aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
177 nbytes & AES_BLOCK_MASK);
178 nbytes &= AES_BLOCK_SIZE - 1;
179 err = blkcipher_walk_done(desc, &walk, nbytes);
180 }
181 kernel_fpu_end();
182
183 return err;
184}
185
186static struct crypto_alg blk_ecb_alg = {
187 .cra_name = "__ecb-aes-aesni",
188 .cra_driver_name = "__driver-ecb-aes-aesni",
189 .cra_priority = 0,
190 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
191 .cra_blocksize = AES_BLOCK_SIZE,
192 .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
193 .cra_alignmask = 0,
194 .cra_type = &crypto_blkcipher_type,
195 .cra_module = THIS_MODULE,
196 .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
197 .cra_u = {
198 .blkcipher = {
199 .min_keysize = AES_MIN_KEY_SIZE,
200 .max_keysize = AES_MAX_KEY_SIZE,
201 .setkey = aes_set_key,
202 .encrypt = ecb_encrypt,
203 .decrypt = ecb_decrypt,
204 },
205 },
206};
207
208static int cbc_encrypt(struct blkcipher_desc *desc,
209 struct scatterlist *dst, struct scatterlist *src,
210 unsigned int nbytes)
211{
212 struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
213 struct blkcipher_walk walk;
214 int err;
215
216 blkcipher_walk_init(&walk, dst, src, nbytes);
217 err = blkcipher_walk_virt(desc, &walk);
218
219 kernel_fpu_begin();
220 while ((nbytes = walk.nbytes)) {
221 aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
222 nbytes & AES_BLOCK_MASK, walk.iv);
223 nbytes &= AES_BLOCK_SIZE - 1;
224 err = blkcipher_walk_done(desc, &walk, nbytes);
225 }
226 kernel_fpu_end();
227
228 return err;
229}
230
231static int cbc_decrypt(struct blkcipher_desc *desc,
232 struct scatterlist *dst, struct scatterlist *src,
233 unsigned int nbytes)
234{
235 struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
236 struct blkcipher_walk walk;
237 int err;
238
239 blkcipher_walk_init(&walk, dst, src, nbytes);
240 err = blkcipher_walk_virt(desc, &walk);
241
242 kernel_fpu_begin();
243 while ((nbytes = walk.nbytes)) {
244 aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
245 nbytes & AES_BLOCK_MASK, walk.iv);
246 nbytes &= AES_BLOCK_SIZE - 1;
247 err = blkcipher_walk_done(desc, &walk, nbytes);
248 }
249 kernel_fpu_end();
250
251 return err;
252}
253
254static struct crypto_alg blk_cbc_alg = {
255 .cra_name = "__cbc-aes-aesni",
256 .cra_driver_name = "__driver-cbc-aes-aesni",
257 .cra_priority = 0,
258 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
259 .cra_blocksize = AES_BLOCK_SIZE,
260 .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
261 .cra_alignmask = 0,
262 .cra_type = &crypto_blkcipher_type,
263 .cra_module = THIS_MODULE,
264 .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
265 .cra_u = {
266 .blkcipher = {
267 .min_keysize = AES_MIN_KEY_SIZE,
268 .max_keysize = AES_MAX_KEY_SIZE,
269 .setkey = aes_set_key,
270 .encrypt = cbc_encrypt,
271 .decrypt = cbc_decrypt,
272 },
273 },
274};
275
276static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
277 unsigned int key_len)
278{
279 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
280
281 return crypto_ablkcipher_setkey(&ctx->cryptd_tfm->base, key, key_len);
282}
283
284static int ablk_encrypt(struct ablkcipher_request *req)
285{
286 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
287 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
288
289 if (kernel_fpu_using()) {
290 struct ablkcipher_request *cryptd_req =
291 ablkcipher_request_ctx(req);
292 memcpy(cryptd_req, req, sizeof(*req));
293 ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
294 return crypto_ablkcipher_encrypt(cryptd_req);
295 } else {
296 struct blkcipher_desc desc;
297 desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
298 desc.info = req->info;
299 desc.flags = 0;
300 return crypto_blkcipher_crt(desc.tfm)->encrypt(
301 &desc, req->dst, req->src, req->nbytes);
302 }
303}
304
305static int ablk_decrypt(struct ablkcipher_request *req)
306{
307 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
308 struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
309
310 if (kernel_fpu_using()) {
311 struct ablkcipher_request *cryptd_req =
312 ablkcipher_request_ctx(req);
313 memcpy(cryptd_req, req, sizeof(*req));
314 ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
315 return crypto_ablkcipher_decrypt(cryptd_req);
316 } else {
317 struct blkcipher_desc desc;
318 desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
319 desc.info = req->info;
320 desc.flags = 0;
321 return crypto_blkcipher_crt(desc.tfm)->decrypt(
322 &desc, req->dst, req->src, req->nbytes);
323 }
324}
325
326static void ablk_exit(struct crypto_tfm *tfm)
327{
328 struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
329
330 cryptd_free_ablkcipher(ctx->cryptd_tfm);
331}
332
333static void ablk_init_common(struct crypto_tfm *tfm,
334 struct cryptd_ablkcipher *cryptd_tfm)
335{
336 struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
337
338 ctx->cryptd_tfm = cryptd_tfm;
339 tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
340 crypto_ablkcipher_reqsize(&cryptd_tfm->base);
341}
342
343static int ablk_ecb_init(struct crypto_tfm *tfm)
344{
345 struct cryptd_ablkcipher *cryptd_tfm;
346
347 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-aes-aesni", 0, 0);
348 if (IS_ERR(cryptd_tfm))
349 return PTR_ERR(cryptd_tfm);
350 ablk_init_common(tfm, cryptd_tfm);
351 return 0;
352}
353
354static struct crypto_alg ablk_ecb_alg = {
355 .cra_name = "ecb(aes)",
356 .cra_driver_name = "ecb-aes-aesni",
357 .cra_priority = 400,
358 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
359 .cra_blocksize = AES_BLOCK_SIZE,
360 .cra_ctxsize = sizeof(struct async_aes_ctx),
361 .cra_alignmask = 0,
362 .cra_type = &crypto_ablkcipher_type,
363 .cra_module = THIS_MODULE,
364 .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list),
365 .cra_init = ablk_ecb_init,
366 .cra_exit = ablk_exit,
367 .cra_u = {
368 .ablkcipher = {
369 .min_keysize = AES_MIN_KEY_SIZE,
370 .max_keysize = AES_MAX_KEY_SIZE,
371 .setkey = ablk_set_key,
372 .encrypt = ablk_encrypt,
373 .decrypt = ablk_decrypt,
374 },
375 },
376};
377
378static int ablk_cbc_init(struct crypto_tfm *tfm)
379{
380 struct cryptd_ablkcipher *cryptd_tfm;
381
382 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-aes-aesni", 0, 0);
383 if (IS_ERR(cryptd_tfm))
384 return PTR_ERR(cryptd_tfm);
385 ablk_init_common(tfm, cryptd_tfm);
386 return 0;
387}
388
389static struct crypto_alg ablk_cbc_alg = {
390 .cra_name = "cbc(aes)",
391 .cra_driver_name = "cbc-aes-aesni",
392 .cra_priority = 400,
393 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
394 .cra_blocksize = AES_BLOCK_SIZE,
395 .cra_ctxsize = sizeof(struct async_aes_ctx),
396 .cra_alignmask = 0,
397 .cra_type = &crypto_ablkcipher_type,
398 .cra_module = THIS_MODULE,
399 .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list),
400 .cra_init = ablk_cbc_init,
401 .cra_exit = ablk_exit,
402 .cra_u = {
403 .ablkcipher = {
404 .min_keysize = AES_MIN_KEY_SIZE,
405 .max_keysize = AES_MAX_KEY_SIZE,
406 .ivsize = AES_BLOCK_SIZE,
407 .setkey = ablk_set_key,
408 .encrypt = ablk_encrypt,
409 .decrypt = ablk_decrypt,
410 },
411 },
412};
413
414static int __init aesni_init(void)
415{
416 int err;
417
418 if (!cpu_has_aes) {
419 printk(KERN_ERR "Intel AES-NI instructions are not detected.\n");
420 return -ENODEV;
421 }
422 if ((err = crypto_register_alg(&aesni_alg)))
423 goto aes_err;
424 if ((err = crypto_register_alg(&blk_ecb_alg)))
425 goto blk_ecb_err;
426 if ((err = crypto_register_alg(&blk_cbc_alg)))
427 goto blk_cbc_err;
428 if ((err = crypto_register_alg(&ablk_ecb_alg)))
429 goto ablk_ecb_err;
430 if ((err = crypto_register_alg(&ablk_cbc_alg)))
431 goto ablk_cbc_err;
432
433 return err;
434
435ablk_cbc_err:
436 crypto_unregister_alg(&ablk_ecb_alg);
437ablk_ecb_err:
438 crypto_unregister_alg(&blk_cbc_alg);
439blk_cbc_err:
440 crypto_unregister_alg(&blk_ecb_alg);
441blk_ecb_err:
442 crypto_unregister_alg(&aesni_alg);
443aes_err:
444 return err;
445}
446
447static void __exit aesni_exit(void)
448{
449 crypto_unregister_alg(&ablk_cbc_alg);
450 crypto_unregister_alg(&ablk_ecb_alg);
451 crypto_unregister_alg(&blk_cbc_alg);
452 crypto_unregister_alg(&blk_ecb_alg);
453 crypto_unregister_alg(&aesni_alg);
454}
455
456module_init(aesni_init);
457module_exit(aesni_exit);
458
459MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized");
460MODULE_LICENSE("GPL");
461MODULE_ALIAS("aes");
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 097a6b64c24d..a505202086e8 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -557,7 +557,7 @@ ia32_sys_call_table:
557 .quad sys32_olduname 557 .quad sys32_olduname
558 .quad sys_umask /* 60 */ 558 .quad sys_umask /* 60 */
559 .quad sys_chroot 559 .quad sys_chroot
560 .quad sys32_ustat 560 .quad compat_sys_ustat
561 .quad sys_dup2 561 .quad sys_dup2
562 .quad sys_getppid 562 .quad sys_getppid
563 .quad sys_getpgrp /* 65 */ 563 .quad sys_getpgrp /* 65 */
@@ -828,4 +828,6 @@ ia32_sys_call_table:
828 .quad sys_dup3 /* 330 */ 828 .quad sys_dup3 /* 330 */
829 .quad sys_pipe2 829 .quad sys_pipe2
830 .quad sys_inotify_init1 830 .quad sys_inotify_init1
831 .quad compat_sys_preadv
832 .quad compat_sys_pwritev
831ia32_syscall_end: 833ia32_syscall_end:
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 6c0d7f6231af..efac92fd1efb 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -638,28 +638,6 @@ long sys32_uname(struct old_utsname __user *name)
638 return err ? -EFAULT : 0; 638 return err ? -EFAULT : 0;
639} 639}
640 640
641long sys32_ustat(unsigned dev, struct ustat32 __user *u32p)
642{
643 struct ustat u;
644 mm_segment_t seg;
645 int ret;
646
647 seg = get_fs();
648 set_fs(KERNEL_DS);
649 ret = sys_ustat(dev, (struct ustat __user *)&u);
650 set_fs(seg);
651 if (ret < 0)
652 return ret;
653
654 if (!access_ok(VERIFY_WRITE, u32p, sizeof(struct ustat32)) ||
655 __put_user((__u32) u.f_tfree, &u32p->f_tfree) ||
656 __put_user((__u32) u.f_tinode, &u32p->f_tfree) ||
657 __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) ||
658 __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack)))
659 ret = -EFAULT;
660 return ret;
661}
662
663asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv, 641asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv,
664 compat_uptr_t __user *envp, struct pt_regs *regs) 642 compat_uptr_t __user *envp, struct pt_regs *regs)
665{ 643{
diff --git a/arch/x86/include/asm/aes.h b/arch/x86/include/asm/aes.h
new file mode 100644
index 000000000000..80545a1cbe39
--- /dev/null
+++ b/arch/x86/include/asm/aes.h
@@ -0,0 +1,11 @@
1#ifndef ASM_X86_AES_H
2#define ASM_X86_AES_H
3
4#include <linux/crypto.h>
5#include <crypto/aes.h>
6
7void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
8 const u8 *src);
9void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
10 const u8 *src);
11#endif
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 394d177d721b..42f2f8377422 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -75,7 +75,7 @@ static inline void default_inquire_remote_apic(int apicid)
75#define setup_secondary_clock setup_secondary_APIC_clock 75#define setup_secondary_clock setup_secondary_APIC_clock
76#endif 76#endif
77 77
78#ifdef CONFIG_X86_VSMP 78#ifdef CONFIG_X86_64
79extern int is_vsmp_box(void); 79extern int is_vsmp_box(void);
80#else 80#else
81static inline int is_vsmp_box(void) 81static inline int is_vsmp_box(void)
@@ -107,7 +107,20 @@ extern u32 native_safe_apic_wait_icr_idle(void);
107extern void native_apic_icr_write(u32 low, u32 id); 107extern void native_apic_icr_write(u32 low, u32 id);
108extern u64 native_apic_icr_read(void); 108extern u64 native_apic_icr_read(void);
109 109
110#define EIM_8BIT_APIC_ID 0
111#define EIM_32BIT_APIC_ID 1
112
110#ifdef CONFIG_X86_X2APIC 113#ifdef CONFIG_X86_X2APIC
114/*
115 * Make previous memory operations globally visible before
116 * sending the IPI through x2apic wrmsr. We need a serializing instruction or
117 * mfence for this.
118 */
119static inline void x2apic_wrmsr_fence(void)
120{
121 asm volatile("mfence" : : : "memory");
122}
123
111static inline void native_apic_msr_write(u32 reg, u32 v) 124static inline void native_apic_msr_write(u32 reg, u32 v)
112{ 125{
113 if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || 126 if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
@@ -184,6 +197,9 @@ static inline int x2apic_enabled(void)
184{ 197{
185 return 0; 198 return 0;
186} 199}
200
201#define x2apic 0
202
187#endif 203#endif
188 204
189extern int get_physical_broadcast(void); 205extern int get_physical_broadcast(void);
@@ -476,10 +492,19 @@ static inline int default_apic_id_registered(void)
476 return physid_isset(read_apic_id(), phys_cpu_present_map); 492 return physid_isset(read_apic_id(), phys_cpu_present_map);
477} 493}
478 494
495static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
496{
497 return cpuid_apic >> index_msb;
498}
499
500extern int default_apicid_to_node(int logical_apicid);
501
502#endif
503
479static inline unsigned int 504static inline unsigned int
480default_cpu_mask_to_apicid(const struct cpumask *cpumask) 505default_cpu_mask_to_apicid(const struct cpumask *cpumask)
481{ 506{
482 return cpumask_bits(cpumask)[0]; 507 return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
483} 508}
484 509
485static inline unsigned int 510static inline unsigned int
@@ -493,15 +518,6 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
493 return (unsigned int)(mask1 & mask2 & mask3); 518 return (unsigned int)(mask1 & mask2 & mask3);
494} 519}
495 520
496static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
497{
498 return cpuid_apic >> index_msb;
499}
500
501extern int default_apicid_to_node(int logical_apicid);
502
503#endif
504
505static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) 521static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid)
506{ 522{
507 return physid_isset(apicid, bitmap); 523 return physid_isset(apicid, bitmap);
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 6526cf08b0e4..6ba23dd9fc92 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -1,10 +1,6 @@
1#ifndef _ASM_X86_BOOT_H 1#ifndef _ASM_X86_BOOT_H
2#define _ASM_X86_BOOT_H 2#define _ASM_X86_BOOT_H
3 3
4/* Don't touch these, unless you really know what you're doing. */
5#define DEF_SYSSEG 0x1000
6#define DEF_SYSSIZE 0x7F00
7
8/* Internal svga startup constants */ 4/* Internal svga startup constants */
9#define NORMAL_VGA 0xffff /* 80x25 mode */ 5#define NORMAL_VGA 0xffff /* 80x25 mode */
10#define EXTENDED_VGA 0xfffe /* 80x50 mode */ 6#define EXTENDED_VGA 0xfffe /* 80x50 mode */
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index eb2221d5add2..e55dfc1ad453 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -90,6 +90,9 @@ int set_memory_4k(unsigned long addr, int numpages);
90int set_memory_array_uc(unsigned long *addr, int addrinarray); 90int set_memory_array_uc(unsigned long *addr, int addrinarray);
91int set_memory_array_wb(unsigned long *addr, int addrinarray); 91int set_memory_array_wb(unsigned long *addr, int addrinarray);
92 92
93int set_pages_array_uc(struct page **pages, int addrinarray);
94int set_pages_array_wb(struct page **pages, int addrinarray);
95
93/* 96/*
94 * For legacy compatibility with the old APIs, a few functions 97 * For legacy compatibility with the old APIs, a few functions
95 * are provided that work on a "struct page". 98 * are provided that work on a "struct page".
diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h
new file mode 100755
index 000000000000..222802029fa6
--- /dev/null
+++ b/arch/x86/include/asm/cpu_debug.h
@@ -0,0 +1,226 @@
1#ifndef _ASM_X86_CPU_DEBUG_H
2#define _ASM_X86_CPU_DEBUG_H
3
4/*
5 * CPU x86 architecture debug
6 *
7 * Copyright(C) 2009 Jaswinder Singh Rajput
8 */
9
10/* Register flags */
11enum cpu_debug_bit {
12/* Model Specific Registers (MSRs) */
13 CPU_MC_BIT, /* Machine Check */
14 CPU_MONITOR_BIT, /* Monitor */
15 CPU_TIME_BIT, /* Time */
16 CPU_PMC_BIT, /* Performance Monitor */
17 CPU_PLATFORM_BIT, /* Platform */
18 CPU_APIC_BIT, /* APIC */
19 CPU_POWERON_BIT, /* Power-on */
20 CPU_CONTROL_BIT, /* Control */
21 CPU_FEATURES_BIT, /* Features control */
22 CPU_LBRANCH_BIT, /* Last Branch */
23 CPU_BIOS_BIT, /* BIOS */
24 CPU_FREQ_BIT, /* Frequency */
25 CPU_MTTR_BIT, /* MTRR */
26 CPU_PERF_BIT, /* Performance */
27 CPU_CACHE_BIT, /* Cache */
28 CPU_SYSENTER_BIT, /* Sysenter */
29 CPU_THERM_BIT, /* Thermal */
30 CPU_MISC_BIT, /* Miscellaneous */
31 CPU_DEBUG_BIT, /* Debug */
32 CPU_PAT_BIT, /* PAT */
33 CPU_VMX_BIT, /* VMX */
34 CPU_CALL_BIT, /* System Call */
35 CPU_BASE_BIT, /* BASE Address */
36 CPU_VER_BIT, /* Version ID */
37 CPU_CONF_BIT, /* Configuration */
38 CPU_SMM_BIT, /* System mgmt mode */
39 CPU_SVM_BIT, /*Secure Virtual Machine*/
40 CPU_OSVM_BIT, /* OS-Visible Workaround*/
41/* Standard Registers */
42 CPU_TSS_BIT, /* Task Stack Segment */
43 CPU_CR_BIT, /* Control Registers */
44 CPU_DT_BIT, /* Descriptor Table */
45/* End of Registers flags */
46 CPU_REG_ALL_BIT, /* Select all Registers */
47};
48
49#define CPU_REG_ALL (~0) /* Select all Registers */
50
51#define CPU_MC (1 << CPU_MC_BIT)
52#define CPU_MONITOR (1 << CPU_MONITOR_BIT)
53#define CPU_TIME (1 << CPU_TIME_BIT)
54#define CPU_PMC (1 << CPU_PMC_BIT)
55#define CPU_PLATFORM (1 << CPU_PLATFORM_BIT)
56#define CPU_APIC (1 << CPU_APIC_BIT)
57#define CPU_POWERON (1 << CPU_POWERON_BIT)
58#define CPU_CONTROL (1 << CPU_CONTROL_BIT)
59#define CPU_FEATURES (1 << CPU_FEATURES_BIT)
60#define CPU_LBRANCH (1 << CPU_LBRANCH_BIT)
61#define CPU_BIOS (1 << CPU_BIOS_BIT)
62#define CPU_FREQ (1 << CPU_FREQ_BIT)
63#define CPU_MTRR (1 << CPU_MTTR_BIT)
64#define CPU_PERF (1 << CPU_PERF_BIT)
65#define CPU_CACHE (1 << CPU_CACHE_BIT)
66#define CPU_SYSENTER (1 << CPU_SYSENTER_BIT)
67#define CPU_THERM (1 << CPU_THERM_BIT)
68#define CPU_MISC (1 << CPU_MISC_BIT)
69#define CPU_DEBUG (1 << CPU_DEBUG_BIT)
70#define CPU_PAT (1 << CPU_PAT_BIT)
71#define CPU_VMX (1 << CPU_VMX_BIT)
72#define CPU_CALL (1 << CPU_CALL_BIT)
73#define CPU_BASE (1 << CPU_BASE_BIT)
74#define CPU_VER (1 << CPU_VER_BIT)
75#define CPU_CONF (1 << CPU_CONF_BIT)
76#define CPU_SMM (1 << CPU_SMM_BIT)
77#define CPU_SVM (1 << CPU_SVM_BIT)
78#define CPU_OSVM (1 << CPU_OSVM_BIT)
79#define CPU_TSS (1 << CPU_TSS_BIT)
80#define CPU_CR (1 << CPU_CR_BIT)
81#define CPU_DT (1 << CPU_DT_BIT)
82
83/* Register file flags */
84enum cpu_file_bit {
85 CPU_INDEX_BIT, /* index */
86 CPU_VALUE_BIT, /* value */
87};
88
89#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT)
90
91/*
92 * DisplayFamily_DisplayModel Processor Families/Processor Number Series
93 * -------------------------- ------------------------------------------
94 * 05_01, 05_02, 05_04 Pentium, Pentium with MMX
95 *
96 * 06_01 Pentium Pro
97 * 06_03, 06_05 Pentium II Xeon, Pentium II
98 * 06_07, 06_08, 06_0A, 06_0B Pentium III Xeon, Pentum III
99 *
100 * 06_09, 060D Pentium M
101 *
102 * 06_0E Core Duo, Core Solo
103 *
104 * 06_0F Xeon 3000, 3200, 5100, 5300, 7300 series,
105 * Core 2 Quad, Core 2 Extreme, Core 2 Duo,
106 * Pentium dual-core
107 * 06_17 Xeon 5200, 5400 series, Core 2 Quad Q9650
108 *
109 * 06_1C Atom
110 *
111 * 0F_00, 0F_01, 0F_02 Xeon, Xeon MP, Pentium 4
112 * 0F_03, 0F_04 Xeon, Xeon MP, Pentium 4, Pentium D
113 *
114 * 0F_06 Xeon 7100, 5000 Series, Xeon MP,
115 * Pentium 4, Pentium D
116 */
117
118/* Register processors bits */
119enum cpu_processor_bit {
120 CPU_NONE,
121/* Intel */
122 CPU_INTEL_PENTIUM_BIT,
123 CPU_INTEL_P6_BIT,
124 CPU_INTEL_PENTIUM_M_BIT,
125 CPU_INTEL_CORE_BIT,
126 CPU_INTEL_CORE2_BIT,
127 CPU_INTEL_ATOM_BIT,
128 CPU_INTEL_XEON_P4_BIT,
129 CPU_INTEL_XEON_MP_BIT,
130/* AMD */
131 CPU_AMD_K6_BIT,
132 CPU_AMD_K7_BIT,
133 CPU_AMD_K8_BIT,
134 CPU_AMD_0F_BIT,
135 CPU_AMD_10_BIT,
136 CPU_AMD_11_BIT,
137};
138
139#define CPU_INTEL_PENTIUM (1 << CPU_INTEL_PENTIUM_BIT)
140#define CPU_INTEL_P6 (1 << CPU_INTEL_P6_BIT)
141#define CPU_INTEL_PENTIUM_M (1 << CPU_INTEL_PENTIUM_M_BIT)
142#define CPU_INTEL_CORE (1 << CPU_INTEL_CORE_BIT)
143#define CPU_INTEL_CORE2 (1 << CPU_INTEL_CORE2_BIT)
144#define CPU_INTEL_ATOM (1 << CPU_INTEL_ATOM_BIT)
145#define CPU_INTEL_XEON_P4 (1 << CPU_INTEL_XEON_P4_BIT)
146#define CPU_INTEL_XEON_MP (1 << CPU_INTEL_XEON_MP_BIT)
147
148#define CPU_INTEL_PX (CPU_INTEL_P6 | CPU_INTEL_PENTIUM_M)
149#define CPU_INTEL_COREX (CPU_INTEL_CORE | CPU_INTEL_CORE2)
150#define CPU_INTEL_XEON (CPU_INTEL_XEON_P4 | CPU_INTEL_XEON_MP)
151#define CPU_CO_AT (CPU_INTEL_CORE | CPU_INTEL_ATOM)
152#define CPU_C2_AT (CPU_INTEL_CORE2 | CPU_INTEL_ATOM)
153#define CPU_CX_AT (CPU_INTEL_COREX | CPU_INTEL_ATOM)
154#define CPU_CX_XE (CPU_INTEL_COREX | CPU_INTEL_XEON)
155#define CPU_P6_XE (CPU_INTEL_P6 | CPU_INTEL_XEON)
156#define CPU_PM_CO_AT (CPU_INTEL_PENTIUM_M | CPU_CO_AT)
157#define CPU_C2_AT_XE (CPU_C2_AT | CPU_INTEL_XEON)
158#define CPU_CX_AT_XE (CPU_CX_AT | CPU_INTEL_XEON)
159#define CPU_P6_CX_AT (CPU_INTEL_P6 | CPU_CX_AT)
160#define CPU_P6_CX_XE (CPU_P6_XE | CPU_INTEL_COREX)
161#define CPU_P6_CX_AT_XE (CPU_INTEL_P6 | CPU_CX_AT_XE)
162#define CPU_PM_CX_AT_XE (CPU_INTEL_PENTIUM_M | CPU_CX_AT_XE)
163#define CPU_PM_CX_AT (CPU_INTEL_PENTIUM_M | CPU_CX_AT)
164#define CPU_PM_CX_XE (CPU_INTEL_PENTIUM_M | CPU_CX_XE)
165#define CPU_PX_CX_AT (CPU_INTEL_PX | CPU_CX_AT)
166#define CPU_PX_CX_AT_XE (CPU_INTEL_PX | CPU_CX_AT_XE)
167
168/* Select all supported Intel CPUs */
169#define CPU_INTEL_ALL (CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE)
170
171#define CPU_AMD_K6 (1 << CPU_AMD_K6_BIT)
172#define CPU_AMD_K7 (1 << CPU_AMD_K7_BIT)
173#define CPU_AMD_K8 (1 << CPU_AMD_K8_BIT)
174#define CPU_AMD_0F (1 << CPU_AMD_0F_BIT)
175#define CPU_AMD_10 (1 << CPU_AMD_10_BIT)
176#define CPU_AMD_11 (1 << CPU_AMD_11_BIT)
177
178#define CPU_K10_PLUS (CPU_AMD_10 | CPU_AMD_11)
179#define CPU_K0F_PLUS (CPU_AMD_0F | CPU_K10_PLUS)
180#define CPU_K8_PLUS (CPU_AMD_K8 | CPU_K0F_PLUS)
181#define CPU_K7_PLUS (CPU_AMD_K7 | CPU_K8_PLUS)
182
183/* Select all supported AMD CPUs */
184#define CPU_AMD_ALL (CPU_AMD_K6 | CPU_K7_PLUS)
185
186/* Select all supported CPUs */
187#define CPU_ALL (CPU_INTEL_ALL | CPU_AMD_ALL)
188
189#define MAX_CPU_FILES 512
190
191struct cpu_private {
192 unsigned cpu;
193 unsigned type;
194 unsigned reg;
195 unsigned file;
196};
197
198struct cpu_debug_base {
199 char *name; /* Register name */
200 unsigned flag; /* Register flag */
201 unsigned write; /* Register write flag */
202};
203
204/*
205 * Currently it looks similar to cpu_debug_base but once we add more files
206 * cpu_file_base will go in different direction
207 */
208struct cpu_file_base {
209 char *name; /* Register file name */
210 unsigned flag; /* Register file flag */
211 unsigned write; /* Register write flag */
212};
213
214struct cpu_cpuX_base {
215 struct dentry *dentry; /* Register dentry */
216 int init; /* Register index file */
217};
218
219struct cpu_debug_range {
220 unsigned min; /* Register range min */
221 unsigned max; /* Register range max */
222 unsigned flag; /* Supported flags */
223 unsigned model; /* Supported models */
224};
225
226#endif /* _ASM_X86_CPU_DEBUG_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7301e60dc4a8..0beba0d1468d 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -213,6 +213,7 @@ extern const char * const x86_power_flags[32];
213#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) 213#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM)
214#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2) 214#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2)
215#define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3) 215#define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3)
216#define cpu_has_aes boot_cpu_has(X86_FEATURE_AES)
216#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) 217#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
217#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP) 218#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP)
218#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) 219#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)
diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index a7f3c75f8ad7..61c852fa346b 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -3,8 +3,6 @@
3#ifndef __ASSEMBLY__ 3#ifndef __ASSEMBLY__
4#include <linux/cpumask.h> 4#include <linux/cpumask.h>
5 5
6#ifdef CONFIG_X86_64
7
8extern cpumask_var_t cpu_callin_mask; 6extern cpumask_var_t cpu_callin_mask;
9extern cpumask_var_t cpu_callout_mask; 7extern cpumask_var_t cpu_callout_mask;
10extern cpumask_var_t cpu_initialized_mask; 8extern cpumask_var_t cpu_initialized_mask;
@@ -12,21 +10,5 @@ extern cpumask_var_t cpu_sibling_setup_mask;
12 10
13extern void setup_cpu_local_masks(void); 11extern void setup_cpu_local_masks(void);
14 12
15#else /* CONFIG_X86_32 */
16
17extern cpumask_t cpu_callin_map;
18extern cpumask_t cpu_callout_map;
19extern cpumask_t cpu_initialized;
20extern cpumask_t cpu_sibling_setup_map;
21
22#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map)
23#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map)
24#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized)
25#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map)
26
27static inline void setup_cpu_local_masks(void) { }
28
29#endif /* CONFIG_X86_32 */
30
31#endif /* __ASSEMBLY__ */ 13#endif /* __ASSEMBLY__ */
32#endif /* _ASM_X86_CPUMASK_H */ 14#endif /* _ASM_X86_CPUMASK_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index dc27705f5443..5623c50d67b2 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -91,7 +91,6 @@ static inline int desc_empty(const void *ptr)
91#define store_gdt(dtr) native_store_gdt(dtr) 91#define store_gdt(dtr) native_store_gdt(dtr)
92#define store_idt(dtr) native_store_idt(dtr) 92#define store_idt(dtr) native_store_idt(dtr)
93#define store_tr(tr) (tr = native_store_tr()) 93#define store_tr(tr) (tr = native_store_tr())
94#define store_ldt(ldt) asm("sldt %0":"=m" (ldt))
95 94
96#define load_TLS(t, cpu) native_load_tls(t, cpu) 95#define load_TLS(t, cpu) native_load_tls(t, cpu)
97#define set_ldt native_set_ldt 96#define set_ldt native_set_ldt
@@ -112,6 +111,8 @@ static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
112} 111}
113#endif /* CONFIG_PARAVIRT */ 112#endif /* CONFIG_PARAVIRT */
114 113
114#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))
115
115static inline void native_write_idt_entry(gate_desc *idt, int entry, 116static inline void native_write_idt_entry(gate_desc *idt, int entry,
116 const gate_desc *gate) 117 const gate_desc *gate)
117{ 118{
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 3c034f48fdb0..4994a20acbcb 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -6,7 +6,7 @@ struct dev_archdata {
6 void *acpi_handle; 6 void *acpi_handle;
7#endif 7#endif
8#ifdef CONFIG_X86_64 8#ifdef CONFIG_X86_64
9struct dma_mapping_ops *dma_ops; 9struct dma_map_ops *dma_ops;
10#endif 10#endif
11#ifdef CONFIG_DMAR 11#ifdef CONFIG_DMAR
12 void *iommu; /* hook for IOMMU specific extension */ 12 void *iommu; /* hook for IOMMU specific extension */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 132a134d12f2..cea7b74963e9 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -7,6 +7,8 @@
7 */ 7 */
8 8
9#include <linux/scatterlist.h> 9#include <linux/scatterlist.h>
10#include <linux/dma-debug.h>
11#include <linux/dma-attrs.h>
10#include <asm/io.h> 12#include <asm/io.h>
11#include <asm/swiotlb.h> 13#include <asm/swiotlb.h>
12#include <asm-generic/dma-coherent.h> 14#include <asm-generic/dma-coherent.h>
@@ -16,47 +18,9 @@ extern int iommu_merge;
16extern struct device x86_dma_fallback_dev; 18extern struct device x86_dma_fallback_dev;
17extern int panic_on_overflow; 19extern int panic_on_overflow;
18 20
19struct dma_mapping_ops { 21extern struct dma_map_ops *dma_ops;
20 int (*mapping_error)(struct device *dev, 22
21 dma_addr_t dma_addr); 23static inline struct dma_map_ops *get_dma_ops(struct device *dev)
22 void* (*alloc_coherent)(struct device *dev, size_t size,
23 dma_addr_t *dma_handle, gfp_t gfp);
24 void (*free_coherent)(struct device *dev, size_t size,
25 void *vaddr, dma_addr_t dma_handle);
26 dma_addr_t (*map_single)(struct device *hwdev, phys_addr_t ptr,
27 size_t size, int direction);
28 void (*unmap_single)(struct device *dev, dma_addr_t addr,
29 size_t size, int direction);
30 void (*sync_single_for_cpu)(struct device *hwdev,
31 dma_addr_t dma_handle, size_t size,
32 int direction);
33 void (*sync_single_for_device)(struct device *hwdev,
34 dma_addr_t dma_handle, size_t size,
35 int direction);
36 void (*sync_single_range_for_cpu)(struct device *hwdev,
37 dma_addr_t dma_handle, unsigned long offset,
38 size_t size, int direction);
39 void (*sync_single_range_for_device)(struct device *hwdev,
40 dma_addr_t dma_handle, unsigned long offset,
41 size_t size, int direction);
42 void (*sync_sg_for_cpu)(struct device *hwdev,
43 struct scatterlist *sg, int nelems,
44 int direction);
45 void (*sync_sg_for_device)(struct device *hwdev,
46 struct scatterlist *sg, int nelems,
47 int direction);
48 int (*map_sg)(struct device *hwdev, struct scatterlist *sg,
49 int nents, int direction);
50 void (*unmap_sg)(struct device *hwdev,
51 struct scatterlist *sg, int nents,
52 int direction);
53 int (*dma_supported)(struct device *hwdev, u64 mask);
54 int is_phys;
55};
56
57extern struct dma_mapping_ops *dma_ops;
58
59static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
60{ 24{
61#ifdef CONFIG_X86_32 25#ifdef CONFIG_X86_32
62 return dma_ops; 26 return dma_ops;
@@ -71,7 +35,7 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
71/* Make sure we keep the same behaviour */ 35/* Make sure we keep the same behaviour */
72static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 36static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
73{ 37{
74 struct dma_mapping_ops *ops = get_dma_ops(dev); 38 struct dma_map_ops *ops = get_dma_ops(dev);
75 if (ops->mapping_error) 39 if (ops->mapping_error)
76 return ops->mapping_error(dev, dma_addr); 40 return ops->mapping_error(dev, dma_addr);
77 41
@@ -90,137 +54,167 @@ extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
90 54
91static inline dma_addr_t 55static inline dma_addr_t
92dma_map_single(struct device *hwdev, void *ptr, size_t size, 56dma_map_single(struct device *hwdev, void *ptr, size_t size,
93 int direction) 57 enum dma_data_direction dir)
94{ 58{
95 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 59 struct dma_map_ops *ops = get_dma_ops(hwdev);
96 60 dma_addr_t addr;
97 BUG_ON(!valid_dma_direction(direction)); 61
98 return ops->map_single(hwdev, virt_to_phys(ptr), size, direction); 62 BUG_ON(!valid_dma_direction(dir));
63 addr = ops->map_page(hwdev, virt_to_page(ptr),
64 (unsigned long)ptr & ~PAGE_MASK, size,
65 dir, NULL);
66 debug_dma_map_page(hwdev, virt_to_page(ptr),
67 (unsigned long)ptr & ~PAGE_MASK, size,
68 dir, addr, true);
69 return addr;
99} 70}
100 71
101static inline void 72static inline void
102dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, 73dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size,
103 int direction) 74 enum dma_data_direction dir)
104{ 75{
105 struct dma_mapping_ops *ops = get_dma_ops(dev); 76 struct dma_map_ops *ops = get_dma_ops(dev);
106 77
107 BUG_ON(!valid_dma_direction(direction)); 78 BUG_ON(!valid_dma_direction(dir));
108 if (ops->unmap_single) 79 if (ops->unmap_page)
109 ops->unmap_single(dev, addr, size, direction); 80 ops->unmap_page(dev, addr, size, dir, NULL);
81 debug_dma_unmap_page(dev, addr, size, dir, true);
110} 82}
111 83
112static inline int 84static inline int
113dma_map_sg(struct device *hwdev, struct scatterlist *sg, 85dma_map_sg(struct device *hwdev, struct scatterlist *sg,
114 int nents, int direction) 86 int nents, enum dma_data_direction dir)
115{ 87{
116 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 88 struct dma_map_ops *ops = get_dma_ops(hwdev);
89 int ents;
90
91 BUG_ON(!valid_dma_direction(dir));
92 ents = ops->map_sg(hwdev, sg, nents, dir, NULL);
93 debug_dma_map_sg(hwdev, sg, nents, ents, dir);
117 94
118 BUG_ON(!valid_dma_direction(direction)); 95 return ents;
119 return ops->map_sg(hwdev, sg, nents, direction);
120} 96}
121 97
122static inline void 98static inline void
123dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, 99dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
124 int direction) 100 enum dma_data_direction dir)
125{ 101{
126 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 102 struct dma_map_ops *ops = get_dma_ops(hwdev);
127 103
128 BUG_ON(!valid_dma_direction(direction)); 104 BUG_ON(!valid_dma_direction(dir));
105 debug_dma_unmap_sg(hwdev, sg, nents, dir);
129 if (ops->unmap_sg) 106 if (ops->unmap_sg)
130 ops->unmap_sg(hwdev, sg, nents, direction); 107 ops->unmap_sg(hwdev, sg, nents, dir, NULL);
131} 108}
132 109
133static inline void 110static inline void
134dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle, 111dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
135 size_t size, int direction) 112 size_t size, enum dma_data_direction dir)
136{ 113{
137 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 114 struct dma_map_ops *ops = get_dma_ops(hwdev);
138 115
139 BUG_ON(!valid_dma_direction(direction)); 116 BUG_ON(!valid_dma_direction(dir));
140 if (ops->sync_single_for_cpu) 117 if (ops->sync_single_for_cpu)
141 ops->sync_single_for_cpu(hwdev, dma_handle, size, direction); 118 ops->sync_single_for_cpu(hwdev, dma_handle, size, dir);
119 debug_dma_sync_single_for_cpu(hwdev, dma_handle, size, dir);
142 flush_write_buffers(); 120 flush_write_buffers();
143} 121}
144 122
145static inline void 123static inline void
146dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle, 124dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
147 size_t size, int direction) 125 size_t size, enum dma_data_direction dir)
148{ 126{
149 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 127 struct dma_map_ops *ops = get_dma_ops(hwdev);
150 128
151 BUG_ON(!valid_dma_direction(direction)); 129 BUG_ON(!valid_dma_direction(dir));
152 if (ops->sync_single_for_device) 130 if (ops->sync_single_for_device)
153 ops->sync_single_for_device(hwdev, dma_handle, size, direction); 131 ops->sync_single_for_device(hwdev, dma_handle, size, dir);
132 debug_dma_sync_single_for_device(hwdev, dma_handle, size, dir);
154 flush_write_buffers(); 133 flush_write_buffers();
155} 134}
156 135
157static inline void 136static inline void
158dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle, 137dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
159 unsigned long offset, size_t size, int direction) 138 unsigned long offset, size_t size,
139 enum dma_data_direction dir)
160{ 140{
161 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 141 struct dma_map_ops *ops = get_dma_ops(hwdev);
162 142
163 BUG_ON(!valid_dma_direction(direction)); 143 BUG_ON(!valid_dma_direction(dir));
164 if (ops->sync_single_range_for_cpu) 144 if (ops->sync_single_range_for_cpu)
165 ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, 145 ops->sync_single_range_for_cpu(hwdev, dma_handle, offset,
166 size, direction); 146 size, dir);
147 debug_dma_sync_single_range_for_cpu(hwdev, dma_handle,
148 offset, size, dir);
167 flush_write_buffers(); 149 flush_write_buffers();
168} 150}
169 151
170static inline void 152static inline void
171dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle, 153dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
172 unsigned long offset, size_t size, 154 unsigned long offset, size_t size,
173 int direction) 155 enum dma_data_direction dir)
174{ 156{
175 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 157 struct dma_map_ops *ops = get_dma_ops(hwdev);
176 158
177 BUG_ON(!valid_dma_direction(direction)); 159 BUG_ON(!valid_dma_direction(dir));
178 if (ops->sync_single_range_for_device) 160 if (ops->sync_single_range_for_device)
179 ops->sync_single_range_for_device(hwdev, dma_handle, 161 ops->sync_single_range_for_device(hwdev, dma_handle,
180 offset, size, direction); 162 offset, size, dir);
163 debug_dma_sync_single_range_for_device(hwdev, dma_handle,
164 offset, size, dir);
181 flush_write_buffers(); 165 flush_write_buffers();
182} 166}
183 167
184static inline void 168static inline void
185dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, 169dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
186 int nelems, int direction) 170 int nelems, enum dma_data_direction dir)
187{ 171{
188 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 172 struct dma_map_ops *ops = get_dma_ops(hwdev);
189 173
190 BUG_ON(!valid_dma_direction(direction)); 174 BUG_ON(!valid_dma_direction(dir));
191 if (ops->sync_sg_for_cpu) 175 if (ops->sync_sg_for_cpu)
192 ops->sync_sg_for_cpu(hwdev, sg, nelems, direction); 176 ops->sync_sg_for_cpu(hwdev, sg, nelems, dir);
177 debug_dma_sync_sg_for_cpu(hwdev, sg, nelems, dir);
193 flush_write_buffers(); 178 flush_write_buffers();
194} 179}
195 180
196static inline void 181static inline void
197dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, 182dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
198 int nelems, int direction) 183 int nelems, enum dma_data_direction dir)
199{ 184{
200 struct dma_mapping_ops *ops = get_dma_ops(hwdev); 185 struct dma_map_ops *ops = get_dma_ops(hwdev);
201 186
202 BUG_ON(!valid_dma_direction(direction)); 187 BUG_ON(!valid_dma_direction(dir));
203 if (ops->sync_sg_for_device) 188 if (ops->sync_sg_for_device)
204 ops->sync_sg_for_device(hwdev, sg, nelems, direction); 189 ops->sync_sg_for_device(hwdev, sg, nelems, dir);
190 debug_dma_sync_sg_for_device(hwdev, sg, nelems, dir);
205 191
206 flush_write_buffers(); 192 flush_write_buffers();
207} 193}
208 194
209static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, 195static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
210 size_t offset, size_t size, 196 size_t offset, size_t size,
211 int direction) 197 enum dma_data_direction dir)
212{ 198{
213 struct dma_mapping_ops *ops = get_dma_ops(dev); 199 struct dma_map_ops *ops = get_dma_ops(dev);
200 dma_addr_t addr;
214 201
215 BUG_ON(!valid_dma_direction(direction)); 202 BUG_ON(!valid_dma_direction(dir));
216 return ops->map_single(dev, page_to_phys(page) + offset, 203 addr = ops->map_page(dev, page, offset, size, dir, NULL);
217 size, direction); 204 debug_dma_map_page(dev, page, offset, size, dir, addr, false);
205
206 return addr;
218} 207}
219 208
220static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, 209static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
221 size_t size, int direction) 210 size_t size, enum dma_data_direction dir)
222{ 211{
223 dma_unmap_single(dev, addr, size, direction); 212 struct dma_map_ops *ops = get_dma_ops(dev);
213
214 BUG_ON(!valid_dma_direction(dir));
215 if (ops->unmap_page)
216 ops->unmap_page(dev, addr, size, dir, NULL);
217 debug_dma_unmap_page(dev, addr, size, dir, false);
224} 218}
225 219
226static inline void 220static inline void
@@ -266,7 +260,7 @@ static inline void *
266dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 260dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
267 gfp_t gfp) 261 gfp_t gfp)
268{ 262{
269 struct dma_mapping_ops *ops = get_dma_ops(dev); 263 struct dma_map_ops *ops = get_dma_ops(dev);
270 void *memory; 264 void *memory;
271 265
272 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); 266 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
@@ -285,20 +279,24 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
285 if (!ops->alloc_coherent) 279 if (!ops->alloc_coherent)
286 return NULL; 280 return NULL;
287 281
288 return ops->alloc_coherent(dev, size, dma_handle, 282 memory = ops->alloc_coherent(dev, size, dma_handle,
289 dma_alloc_coherent_gfp_flags(dev, gfp)); 283 dma_alloc_coherent_gfp_flags(dev, gfp));
284 debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
285
286 return memory;
290} 287}
291 288
292static inline void dma_free_coherent(struct device *dev, size_t size, 289static inline void dma_free_coherent(struct device *dev, size_t size,
293 void *vaddr, dma_addr_t bus) 290 void *vaddr, dma_addr_t bus)
294{ 291{
295 struct dma_mapping_ops *ops = get_dma_ops(dev); 292 struct dma_map_ops *ops = get_dma_ops(dev);
296 293
297 WARN_ON(irqs_disabled()); /* for portability */ 294 WARN_ON(irqs_disabled()); /* for portability */
298 295
299 if (dma_release_from_coherent(dev, get_order(size), vaddr)) 296 if (dma_release_from_coherent(dev, get_order(size), vaddr))
300 return; 297 return;
301 298
299 debug_dma_free_coherent(dev, size, vaddr, bus);
302 if (ops->free_coherent) 300 if (ops->free_coherent)
303 ops->free_coherent(dev, size, vaddr, bus); 301 ops->free_coherent(dev, size, vaddr, bus);
304} 302}
diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h
index bc68212c6bc0..fd8f9e2ca35f 100644
--- a/arch/x86/include/asm/dmi.h
+++ b/arch/x86/include/asm/dmi.h
@@ -1,22 +1,15 @@
1#ifndef _ASM_X86_DMI_H 1#ifndef _ASM_X86_DMI_H
2#define _ASM_X86_DMI_H 2#define _ASM_X86_DMI_H
3 3
4#include <asm/io.h> 4#include <linux/compiler.h>
5 5#include <linux/init.h>
6#define DMI_MAX_DATA 2048
7 6
8extern int dmi_alloc_index; 7#include <asm/io.h>
9extern char dmi_alloc_data[DMI_MAX_DATA]; 8#include <asm/setup.h>
10 9
11/* This is so early that there is no good way to allocate dynamic memory. 10static __always_inline __init void *dmi_alloc(unsigned len)
12 Allocate data in an BSS array. */
13static inline void *dmi_alloc(unsigned len)
14{ 11{
15 int idx = dmi_alloc_index; 12 return extend_brk(len, sizeof(int));
16 if ((dmi_alloc_index + len) > DMI_MAX_DATA)
17 return NULL;
18 dmi_alloc_index += len;
19 return dmi_alloc_data + idx;
20} 13}
21 14
22/* Use early IO mappings for DMI because it's initialized early */ 15/* Use early IO mappings for DMI because it's initialized early */
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 00d41ce4c844..7ecba4d85089 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -72,7 +72,7 @@ extern int e820_all_mapped(u64 start, u64 end, unsigned type);
72extern void e820_add_region(u64 start, u64 size, int type); 72extern void e820_add_region(u64 start, u64 size, int type);
73extern void e820_print_map(char *who); 73extern void e820_print_map(char *who);
74extern int 74extern int
75sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, int *pnr_map); 75sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, u32 *pnr_map);
76extern u64 e820_update_range(u64 start, u64 size, unsigned old_type, 76extern u64 e820_update_range(u64 start, u64 size, unsigned old_type,
77 unsigned new_type); 77 unsigned new_type);
78extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type, 78extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type,
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index db24c2278be0..bd2c6511c887 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -28,6 +28,13 @@
28 28
29#endif 29#endif
30 30
31/* FIXME: I don't want to stay hardcoded */
32#ifdef CONFIG_X86_64
33# define FTRACE_SYSCALL_MAX 296
34#else
35# define FTRACE_SYSCALL_MAX 333
36#endif
37
31#ifdef CONFIG_FUNCTION_TRACER 38#ifdef CONFIG_FUNCTION_TRACER
32#define MCOUNT_ADDR ((long)(mcount)) 39#define MCOUNT_ADDR ((long)(mcount))
33#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ 40#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index bf9276bea660..014c2b85ae45 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -63,6 +63,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
63void *kmap_atomic(struct page *page, enum km_type type); 63void *kmap_atomic(struct page *page, enum km_type type);
64void kunmap_atomic(void *kvaddr, enum km_type type); 64void kunmap_atomic(void *kvaddr, enum km_type type);
65void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); 65void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
66void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
66struct page *kmap_atomic_to_page(void *ptr); 67struct page *kmap_atomic_to_page(void *ptr);
67 68
68#ifndef CONFIG_PARAVIRT 69#ifndef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 50ca486fd88c..1f7e62517284 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -129,13 +129,6 @@ typedef struct compat_siginfo {
129 } _sifields; 129 } _sifields;
130} compat_siginfo_t; 130} compat_siginfo_t;
131 131
132struct ustat32 {
133 __u32 f_tfree;
134 compat_ino_t f_tinode;
135 char f_fname[6];
136 char f_fpack[6];
137};
138
139#define IA32_STACK_TOP IA32_PAGE_OFFSET 132#define IA32_STACK_TOP IA32_PAGE_OFFSET
140 133
141#ifdef __KERNEL__ 134#ifdef __KERNEL__
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 59cb4a1317b7..9d826e436010 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -162,9 +162,13 @@ extern int (*ioapic_renumber_irq)(int ioapic, int irq);
162extern void ioapic_init_mappings(void); 162extern void ioapic_init_mappings(void);
163 163
164#ifdef CONFIG_X86_64 164#ifdef CONFIG_X86_64
165extern int save_mask_IO_APIC_setup(void); 165extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
166extern void restore_IO_APIC_setup(void); 166extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
167extern void reinit_intr_remapped_IO_APIC(int); 167extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
168extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
169extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
170extern void reinit_intr_remapped_IO_APIC(int intr_remapping,
171 struct IO_APIC_route_entry **ioapic_entries);
168#endif 172#endif
169 173
170extern void probe_nr_irqs_gsi(void); 174extern void probe_nr_irqs_gsi(void);
@@ -172,7 +176,7 @@ extern void probe_nr_irqs_gsi(void);
172extern int setup_ioapic_entry(int apic, int irq, 176extern int setup_ioapic_entry(int apic, int irq,
173 struct IO_APIC_route_entry *entry, 177 struct IO_APIC_route_entry *entry,
174 unsigned int destination, int trigger, 178 unsigned int destination, int trigger,
175 int polarity, int vector); 179 int polarity, int vector, int pin);
176extern void ioapic_write_entry(int apic, int pin, 180extern void ioapic_write_entry(int apic, int pin,
177 struct IO_APIC_route_entry e); 181 struct IO_APIC_route_entry e);
178#else /* !CONFIG_X86_IO_APIC */ 182#else /* !CONFIG_X86_IO_APIC */
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index a6ee9e6f530f..af326a2975b5 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -3,7 +3,7 @@
3 3
4extern void pci_iommu_shutdown(void); 4extern void pci_iommu_shutdown(void);
5extern void no_iommu_init(void); 5extern void no_iommu_init(void);
6extern struct dma_mapping_ops nommu_dma_ops; 6extern struct dma_map_ops nommu_dma_ops;
7extern int force_iommu, no_iommu; 7extern int force_iommu, no_iommu;
8extern int iommu_detected; 8extern int iommu_detected;
9 9
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 20e1fd588dbf..0396760fccb8 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -1,8 +1,6 @@
1#ifndef _ASM_X86_IRQ_REMAPPING_H 1#ifndef _ASM_X86_IRQ_REMAPPING_H
2#define _ASM_X86_IRQ_REMAPPING_H 2#define _ASM_X86_IRQ_REMAPPING_H
3 3
4extern int x2apic;
5
6#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8) 4#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
7 5
8#endif /* _ASM_X86_IRQ_REMAPPING_H */ 6#endif /* _ASM_X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 0ceb6d19ed30..317ff1703d0b 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -9,13 +9,13 @@
9# define PAGES_NR 4 9# define PAGES_NR 4
10#else 10#else
11# define PA_CONTROL_PAGE 0 11# define PA_CONTROL_PAGE 0
12# define PA_TABLE_PAGE 1 12# define VA_CONTROL_PAGE 1
13# define PAGES_NR 2 13# define PA_TABLE_PAGE 2
14# define PA_SWAP_PAGE 3
15# define PAGES_NR 4
14#endif 16#endif
15 17
16#ifdef CONFIG_X86_32
17# define KEXEC_CONTROL_CODE_MAX_SIZE 2048 18# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
18#endif
19 19
20#ifndef __ASSEMBLY__ 20#ifndef __ASSEMBLY__
21 21
@@ -136,10 +136,11 @@ relocate_kernel(unsigned long indirection_page,
136 unsigned int has_pae, 136 unsigned int has_pae,
137 unsigned int preserve_context); 137 unsigned int preserve_context);
138#else 138#else
139NORET_TYPE void 139unsigned long
140relocate_kernel(unsigned long indirection_page, 140relocate_kernel(unsigned long indirection_page,
141 unsigned long page_list, 141 unsigned long page_list,
142 unsigned long start_address) ATTRIB_NORET; 142 unsigned long start_address,
143 unsigned int preserve_context);
143#endif 144#endif
144 145
145#define ARCH_HAS_KIMAGE_ARCH 146#define ARCH_HAS_KIMAGE_ARCH
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 886c9402ec45..dc3f6cf11704 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -15,6 +15,7 @@
15#define __KVM_HAVE_DEVICE_ASSIGNMENT 15#define __KVM_HAVE_DEVICE_ASSIGNMENT
16#define __KVM_HAVE_MSI 16#define __KVM_HAVE_MSI
17#define __KVM_HAVE_USER_NMI 17#define __KVM_HAVE_USER_NMI
18#define __KVM_HAVE_GUEST_DEBUG
18 19
19/* Architectural interrupt line count. */ 20/* Architectural interrupt line count. */
20#define KVM_NR_INTERRUPTS 256 21#define KVM_NR_INTERRUPTS 256
@@ -212,7 +213,30 @@ struct kvm_pit_channel_state {
212 __s64 count_load_time; 213 __s64 count_load_time;
213}; 214};
214 215
216struct kvm_debug_exit_arch {
217 __u32 exception;
218 __u32 pad;
219 __u64 pc;
220 __u64 dr6;
221 __u64 dr7;
222};
223
224#define KVM_GUESTDBG_USE_SW_BP 0x00010000
225#define KVM_GUESTDBG_USE_HW_BP 0x00020000
226#define KVM_GUESTDBG_INJECT_DB 0x00040000
227#define KVM_GUESTDBG_INJECT_BP 0x00080000
228
229/* for KVM_SET_GUEST_DEBUG */
230struct kvm_guest_debug_arch {
231 __u64 debugreg[8];
232};
233
215struct kvm_pit_state { 234struct kvm_pit_state {
216 struct kvm_pit_channel_state channels[3]; 235 struct kvm_pit_channel_state channels[3];
217}; 236};
237
238struct kvm_reinject_control {
239 __u8 pit_reinject;
240 __u8 reserved[31];
241};
218#endif /* _ASM_X86_KVM_H */ 242#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 730843d1d2fb..f0faf58044ff 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -22,6 +22,7 @@
22#include <asm/pvclock-abi.h> 22#include <asm/pvclock-abi.h>
23#include <asm/desc.h> 23#include <asm/desc.h>
24#include <asm/mtrr.h> 24#include <asm/mtrr.h>
25#include <asm/msr-index.h>
25 26
26#define KVM_MAX_VCPUS 16 27#define KVM_MAX_VCPUS 16
27#define KVM_MEMORY_SLOTS 32 28#define KVM_MEMORY_SLOTS 32
@@ -134,11 +135,18 @@ enum {
134 135
135#define KVM_NR_MEM_OBJS 40 136#define KVM_NR_MEM_OBJS 40
136 137
137struct kvm_guest_debug { 138#define KVM_NR_DB_REGS 4
138 int enabled; 139
139 unsigned long bp[4]; 140#define DR6_BD (1 << 13)
140 int singlestep; 141#define DR6_BS (1 << 14)
141}; 142#define DR6_FIXED_1 0xffff0ff0
143#define DR6_VOLATILE 0x0000e00f
144
145#define DR7_BP_EN_MASK 0x000000ff
146#define DR7_GE (1 << 9)
147#define DR7_GD (1 << 13)
148#define DR7_FIXED_1 0x00000400
149#define DR7_VOLATILE 0xffff23ff
142 150
143/* 151/*
144 * We don't want allocation failures within the mmu code, so we preallocate 152 * We don't want allocation failures within the mmu code, so we preallocate
@@ -162,7 +170,8 @@ struct kvm_pte_chain {
162 * bits 0:3 - total guest paging levels (2-4, or zero for real mode) 170 * bits 0:3 - total guest paging levels (2-4, or zero for real mode)
163 * bits 4:7 - page table level for this shadow (1-4) 171 * bits 4:7 - page table level for this shadow (1-4)
164 * bits 8:9 - page table quadrant for 2-level guests 172 * bits 8:9 - page table quadrant for 2-level guests
165 * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode) 173 * bit 16 - direct mapping of virtual to physical mapping at gfn
174 * used for real mode and two-dimensional paging
166 * bits 17:19 - common access permissions for all ptes in this shadow page 175 * bits 17:19 - common access permissions for all ptes in this shadow page
167 */ 176 */
168union kvm_mmu_page_role { 177union kvm_mmu_page_role {
@@ -172,9 +181,10 @@ union kvm_mmu_page_role {
172 unsigned level:4; 181 unsigned level:4;
173 unsigned quadrant:2; 182 unsigned quadrant:2;
174 unsigned pad_for_nice_hex_output:6; 183 unsigned pad_for_nice_hex_output:6;
175 unsigned metaphysical:1; 184 unsigned direct:1;
176 unsigned access:3; 185 unsigned access:3;
177 unsigned invalid:1; 186 unsigned invalid:1;
187 unsigned cr4_pge:1;
178 }; 188 };
179}; 189};
180 190
@@ -218,6 +228,18 @@ struct kvm_pv_mmu_op_buffer {
218 char buf[512] __aligned(sizeof(long)); 228 char buf[512] __aligned(sizeof(long));
219}; 229};
220 230
231struct kvm_pio_request {
232 unsigned long count;
233 int cur_count;
234 gva_t guest_gva;
235 int in;
236 int port;
237 int size;
238 int string;
239 int down;
240 int rep;
241};
242
221/* 243/*
222 * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level 244 * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
223 * 32-bit). The kvm_mmu structure abstracts the details of the current mmu 245 * 32-bit). The kvm_mmu structure abstracts the details of the current mmu
@@ -236,6 +258,7 @@ struct kvm_mmu {
236 hpa_t root_hpa; 258 hpa_t root_hpa;
237 int root_level; 259 int root_level;
238 int shadow_root_level; 260 int shadow_root_level;
261 union kvm_mmu_page_role base_role;
239 262
240 u64 *pae_root; 263 u64 *pae_root;
241}; 264};
@@ -258,6 +281,7 @@ struct kvm_vcpu_arch {
258 unsigned long cr3; 281 unsigned long cr3;
259 unsigned long cr4; 282 unsigned long cr4;
260 unsigned long cr8; 283 unsigned long cr8;
284 u32 hflags;
261 u64 pdptrs[4]; /* pae */ 285 u64 pdptrs[4]; /* pae */
262 u64 shadow_efer; 286 u64 shadow_efer;
263 u64 apic_base; 287 u64 apic_base;
@@ -338,6 +362,15 @@ struct kvm_vcpu_arch {
338 362
339 struct mtrr_state_type mtrr_state; 363 struct mtrr_state_type mtrr_state;
340 u32 pat; 364 u32 pat;
365
366 int switch_db_regs;
367 unsigned long host_db[KVM_NR_DB_REGS];
368 unsigned long host_dr6;
369 unsigned long host_dr7;
370 unsigned long db[KVM_NR_DB_REGS];
371 unsigned long dr6;
372 unsigned long dr7;
373 unsigned long eff_db[KVM_NR_DB_REGS];
341}; 374};
342 375
343struct kvm_mem_alias { 376struct kvm_mem_alias {
@@ -378,6 +411,7 @@ struct kvm_arch{
378 411
379 unsigned long irq_sources_bitmap; 412 unsigned long irq_sources_bitmap;
380 unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; 413 unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
414 u64 vm_init_tsc;
381}; 415};
382 416
383struct kvm_vm_stat { 417struct kvm_vm_stat {
@@ -446,8 +480,7 @@ struct kvm_x86_ops {
446 void (*vcpu_put)(struct kvm_vcpu *vcpu); 480 void (*vcpu_put)(struct kvm_vcpu *vcpu);
447 481
448 int (*set_guest_debug)(struct kvm_vcpu *vcpu, 482 int (*set_guest_debug)(struct kvm_vcpu *vcpu,
449 struct kvm_debug_guest *dbg); 483 struct kvm_guest_debug *dbg);
450 void (*guest_debug_pre)(struct kvm_vcpu *vcpu);
451 int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); 484 int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
452 int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); 485 int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
453 u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); 486 u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
@@ -583,16 +616,12 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
583void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, 616void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
584 u32 error_code); 617 u32 error_code);
585 618
586void kvm_pic_set_irq(void *opaque, int irq, int level); 619int kvm_pic_set_irq(void *opaque, int irq, int level);
587 620
588void kvm_inject_nmi(struct kvm_vcpu *vcpu); 621void kvm_inject_nmi(struct kvm_vcpu *vcpu);
589 622
590void fx_init(struct kvm_vcpu *vcpu); 623void fx_init(struct kvm_vcpu *vcpu);
591 624
592int emulator_read_std(unsigned long addr,
593 void *val,
594 unsigned int bytes,
595 struct kvm_vcpu *vcpu);
596int emulator_write_emulated(unsigned long addr, 625int emulator_write_emulated(unsigned long addr,
597 const void *val, 626 const void *val,
598 unsigned int bytes, 627 unsigned int bytes,
@@ -737,6 +766,10 @@ enum {
737 TASK_SWITCH_GATE = 3, 766 TASK_SWITCH_GATE = 3,
738}; 767};
739 768
769#define HF_GIF_MASK (1 << 0)
770#define HF_HIF_MASK (1 << 1)
771#define HF_VINTR_MASK (1 << 2)
772
740/* 773/*
741 * Hardware virtualization extension instructions may fault if a 774 * Hardware virtualization extension instructions may fault if a
742 * reboot turns off virtualization while processes are running. 775 * reboot turns off virtualization while processes are running.
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index 43894428c3c2..0f4ee7148afe 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -26,36 +26,20 @@
26 26
27#ifndef __ASSEMBLY__ 27#ifndef __ASSEMBLY__
28#include <asm/hw_irq.h> 28#include <asm/hw_irq.h>
29#include <asm/kvm_para.h>
29 30
30/*G:031 But first, how does our Guest contact the Host to ask for privileged 31/*G:031 But first, how does our Guest contact the Host to ask for privileged
31 * operations? There are two ways: the direct way is to make a "hypercall", 32 * operations? There are two ways: the direct way is to make a "hypercall",
32 * to make requests of the Host Itself. 33 * to make requests of the Host Itself.
33 * 34 *
34 * Our hypercall mechanism uses the highest unused trap code (traps 32 and 35 * We use the KVM hypercall mechanism. Eighteen hypercalls are
35 * above are used by real hardware interrupts). Fifteen hypercalls are
36 * available: the hypercall number is put in the %eax register, and the 36 * available: the hypercall number is put in the %eax register, and the
37 * arguments (when required) are placed in %edx, %ebx and %ecx. If a return 37 * arguments (when required) are placed in %ebx, %ecx and %edx. If a return
38 * value makes sense, it's returned in %eax. 38 * value makes sense, it's returned in %eax.
39 * 39 *
40 * Grossly invalid calls result in Sudden Death at the hands of the vengeful 40 * Grossly invalid calls result in Sudden Death at the hands of the vengeful
41 * Host, rather than returning failure. This reflects Winston Churchill's 41 * Host, rather than returning failure. This reflects Winston Churchill's
42 * definition of a gentleman: "someone who is only rude intentionally". */ 42 * definition of a gentleman: "someone who is only rude intentionally". */
43static inline unsigned long
44hcall(unsigned long call,
45 unsigned long arg1, unsigned long arg2, unsigned long arg3)
46{
47 /* "int" is the Intel instruction to trigger a trap. */
48 asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY)
49 /* The call in %eax (aka "a") might be overwritten */
50 : "=a"(call)
51 /* The arguments are in %eax, %edx, %ebx & %ecx */
52 : "a"(call), "d"(arg1), "b"(arg2), "c"(arg3)
53 /* "memory" means this might write somewhere in memory.
54 * This isn't true for all calls, but it's safe to tell
55 * gcc that it might happen so it doesn't get clever. */
56 : "memory");
57 return call;
58}
59/*:*/ 43/*:*/
60 44
61/* Can't use our min() macro here: needs to be a constant */ 45/* Can't use our min() macro here: needs to be a constant */
@@ -64,7 +48,7 @@ hcall(unsigned long call,
64#define LHCALL_RING_SIZE 64 48#define LHCALL_RING_SIZE 64
65struct hcall_args { 49struct hcall_args {
66 /* These map directly onto eax, ebx, ecx, edx in struct lguest_regs */ 50 /* These map directly onto eax, ebx, ecx, edx in struct lguest_regs */
67 unsigned long arg0, arg2, arg3, arg1; 51 unsigned long arg0, arg1, arg2, arg3;
68}; 52};
69 53
70#endif /* !__ASSEMBLY__ */ 54#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index a0d70b46c27c..12d55e773eb6 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_X86_LINKAGE_H 1#ifndef _ASM_X86_LINKAGE_H
2#define _ASM_X86_LINKAGE_H 2#define _ASM_X86_LINKAGE_H
3 3
4#include <linux/stringify.h>
5
4#undef notrace 6#undef notrace
5#define notrace __attribute__((no_instrument_function)) 7#define notrace __attribute__((no_instrument_function))
6 8
@@ -53,14 +55,9 @@
53 .globl name; \ 55 .globl name; \
54 name: 56 name:
55 57
56#ifdef CONFIG_X86_64 58#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)
57#define __ALIGN .p2align 4,,15 59#define __ALIGN .p2align 4, 0x90
58#define __ALIGN_STR ".p2align 4,,15" 60#define __ALIGN_STR __stringify(__ALIGN)
59#endif
60
61#ifdef CONFIG_X86_ALIGNMENT_16
62#define __ALIGN .align 16,0x90
63#define __ALIGN_STR ".align 16,0x90"
64#endif 61#endif
65 62
66#endif /* __ASSEMBLY__ */ 63#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/msidef.h b/arch/x86/include/asm/msidef.h
index 6706b3006f13..4cc48af23fef 100644
--- a/arch/x86/include/asm/msidef.h
+++ b/arch/x86/include/asm/msidef.h
@@ -47,6 +47,7 @@
47#define MSI_ADDR_DEST_ID_MASK 0x00ffff0 47#define MSI_ADDR_DEST_ID_MASK 0x00ffff0
48#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ 48#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
49 MSI_ADDR_DEST_ID_MASK) 49 MSI_ADDR_DEST_ID_MASK)
50#define MSI_ADDR_EXT_DEST_ID(dest) ((dest) & 0xffffff00)
50 51
51#define MSI_ADDR_IR_EXT_INT (1 << 4) 52#define MSI_ADDR_IR_EXT_INT (1 << 4)
52#define MSI_ADDR_IR_SHV (1 << 3) 53#define MSI_ADDR_IR_SHV (1 << 3)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 2dbd2314139e..ec41fc16c167 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -18,11 +18,15 @@
18#define _EFER_LME 8 /* Long mode enable */ 18#define _EFER_LME 8 /* Long mode enable */
19#define _EFER_LMA 10 /* Long mode active (read-only) */ 19#define _EFER_LMA 10 /* Long mode active (read-only) */
20#define _EFER_NX 11 /* No execute enable */ 20#define _EFER_NX 11 /* No execute enable */
21#define _EFER_SVME 12 /* Enable virtualization */
22#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
21 23
22#define EFER_SCE (1<<_EFER_SCE) 24#define EFER_SCE (1<<_EFER_SCE)
23#define EFER_LME (1<<_EFER_LME) 25#define EFER_LME (1<<_EFER_LME)
24#define EFER_LMA (1<<_EFER_LMA) 26#define EFER_LMA (1<<_EFER_LMA)
25#define EFER_NX (1<<_EFER_NX) 27#define EFER_NX (1<<_EFER_NX)
28#define EFER_SVME (1<<_EFER_SVME)
29#define EFER_FFXSR (1<<_EFER_FFXSR)
26 30
27/* Intel MSRs. Some also available on other CPUs */ 31/* Intel MSRs. Some also available on other CPUs */
28#define MSR_IA32_PERFCTR0 0x000000c1 32#define MSR_IA32_PERFCTR0 0x000000c1
@@ -365,4 +369,9 @@
365#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b 369#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
366#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c 370#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
367 371
372/* AMD-V MSRs */
373
374#define MSR_VM_CR 0xc0010114
375#define MSR_VM_HSAVE_PA 0xc0010117
376
368#endif /* _ASM_X86_MSR_INDEX_H */ 377#endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index f1e4a79a6e41..0f915ae649a7 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -39,6 +39,11 @@
39#define __VIRTUAL_MASK_SHIFT 32 39#define __VIRTUAL_MASK_SHIFT 32
40#endif /* CONFIG_X86_PAE */ 40#endif /* CONFIG_X86_PAE */
41 41
42/*
43 * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
44 */
45#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
46
42#ifndef __ASSEMBLY__ 47#ifndef __ASSEMBLY__
43 48
44/* 49/*
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 0617d5cc9712..7727aa8b7dda 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -317,8 +317,6 @@ struct pv_mmu_ops {
317#if PAGETABLE_LEVELS >= 3 317#if PAGETABLE_LEVELS >= 3
318#ifdef CONFIG_X86_PAE 318#ifdef CONFIG_X86_PAE
319 void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); 319 void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
320 void (*set_pte_present)(struct mm_struct *mm, unsigned long addr,
321 pte_t *ptep, pte_t pte);
322 void (*pte_clear)(struct mm_struct *mm, unsigned long addr, 320 void (*pte_clear)(struct mm_struct *mm, unsigned long addr,
323 pte_t *ptep); 321 pte_t *ptep);
324 void (*pmd_clear)(pmd_t *pmdp); 322 void (*pmd_clear)(pmd_t *pmdp);
@@ -389,7 +387,7 @@ extern struct pv_lock_ops pv_lock_ops;
389 387
390#define paravirt_type(op) \ 388#define paravirt_type(op) \
391 [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ 389 [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
392 [paravirt_opptr] "m" (op) 390 [paravirt_opptr] "i" (&(op))
393#define paravirt_clobber(clobber) \ 391#define paravirt_clobber(clobber) \
394 [paravirt_clobber] "i" (clobber) 392 [paravirt_clobber] "i" (clobber)
395 393
@@ -443,7 +441,7 @@ int paravirt_disable_iospace(void);
443 * offset into the paravirt_patch_template structure, and can therefore be 441 * offset into the paravirt_patch_template structure, and can therefore be
444 * freely converted back into a structure offset. 442 * freely converted back into a structure offset.
445 */ 443 */
446#define PARAVIRT_CALL "call *%[paravirt_opptr];" 444#define PARAVIRT_CALL "call *%c[paravirt_opptr];"
447 445
448/* 446/*
449 * These macros are intended to wrap calls through one of the paravirt 447 * These macros are intended to wrap calls through one of the paravirt
@@ -1365,13 +1363,6 @@ static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
1365 pte.pte, pte.pte >> 32); 1363 pte.pte, pte.pte >> 32);
1366} 1364}
1367 1365
1368static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
1369 pte_t *ptep, pte_t pte)
1370{
1371 /* 5 arg words */
1372 pv_mmu_ops.set_pte_present(mm, addr, ptep, pte);
1373}
1374
1375static inline void pte_clear(struct mm_struct *mm, unsigned long addr, 1366static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
1376 pte_t *ptep) 1367 pte_t *ptep)
1377{ 1368{
@@ -1388,12 +1379,6 @@ static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
1388 set_pte(ptep, pte); 1379 set_pte(ptep, pte);
1389} 1380}
1390 1381
1391static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
1392 pte_t *ptep, pte_t pte)
1393{
1394 set_pte(ptep, pte);
1395}
1396
1397static inline void pte_clear(struct mm_struct *mm, unsigned long addr, 1382static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
1398 pte_t *ptep) 1383 pte_t *ptep)
1399{ 1384{
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index a977de23cb4d..b51a1e8b0baf 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -86,12 +86,43 @@ static inline void early_quirks(void) { }
86 86
87extern void pci_iommu_alloc(void); 87extern void pci_iommu_alloc(void);
88 88
89#endif /* __KERNEL__ */ 89/* MSI arch hook */
90#define arch_setup_msi_irqs arch_setup_msi_irqs
91
92#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
93
94#if defined(CONFIG_X86_64) || defined(CONFIG_DMA_API_DEBUG)
95
96#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
97 dma_addr_t ADDR_NAME;
98#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
99 __u32 LEN_NAME;
100#define pci_unmap_addr(PTR, ADDR_NAME) \
101 ((PTR)->ADDR_NAME)
102#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
103 (((PTR)->ADDR_NAME) = (VAL))
104#define pci_unmap_len(PTR, LEN_NAME) \
105 ((PTR)->LEN_NAME)
106#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
107 (((PTR)->LEN_NAME) = (VAL))
90 108
91#ifdef CONFIG_X86_32
92# include "pci_32.h"
93#else 109#else
94# include "pci_64.h" 110
111#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME[0];
112#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) unsigned LEN_NAME[0];
113#define pci_unmap_addr(PTR, ADDR_NAME) sizeof((PTR)->ADDR_NAME)
114#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
115 do { break; } while (pci_unmap_addr(PTR, ADDR_NAME))
116#define pci_unmap_len(PTR, LEN_NAME) sizeof((PTR)->LEN_NAME)
117#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
118 do { break; } while (pci_unmap_len(PTR, LEN_NAME))
119
120#endif
121
122#endif /* __KERNEL__ */
123
124#ifdef CONFIG_X86_64
125#include "pci_64.h"
95#endif 126#endif
96 127
97/* implement the pci_ DMA API in terms of the generic device dma_ one */ 128/* implement the pci_ DMA API in terms of the generic device dma_ one */
@@ -109,11 +140,6 @@ static inline int __pcibus_to_node(const struct pci_bus *bus)
109 return sd->node; 140 return sd->node;
110} 141}
111 142
112static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus)
113{
114 return node_to_cpumask(__pcibus_to_node(bus));
115}
116
117static inline const struct cpumask * 143static inline const struct cpumask *
118cpumask_of_pcibus(const struct pci_bus *bus) 144cpumask_of_pcibus(const struct pci_bus *bus)
119{ 145{
diff --git a/arch/x86/include/asm/pci_32.h b/arch/x86/include/asm/pci_32.h
deleted file mode 100644
index 6f1213a6ef4f..000000000000
--- a/arch/x86/include/asm/pci_32.h
+++ /dev/null
@@ -1,34 +0,0 @@
1#ifndef _ASM_X86_PCI_32_H
2#define _ASM_X86_PCI_32_H
3
4
5#ifdef __KERNEL__
6
7
8/* Dynamic DMA mapping stuff.
9 * i386 has everything mapped statically.
10 */
11
12struct pci_dev;
13
14/* The PCI address space does equal the physical memory
15 * address space. The networking and block device layers use
16 * this boolean for bounce buffer decisions.
17 */
18#define PCI_DMA_BUS_IS_PHYS (1)
19
20/* pci_unmap_{page,single} is a nop so... */
21#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME[0];
22#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) unsigned LEN_NAME[0];
23#define pci_unmap_addr(PTR, ADDR_NAME) sizeof((PTR)->ADDR_NAME)
24#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
25 do { break; } while (pci_unmap_addr(PTR, ADDR_NAME))
26#define pci_unmap_len(PTR, LEN_NAME) sizeof((PTR)->LEN_NAME)
27#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
28 do { break; } while (pci_unmap_len(PTR, LEN_NAME))
29
30
31#endif /* __KERNEL__ */
32
33
34#endif /* _ASM_X86_PCI_32_H */
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
index 4da207982777..ae5e40f67daf 100644
--- a/arch/x86/include/asm/pci_64.h
+++ b/arch/x86/include/asm/pci_64.h
@@ -24,28 +24,6 @@ extern int (*pci_config_write)(int seg, int bus, int dev, int fn,
24 24
25extern void dma32_reserve_bootmem(void); 25extern void dma32_reserve_bootmem(void);
26 26
27/* The PCI address space does equal the physical memory
28 * address space. The networking and block device layers use
29 * this boolean for bounce buffer decisions
30 *
31 * On AMD64 it mostly equals, but we set it to zero if a hardware
32 * IOMMU (gart) of sotware IOMMU (swiotlb) is available.
33 */
34#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
35
36#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
37 dma_addr_t ADDR_NAME;
38#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
39 __u32 LEN_NAME;
40#define pci_unmap_addr(PTR, ADDR_NAME) \
41 ((PTR)->ADDR_NAME)
42#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
43 (((PTR)->ADDR_NAME) = (VAL))
44#define pci_unmap_len(PTR, LEN_NAME) \
45 ((PTR)->LEN_NAME)
46#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
47 (((PTR)->LEN_NAME) = (VAL))
48
49#endif /* __KERNEL__ */ 27#endif /* __KERNEL__ */
50 28
51#endif /* _ASM_X86_PCI_64_H */ 29#endif /* _ASM_X86_PCI_64_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 8f1d2fbec1d4..aee103b26d01 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -43,14 +43,6 @@
43#else /* ...!ASSEMBLY */ 43#else /* ...!ASSEMBLY */
44 44
45#include <linux/stringify.h> 45#include <linux/stringify.h>
46#include <asm/sections.h>
47
48#define __addr_to_pcpu_ptr(addr) \
49 (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \
50 + (unsigned long)__per_cpu_start)
51#define __pcpu_ptr_to_addr(ptr) \
52 (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \
53 - (unsigned long)__per_cpu_start)
54 46
55#ifdef CONFIG_SMP 47#ifdef CONFIG_SMP
56#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x 48#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index c1774ac9da7a..2334982b339e 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -26,13 +26,6 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
26 native_set_pte(ptep, pte); 26 native_set_pte(ptep, pte);
27} 27}
28 28
29static inline void native_set_pte_present(struct mm_struct *mm,
30 unsigned long addr,
31 pte_t *ptep, pte_t pte)
32{
33 native_set_pte(ptep, pte);
34}
35
36static inline void native_pmd_clear(pmd_t *pmdp) 29static inline void native_pmd_clear(pmd_t *pmdp)
37{ 30{
38 native_set_pmd(pmdp, __pmd(0)); 31 native_set_pmd(pmdp, __pmd(0));
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 3f13cdf61156..177b0165ea01 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -31,23 +31,6 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
31 ptep->pte_low = pte.pte_low; 31 ptep->pte_low = pte.pte_low;
32} 32}
33 33
34/*
35 * Since this is only called on user PTEs, and the page fault handler
36 * must handle the already racy situation of simultaneous page faults,
37 * we are justified in merely clearing the PTE present bit, followed
38 * by a set. The ordering here is important.
39 */
40static inline void native_set_pte_present(struct mm_struct *mm,
41 unsigned long addr,
42 pte_t *ptep, pte_t pte)
43{
44 ptep->pte_low = 0;
45 smp_wmb();
46 ptep->pte_high = pte.pte_high;
47 smp_wmb();
48 ptep->pte_low = pte.pte_low;
49}
50
51static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) 34static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
52{ 35{
53 set_64bit((unsigned long long *)(ptep), native_pte_val(pte)); 36 set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index d0812e155f1d..29d96d168bc0 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -31,8 +31,6 @@ extern struct list_head pgd_list;
31#define set_pte(ptep, pte) native_set_pte(ptep, pte) 31#define set_pte(ptep, pte) native_set_pte(ptep, pte)
32#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) 32#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
33 33
34#define set_pte_present(mm, addr, ptep, pte) \
35 native_set_pte_present(mm, addr, ptep, pte)
36#define set_pte_atomic(ptep, pte) \ 34#define set_pte_atomic(ptep, pte) \
37 native_set_pte_atomic(ptep, pte) 35 native_set_pte_atomic(ptep, pte)
38 36
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 97612fc7632f..31bd120cf2a2 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -42,9 +42,6 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
42 */ 42 */
43#undef TEST_ACCESS_OK 43#undef TEST_ACCESS_OK
44 44
45/* The boot page tables (all created as a single array) */
46extern unsigned long pg0[];
47
48#ifdef CONFIG_X86_PAE 45#ifdef CONFIG_X86_PAE
49# include <asm/pgtable-3level.h> 46# include <asm/pgtable-3level.h>
50#else 47#else
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 76139506c3e4..34c52370f2fe 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -75,9 +75,9 @@ struct cpuinfo_x86 {
75#else 75#else
76 /* Number of 4K pages in DTLB/ITLB combined(in pages): */ 76 /* Number of 4K pages in DTLB/ITLB combined(in pages): */
77 int x86_tlbsize; 77 int x86_tlbsize;
78#endif
78 __u8 x86_virt_bits; 79 __u8 x86_virt_bits;
79 __u8 x86_phys_bits; 80 __u8 x86_phys_bits;
80#endif
81 /* CPUID returned core id bits: */ 81 /* CPUID returned core id bits: */
82 __u8 x86_coreid_bits; 82 __u8 x86_coreid_bits;
83 /* Max extended CPUID function supported: */ 83 /* Max extended CPUID function supported: */
@@ -94,7 +94,7 @@ struct cpuinfo_x86 {
94 unsigned long loops_per_jiffy; 94 unsigned long loops_per_jiffy;
95#ifdef CONFIG_SMP 95#ifdef CONFIG_SMP
96 /* cpus sharing the last level cache: */ 96 /* cpus sharing the last level cache: */
97 cpumask_t llc_shared_map; 97 cpumask_var_t llc_shared_map;
98#endif 98#endif
99 /* cpuid returned max cores value: */ 99 /* cpuid returned max cores value: */
100 u16 x86_max_cores; 100 u16 x86_max_cores;
@@ -391,6 +391,9 @@ DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
391DECLARE_INIT_PER_CPU(irq_stack_union); 391DECLARE_INIT_PER_CPU(irq_stack_union);
392 392
393DECLARE_PER_CPU(char *, irq_stack_ptr); 393DECLARE_PER_CPU(char *, irq_stack_ptr);
394DECLARE_PER_CPU(unsigned int, irq_count);
395extern unsigned long kernel_eflags;
396extern asmlinkage void ignore_sysret(void);
394#else /* X86_64 */ 397#else /* X86_64 */
395#ifdef CONFIG_CC_STACKPROTECTOR 398#ifdef CONFIG_CC_STACKPROTECTOR
396DECLARE_PER_CPU(unsigned long, stack_canary); 399DECLARE_PER_CPU(unsigned long, stack_canary);
@@ -733,6 +736,7 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
733extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); 736extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
734 737
735extern void select_idle_routine(const struct cpuinfo_x86 *c); 738extern void select_idle_routine(const struct cpuinfo_x86 *c);
739extern void init_c1e_mask(void);
736 740
737extern unsigned long boot_option_idle_override; 741extern unsigned long boot_option_idle_override;
738extern unsigned long idle_halt; 742extern unsigned long idle_halt;
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 2b8c5160388f..1b7ee5d673c2 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -1 +1,8 @@
1#ifndef _ASM_X86_SECTIONS_H
2#define _ASM_X86_SECTIONS_H
3
1#include <asm-generic/sections.h> 4#include <asm-generic/sections.h>
5
6extern char __brk_base[], __brk_limit[];
7
8#endif /* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 05c6f6b11fd5..bdc2ada05ae0 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -64,7 +64,7 @@ extern void x86_quirk_time_init(void);
64#include <asm/bootparam.h> 64#include <asm/bootparam.h>
65 65
66/* Interrupt control for vSMPowered x86_64 systems */ 66/* Interrupt control for vSMPowered x86_64 systems */
67#ifdef CONFIG_X86_VSMP 67#ifdef CONFIG_X86_64
68void vsmp_init(void); 68void vsmp_init(void);
69#else 69#else
70static inline void vsmp_init(void) { } 70static inline void vsmp_init(void) { }
@@ -100,20 +100,51 @@ extern struct boot_params boot_params;
100 */ 100 */
101#define LOWMEMSIZE() (0x9f000) 101#define LOWMEMSIZE() (0x9f000)
102 102
103/* exceedingly early brk-like allocator */
104extern unsigned long _brk_end;
105void *extend_brk(size_t size, size_t align);
106
107/*
108 * Reserve space in the brk section. The name must be unique within
109 * the file, and somewhat descriptive. The size is in bytes. Must be
110 * used at file scope.
111 *
112 * (This uses a temp function to wrap the asm so we can pass it the
113 * size parameter; otherwise we wouldn't be able to. We can't use a
114 * "section" attribute on a normal variable because it always ends up
115 * being @progbits, which ends up allocating space in the vmlinux
116 * executable.)
117 */
118#define RESERVE_BRK(name,sz) \
119 static void __section(.discard) __used \
120 __brk_reservation_fn_##name##__(void) { \
121 asm volatile ( \
122 ".pushsection .brk_reservation,\"aw\",@nobits;" \
123 ".brk." #name ":" \
124 " 1:.skip %c0;" \
125 " .size .brk." #name ", . - 1b;" \
126 " .popsection" \
127 : : "i" (sz)); \
128 }
129
103#ifdef __i386__ 130#ifdef __i386__
104 131
105void __init i386_start_kernel(void); 132void __init i386_start_kernel(void);
106extern void probe_roms(void); 133extern void probe_roms(void);
107 134
108extern unsigned long init_pg_tables_start;
109extern unsigned long init_pg_tables_end;
110
111#else 135#else
112void __init x86_64_start_kernel(char *real_mode); 136void __init x86_64_start_kernel(char *real_mode);
113void __init x86_64_start_reservations(char *real_mode_data); 137void __init x86_64_start_reservations(char *real_mode_data);
114 138
115#endif /* __i386__ */ 139#endif /* __i386__ */
116#endif /* _SETUP */ 140#endif /* _SETUP */
141#else
142#define RESERVE_BRK(name,sz) \
143 .pushsection .brk_reservation,"aw",@nobits; \
144.brk.name: \
1451: .skip sz; \
146 .size .brk.name,.-1b; \
147 .popsection
117#endif /* __ASSEMBLY__ */ 148#endif /* __ASSEMBLY__ */
118#endif /* __KERNEL__ */ 149#endif /* __KERNEL__ */
119 150
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 47d0e21f2b9e..19e0d88b966d 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -21,19 +21,19 @@
21extern int smp_num_siblings; 21extern int smp_num_siblings;
22extern unsigned int num_processors; 22extern unsigned int num_processors;
23 23
24DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); 24DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
25DECLARE_PER_CPU(cpumask_t, cpu_core_map); 25DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
26DECLARE_PER_CPU(u16, cpu_llc_id); 26DECLARE_PER_CPU(u16, cpu_llc_id);
27DECLARE_PER_CPU(int, cpu_number); 27DECLARE_PER_CPU(int, cpu_number);
28 28
29static inline struct cpumask *cpu_sibling_mask(int cpu) 29static inline struct cpumask *cpu_sibling_mask(int cpu)
30{ 30{
31 return &per_cpu(cpu_sibling_map, cpu); 31 return per_cpu(cpu_sibling_map, cpu);
32} 32}
33 33
34static inline struct cpumask *cpu_core_mask(int cpu) 34static inline struct cpumask *cpu_core_mask(int cpu)
35{ 35{
36 return &per_cpu(cpu_core_map, cpu); 36 return per_cpu(cpu_core_map, cpu);
37} 37}
38 38
39DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); 39DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
@@ -121,9 +121,10 @@ static inline void arch_send_call_function_single_ipi(int cpu)
121 smp_ops.send_call_func_single_ipi(cpu); 121 smp_ops.send_call_func_single_ipi(cpu);
122} 122}
123 123
124static inline void arch_send_call_function_ipi(cpumask_t mask) 124#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
125static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
125{ 126{
126 smp_ops.send_call_func_ipi(&mask); 127 smp_ops.send_call_func_ipi(mask);
127} 128}
128 129
129void cpu_disable_common(void); 130void cpu_disable_common(void);
diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h
index 8ab9cc8b2ecc..ca8bf2cd0ba9 100644
--- a/arch/x86/include/asm/socket.h
+++ b/arch/x86/include/asm/socket.h
@@ -54,4 +54,7 @@
54 54
55#define SO_MARK 36 55#define SO_MARK 36
56 56
57#define SO_TIMESTAMPING 37
58#define SCM_TIMESTAMPING SO_TIMESTAMPING
59
57#endif /* _ASM_X86_SOCKET_H */ 60#endif /* _ASM_X86_SOCKET_H */
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 3a5696656680..e5e6caffec87 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -295,6 +295,9 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw)
295 : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); 295 : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
296} 296}
297 297
298#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
299#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
300
298#define _raw_spin_relax(lock) cpu_relax() 301#define _raw_spin_relax(lock) cpu_relax()
299#define _raw_read_relax(lock) cpu_relax() 302#define _raw_read_relax(lock) cpu_relax()
300#define _raw_write_relax(lock) cpu_relax() 303#define _raw_write_relax(lock) cpu_relax()
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index a5074bd0f8be..48dcfa62ea07 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -24,28 +24,4 @@ struct saved_context {
24 unsigned long return_address; 24 unsigned long return_address;
25} __attribute__((packed)); 25} __attribute__((packed));
26 26
27#ifdef CONFIG_ACPI
28extern unsigned long saved_eip;
29extern unsigned long saved_esp;
30extern unsigned long saved_ebp;
31extern unsigned long saved_ebx;
32extern unsigned long saved_esi;
33extern unsigned long saved_edi;
34
35static inline void acpi_save_register_state(unsigned long return_point)
36{
37 saved_eip = return_point;
38 asm volatile("movl %%esp,%0" : "=m" (saved_esp));
39 asm volatile("movl %%ebp,%0" : "=m" (saved_ebp));
40 asm volatile("movl %%ebx,%0" : "=m" (saved_ebx));
41 asm volatile("movl %%edi,%0" : "=m" (saved_edi));
42 asm volatile("movl %%esi,%0" : "=m" (saved_esi));
43}
44
45#define acpi_restore_register_state() do {} while (0)
46
47/* routines for saving/restoring kernel state */
48extern int acpi_save_state_mem(void);
49#endif
50
51#endif /* _ASM_X86_SUSPEND_32_H */ 27#endif /* _ASM_X86_SUSPEND_32_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 1b8afa78e869..82ada75f3ebf 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -174,10 +174,6 @@ struct __attribute__ ((__packed__)) vmcb {
174#define SVM_CPUID_FEATURE_SHIFT 2 174#define SVM_CPUID_FEATURE_SHIFT 2
175#define SVM_CPUID_FUNC 0x8000000a 175#define SVM_CPUID_FUNC 0x8000000a
176 176
177#define MSR_EFER_SVME_MASK (1ULL << 12)
178#define MSR_VM_CR 0xc0010114
179#define MSR_VM_HSAVE_PA 0xc0010117ULL
180
181#define SVM_VM_CR_SVM_DISABLE 4 177#define SVM_VM_CR_SVM_DISABLE 4
182 178
183#define SVM_SELECTOR_S_SHIFT 4 179#define SVM_SELECTOR_S_SHIFT 4
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index ffb08be2a530..72a6dcd1299b 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -70,8 +70,6 @@ struct old_utsname;
70asmlinkage long sys32_olduname(struct oldold_utsname __user *); 70asmlinkage long sys32_olduname(struct oldold_utsname __user *);
71long sys32_uname(struct old_utsname __user *); 71long sys32_uname(struct old_utsname __user *);
72 72
73long sys32_ustat(unsigned, struct ustat32 __user *);
74
75asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *, 73asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *,
76 compat_uptr_t __user *, struct pt_regs *); 74 compat_uptr_t __user *, struct pt_regs *);
77asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); 75asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index a81195eaa2b3..bd37ed444a21 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -12,9 +12,9 @@ unsigned long native_calibrate_tsc(void);
12 12
13#ifdef CONFIG_X86_32 13#ifdef CONFIG_X86_32
14extern int timer_ack; 14extern int timer_ack;
15extern int recalibrate_cpu_khz(void);
16extern irqreturn_t timer_interrupt(int irq, void *dev_id); 15extern irqreturn_t timer_interrupt(int irq, void *dev_id);
17#endif /* CONFIG_X86_32 */ 16#endif /* CONFIG_X86_32 */
17extern int recalibrate_cpu_khz(void);
18 18
19extern int no_timer_check; 19extern int no_timer_check;
20 20
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 77cfb2cfb386..892b119dba6f 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -44,9 +44,6 @@
44 44
45#ifdef CONFIG_X86_32 45#ifdef CONFIG_X86_32
46 46
47/* Mappings between node number and cpus on that node. */
48extern cpumask_t node_to_cpumask_map[];
49
50/* Mappings between logical cpu number and node number */ 47/* Mappings between logical cpu number and node number */
51extern int cpu_to_node_map[]; 48extern int cpu_to_node_map[];
52 49
@@ -57,30 +54,8 @@ static inline int cpu_to_node(int cpu)
57} 54}
58#define early_cpu_to_node(cpu) cpu_to_node(cpu) 55#define early_cpu_to_node(cpu) cpu_to_node(cpu)
59 56
60/* Returns a bitmask of CPUs on Node 'node'.
61 *
62 * Side note: this function creates the returned cpumask on the stack
63 * so with a high NR_CPUS count, excessive stack space is used. The
64 * cpumask_of_node function should be used whenever possible.
65 */
66static inline cpumask_t node_to_cpumask(int node)
67{
68 return node_to_cpumask_map[node];
69}
70
71/* Returns a bitmask of CPUs on Node 'node'. */
72static inline const struct cpumask *cpumask_of_node(int node)
73{
74 return &node_to_cpumask_map[node];
75}
76
77static inline void setup_node_to_cpumask_map(void) { }
78
79#else /* CONFIG_X86_64 */ 57#else /* CONFIG_X86_64 */
80 58
81/* Mappings between node number and cpus on that node. */
82extern cpumask_t *node_to_cpumask_map;
83
84/* Mappings between logical cpu number and node number */ 59/* Mappings between logical cpu number and node number */
85DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); 60DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
86 61
@@ -91,8 +66,6 @@ DECLARE_PER_CPU(int, node_number);
91#ifdef CONFIG_DEBUG_PER_CPU_MAPS 66#ifdef CONFIG_DEBUG_PER_CPU_MAPS
92extern int cpu_to_node(int cpu); 67extern int cpu_to_node(int cpu);
93extern int early_cpu_to_node(int cpu); 68extern int early_cpu_to_node(int cpu);
94extern const cpumask_t *cpumask_of_node(int node);
95extern cpumask_t node_to_cpumask(int node);
96 69
97#else /* !CONFIG_DEBUG_PER_CPU_MAPS */ 70#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
98 71
@@ -108,42 +81,32 @@ static inline int early_cpu_to_node(int cpu)
108 return early_per_cpu(x86_cpu_to_node_map, cpu); 81 return early_per_cpu(x86_cpu_to_node_map, cpu);
109} 82}
110 83
111/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ 84#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
112static inline const cpumask_t *cpumask_of_node(int node) 85
113{ 86#endif /* CONFIG_X86_64 */
114 return &node_to_cpumask_map[node];
115}
116 87
117/* Returns a bitmask of CPUs on Node 'node'. */ 88/* Mappings between node number and cpus on that node. */
118static inline cpumask_t node_to_cpumask(int node) 89extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
90
91#ifdef CONFIG_DEBUG_PER_CPU_MAPS
92extern const struct cpumask *cpumask_of_node(int node);
93#else
94/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
95static inline const struct cpumask *cpumask_of_node(int node)
119{ 96{
120 return node_to_cpumask_map[node]; 97 return node_to_cpumask_map[node];
121} 98}
122 99#endif
123#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
124 100
125extern void setup_node_to_cpumask_map(void); 101extern void setup_node_to_cpumask_map(void);
126 102
127/* 103/*
128 * Replace default node_to_cpumask_ptr with optimized version
129 * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
130 */
131#define node_to_cpumask_ptr(v, node) \
132 const cpumask_t *v = cpumask_of_node(node)
133
134#define node_to_cpumask_ptr_next(v, node) \
135 v = cpumask_of_node(node)
136
137#endif /* CONFIG_X86_64 */
138
139/*
140 * Returns the number of the node containing Node 'node'. This 104 * Returns the number of the node containing Node 'node'. This
141 * architecture is flat, so it is a pretty simple function! 105 * architecture is flat, so it is a pretty simple function!
142 */ 106 */
143#define parent_node(node) (node) 107#define parent_node(node) (node)
144 108
145#define pcibus_to_node(bus) __pcibus_to_node(bus) 109#define pcibus_to_node(bus) __pcibus_to_node(bus)
146#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus)
147 110
148#ifdef CONFIG_X86_32 111#ifdef CONFIG_X86_32
149extern unsigned long node_start_pfn[]; 112extern unsigned long node_start_pfn[];
@@ -209,52 +172,24 @@ static inline int early_cpu_to_node(int cpu)
209 return 0; 172 return 0;
210} 173}
211 174
212static inline const cpumask_t *cpumask_of_node(int node) 175static inline const struct cpumask *cpumask_of_node(int node)
213{
214 return &cpu_online_map;
215}
216static inline cpumask_t node_to_cpumask(int node)
217{
218 return cpu_online_map;
219}
220static inline int node_to_first_cpu(int node)
221{ 176{
222 return first_cpu(cpu_online_map); 177 return cpu_online_mask;
223} 178}
224 179
225static inline void setup_node_to_cpumask_map(void) { } 180static inline void setup_node_to_cpumask_map(void) { }
226 181
227/*
228 * Replace default node_to_cpumask_ptr with optimized version
229 * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
230 */
231#define node_to_cpumask_ptr(v, node) \
232 const cpumask_t *v = cpumask_of_node(node)
233
234#define node_to_cpumask_ptr_next(v, node) \
235 v = cpumask_of_node(node)
236#endif 182#endif
237 183
238#include <asm-generic/topology.h> 184#include <asm-generic/topology.h>
239 185
240#ifdef CONFIG_NUMA
241/* Returns the number of the first CPU on Node 'node'. */
242static inline int node_to_first_cpu(int node)
243{
244 return cpumask_first(cpumask_of_node(node));
245}
246#endif
247
248extern cpumask_t cpu_coregroup_map(int cpu);
249extern const struct cpumask *cpu_coregroup_mask(int cpu); 186extern const struct cpumask *cpu_coregroup_mask(int cpu);
250 187
251#ifdef ENABLE_TOPO_DEFINES 188#ifdef ENABLE_TOPO_DEFINES
252#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) 189#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id)
253#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) 190#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
254#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) 191#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
255#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) 192#define topology_thread_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
256#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu))
257#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
258 193
259/* indicates that pointers to the topology cpumask_t maps are valid */ 194/* indicates that pointers to the topology cpumask_t maps are valid */
260#define arch_provides_topology_pointers yes 195#define arch_provides_topology_pointers yes
@@ -268,7 +203,7 @@ struct pci_bus;
268void set_pci_bus_resources_arch_default(struct pci_bus *b); 203void set_pci_bus_resources_arch_default(struct pci_bus *b);
269 204
270#ifdef CONFIG_SMP 205#ifdef CONFIG_SMP
271#define mc_capable() (cpus_weight(per_cpu(cpu_core_map, 0)) != nr_cpu_ids) 206#define mc_capable() (cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids)
272#define smt_capable() (smp_num_siblings > 1) 207#define smt_capable() (smp_num_siblings > 1)
273#endif 208#endif
274 209
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index f2bba78430a4..6e72d74cf8dc 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -338,6 +338,8 @@
338#define __NR_dup3 330 338#define __NR_dup3 330
339#define __NR_pipe2 331 339#define __NR_pipe2 331
340#define __NR_inotify_init1 332 340#define __NR_inotify_init1 332
341#define __NR_preadv 333
342#define __NR_pwritev 334
341 343
342#ifdef __KERNEL__ 344#ifdef __KERNEL__
343 345
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index d2e415e6666f..f81829462325 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -653,6 +653,10 @@ __SYSCALL(__NR_dup3, sys_dup3)
653__SYSCALL(__NR_pipe2, sys_pipe2) 653__SYSCALL(__NR_pipe2, sys_pipe2)
654#define __NR_inotify_init1 294 654#define __NR_inotify_init1 294
655__SYSCALL(__NR_inotify_init1, sys_inotify_init1) 655__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
656#define __NR_preadv 295
657__SYSCALL(__NR_preadv, sys_preadv)
658#define __NR_pwritev 296
659__SYSCALL(__NR_pwritev, sys_pwritev)
656 660
657 661
658#ifndef __NO_STUBS 662#ifndef __NO_STUBS
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 9f4dfba33b28..d3a98ea1062e 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -11,11 +11,13 @@
11#ifndef _ASM_X86_UV_UV_HUB_H 11#ifndef _ASM_X86_UV_UV_HUB_H
12#define _ASM_X86_UV_UV_HUB_H 12#define _ASM_X86_UV_UV_HUB_H
13 13
14#ifdef CONFIG_X86_64
14#include <linux/numa.h> 15#include <linux/numa.h>
15#include <linux/percpu.h> 16#include <linux/percpu.h>
16#include <linux/timer.h> 17#include <linux/timer.h>
17#include <asm/types.h> 18#include <asm/types.h>
18#include <asm/percpu.h> 19#include <asm/percpu.h>
20#include <asm/uv/uv_mmrs.h>
19 21
20 22
21/* 23/*
@@ -397,6 +399,7 @@ static inline void uv_set_scir_bits(unsigned char value)
397 uv_write_local_mmr8(uv_hub_info->scir.offset, value); 399 uv_write_local_mmr8(uv_hub_info->scir.offset, value);
398 } 400 }
399} 401}
402
400static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) 403static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
401{ 404{
402 if (uv_cpu_hub_info(cpu)->scir.state != value) { 405 if (uv_cpu_hub_info(cpu)->scir.state != value) {
@@ -405,4 +408,15 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
405 } 408 }
406} 409}
407 410
411static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
412{
413 unsigned long val;
414
415 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
416 ((apicid & 0x3f) << UVH_IPI_INT_APIC_ID_SHFT) |
417 (vector << UVH_IPI_INT_VECTOR_SHFT);
418 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
419}
420
421#endif /* CONFIG_X86_64 */
408#endif /* _ASM_X86_UV_UV_HUB_H */ 422#endif /* _ASM_X86_UV_UV_HUB_H */
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index dd627793a234..db68ac8a5ac2 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -1,3 +1,4 @@
1
1/* 2/*
2 * This file is subject to the terms and conditions of the GNU General Public 3 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive 4 * License. See the file "COPYING" in the main directory of this archive
@@ -243,6 +244,158 @@ union uvh_event_occurred0_u {
243#define UVH_EVENT_OCCURRED0_ALIAS_32 0x005f0 244#define UVH_EVENT_OCCURRED0_ALIAS_32 0x005f0
244 245
245/* ========================================================================= */ 246/* ========================================================================= */
247/* UVH_GR0_TLB_INT0_CONFIG */
248/* ========================================================================= */
249#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL
250
251#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0
252#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
253#define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8
254#define UVH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL
255#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11
256#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
257#define UVH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12
258#define UVH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
259#define UVH_GR0_TLB_INT0_CONFIG_P_SHFT 13
260#define UVH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL
261#define UVH_GR0_TLB_INT0_CONFIG_T_SHFT 15
262#define UVH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL
263#define UVH_GR0_TLB_INT0_CONFIG_M_SHFT 16
264#define UVH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
265#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32
266#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
267
268union uvh_gr0_tlb_int0_config_u {
269 unsigned long v;
270 struct uvh_gr0_tlb_int0_config_s {
271 unsigned long vector_ : 8; /* RW */
272 unsigned long dm : 3; /* RW */
273 unsigned long destmode : 1; /* RW */
274 unsigned long status : 1; /* RO */
275 unsigned long p : 1; /* RO */
276 unsigned long rsvd_14 : 1; /* */
277 unsigned long t : 1; /* RO */
278 unsigned long m : 1; /* RW */
279 unsigned long rsvd_17_31: 15; /* */
280 unsigned long apic_id : 32; /* RW */
281 } s;
282};
283
284/* ========================================================================= */
285/* UVH_GR0_TLB_INT1_CONFIG */
286/* ========================================================================= */
287#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL
288
289#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0
290#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL
291#define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8
292#define UVH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL
293#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11
294#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL
295#define UVH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12
296#define UVH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL
297#define UVH_GR0_TLB_INT1_CONFIG_P_SHFT 13
298#define UVH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL
299#define UVH_GR0_TLB_INT1_CONFIG_T_SHFT 15
300#define UVH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL
301#define UVH_GR0_TLB_INT1_CONFIG_M_SHFT 16
302#define UVH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
303#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32
304#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
305
306union uvh_gr0_tlb_int1_config_u {
307 unsigned long v;
308 struct uvh_gr0_tlb_int1_config_s {
309 unsigned long vector_ : 8; /* RW */
310 unsigned long dm : 3; /* RW */
311 unsigned long destmode : 1; /* RW */
312 unsigned long status : 1; /* RO */
313 unsigned long p : 1; /* RO */
314 unsigned long rsvd_14 : 1; /* */
315 unsigned long t : 1; /* RO */
316 unsigned long m : 1; /* RW */
317 unsigned long rsvd_17_31: 15; /* */
318 unsigned long apic_id : 32; /* RW */
319 } s;
320};
321
322/* ========================================================================= */
323/* UVH_GR1_TLB_INT0_CONFIG */
324/* ========================================================================= */
325#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL
326
327#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0
328#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL
329#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8
330#define UVH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL
331#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11
332#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL
333#define UVH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12
334#define UVH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL
335#define UVH_GR1_TLB_INT0_CONFIG_P_SHFT 13
336#define UVH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL
337#define UVH_GR1_TLB_INT0_CONFIG_T_SHFT 15
338#define UVH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL
339#define UVH_GR1_TLB_INT0_CONFIG_M_SHFT 16
340#define UVH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
341#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32
342#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
343
344union uvh_gr1_tlb_int0_config_u {
345 unsigned long v;
346 struct uvh_gr1_tlb_int0_config_s {
347 unsigned long vector_ : 8; /* RW */
348 unsigned long dm : 3; /* RW */
349 unsigned long destmode : 1; /* RW */
350 unsigned long status : 1; /* RO */
351 unsigned long p : 1; /* RO */
352 unsigned long rsvd_14 : 1; /* */
353 unsigned long t : 1; /* RO */
354 unsigned long m : 1; /* RW */
355 unsigned long rsvd_17_31: 15; /* */
356 unsigned long apic_id : 32; /* RW */
357 } s;
358};
359
360/* ========================================================================= */
361/* UVH_GR1_TLB_INT1_CONFIG */
362/* ========================================================================= */
363#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL
364
365#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0
366#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL
367#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8
368#define UVH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL
369#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11
370#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL
371#define UVH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12
372#define UVH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL
373#define UVH_GR1_TLB_INT1_CONFIG_P_SHFT 13
374#define UVH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL
375#define UVH_GR1_TLB_INT1_CONFIG_T_SHFT 15
376#define UVH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL
377#define UVH_GR1_TLB_INT1_CONFIG_M_SHFT 16
378#define UVH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
379#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32
380#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
381
382union uvh_gr1_tlb_int1_config_u {
383 unsigned long v;
384 struct uvh_gr1_tlb_int1_config_s {
385 unsigned long vector_ : 8; /* RW */
386 unsigned long dm : 3; /* RW */
387 unsigned long destmode : 1; /* RW */
388 unsigned long status : 1; /* RO */
389 unsigned long p : 1; /* RO */
390 unsigned long rsvd_14 : 1; /* */
391 unsigned long t : 1; /* RO */
392 unsigned long m : 1; /* RW */
393 unsigned long rsvd_17_31: 15; /* */
394 unsigned long apic_id : 32; /* RW */
395 } s;
396};
397
398/* ========================================================================= */
246/* UVH_INT_CMPB */ 399/* UVH_INT_CMPB */
247/* ========================================================================= */ 400/* ========================================================================= */
248#define UVH_INT_CMPB 0x22080UL 401#define UVH_INT_CMPB 0x22080UL
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 593636275238..e0f9aa16358b 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -118,7 +118,7 @@ static inline void cpu_svm_disable(void)
118 118
119 wrmsrl(MSR_VM_HSAVE_PA, 0); 119 wrmsrl(MSR_VM_HSAVE_PA, 0);
120 rdmsrl(MSR_EFER, efer); 120 rdmsrl(MSR_EFER, efer);
121 wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); 121 wrmsrl(MSR_EFER, efer & ~EFER_SVME);
122} 122}
123 123
124/** Makes sure SVM is disabled, if it is supported on the CPU 124/** Makes sure SVM is disabled, if it is supported on the CPU
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index d0238e6151d8..498f944010b9 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -270,8 +270,9 @@ enum vmcs_field {
270 270
271#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ 271#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
272#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ 272#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */
273#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */ 273#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */
274#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ 274#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
275#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */
275 276
276/* GUEST_INTERRUPTIBILITY_INFO flags. */ 277/* GUEST_INTERRUPTIBILITY_INFO flags. */
277#define GUEST_INTR_STATE_STI 0x00000001 278#define GUEST_INTR_STATE_STI 0x00000001
@@ -311,7 +312,7 @@ enum vmcs_field {
311#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */ 312#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */
312#define TYPE_MOV_TO_DR (0 << 4) 313#define TYPE_MOV_TO_DR (0 << 4)
313#define TYPE_MOV_FROM_DR (1 << 4) 314#define TYPE_MOV_FROM_DR (1 << 4)
314#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose reg. */ 315#define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */
315 316
316 317
317/* segment AR */ 318/* segment AR */
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 5e79ca694326..9c371e4a9fa6 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -296,6 +296,8 @@ HYPERVISOR_get_debugreg(int reg)
296static inline int 296static inline int
297HYPERVISOR_update_descriptor(u64 ma, u64 desc) 297HYPERVISOR_update_descriptor(u64 ma, u64 desc)
298{ 298{
299 if (sizeof(u64) == sizeof(long))
300 return _hypercall2(int, update_descriptor, ma, desc);
299 return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); 301 return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
300} 302}
301 303
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a0c9e138b008..77df4d654ff9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -67,11 +67,11 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
67obj-y += apic/ 67obj-y += apic/
68obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 68obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
69obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 69obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
70obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o 70obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
71obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
71obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 72obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
72obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 73obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
73obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 74obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
74obj-$(CONFIG_X86_VSMP) += vsmp_64.o
75obj-$(CONFIG_KPROBES) += kprobes.o 75obj-$(CONFIG_KPROBES) += kprobes.o
76obj-$(CONFIG_MODULES) += module_$(BITS).o 76obj-$(CONFIG_MODULES) += module_$(BITS).o
77obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o 77obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
@@ -107,7 +107,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
107 107
108obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o 108obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
109 109
110obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64 110obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
111 111
112### 112###
113# 64 bit specific files 113# 64 bit specific files
@@ -121,4 +121,5 @@ ifeq ($(CONFIG_X86_64),y)
121 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o 121 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
122 122
123 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o 123 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
124 obj-y += vsmp_64.o
124endif 125endif
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index a18eb7ce2236..723989d7f802 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -230,6 +230,35 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled)
230} 230}
231 231
232static int __init 232static int __init
233acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
234{
235 struct acpi_madt_local_x2apic *processor = NULL;
236
237 processor = (struct acpi_madt_local_x2apic *)header;
238
239 if (BAD_MADT_ENTRY(processor, end))
240 return -EINVAL;
241
242 acpi_table_print_madt_entry(header);
243
244#ifdef CONFIG_X86_X2APIC
245 /*
246 * We need to register disabled CPU as well to permit
247 * counting disabled CPUs. This allows us to size
248 * cpus_possible_map more accurately, to permit
249 * to not preallocating memory for all NR_CPUS
250 * when we use CPU hotplug.
251 */
252 acpi_register_lapic(processor->local_apic_id, /* APIC ID */
253 processor->lapic_flags & ACPI_MADT_ENABLED);
254#else
255 printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
256#endif
257
258 return 0;
259}
260
261static int __init
233acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) 262acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end)
234{ 263{
235 struct acpi_madt_local_apic *processor = NULL; 264 struct acpi_madt_local_apic *processor = NULL;
@@ -289,6 +318,25 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header,
289} 318}
290 319
291static int __init 320static int __init
321acpi_parse_x2apic_nmi(struct acpi_subtable_header *header,
322 const unsigned long end)
323{
324 struct acpi_madt_local_x2apic_nmi *x2apic_nmi = NULL;
325
326 x2apic_nmi = (struct acpi_madt_local_x2apic_nmi *)header;
327
328 if (BAD_MADT_ENTRY(x2apic_nmi, end))
329 return -EINVAL;
330
331 acpi_table_print_madt_entry(header);
332
333 if (x2apic_nmi->lint != 1)
334 printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
335
336 return 0;
337}
338
339static int __init
292acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end) 340acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end)
293{ 341{
294 struct acpi_madt_local_apic_nmi *lapic_nmi = NULL; 342 struct acpi_madt_local_apic_nmi *lapic_nmi = NULL;
@@ -793,6 +841,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
793static int __init acpi_parse_madt_lapic_entries(void) 841static int __init acpi_parse_madt_lapic_entries(void)
794{ 842{
795 int count; 843 int count;
844 int x2count = 0;
796 845
797 if (!cpu_has_apic) 846 if (!cpu_has_apic)
798 return -ENODEV; 847 return -ENODEV;
@@ -816,22 +865,28 @@ static int __init acpi_parse_madt_lapic_entries(void)
816 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, 865 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
817 acpi_parse_sapic, MAX_APICS); 866 acpi_parse_sapic, MAX_APICS);
818 867
819 if (!count) 868 if (!count) {
869 x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
870 acpi_parse_x2apic, MAX_APICS);
820 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, 871 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
821 acpi_parse_lapic, MAX_APICS); 872 acpi_parse_lapic, MAX_APICS);
822 if (!count) { 873 }
874 if (!count && !x2count) {
823 printk(KERN_ERR PREFIX "No LAPIC entries present\n"); 875 printk(KERN_ERR PREFIX "No LAPIC entries present\n");
824 /* TBD: Cleanup to allow fallback to MPS */ 876 /* TBD: Cleanup to allow fallback to MPS */
825 return -ENODEV; 877 return -ENODEV;
826 } else if (count < 0) { 878 } else if (count < 0 || x2count < 0) {
827 printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n"); 879 printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
828 /* TBD: Cleanup to allow fallback to MPS */ 880 /* TBD: Cleanup to allow fallback to MPS */
829 return count; 881 return count;
830 } 882 }
831 883
884 x2count =
885 acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI,
886 acpi_parse_x2apic_nmi, 0);
832 count = 887 count =
833 acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0); 888 acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0);
834 if (count < 0) { 889 if (count < 0 || x2count < 0) {
835 printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); 890 printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
836 /* TBD: Cleanup to allow fallback to MPS */ 891 /* TBD: Cleanup to allow fallback to MPS */
837 return count; 892 return count;
@@ -1470,7 +1525,7 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
1470 1525
1471/* 1526/*
1472 * If your system is blacklisted here, but you find that acpi=force 1527 * If your system is blacklisted here, but you find that acpi=force
1473 * works for you, please contact acpi-devel@sourceforge.net 1528 * works for you, please contact linux-acpi@vger.kernel.org
1474 */ 1529 */
1475static struct dmi_system_id __initdata acpi_dmi_table[] = { 1530static struct dmi_system_id __initdata acpi_dmi_table[] = {
1476 /* 1531 /*
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 5113c080f0c4..a97db99dad52 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -22,10 +22,9 @@
22#include <linux/bitops.h> 22#include <linux/bitops.h>
23#include <linux/debugfs.h> 23#include <linux/debugfs.h>
24#include <linux/scatterlist.h> 24#include <linux/scatterlist.h>
25#include <linux/dma-mapping.h>
25#include <linux/iommu-helper.h> 26#include <linux/iommu-helper.h>
26#ifdef CONFIG_IOMMU_API
27#include <linux/iommu.h> 27#include <linux/iommu.h>
28#endif
29#include <asm/proto.h> 28#include <asm/proto.h>
30#include <asm/iommu.h> 29#include <asm/iommu.h>
31#include <asm/gart.h> 30#include <asm/gart.h>
@@ -1297,8 +1296,10 @@ static void __unmap_single(struct amd_iommu *iommu,
1297/* 1296/*
1298 * The exported map_single function for dma_ops. 1297 * The exported map_single function for dma_ops.
1299 */ 1298 */
1300static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, 1299static dma_addr_t map_page(struct device *dev, struct page *page,
1301 size_t size, int dir) 1300 unsigned long offset, size_t size,
1301 enum dma_data_direction dir,
1302 struct dma_attrs *attrs)
1302{ 1303{
1303 unsigned long flags; 1304 unsigned long flags;
1304 struct amd_iommu *iommu; 1305 struct amd_iommu *iommu;
@@ -1306,6 +1307,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
1306 u16 devid; 1307 u16 devid;
1307 dma_addr_t addr; 1308 dma_addr_t addr;
1308 u64 dma_mask; 1309 u64 dma_mask;
1310 phys_addr_t paddr = page_to_phys(page) + offset;
1309 1311
1310 INC_STATS_COUNTER(cnt_map_single); 1312 INC_STATS_COUNTER(cnt_map_single);
1311 1313
@@ -1340,8 +1342,8 @@ out:
1340/* 1342/*
1341 * The exported unmap_single function for dma_ops. 1343 * The exported unmap_single function for dma_ops.
1342 */ 1344 */
1343static void unmap_single(struct device *dev, dma_addr_t dma_addr, 1345static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
1344 size_t size, int dir) 1346 enum dma_data_direction dir, struct dma_attrs *attrs)
1345{ 1347{
1346 unsigned long flags; 1348 unsigned long flags;
1347 struct amd_iommu *iommu; 1349 struct amd_iommu *iommu;
@@ -1390,7 +1392,8 @@ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
1390 * lists). 1392 * lists).
1391 */ 1393 */
1392static int map_sg(struct device *dev, struct scatterlist *sglist, 1394static int map_sg(struct device *dev, struct scatterlist *sglist,
1393 int nelems, int dir) 1395 int nelems, enum dma_data_direction dir,
1396 struct dma_attrs *attrs)
1394{ 1397{
1395 unsigned long flags; 1398 unsigned long flags;
1396 struct amd_iommu *iommu; 1399 struct amd_iommu *iommu;
@@ -1457,7 +1460,8 @@ unmap:
1457 * lists). 1460 * lists).
1458 */ 1461 */
1459static void unmap_sg(struct device *dev, struct scatterlist *sglist, 1462static void unmap_sg(struct device *dev, struct scatterlist *sglist,
1460 int nelems, int dir) 1463 int nelems, enum dma_data_direction dir,
1464 struct dma_attrs *attrs)
1461{ 1465{
1462 unsigned long flags; 1466 unsigned long flags;
1463 struct amd_iommu *iommu; 1467 struct amd_iommu *iommu;
@@ -1644,11 +1648,11 @@ static void prealloc_protection_domains(void)
1644 } 1648 }
1645} 1649}
1646 1650
1647static struct dma_mapping_ops amd_iommu_dma_ops = { 1651static struct dma_map_ops amd_iommu_dma_ops = {
1648 .alloc_coherent = alloc_coherent, 1652 .alloc_coherent = alloc_coherent,
1649 .free_coherent = free_coherent, 1653 .free_coherent = free_coherent,
1650 .map_single = map_single, 1654 .map_page = map_page,
1651 .unmap_single = unmap_single, 1655 .unmap_page = unmap_page,
1652 .map_sg = map_sg, 1656 .map_sg = map_sg,
1653 .unmap_sg = unmap_sg, 1657 .unmap_sg = unmap_sg,
1654 .dma_supported = amd_iommu_dma_supported, 1658 .dma_supported = amd_iommu_dma_supported,
@@ -1924,6 +1928,12 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
1924 return paddr; 1928 return paddr;
1925} 1929}
1926 1930
1931static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
1932 unsigned long cap)
1933{
1934 return 0;
1935}
1936
1927static struct iommu_ops amd_iommu_ops = { 1937static struct iommu_ops amd_iommu_ops = {
1928 .domain_init = amd_iommu_domain_init, 1938 .domain_init = amd_iommu_domain_init,
1929 .domain_destroy = amd_iommu_domain_destroy, 1939 .domain_destroy = amd_iommu_domain_destroy,
@@ -1932,5 +1942,6 @@ static struct iommu_ops amd_iommu_ops = {
1932 .map = amd_iommu_map_range, 1942 .map = amd_iommu_map_range,
1933 .unmap = amd_iommu_unmap_range, 1943 .unmap = amd_iommu_unmap_range,
1934 .iova_to_phys = amd_iommu_iova_to_phys, 1944 .iova_to_phys = amd_iommu_iova_to_phys,
1945 .domain_has_cap = amd_iommu_domain_has_cap,
1935}; 1946};
1936 1947
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 30909a258d0f..098ec84b8c00 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -809,7 +809,7 @@ void clear_local_APIC(void)
809 u32 v; 809 u32 v;
810 810
811 /* APIC hasn't been mapped yet */ 811 /* APIC hasn't been mapped yet */
812 if (!apic_phys) 812 if (!x2apic && !apic_phys)
813 return; 813 return;
814 814
815 maxlvt = lapic_get_maxlvt(); 815 maxlvt = lapic_get_maxlvt();
@@ -1304,6 +1304,7 @@ void __init enable_IR_x2apic(void)
1304#ifdef CONFIG_INTR_REMAP 1304#ifdef CONFIG_INTR_REMAP
1305 int ret; 1305 int ret;
1306 unsigned long flags; 1306 unsigned long flags;
1307 struct IO_APIC_route_entry **ioapic_entries = NULL;
1307 1308
1308 if (!cpu_has_x2apic) 1309 if (!cpu_has_x2apic)
1309 return; 1310 return;
@@ -1334,16 +1335,23 @@ void __init enable_IR_x2apic(void)
1334 return; 1335 return;
1335 } 1336 }
1336 1337
1337 local_irq_save(flags); 1338 ioapic_entries = alloc_ioapic_entries();
1338 mask_8259A(); 1339 if (!ioapic_entries) {
1340 pr_info("Allocate ioapic_entries failed: %d\n", ret);
1341 goto end;
1342 }
1339 1343
1340 ret = save_mask_IO_APIC_setup(); 1344 ret = save_IO_APIC_setup(ioapic_entries);
1341 if (ret) { 1345 if (ret) {
1342 pr_info("Saving IO-APIC state failed: %d\n", ret); 1346 pr_info("Saving IO-APIC state failed: %d\n", ret);
1343 goto end; 1347 goto end;
1344 } 1348 }
1345 1349
1346 ret = enable_intr_remapping(1); 1350 local_irq_save(flags);
1351 mask_IO_APIC_setup(ioapic_entries);
1352 mask_8259A();
1353
1354 ret = enable_intr_remapping(EIM_32BIT_APIC_ID);
1347 1355
1348 if (ret && x2apic_preenabled) { 1356 if (ret && x2apic_preenabled) {
1349 local_irq_restore(flags); 1357 local_irq_restore(flags);
@@ -1363,14 +1371,14 @@ end_restore:
1363 /* 1371 /*
1364 * IR enabling failed 1372 * IR enabling failed
1365 */ 1373 */
1366 restore_IO_APIC_setup(); 1374 restore_IO_APIC_setup(ioapic_entries);
1367 else 1375 else
1368 reinit_intr_remapped_IO_APIC(x2apic_preenabled); 1376 reinit_intr_remapped_IO_APIC(x2apic_preenabled, ioapic_entries);
1369 1377
1370end:
1371 unmask_8259A(); 1378 unmask_8259A();
1372 local_irq_restore(flags); 1379 local_irq_restore(flags);
1373 1380
1381end:
1374 if (!ret) { 1382 if (!ret) {
1375 if (!x2apic_preenabled) 1383 if (!x2apic_preenabled)
1376 pr_info("Enabled x2apic and interrupt-remapping\n"); 1384 pr_info("Enabled x2apic and interrupt-remapping\n");
@@ -1378,6 +1386,8 @@ end:
1378 pr_info("Enabled Interrupt-remapping\n"); 1386 pr_info("Enabled Interrupt-remapping\n");
1379 } else 1387 } else
1380 pr_err("Failed to enable Interrupt-remapping and x2apic\n"); 1388 pr_err("Failed to enable Interrupt-remapping and x2apic\n");
1389 if (ioapic_entries)
1390 free_ioapic_entries(ioapic_entries);
1381#else 1391#else
1382 if (!cpu_has_x2apic) 1392 if (!cpu_has_x2apic)
1383 return; 1393 return;
@@ -1523,12 +1533,10 @@ void __init early_init_lapic_mapping(void)
1523 */ 1533 */
1524void __init init_apic_mappings(void) 1534void __init init_apic_mappings(void)
1525{ 1535{
1526#ifdef CONFIG_X86_X2APIC
1527 if (x2apic) { 1536 if (x2apic) {
1528 boot_cpu_physical_apicid = read_apic_id(); 1537 boot_cpu_physical_apicid = read_apic_id();
1529 return; 1538 return;
1530 } 1539 }
1531#endif
1532 1540
1533 /* 1541 /*
1534 * If no local APIC can be found then set up a fake all 1542 * If no local APIC can be found then set up a fake all
@@ -1955,6 +1963,10 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1955 1963
1956 local_irq_save(flags); 1964 local_irq_save(flags);
1957 disable_local_APIC(); 1965 disable_local_APIC();
1966#ifdef CONFIG_INTR_REMAP
1967 if (intr_remapping_enabled)
1968 disable_intr_remapping();
1969#endif
1958 local_irq_restore(flags); 1970 local_irq_restore(flags);
1959 return 0; 1971 return 0;
1960} 1972}
@@ -1965,19 +1977,42 @@ static int lapic_resume(struct sys_device *dev)
1965 unsigned long flags; 1977 unsigned long flags;
1966 int maxlvt; 1978 int maxlvt;
1967 1979
1980#ifdef CONFIG_INTR_REMAP
1981 int ret;
1982 struct IO_APIC_route_entry **ioapic_entries = NULL;
1983
1968 if (!apic_pm_state.active) 1984 if (!apic_pm_state.active)
1969 return 0; 1985 return 0;
1970 1986
1971 maxlvt = lapic_get_maxlvt();
1972
1973 local_irq_save(flags); 1987 local_irq_save(flags);
1988 if (x2apic) {
1989 ioapic_entries = alloc_ioapic_entries();
1990 if (!ioapic_entries) {
1991 WARN(1, "Alloc ioapic_entries in lapic resume failed.");
1992 return -ENOMEM;
1993 }
1974 1994
1975#ifdef CONFIG_X86_X2APIC 1995 ret = save_IO_APIC_setup(ioapic_entries);
1996 if (ret) {
1997 WARN(1, "Saving IO-APIC state failed: %d\n", ret);
1998 free_ioapic_entries(ioapic_entries);
1999 return ret;
2000 }
2001
2002 mask_IO_APIC_setup(ioapic_entries);
2003 mask_8259A();
2004 enable_x2apic();
2005 }
2006#else
2007 if (!apic_pm_state.active)
2008 return 0;
2009
2010 local_irq_save(flags);
1976 if (x2apic) 2011 if (x2apic)
1977 enable_x2apic(); 2012 enable_x2apic();
1978 else
1979#endif 2013#endif
1980 { 2014
2015 else {
1981 /* 2016 /*
1982 * Make sure the APICBASE points to the right address 2017 * Make sure the APICBASE points to the right address
1983 * 2018 *
@@ -1990,6 +2025,7 @@ static int lapic_resume(struct sys_device *dev)
1990 wrmsr(MSR_IA32_APICBASE, l, h); 2025 wrmsr(MSR_IA32_APICBASE, l, h);
1991 } 2026 }
1992 2027
2028 maxlvt = lapic_get_maxlvt();
1993 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 2029 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1994 apic_write(APIC_ID, apic_pm_state.apic_id); 2030 apic_write(APIC_ID, apic_pm_state.apic_id);
1995 apic_write(APIC_DFR, apic_pm_state.apic_dfr); 2031 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@ -2013,8 +2049,20 @@ static int lapic_resume(struct sys_device *dev)
2013 apic_write(APIC_ESR, 0); 2049 apic_write(APIC_ESR, 0);
2014 apic_read(APIC_ESR); 2050 apic_read(APIC_ESR);
2015 2051
2052#ifdef CONFIG_INTR_REMAP
2053 if (intr_remapping_enabled)
2054 reenable_intr_remapping(EIM_32BIT_APIC_ID);
2055
2056 if (x2apic) {
2057 unmask_8259A();
2058 restore_IO_APIC_setup(ioapic_entries);
2059 free_ioapic_entries(ioapic_entries);
2060 }
2061#endif
2062
2016 local_irq_restore(flags); 2063 local_irq_restore(flags);
2017 2064
2065
2018 return 0; 2066 return 0;
2019} 2067}
2020 2068
@@ -2052,7 +2100,9 @@ static int __init init_lapic_sysfs(void)
2052 error = sysdev_register(&device_lapic); 2100 error = sysdev_register(&device_lapic);
2053 return error; 2101 return error;
2054} 2102}
2055device_initcall(init_lapic_sysfs); 2103
2104/* local apic needs to resume before other devices access its registers. */
2105core_initcall(init_lapic_sysfs);
2056 2106
2057#else /* CONFIG_PM */ 2107#else /* CONFIG_PM */
2058 2108
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index f933822dba18..0014714ea97b 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -159,20 +159,6 @@ static int flat_apic_id_registered(void)
159 return physid_isset(read_xapic_id(), phys_cpu_present_map); 159 return physid_isset(read_xapic_id(), phys_cpu_present_map);
160} 160}
161 161
162static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
163{
164 return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
165}
166
167static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
168 const struct cpumask *andmask)
169{
170 unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
171 unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
172
173 return mask1 & mask2;
174}
175
176static int flat_phys_pkg_id(int initial_apic_id, int index_msb) 162static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
177{ 163{
178 return hard_smp_processor_id() >> index_msb; 164 return hard_smp_processor_id() >> index_msb;
@@ -213,8 +199,8 @@ struct apic apic_flat = {
213 .set_apic_id = set_apic_id, 199 .set_apic_id = set_apic_id,
214 .apic_id_mask = 0xFFu << 24, 200 .apic_id_mask = 0xFFu << 24,
215 201
216 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 202 .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
217 .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, 203 .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
218 204
219 .send_IPI_mask = flat_send_IPI_mask, 205 .send_IPI_mask = flat_send_IPI_mask,
220 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, 206 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index d806ecaa948f..676cdac385c0 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -26,12 +26,12 @@ static int bigsmp_apic_id_registered(void)
26 return 1; 26 return 1;
27} 27}
28 28
29static const cpumask_t *bigsmp_target_cpus(void) 29static const struct cpumask *bigsmp_target_cpus(void)
30{ 30{
31#ifdef CONFIG_SMP 31#ifdef CONFIG_SMP
32 return &cpu_online_map; 32 return cpu_online_mask;
33#else 33#else
34 return &cpumask_of_cpu(0); 34 return cpumask_of(0);
35#endif 35#endif
36} 36}
37 37
@@ -118,9 +118,9 @@ static int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid)
118} 118}
119 119
120/* As we are using single CPU as destination, pick only one CPU here */ 120/* As we are using single CPU as destination, pick only one CPU here */
121static unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask) 121static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask)
122{ 122{
123 return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask)); 123 return bigsmp_cpu_to_logical_apicid(cpumask_first(cpumask));
124} 124}
125 125
126static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 126static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
@@ -188,10 +188,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
188 { } /* NULL entry stops DMI scanning */ 188 { } /* NULL entry stops DMI scanning */
189}; 189};
190 190
191static void bigsmp_vector_allocation_domain(int cpu, cpumask_t *retmask) 191static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask)
192{ 192{
193 cpus_clear(*retmask); 193 cpumask_clear(retmask);
194 cpu_set(cpu, *retmask); 194 cpumask_set_cpu(cpu, retmask);
195} 195}
196 196
197static int probe_bigsmp(void) 197static int probe_bigsmp(void)
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 19588f2770ee..1c11b819f245 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -410,7 +410,7 @@ static void es7000_enable_apic_mode(void)
410 WARN(1, "Command failed, status = %x\n", mip_status); 410 WARN(1, "Command failed, status = %x\n", mip_status);
411} 411}
412 412
413static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) 413static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask)
414{ 414{
415 /* Careful. Some cpus do not strictly honor the set of cpus 415 /* Careful. Some cpus do not strictly honor the set of cpus
416 * specified in the interrupt destination when using lowest 416 * specified in the interrupt destination when using lowest
@@ -420,7 +420,8 @@ static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask)
420 * deliver interrupts to the wrong hyperthread when only one 420 * deliver interrupts to the wrong hyperthread when only one
421 * hyperthread was specified in the interrupt desitination. 421 * hyperthread was specified in the interrupt desitination.
422 */ 422 */
423 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; 423 cpumask_clear(retmask);
424 cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
424} 425}
425 426
426 427
@@ -455,14 +456,14 @@ static int es7000_apic_id_registered(void)
455 return 1; 456 return 1;
456} 457}
457 458
458static const cpumask_t *target_cpus_cluster(void) 459static const struct cpumask *target_cpus_cluster(void)
459{ 460{
460 return &CPU_MASK_ALL; 461 return cpu_all_mask;
461} 462}
462 463
463static const cpumask_t *es7000_target_cpus(void) 464static const struct cpumask *es7000_target_cpus(void)
464{ 465{
465 return &cpumask_of_cpu(smp_processor_id()); 466 return cpumask_of(smp_processor_id());
466} 467}
467 468
468static unsigned long 469static unsigned long
@@ -517,7 +518,7 @@ static void es7000_setup_apic_routing(void)
517 "Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", 518 "Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
518 (apic_version[apic] == 0x14) ? 519 (apic_version[apic] == 0x14) ?
519 "Physical Cluster" : "Logical Cluster", 520 "Physical Cluster" : "Logical Cluster",
520 nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); 521 nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
521} 522}
522 523
523static int es7000_apicid_to_node(int logical_apicid) 524static int es7000_apicid_to_node(int logical_apicid)
@@ -572,7 +573,7 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
572 return 1; 573 return 1;
573} 574}
574 575
575static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) 576static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask)
576{ 577{
577 unsigned int round = 0; 578 unsigned int round = 0;
578 int cpu, uninitialized_var(apicid); 579 int cpu, uninitialized_var(apicid);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 00e6071cefc4..767fe7e46d68 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -389,6 +389,8 @@ struct io_apic {
389 unsigned int index; 389 unsigned int index;
390 unsigned int unused[3]; 390 unsigned int unused[3];
391 unsigned int data; 391 unsigned int data;
392 unsigned int unused2[11];
393 unsigned int eoi;
392}; 394};
393 395
394static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) 396static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
@@ -397,6 +399,12 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
397 + (mp_ioapics[idx].apicaddr & ~PAGE_MASK); 399 + (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
398} 400}
399 401
402static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
403{
404 struct io_apic __iomem *io_apic = io_apic_base(apic);
405 writel(vector, &io_apic->eoi);
406}
407
400static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 408static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
401{ 409{
402 struct io_apic __iomem *io_apic = io_apic_base(apic); 410 struct io_apic __iomem *io_apic = io_apic_base(apic);
@@ -546,16 +554,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
546 554
547 apic = entry->apic; 555 apic = entry->apic;
548 pin = entry->pin; 556 pin = entry->pin;
549#ifdef CONFIG_INTR_REMAP
550 /* 557 /*
551 * With interrupt-remapping, destination information comes 558 * With interrupt-remapping, destination information comes
552 * from interrupt-remapping table entry. 559 * from interrupt-remapping table entry.
553 */ 560 */
554 if (!irq_remapped(irq)) 561 if (!irq_remapped(irq))
555 io_apic_write(apic, 0x11 + pin*2, dest); 562 io_apic_write(apic, 0x11 + pin*2, dest);
556#else
557 io_apic_write(apic, 0x11 + pin*2, dest);
558#endif
559 reg = io_apic_read(apic, 0x10 + pin*2); 563 reg = io_apic_read(apic, 0x10 + pin*2);
560 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 564 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
561 reg |= vector; 565 reg |= vector;
@@ -588,10 +592,12 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
588 if (assign_irq_vector(irq, cfg, mask)) 592 if (assign_irq_vector(irq, cfg, mask))
589 return BAD_APICID; 593 return BAD_APICID;
590 594
591 cpumask_and(desc->affinity, cfg->domain, mask); 595 /* check that before desc->addinity get updated */
592 set_extra_move_desc(desc, mask); 596 set_extra_move_desc(desc, mask);
593 597
594 return apic->cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); 598 cpumask_copy(desc->affinity, mask);
599
600 return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
595} 601}
596 602
597static void 603static void
@@ -845,75 +851,106 @@ __setup("pirq=", ioapic_pirq_setup);
845#endif /* CONFIG_X86_32 */ 851#endif /* CONFIG_X86_32 */
846 852
847#ifdef CONFIG_INTR_REMAP 853#ifdef CONFIG_INTR_REMAP
848/* I/O APIC RTE contents at the OS boot up */ 854struct IO_APIC_route_entry **alloc_ioapic_entries(void)
849static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; 855{
856 int apic;
857 struct IO_APIC_route_entry **ioapic_entries;
858
859 ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics,
860 GFP_ATOMIC);
861 if (!ioapic_entries)
862 return 0;
863
864 for (apic = 0; apic < nr_ioapics; apic++) {
865 ioapic_entries[apic] =
866 kzalloc(sizeof(struct IO_APIC_route_entry) *
867 nr_ioapic_registers[apic], GFP_ATOMIC);
868 if (!ioapic_entries[apic])
869 goto nomem;
870 }
871
872 return ioapic_entries;
873
874nomem:
875 while (--apic >= 0)
876 kfree(ioapic_entries[apic]);
877 kfree(ioapic_entries);
878
879 return 0;
880}
850 881
851/* 882/*
852 * Saves and masks all the unmasked IO-APIC RTE's 883 * Saves all the IO-APIC RTE's
853 */ 884 */
854int save_mask_IO_APIC_setup(void) 885int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
855{ 886{
856 union IO_APIC_reg_01 reg_01;
857 unsigned long flags;
858 int apic, pin; 887 int apic, pin;
859 888
860 /* 889 if (!ioapic_entries)
861 * The number of IO-APIC IRQ registers (== #pins): 890 return -ENOMEM;
862 */ 891
863 for (apic = 0; apic < nr_ioapics; apic++) { 892 for (apic = 0; apic < nr_ioapics; apic++) {
864 spin_lock_irqsave(&ioapic_lock, flags); 893 if (!ioapic_entries[apic])
865 reg_01.raw = io_apic_read(apic, 1); 894 return -ENOMEM;
866 spin_unlock_irqrestore(&ioapic_lock, flags); 895
867 nr_ioapic_registers[apic] = reg_01.bits.entries+1; 896 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
897 ioapic_entries[apic][pin] =
898 ioapic_read_entry(apic, pin);
868 } 899 }
869 900
901 return 0;
902}
903
904/*
905 * Mask all IO APIC entries.
906 */
907void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
908{
909 int apic, pin;
910
911 if (!ioapic_entries)
912 return;
913
870 for (apic = 0; apic < nr_ioapics; apic++) { 914 for (apic = 0; apic < nr_ioapics; apic++) {
871 early_ioapic_entries[apic] = 915 if (!ioapic_entries[apic])
872 kzalloc(sizeof(struct IO_APIC_route_entry) * 916 break;
873 nr_ioapic_registers[apic], GFP_KERNEL);
874 if (!early_ioapic_entries[apic])
875 goto nomem;
876 }
877 917
878 for (apic = 0; apic < nr_ioapics; apic++)
879 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 918 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
880 struct IO_APIC_route_entry entry; 919 struct IO_APIC_route_entry entry;
881 920
882 entry = early_ioapic_entries[apic][pin] = 921 entry = ioapic_entries[apic][pin];
883 ioapic_read_entry(apic, pin);
884 if (!entry.mask) { 922 if (!entry.mask) {
885 entry.mask = 1; 923 entry.mask = 1;
886 ioapic_write_entry(apic, pin, entry); 924 ioapic_write_entry(apic, pin, entry);
887 } 925 }
888 } 926 }
889 927 }
890 return 0;
891
892nomem:
893 while (apic >= 0)
894 kfree(early_ioapic_entries[apic--]);
895 memset(early_ioapic_entries, 0,
896 ARRAY_SIZE(early_ioapic_entries));
897
898 return -ENOMEM;
899} 928}
900 929
901void restore_IO_APIC_setup(void) 930/*
931 * Restore IO APIC entries which was saved in ioapic_entries.
932 */
933int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
902{ 934{
903 int apic, pin; 935 int apic, pin;
904 936
937 if (!ioapic_entries)
938 return -ENOMEM;
939
905 for (apic = 0; apic < nr_ioapics; apic++) { 940 for (apic = 0; apic < nr_ioapics; apic++) {
906 if (!early_ioapic_entries[apic]) 941 if (!ioapic_entries[apic])
907 break; 942 return -ENOMEM;
943
908 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) 944 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
909 ioapic_write_entry(apic, pin, 945 ioapic_write_entry(apic, pin,
910 early_ioapic_entries[apic][pin]); 946 ioapic_entries[apic][pin]);
911 kfree(early_ioapic_entries[apic]);
912 early_ioapic_entries[apic] = NULL;
913 } 947 }
948 return 0;
914} 949}
915 950
916void reinit_intr_remapped_IO_APIC(int intr_remapping) 951void reinit_intr_remapped_IO_APIC(int intr_remapping,
952 struct IO_APIC_route_entry **ioapic_entries)
953
917{ 954{
918 /* 955 /*
919 * for now plain restore of previous settings. 956 * for now plain restore of previous settings.
@@ -922,7 +959,17 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping)
922 * table entries. for now, do a plain restore, and wait for 959 * table entries. for now, do a plain restore, and wait for
923 * the setup_IO_APIC_irqs() to do proper initialization. 960 * the setup_IO_APIC_irqs() to do proper initialization.
924 */ 961 */
925 restore_IO_APIC_setup(); 962 restore_IO_APIC_setup(ioapic_entries);
963}
964
965void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
966{
967 int apic;
968
969 for (apic = 0; apic < nr_ioapics; apic++)
970 kfree(ioapic_entries[apic]);
971
972 kfree(ioapic_entries);
926} 973}
927#endif 974#endif
928 975
@@ -1411,9 +1458,7 @@ void __setup_vector_irq(int cpu)
1411} 1458}
1412 1459
1413static struct irq_chip ioapic_chip; 1460static struct irq_chip ioapic_chip;
1414#ifdef CONFIG_INTR_REMAP
1415static struct irq_chip ir_ioapic_chip; 1461static struct irq_chip ir_ioapic_chip;
1416#endif
1417 1462
1418#define IOAPIC_AUTO -1 1463#define IOAPIC_AUTO -1
1419#define IOAPIC_EDGE 0 1464#define IOAPIC_EDGE 0
@@ -1452,7 +1497,6 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t
1452 else 1497 else
1453 desc->status &= ~IRQ_LEVEL; 1498 desc->status &= ~IRQ_LEVEL;
1454 1499
1455#ifdef CONFIG_INTR_REMAP
1456 if (irq_remapped(irq)) { 1500 if (irq_remapped(irq)) {
1457 desc->status |= IRQ_MOVE_PCNTXT; 1501 desc->status |= IRQ_MOVE_PCNTXT;
1458 if (trigger) 1502 if (trigger)
@@ -1464,7 +1508,7 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t
1464 handle_edge_irq, "edge"); 1508 handle_edge_irq, "edge");
1465 return; 1509 return;
1466 } 1510 }
1467#endif 1511
1468 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1512 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1469 trigger == IOAPIC_LEVEL) 1513 trigger == IOAPIC_LEVEL)
1470 set_irq_chip_and_handler_name(irq, &ioapic_chip, 1514 set_irq_chip_and_handler_name(irq, &ioapic_chip,
@@ -1478,14 +1522,13 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t
1478int setup_ioapic_entry(int apic_id, int irq, 1522int setup_ioapic_entry(int apic_id, int irq,
1479 struct IO_APIC_route_entry *entry, 1523 struct IO_APIC_route_entry *entry,
1480 unsigned int destination, int trigger, 1524 unsigned int destination, int trigger,
1481 int polarity, int vector) 1525 int polarity, int vector, int pin)
1482{ 1526{
1483 /* 1527 /*
1484 * add it to the IO-APIC irq-routing table: 1528 * add it to the IO-APIC irq-routing table:
1485 */ 1529 */
1486 memset(entry,0,sizeof(*entry)); 1530 memset(entry,0,sizeof(*entry));
1487 1531
1488#ifdef CONFIG_INTR_REMAP
1489 if (intr_remapping_enabled) { 1532 if (intr_remapping_enabled) {
1490 struct intel_iommu *iommu = map_ioapic_to_ir(apic_id); 1533 struct intel_iommu *iommu = map_ioapic_to_ir(apic_id);
1491 struct irte irte; 1534 struct irte irte;
@@ -1504,7 +1547,14 @@ int setup_ioapic_entry(int apic_id, int irq,
1504 1547
1505 irte.present = 1; 1548 irte.present = 1;
1506 irte.dst_mode = apic->irq_dest_mode; 1549 irte.dst_mode = apic->irq_dest_mode;
1507 irte.trigger_mode = trigger; 1550 /*
1551 * Trigger mode in the IRTE will always be edge, and the
1552 * actual level or edge trigger will be setup in the IO-APIC
1553 * RTE. This will help simplify level triggered irq migration.
1554 * For more details, see the comments above explainig IO-APIC
1555 * irq migration in the presence of interrupt-remapping.
1556 */
1557 irte.trigger_mode = 0;
1508 irte.dlvry_mode = apic->irq_delivery_mode; 1558 irte.dlvry_mode = apic->irq_delivery_mode;
1509 irte.vector = vector; 1559 irte.vector = vector;
1510 irte.dest_id = IRTE_DEST(destination); 1560 irte.dest_id = IRTE_DEST(destination);
@@ -1515,18 +1565,21 @@ int setup_ioapic_entry(int apic_id, int irq,
1515 ir_entry->zero = 0; 1565 ir_entry->zero = 0;
1516 ir_entry->format = 1; 1566 ir_entry->format = 1;
1517 ir_entry->index = (index & 0x7fff); 1567 ir_entry->index = (index & 0x7fff);
1518 } else 1568 /*
1519#endif 1569 * IO-APIC RTE will be configured with virtual vector.
1520 { 1570 * irq handler will do the explicit EOI to the io-apic.
1571 */
1572 ir_entry->vector = pin;
1573 } else {
1521 entry->delivery_mode = apic->irq_delivery_mode; 1574 entry->delivery_mode = apic->irq_delivery_mode;
1522 entry->dest_mode = apic->irq_dest_mode; 1575 entry->dest_mode = apic->irq_dest_mode;
1523 entry->dest = destination; 1576 entry->dest = destination;
1577 entry->vector = vector;
1524 } 1578 }
1525 1579
1526 entry->mask = 0; /* enable IRQ */ 1580 entry->mask = 0; /* enable IRQ */
1527 entry->trigger = trigger; 1581 entry->trigger = trigger;
1528 entry->polarity = polarity; 1582 entry->polarity = polarity;
1529 entry->vector = vector;
1530 1583
1531 /* Mask level triggered irqs. 1584 /* Mask level triggered irqs.
1532 * Use IRQ_DELAYED_DISABLE for edge triggered irqs. 1585 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
@@ -1561,7 +1614,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
1561 1614
1562 1615
1563 if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry, 1616 if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry,
1564 dest, trigger, polarity, cfg->vector)) { 1617 dest, trigger, polarity, cfg->vector, pin)) {
1565 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1618 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1566 mp_ioapics[apic_id].apicid, pin); 1619 mp_ioapics[apic_id].apicid, pin);
1567 __clear_irq_vector(irq, cfg); 1620 __clear_irq_vector(irq, cfg);
@@ -1642,10 +1695,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
1642{ 1695{
1643 struct IO_APIC_route_entry entry; 1696 struct IO_APIC_route_entry entry;
1644 1697
1645#ifdef CONFIG_INTR_REMAP
1646 if (intr_remapping_enabled) 1698 if (intr_remapping_enabled)
1647 return; 1699 return;
1648#endif
1649 1700
1650 memset(&entry, 0, sizeof(entry)); 1701 memset(&entry, 0, sizeof(entry));
1651 1702
@@ -2040,8 +2091,13 @@ void disable_IO_APIC(void)
2040 * If the i8259 is routed through an IOAPIC 2091 * If the i8259 is routed through an IOAPIC
2041 * Put that IOAPIC in virtual wire mode 2092 * Put that IOAPIC in virtual wire mode
2042 * so legacy interrupts can be delivered. 2093 * so legacy interrupts can be delivered.
2094 *
2095 * With interrupt-remapping, for now we will use virtual wire A mode,
2096 * as virtual wire B is little complex (need to configure both
2097 * IOAPIC RTE aswell as interrupt-remapping table entry).
2098 * As this gets called during crash dump, keep this simple for now.
2043 */ 2099 */
2044 if (ioapic_i8259.pin != -1) { 2100 if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) {
2045 struct IO_APIC_route_entry entry; 2101 struct IO_APIC_route_entry entry;
2046 2102
2047 memset(&entry, 0, sizeof(entry)); 2103 memset(&entry, 0, sizeof(entry));
@@ -2061,7 +2117,10 @@ void disable_IO_APIC(void)
2061 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); 2117 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
2062 } 2118 }
2063 2119
2064 disconnect_bsp_APIC(ioapic_i8259.pin != -1); 2120 /*
2121 * Use virtual wire A mode when interrupt remapping is enabled.
2122 */
2123 disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1);
2065} 2124}
2066 2125
2067#ifdef CONFIG_X86_32 2126#ifdef CONFIG_X86_32
@@ -2303,37 +2362,24 @@ static int ioapic_retrigger_irq(unsigned int irq)
2303#ifdef CONFIG_SMP 2362#ifdef CONFIG_SMP
2304 2363
2305#ifdef CONFIG_INTR_REMAP 2364#ifdef CONFIG_INTR_REMAP
2306static void ir_irq_migration(struct work_struct *work);
2307
2308static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
2309 2365
2310/* 2366/*
2311 * Migrate the IO-APIC irq in the presence of intr-remapping. 2367 * Migrate the IO-APIC irq in the presence of intr-remapping.
2312 * 2368 *
2313 * For edge triggered, irq migration is a simple atomic update(of vector 2369 * For both level and edge triggered, irq migration is a simple atomic
2314 * and cpu destination) of IRTE and flush the hardware cache. 2370 * update(of vector and cpu destination) of IRTE and flush the hardware cache.
2315 *
2316 * For level triggered, we need to modify the io-apic RTE aswell with the update
2317 * vector information, along with modifying IRTE with vector and destination.
2318 * So irq migration for level triggered is little bit more complex compared to
2319 * edge triggered migration. But the good news is, we use the same algorithm
2320 * for level triggered migration as we have today, only difference being,
2321 * we now initiate the irq migration from process context instead of the
2322 * interrupt context.
2323 * 2371 *
2324 * In future, when we do a directed EOI (combined with cpu EOI broadcast 2372 * For level triggered, we eliminate the io-apic RTE modification (with the
2325 * suppression) to the IO-APIC, level triggered irq migration will also be 2373 * updated vector information), by using a virtual vector (io-apic pin number).
2326 * as simple as edge triggered migration and we can do the irq migration 2374 * Real vector that is used for interrupting cpu will be coming from
2327 * with a simple atomic update to IO-APIC RTE. 2375 * the interrupt-remapping table entry.
2328 */ 2376 */
2329static void 2377static void
2330migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) 2378migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2331{ 2379{
2332 struct irq_cfg *cfg; 2380 struct irq_cfg *cfg;
2333 struct irte irte; 2381 struct irte irte;
2334 int modify_ioapic_rte;
2335 unsigned int dest; 2382 unsigned int dest;
2336 unsigned long flags;
2337 unsigned int irq; 2383 unsigned int irq;
2338 2384
2339 if (!cpumask_intersects(mask, cpu_online_mask)) 2385 if (!cpumask_intersects(mask, cpu_online_mask))
@@ -2351,13 +2397,6 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2351 2397
2352 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); 2398 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
2353 2399
2354 modify_ioapic_rte = desc->status & IRQ_LEVEL;
2355 if (modify_ioapic_rte) {
2356 spin_lock_irqsave(&ioapic_lock, flags);
2357 __target_IO_APIC_irq(irq, dest, cfg);
2358 spin_unlock_irqrestore(&ioapic_lock, flags);
2359 }
2360
2361 irte.vector = cfg->vector; 2400 irte.vector = cfg->vector;
2362 irte.dest_id = IRTE_DEST(dest); 2401 irte.dest_id = IRTE_DEST(dest);
2363 2402
@@ -2372,73 +2411,12 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2372 cpumask_copy(desc->affinity, mask); 2411 cpumask_copy(desc->affinity, mask);
2373} 2412}
2374 2413
2375static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
2376{
2377 int ret = -1;
2378 struct irq_cfg *cfg = desc->chip_data;
2379
2380 mask_IO_APIC_irq_desc(desc);
2381
2382 if (io_apic_level_ack_pending(cfg)) {
2383 /*
2384 * Interrupt in progress. Migrating irq now will change the
2385 * vector information in the IO-APIC RTE and that will confuse
2386 * the EOI broadcast performed by cpu.
2387 * So, delay the irq migration to the next instance.
2388 */
2389 schedule_delayed_work(&ir_migration_work, 1);
2390 goto unmask;
2391 }
2392
2393 /* everthing is clear. we have right of way */
2394 migrate_ioapic_irq_desc(desc, desc->pending_mask);
2395
2396 ret = 0;
2397 desc->status &= ~IRQ_MOVE_PENDING;
2398 cpumask_clear(desc->pending_mask);
2399
2400unmask:
2401 unmask_IO_APIC_irq_desc(desc);
2402
2403 return ret;
2404}
2405
2406static void ir_irq_migration(struct work_struct *work)
2407{
2408 unsigned int irq;
2409 struct irq_desc *desc;
2410
2411 for_each_irq_desc(irq, desc) {
2412 if (desc->status & IRQ_MOVE_PENDING) {
2413 unsigned long flags;
2414
2415 spin_lock_irqsave(&desc->lock, flags);
2416 if (!desc->chip->set_affinity ||
2417 !(desc->status & IRQ_MOVE_PENDING)) {
2418 desc->status &= ~IRQ_MOVE_PENDING;
2419 spin_unlock_irqrestore(&desc->lock, flags);
2420 continue;
2421 }
2422
2423 desc->chip->set_affinity(irq, desc->pending_mask);
2424 spin_unlock_irqrestore(&desc->lock, flags);
2425 }
2426 }
2427}
2428
2429/* 2414/*
2430 * Migrates the IRQ destination in the process context. 2415 * Migrates the IRQ destination in the process context.
2431 */ 2416 */
2432static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, 2417static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2433 const struct cpumask *mask) 2418 const struct cpumask *mask)
2434{ 2419{
2435 if (desc->status & IRQ_LEVEL) {
2436 desc->status |= IRQ_MOVE_PENDING;
2437 cpumask_copy(desc->pending_mask, mask);
2438 migrate_irq_remapped_level_desc(desc);
2439 return;
2440 }
2441
2442 migrate_ioapic_irq_desc(desc, mask); 2420 migrate_ioapic_irq_desc(desc, mask);
2443} 2421}
2444static void set_ir_ioapic_affinity_irq(unsigned int irq, 2422static void set_ir_ioapic_affinity_irq(unsigned int irq,
@@ -2448,6 +2426,11 @@ static void set_ir_ioapic_affinity_irq(unsigned int irq,
2448 2426
2449 set_ir_ioapic_affinity_irq_desc(desc, mask); 2427 set_ir_ioapic_affinity_irq_desc(desc, mask);
2450} 2428}
2429#else
2430static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2431 const struct cpumask *mask)
2432{
2433}
2451#endif 2434#endif
2452 2435
2453asmlinkage void smp_irq_move_cleanup_interrupt(void) 2436asmlinkage void smp_irq_move_cleanup_interrupt(void)
@@ -2461,6 +2444,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2461 me = smp_processor_id(); 2444 me = smp_processor_id();
2462 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { 2445 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
2463 unsigned int irq; 2446 unsigned int irq;
2447 unsigned int irr;
2464 struct irq_desc *desc; 2448 struct irq_desc *desc;
2465 struct irq_cfg *cfg; 2449 struct irq_cfg *cfg;
2466 irq = __get_cpu_var(vector_irq)[vector]; 2450 irq = __get_cpu_var(vector_irq)[vector];
@@ -2480,6 +2464,18 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2480 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) 2464 if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2481 goto unlock; 2465 goto unlock;
2482 2466
2467 irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
2468 /*
2469 * Check if the vector that needs to be cleanedup is
2470 * registered at the cpu's IRR. If so, then this is not
2471 * the best time to clean it up. Lets clean it up in the
2472 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
2473 * to myself.
2474 */
2475 if (irr & (1 << (vector % 32))) {
2476 apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
2477 goto unlock;
2478 }
2483 __get_cpu_var(vector_irq)[vector] = -1; 2479 __get_cpu_var(vector_irq)[vector] = -1;
2484 cfg->move_cleanup_count--; 2480 cfg->move_cleanup_count--;
2485unlock: 2481unlock:
@@ -2528,17 +2524,51 @@ static void irq_complete_move(struct irq_desc **descp)
2528static inline void irq_complete_move(struct irq_desc **descp) {} 2524static inline void irq_complete_move(struct irq_desc **descp) {}
2529#endif 2525#endif
2530 2526
2531#ifdef CONFIG_INTR_REMAP 2527#ifdef CONFIG_X86_X2APIC
2528static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
2529{
2530 int apic, pin;
2531 struct irq_pin_list *entry;
2532
2533 entry = cfg->irq_2_pin;
2534 for (;;) {
2535
2536 if (!entry)
2537 break;
2538
2539 apic = entry->apic;
2540 pin = entry->pin;
2541 io_apic_eoi(apic, pin);
2542 entry = entry->next;
2543 }
2544}
2545
2546static void
2547eoi_ioapic_irq(struct irq_desc *desc)
2548{
2549 struct irq_cfg *cfg;
2550 unsigned long flags;
2551 unsigned int irq;
2552
2553 irq = desc->irq;
2554 cfg = desc->chip_data;
2555
2556 spin_lock_irqsave(&ioapic_lock, flags);
2557 __eoi_ioapic_irq(irq, cfg);
2558 spin_unlock_irqrestore(&ioapic_lock, flags);
2559}
2560
2532static void ack_x2apic_level(unsigned int irq) 2561static void ack_x2apic_level(unsigned int irq)
2533{ 2562{
2563 struct irq_desc *desc = irq_to_desc(irq);
2534 ack_x2APIC_irq(); 2564 ack_x2APIC_irq();
2565 eoi_ioapic_irq(desc);
2535} 2566}
2536 2567
2537static void ack_x2apic_edge(unsigned int irq) 2568static void ack_x2apic_edge(unsigned int irq)
2538{ 2569{
2539 ack_x2APIC_irq(); 2570 ack_x2APIC_irq();
2540} 2571}
2541
2542#endif 2572#endif
2543 2573
2544static void ack_apic_edge(unsigned int irq) 2574static void ack_apic_edge(unsigned int irq)
@@ -2649,6 +2679,26 @@ static void ack_apic_level(unsigned int irq)
2649#endif 2679#endif
2650} 2680}
2651 2681
2682#ifdef CONFIG_INTR_REMAP
2683static void ir_ack_apic_edge(unsigned int irq)
2684{
2685#ifdef CONFIG_X86_X2APIC
2686 if (x2apic_enabled())
2687 return ack_x2apic_edge(irq);
2688#endif
2689 return ack_apic_edge(irq);
2690}
2691
2692static void ir_ack_apic_level(unsigned int irq)
2693{
2694#ifdef CONFIG_X86_X2APIC
2695 if (x2apic_enabled())
2696 return ack_x2apic_level(irq);
2697#endif
2698 return ack_apic_level(irq);
2699}
2700#endif /* CONFIG_INTR_REMAP */
2701
2652static struct irq_chip ioapic_chip __read_mostly = { 2702static struct irq_chip ioapic_chip __read_mostly = {
2653 .name = "IO-APIC", 2703 .name = "IO-APIC",
2654 .startup = startup_ioapic_irq, 2704 .startup = startup_ioapic_irq,
@@ -2662,20 +2712,20 @@ static struct irq_chip ioapic_chip __read_mostly = {
2662 .retrigger = ioapic_retrigger_irq, 2712 .retrigger = ioapic_retrigger_irq,
2663}; 2713};
2664 2714
2665#ifdef CONFIG_INTR_REMAP
2666static struct irq_chip ir_ioapic_chip __read_mostly = { 2715static struct irq_chip ir_ioapic_chip __read_mostly = {
2667 .name = "IR-IO-APIC", 2716 .name = "IR-IO-APIC",
2668 .startup = startup_ioapic_irq, 2717 .startup = startup_ioapic_irq,
2669 .mask = mask_IO_APIC_irq, 2718 .mask = mask_IO_APIC_irq,
2670 .unmask = unmask_IO_APIC_irq, 2719 .unmask = unmask_IO_APIC_irq,
2671 .ack = ack_x2apic_edge, 2720#ifdef CONFIG_INTR_REMAP
2672 .eoi = ack_x2apic_level, 2721 .ack = ir_ack_apic_edge,
2722 .eoi = ir_ack_apic_level,
2673#ifdef CONFIG_SMP 2723#ifdef CONFIG_SMP
2674 .set_affinity = set_ir_ioapic_affinity_irq, 2724 .set_affinity = set_ir_ioapic_affinity_irq,
2675#endif 2725#endif
2726#endif
2676 .retrigger = ioapic_retrigger_irq, 2727 .retrigger = ioapic_retrigger_irq,
2677}; 2728};
2678#endif
2679 2729
2680static inline void init_IO_APIC_traps(void) 2730static inline void init_IO_APIC_traps(void)
2681{ 2731{
@@ -2901,10 +2951,8 @@ static inline void __init check_timer(void)
2901 * 8259A. 2951 * 8259A.
2902 */ 2952 */
2903 if (pin1 == -1) { 2953 if (pin1 == -1) {
2904#ifdef CONFIG_INTR_REMAP
2905 if (intr_remapping_enabled) 2954 if (intr_remapping_enabled)
2906 panic("BIOS bug: timer not connected to IO-APIC"); 2955 panic("BIOS bug: timer not connected to IO-APIC");
2907#endif
2908 pin1 = pin2; 2956 pin1 = pin2;
2909 apic1 = apic2; 2957 apic1 = apic2;
2910 no_pin1 = 1; 2958 no_pin1 = 1;
@@ -2940,10 +2988,8 @@ static inline void __init check_timer(void)
2940 clear_IO_APIC_pin(0, pin1); 2988 clear_IO_APIC_pin(0, pin1);
2941 goto out; 2989 goto out;
2942 } 2990 }
2943#ifdef CONFIG_INTR_REMAP
2944 if (intr_remapping_enabled) 2991 if (intr_remapping_enabled)
2945 panic("timer doesn't work through Interrupt-remapped IO-APIC"); 2992 panic("timer doesn't work through Interrupt-remapped IO-APIC");
2946#endif
2947 local_irq_disable(); 2993 local_irq_disable();
2948 clear_IO_APIC_pin(apic1, pin1); 2994 clear_IO_APIC_pin(apic1, pin1);
2949 if (!no_pin1) 2995 if (!no_pin1)
@@ -3237,9 +3283,7 @@ void destroy_irq(unsigned int irq)
3237 if (desc) 3283 if (desc)
3238 desc->chip_data = cfg; 3284 desc->chip_data = cfg;
3239 3285
3240#ifdef CONFIG_INTR_REMAP
3241 free_irte(irq); 3286 free_irte(irq);
3242#endif
3243 spin_lock_irqsave(&vector_lock, flags); 3287 spin_lock_irqsave(&vector_lock, flags);
3244 __clear_irq_vector(irq, cfg); 3288 __clear_irq_vector(irq, cfg);
3245 spin_unlock_irqrestore(&vector_lock, flags); 3289 spin_unlock_irqrestore(&vector_lock, flags);
@@ -3265,7 +3309,6 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3265 3309
3266 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); 3310 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
3267 3311
3268#ifdef CONFIG_INTR_REMAP
3269 if (irq_remapped(irq)) { 3312 if (irq_remapped(irq)) {
3270 struct irte irte; 3313 struct irte irte;
3271 int ir_index; 3314 int ir_index;
@@ -3291,10 +3334,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3291 MSI_ADDR_IR_SHV | 3334 MSI_ADDR_IR_SHV |
3292 MSI_ADDR_IR_INDEX1(ir_index) | 3335 MSI_ADDR_IR_INDEX1(ir_index) |
3293 MSI_ADDR_IR_INDEX2(ir_index); 3336 MSI_ADDR_IR_INDEX2(ir_index);
3294 } else 3337 } else {
3295#endif 3338 if (x2apic_enabled())
3296 { 3339 msg->address_hi = MSI_ADDR_BASE_HI |
3297 msg->address_hi = MSI_ADDR_BASE_HI; 3340 MSI_ADDR_EXT_DEST_ID(dest);
3341 else
3342 msg->address_hi = MSI_ADDR_BASE_HI;
3343
3298 msg->address_lo = 3344 msg->address_lo =
3299 MSI_ADDR_BASE_LO | 3345 MSI_ADDR_BASE_LO |
3300 ((apic->irq_dest_mode == 0) ? 3346 ((apic->irq_dest_mode == 0) ?
@@ -3394,15 +3440,16 @@ static struct irq_chip msi_chip = {
3394 .retrigger = ioapic_retrigger_irq, 3440 .retrigger = ioapic_retrigger_irq,
3395}; 3441};
3396 3442
3397#ifdef CONFIG_INTR_REMAP
3398static struct irq_chip msi_ir_chip = { 3443static struct irq_chip msi_ir_chip = {
3399 .name = "IR-PCI-MSI", 3444 .name = "IR-PCI-MSI",
3400 .unmask = unmask_msi_irq, 3445 .unmask = unmask_msi_irq,
3401 .mask = mask_msi_irq, 3446 .mask = mask_msi_irq,
3402 .ack = ack_x2apic_edge, 3447#ifdef CONFIG_INTR_REMAP
3448 .ack = ir_ack_apic_edge,
3403#ifdef CONFIG_SMP 3449#ifdef CONFIG_SMP
3404 .set_affinity = ir_set_msi_irq_affinity, 3450 .set_affinity = ir_set_msi_irq_affinity,
3405#endif 3451#endif
3452#endif
3406 .retrigger = ioapic_retrigger_irq, 3453 .retrigger = ioapic_retrigger_irq,
3407}; 3454};
3408 3455
@@ -3432,7 +3479,6 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
3432 } 3479 }
3433 return index; 3480 return index;
3434} 3481}
3435#endif
3436 3482
3437static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) 3483static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3438{ 3484{
@@ -3446,7 +3492,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3446 set_irq_msi(irq, msidesc); 3492 set_irq_msi(irq, msidesc);
3447 write_msi_msg(irq, &msg); 3493 write_msi_msg(irq, &msg);
3448 3494
3449#ifdef CONFIG_INTR_REMAP
3450 if (irq_remapped(irq)) { 3495 if (irq_remapped(irq)) {
3451 struct irq_desc *desc = irq_to_desc(irq); 3496 struct irq_desc *desc = irq_to_desc(irq);
3452 /* 3497 /*
@@ -3455,7 +3500,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3455 desc->status |= IRQ_MOVE_PCNTXT; 3500 desc->status |= IRQ_MOVE_PCNTXT;
3456 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); 3501 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
3457 } else 3502 } else
3458#endif
3459 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 3503 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
3460 3504
3461 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); 3505 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
@@ -3469,11 +3513,12 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3469 int ret, sub_handle; 3513 int ret, sub_handle;
3470 struct msi_desc *msidesc; 3514 struct msi_desc *msidesc;
3471 unsigned int irq_want; 3515 unsigned int irq_want;
3472 3516 struct intel_iommu *iommu = NULL;
3473#ifdef CONFIG_INTR_REMAP
3474 struct intel_iommu *iommu = 0;
3475 int index = 0; 3517 int index = 0;
3476#endif 3518
3519 /* x86 doesn't support multiple MSI yet */
3520 if (type == PCI_CAP_ID_MSI && nvec > 1)
3521 return 1;
3477 3522
3478 irq_want = nr_irqs_gsi; 3523 irq_want = nr_irqs_gsi;
3479 sub_handle = 0; 3524 sub_handle = 0;
@@ -3482,7 +3527,6 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3482 if (irq == 0) 3527 if (irq == 0)
3483 return -1; 3528 return -1;
3484 irq_want = irq + 1; 3529 irq_want = irq + 1;
3485#ifdef CONFIG_INTR_REMAP
3486 if (!intr_remapping_enabled) 3530 if (!intr_remapping_enabled)
3487 goto no_ir; 3531 goto no_ir;
3488 3532
@@ -3510,7 +3554,6 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3510 set_irte_irq(irq, iommu, index, sub_handle); 3554 set_irte_irq(irq, iommu, index, sub_handle);
3511 } 3555 }
3512no_ir: 3556no_ir:
3513#endif
3514 ret = setup_msi_irq(dev, msidesc, irq); 3557 ret = setup_msi_irq(dev, msidesc, irq);
3515 if (ret < 0) 3558 if (ret < 0)
3516 goto error; 3559 goto error;
@@ -3528,7 +3571,7 @@ void arch_teardown_msi_irq(unsigned int irq)
3528 destroy_irq(irq); 3571 destroy_irq(irq);
3529} 3572}
3530 3573
3531#ifdef CONFIG_DMAR 3574#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
3532#ifdef CONFIG_SMP 3575#ifdef CONFIG_SMP
3533static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) 3576static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3534{ 3577{
@@ -3609,7 +3652,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3609 3652
3610#endif /* CONFIG_SMP */ 3653#endif /* CONFIG_SMP */
3611 3654
3612struct irq_chip hpet_msi_type = { 3655static struct irq_chip hpet_msi_type = {
3613 .name = "HPET_MSI", 3656 .name = "HPET_MSI",
3614 .unmask = hpet_msi_unmask, 3657 .unmask = hpet_msi_unmask,
3615 .mask = hpet_msi_mask, 3658 .mask = hpet_msi_mask,
@@ -4045,11 +4088,9 @@ void __init setup_ioapic_dest(void)
4045 else 4088 else
4046 mask = apic->target_cpus(); 4089 mask = apic->target_cpus();
4047 4090
4048#ifdef CONFIG_INTR_REMAP
4049 if (intr_remapping_enabled) 4091 if (intr_remapping_enabled)
4050 set_ir_ioapic_affinity_irq_desc(desc, mask); 4092 set_ir_ioapic_affinity_irq_desc(desc, mask);
4051 else 4093 else
4052#endif
4053 set_ioapic_affinity_irq_desc(desc, mask); 4094 set_ioapic_affinity_irq_desc(desc, mask);
4054 } 4095 }
4055 4096
@@ -4142,9 +4183,12 @@ static int __init ioapic_insert_resources(void)
4142 struct resource *r = ioapic_resources; 4183 struct resource *r = ioapic_resources;
4143 4184
4144 if (!r) { 4185 if (!r) {
4145 printk(KERN_ERR 4186 if (nr_ioapics > 0) {
4146 "IO APIC resources could be not be allocated.\n"); 4187 printk(KERN_ERR
4147 return -1; 4188 "IO APIC resources couldn't be allocated.\n");
4189 return -1;
4190 }
4191 return 0;
4148 } 4192 }
4149 4193
4150 for (i = 0; i < nr_ioapics; i++) { 4194 for (i = 0; i < nr_ioapics; i++) {
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index bdfad80c3cf1..d6bd62407152 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -39,7 +39,7 @@
39int unknown_nmi_panic; 39int unknown_nmi_panic;
40int nmi_watchdog_enabled; 40int nmi_watchdog_enabled;
41 41
42static cpumask_t backtrace_mask = CPU_MASK_NONE; 42static cpumask_var_t backtrace_mask;
43 43
44/* nmi_active: 44/* nmi_active:
45 * >0: the lapic NMI watchdog is active, but can be disabled 45 * >0: the lapic NMI watchdog is active, but can be disabled
@@ -138,6 +138,7 @@ int __init check_nmi_watchdog(void)
138 if (!prev_nmi_count) 138 if (!prev_nmi_count)
139 goto error; 139 goto error;
140 140
141 alloc_cpumask_var(&backtrace_mask, GFP_KERNEL);
141 printk(KERN_INFO "Testing NMI watchdog ... "); 142 printk(KERN_INFO "Testing NMI watchdog ... ");
142 143
143#ifdef CONFIG_SMP 144#ifdef CONFIG_SMP
@@ -413,14 +414,14 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
413 touched = 1; 414 touched = 1;
414 } 415 }
415 416
416 if (cpu_isset(cpu, backtrace_mask)) { 417 if (cpumask_test_cpu(cpu, backtrace_mask)) {
417 static DEFINE_SPINLOCK(lock); /* Serialise the printks */ 418 static DEFINE_SPINLOCK(lock); /* Serialise the printks */
418 419
419 spin_lock(&lock); 420 spin_lock(&lock);
420 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); 421 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
421 dump_stack(); 422 dump_stack();
422 spin_unlock(&lock); 423 spin_unlock(&lock);
423 cpu_clear(cpu, backtrace_mask); 424 cpumask_clear_cpu(cpu, backtrace_mask);
424 } 425 }
425 426
426 /* Could check oops_in_progress here too, but it's safer not to */ 427 /* Could check oops_in_progress here too, but it's safer not to */
@@ -554,10 +555,10 @@ void __trigger_all_cpu_backtrace(void)
554{ 555{
555 int i; 556 int i;
556 557
557 backtrace_mask = cpu_online_map; 558 cpumask_copy(backtrace_mask, cpu_online_mask);
558 /* Wait for up to 10 seconds for all CPUs to do the backtrace */ 559 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
559 for (i = 0; i < 10 * 1000; i++) { 560 for (i = 0; i < 10 * 1000; i++) {
560 if (cpus_empty(backtrace_mask)) 561 if (cpumask_empty(backtrace_mask))
561 break; 562 break;
562 mdelay(1); 563 mdelay(1);
563 } 564 }
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index ba2fc6465534..533e59c6fc82 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -334,9 +334,9 @@ static inline void numaq_smp_callin_clear_local_apic(void)
334 clear_local_APIC(); 334 clear_local_APIC();
335} 335}
336 336
337static inline const cpumask_t *numaq_target_cpus(void) 337static inline const struct cpumask *numaq_target_cpus(void)
338{ 338{
339 return &CPU_MASK_ALL; 339 return cpu_all_mask;
340} 340}
341 341
342static inline unsigned long 342static inline unsigned long
@@ -427,7 +427,7 @@ static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid)
427 * We use physical apicids here, not logical, so just return the default 427 * We use physical apicids here, not logical, so just return the default
428 * physical broadcast to stop people from breaking us 428 * physical broadcast to stop people from breaking us
429 */ 429 */
430static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask) 430static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask)
431{ 431{
432 return 0x0F; 432 return 0x0F;
433} 433}
@@ -462,7 +462,7 @@ static int probe_numaq(void)
462 return found_numaq; 462 return found_numaq;
463} 463}
464 464
465static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask) 465static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask)
466{ 466{
467 /* Careful. Some cpus do not strictly honor the set of cpus 467 /* Careful. Some cpus do not strictly honor the set of cpus
468 * specified in the interrupt destination when using lowest 468 * specified in the interrupt destination when using lowest
@@ -472,7 +472,8 @@ static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask)
472 * deliver interrupts to the wrong hyperthread when only one 472 * deliver interrupts to the wrong hyperthread when only one
473 * hyperthread was specified in the interrupt desitination. 473 * hyperthread was specified in the interrupt desitination.
474 */ 474 */
475 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; 475 cpumask_clear(retmask);
476 cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
476} 477}
477 478
478static void numaq_setup_portio_remap(void) 479static void numaq_setup_portio_remap(void)
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 141c99a1c264..01eda2ac65e4 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -83,7 +83,8 @@ static void default_vector_allocation_domain(int cpu, struct cpumask *retmask)
83 * deliver interrupts to the wrong hyperthread when only one 83 * deliver interrupts to the wrong hyperthread when only one
84 * hyperthread was specified in the interrupt desitination. 84 * hyperthread was specified in the interrupt desitination.
85 */ 85 */
86 *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; 86 cpumask_clear(retmask);
87 cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
87} 88}
88 89
89/* should be called last. */ 90/* should be called last. */
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 8d7748efe6a8..1783652bb0e5 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -68,6 +68,13 @@ void __init default_setup_apic_routing(void)
68 apic = &apic_physflat; 68 apic = &apic_physflat;
69 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); 69 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
70 } 70 }
71
72 /*
73 * Now that apic routing model is selected, configure the
74 * fault handling for intr remapping.
75 */
76 if (intr_remapping_enabled)
77 enable_drhd_fault_handling();
71} 78}
72 79
73/* Same for both flat and physical. */ 80/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index aac52fa873ff..9cfe1f415d81 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -53,23 +53,19 @@ static unsigned summit_get_apic_id(unsigned long x)
53 return (x >> 24) & 0xFF; 53 return (x >> 24) & 0xFF;
54} 54}
55 55
56static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector) 56static inline void summit_send_IPI_mask(const struct cpumask *mask, int vector)
57{ 57{
58 default_send_IPI_mask_sequence_logical(mask, vector); 58 default_send_IPI_mask_sequence_logical(mask, vector);
59} 59}
60 60
61static void summit_send_IPI_allbutself(int vector) 61static void summit_send_IPI_allbutself(int vector)
62{ 62{
63 cpumask_t mask = cpu_online_map; 63 default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector);
64 cpu_clear(smp_processor_id(), mask);
65
66 if (!cpus_empty(mask))
67 summit_send_IPI_mask(&mask, vector);
68} 64}
69 65
70static void summit_send_IPI_all(int vector) 66static void summit_send_IPI_all(int vector)
71{ 67{
72 summit_send_IPI_mask(&cpu_online_map, vector); 68 summit_send_IPI_mask(cpu_online_mask, vector);
73} 69}
74 70
75#include <asm/tsc.h> 71#include <asm/tsc.h>
@@ -186,13 +182,13 @@ static inline int is_WPEG(struct rio_detail *rio){
186 182
187#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) 183#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
188 184
189static const cpumask_t *summit_target_cpus(void) 185static const struct cpumask *summit_target_cpus(void)
190{ 186{
191 /* CPU_MASK_ALL (0xff) has undefined behaviour with 187 /* CPU_MASK_ALL (0xff) has undefined behaviour with
192 * dest_LowestPrio mode logical clustered apic interrupt routing 188 * dest_LowestPrio mode logical clustered apic interrupt routing
193 * Just start on cpu 0. IRQ balancing will spread load 189 * Just start on cpu 0. IRQ balancing will spread load
194 */ 190 */
195 return &cpumask_of_cpu(0); 191 return cpumask_of(0);
196} 192}
197 193
198static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid) 194static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid)
@@ -289,7 +285,7 @@ static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid)
289 return 1; 285 return 1;
290} 286}
291 287
292static unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) 288static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
293{ 289{
294 unsigned int round = 0; 290 unsigned int round = 0;
295 int cpu, apicid = 0; 291 int cpu, apicid = 0;
@@ -346,7 +342,7 @@ static int probe_summit(void)
346 return 0; 342 return 0;
347} 343}
348 344
349static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask) 345static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask)
350{ 346{
351 /* Careful. Some cpus do not strictly honor the set of cpus 347 /* Careful. Some cpus do not strictly honor the set of cpus
352 * specified in the interrupt destination when using lowest 348 * specified in the interrupt destination when using lowest
@@ -356,7 +352,8 @@ static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask)
356 * deliver interrupts to the wrong hyperthread when only one 352 * deliver interrupts to the wrong hyperthread when only one
357 * hyperthread was specified in the interrupt desitination. 353 * hyperthread was specified in the interrupt desitination.
358 */ 354 */
359 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; 355 cpumask_clear(retmask);
356 cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
360} 357}
361 358
362#ifdef CONFIG_X86_SUMMIT_NUMA 359#ifdef CONFIG_X86_SUMMIT_NUMA
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 8fb87b6dd633..4a903e2f0d17 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -57,6 +57,8 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
57 unsigned long query_cpu; 57 unsigned long query_cpu;
58 unsigned long flags; 58 unsigned long flags;
59 59
60 x2apic_wrmsr_fence();
61
60 local_irq_save(flags); 62 local_irq_save(flags);
61 for_each_cpu(query_cpu, mask) { 63 for_each_cpu(query_cpu, mask) {
62 __x2apic_send_IPI_dest( 64 __x2apic_send_IPI_dest(
@@ -73,6 +75,8 @@ static void
73 unsigned long query_cpu; 75 unsigned long query_cpu;
74 unsigned long flags; 76 unsigned long flags;
75 77
78 x2apic_wrmsr_fence();
79
76 local_irq_save(flags); 80 local_irq_save(flags);
77 for_each_cpu(query_cpu, mask) { 81 for_each_cpu(query_cpu, mask) {
78 if (query_cpu == this_cpu) 82 if (query_cpu == this_cpu)
@@ -90,6 +94,8 @@ static void x2apic_send_IPI_allbutself(int vector)
90 unsigned long query_cpu; 94 unsigned long query_cpu;
91 unsigned long flags; 95 unsigned long flags;
92 96
97 x2apic_wrmsr_fence();
98
93 local_irq_save(flags); 99 local_irq_save(flags);
94 for_each_online_cpu(query_cpu) { 100 for_each_online_cpu(query_cpu) {
95 if (query_cpu == this_cpu) 101 if (query_cpu == this_cpu)
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 23625b9f98b2..a284359627e7 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -58,6 +58,8 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
58 unsigned long query_cpu; 58 unsigned long query_cpu;
59 unsigned long flags; 59 unsigned long flags;
60 60
61 x2apic_wrmsr_fence();
62
61 local_irq_save(flags); 63 local_irq_save(flags);
62 for_each_cpu(query_cpu, mask) { 64 for_each_cpu(query_cpu, mask) {
63 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), 65 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
@@ -73,6 +75,8 @@ static void
73 unsigned long query_cpu; 75 unsigned long query_cpu;
74 unsigned long flags; 76 unsigned long flags;
75 77
78 x2apic_wrmsr_fence();
79
76 local_irq_save(flags); 80 local_irq_save(flags);
77 for_each_cpu(query_cpu, mask) { 81 for_each_cpu(query_cpu, mask) {
78 if (query_cpu != this_cpu) 82 if (query_cpu != this_cpu)
@@ -89,6 +93,8 @@ static void x2apic_send_IPI_allbutself(int vector)
89 unsigned long query_cpu; 93 unsigned long query_cpu;
90 unsigned long flags; 94 unsigned long flags;
91 95
96 x2apic_wrmsr_fence();
97
92 local_irq_save(flags); 98 local_irq_save(flags);
93 for_each_online_cpu(query_cpu) { 99 for_each_online_cpu(query_cpu) {
94 if (query_cpu == this_cpu) 100 if (query_cpu == this_cpu)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 1bd6da1f8fad..1248318436e8 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -118,17 +118,12 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
118 118
119static void uv_send_IPI_one(int cpu, int vector) 119static void uv_send_IPI_one(int cpu, int vector)
120{ 120{
121 unsigned long val, apicid; 121 unsigned long apicid;
122 int pnode; 122 int pnode;
123 123
124 apicid = per_cpu(x86_cpu_to_apicid, cpu); 124 apicid = per_cpu(x86_cpu_to_apicid, cpu);
125 pnode = uv_apicid_to_pnode(apicid); 125 pnode = uv_apicid_to_pnode(apicid);
126 126 uv_hub_send_ipi(pnode, apicid, vector);
127 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
128 (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
129 (vector << UVH_IPI_INT_VECTOR_SHFT);
130
131 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
132} 127}
133 128
134static void uv_send_IPI_mask(const struct cpumask *mask, int vector) 129static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 10033fe718e0..49e0939bac42 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -466,7 +466,7 @@ static const lookup_t error_table[] = {
466 * @err: APM BIOS return code 466 * @err: APM BIOS return code
467 * 467 *
468 * Write a meaningful log entry to the kernel log in the event of 468 * Write a meaningful log entry to the kernel log in the event of
469 * an APM error. 469 * an APM error. Note that this also handles (negative) kernel errors.
470 */ 470 */
471 471
472static void apm_error(char *str, int err) 472static void apm_error(char *str, int err)
@@ -478,43 +478,14 @@ static void apm_error(char *str, int err)
478 break; 478 break;
479 if (i < ERROR_COUNT) 479 if (i < ERROR_COUNT)
480 printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg); 480 printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg);
481 else if (err < 0)
482 printk(KERN_NOTICE "apm: %s: linux error code %i\n", str, err);
481 else 483 else
482 printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n", 484 printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n",
483 str, err); 485 str, err);
484} 486}
485 487
486/* 488/*
487 * Lock APM functionality to physical CPU 0
488 */
489
490#ifdef CONFIG_SMP
491
492static cpumask_t apm_save_cpus(void)
493{
494 cpumask_t x = current->cpus_allowed;
495 /* Some bioses don't like being called from CPU != 0 */
496 set_cpus_allowed(current, cpumask_of_cpu(0));
497 BUG_ON(smp_processor_id() != 0);
498 return x;
499}
500
501static inline void apm_restore_cpus(cpumask_t mask)
502{
503 set_cpus_allowed(current, mask);
504}
505
506#else
507
508/*
509 * No CPU lockdown needed on a uniprocessor
510 */
511
512#define apm_save_cpus() (current->cpus_allowed)
513#define apm_restore_cpus(x) (void)(x)
514
515#endif
516
517/*
518 * These are the actual BIOS calls. Depending on APM_ZERO_SEGS and 489 * These are the actual BIOS calls. Depending on APM_ZERO_SEGS and
519 * apm_info.allow_ints, we are being really paranoid here! Not only 490 * apm_info.allow_ints, we are being really paranoid here! Not only
520 * are interrupts disabled, but all the segment registers (except SS) 491 * are interrupts disabled, but all the segment registers (except SS)
@@ -568,16 +539,23 @@ static inline void apm_irq_restore(unsigned long flags)
568# define APM_DO_RESTORE_SEGS 539# define APM_DO_RESTORE_SEGS
569#endif 540#endif
570 541
542struct apm_bios_call {
543 u32 func;
544 /* In and out */
545 u32 ebx;
546 u32 ecx;
547 /* Out only */
548 u32 eax;
549 u32 edx;
550 u32 esi;
551
552 /* Error: -ENOMEM, or bits 8-15 of eax */
553 int err;
554};
555
571/** 556/**
572 * apm_bios_call - Make an APM BIOS 32bit call 557 * __apm_bios_call - Make an APM BIOS 32bit call
573 * @func: APM function to execute 558 * @_call: pointer to struct apm_bios_call.
574 * @ebx_in: EBX register for call entry
575 * @ecx_in: ECX register for call entry
576 * @eax: EAX register return
577 * @ebx: EBX register return
578 * @ecx: ECX register return
579 * @edx: EDX register return
580 * @esi: ESI register return
581 * 559 *
582 * Make an APM call using the 32bit protected mode interface. The 560 * Make an APM call using the 32bit protected mode interface. The
583 * caller is responsible for knowing if APM BIOS is configured and 561 * caller is responsible for knowing if APM BIOS is configured and
@@ -586,80 +564,142 @@ static inline void apm_irq_restore(unsigned long flags)
586 * flag is loaded into AL. If there is an error, then the error 564 * flag is loaded into AL. If there is an error, then the error
587 * code is returned in AH (bits 8-15 of eax) and this function 565 * code is returned in AH (bits 8-15 of eax) and this function
588 * returns non-zero. 566 * returns non-zero.
567 *
568 * Note: this makes the call on the current CPU.
589 */ 569 */
590 570static long __apm_bios_call(void *_call)
591static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
592 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi)
593{ 571{
594 APM_DECL_SEGS 572 APM_DECL_SEGS
595 unsigned long flags; 573 unsigned long flags;
596 cpumask_t cpus;
597 int cpu; 574 int cpu;
598 struct desc_struct save_desc_40; 575 struct desc_struct save_desc_40;
599 struct desc_struct *gdt; 576 struct desc_struct *gdt;
600 577 struct apm_bios_call *call = _call;
601 cpus = apm_save_cpus();
602 578
603 cpu = get_cpu(); 579 cpu = get_cpu();
580 BUG_ON(cpu != 0);
604 gdt = get_cpu_gdt_table(cpu); 581 gdt = get_cpu_gdt_table(cpu);
605 save_desc_40 = gdt[0x40 / 8]; 582 save_desc_40 = gdt[0x40 / 8];
606 gdt[0x40 / 8] = bad_bios_desc; 583 gdt[0x40 / 8] = bad_bios_desc;
607 584
608 apm_irq_save(flags); 585 apm_irq_save(flags);
609 APM_DO_SAVE_SEGS; 586 APM_DO_SAVE_SEGS;
610 apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi); 587 apm_bios_call_asm(call->func, call->ebx, call->ecx,
588 &call->eax, &call->ebx, &call->ecx, &call->edx,
589 &call->esi);
611 APM_DO_RESTORE_SEGS; 590 APM_DO_RESTORE_SEGS;
612 apm_irq_restore(flags); 591 apm_irq_restore(flags);
613 gdt[0x40 / 8] = save_desc_40; 592 gdt[0x40 / 8] = save_desc_40;
614 put_cpu(); 593 put_cpu();
615 apm_restore_cpus(cpus);
616 594
617 return *eax & 0xff; 595 return call->eax & 0xff;
596}
597
598/* Run __apm_bios_call or __apm_bios_call_simple on CPU 0 */
599static int on_cpu0(long (*fn)(void *), struct apm_bios_call *call)
600{
601 int ret;
602
603 /* Don't bother with work_on_cpu in the common case, so we don't
604 * have to worry about OOM or overhead. */
605 if (get_cpu() == 0) {
606 ret = fn(call);
607 put_cpu();
608 } else {
609 put_cpu();
610 ret = work_on_cpu(0, fn, call);
611 }
612
613 /* work_on_cpu can fail with -ENOMEM */
614 if (ret < 0)
615 call->err = ret;
616 else
617 call->err = (call->eax >> 8) & 0xff;
618
619 return ret;
618} 620}
619 621
620/** 622/**
621 * apm_bios_call_simple - make a simple APM BIOS 32bit call 623 * apm_bios_call - Make an APM BIOS 32bit call (on CPU 0)
622 * @func: APM function to invoke 624 * @call: the apm_bios_call registers.
623 * @ebx_in: EBX register value for BIOS call 625 *
624 * @ecx_in: ECX register value for BIOS call 626 * If there is an error, it is returned in @call.err.
625 * @eax: EAX register on return from the BIOS call 627 */
628static int apm_bios_call(struct apm_bios_call *call)
629{
630 return on_cpu0(__apm_bios_call, call);
631}
632
633/**
634 * __apm_bios_call_simple - Make an APM BIOS 32bit call (on CPU 0)
635 * @_call: pointer to struct apm_bios_call.
626 * 636 *
627 * Make a BIOS call that returns one value only, or just status. 637 * Make a BIOS call that returns one value only, or just status.
628 * If there is an error, then the error code is returned in AH 638 * If there is an error, then the error code is returned in AH
629 * (bits 8-15 of eax) and this function returns non-zero. This is 639 * (bits 8-15 of eax) and this function returns non-zero (it can
630 * used for simpler BIOS operations. This call may hold interrupts 640 * also return -ENOMEM). This is used for simpler BIOS operations.
631 * off for a long time on some laptops. 641 * This call may hold interrupts off for a long time on some laptops.
642 *
643 * Note: this makes the call on the current CPU.
632 */ 644 */
633 645static long __apm_bios_call_simple(void *_call)
634static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax)
635{ 646{
636 u8 error; 647 u8 error;
637 APM_DECL_SEGS 648 APM_DECL_SEGS
638 unsigned long flags; 649 unsigned long flags;
639 cpumask_t cpus;
640 int cpu; 650 int cpu;
641 struct desc_struct save_desc_40; 651 struct desc_struct save_desc_40;
642 struct desc_struct *gdt; 652 struct desc_struct *gdt;
643 653 struct apm_bios_call *call = _call;
644 cpus = apm_save_cpus();
645 654
646 cpu = get_cpu(); 655 cpu = get_cpu();
656 BUG_ON(cpu != 0);
647 gdt = get_cpu_gdt_table(cpu); 657 gdt = get_cpu_gdt_table(cpu);
648 save_desc_40 = gdt[0x40 / 8]; 658 save_desc_40 = gdt[0x40 / 8];
649 gdt[0x40 / 8] = bad_bios_desc; 659 gdt[0x40 / 8] = bad_bios_desc;
650 660
651 apm_irq_save(flags); 661 apm_irq_save(flags);
652 APM_DO_SAVE_SEGS; 662 APM_DO_SAVE_SEGS;
653 error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax); 663 error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx,
664 &call->eax);
654 APM_DO_RESTORE_SEGS; 665 APM_DO_RESTORE_SEGS;
655 apm_irq_restore(flags); 666 apm_irq_restore(flags);
656 gdt[0x40 / 8] = save_desc_40; 667 gdt[0x40 / 8] = save_desc_40;
657 put_cpu(); 668 put_cpu();
658 apm_restore_cpus(cpus);
659 return error; 669 return error;
660} 670}
661 671
662/** 672/**
673 * apm_bios_call_simple - make a simple APM BIOS 32bit call
674 * @func: APM function to invoke
675 * @ebx_in: EBX register value for BIOS call
676 * @ecx_in: ECX register value for BIOS call
677 * @eax: EAX register on return from the BIOS call
678 * @err: bits
679 *
680 * Make a BIOS call that returns one value only, or just status.
681 * If there is an error, then the error code is returned in @err
682 * and this function returns non-zero. This is used for simpler
683 * BIOS operations. This call may hold interrupts off for a long
684 * time on some laptops.
685 */
686static int apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax,
687 int *err)
688{
689 struct apm_bios_call call;
690 int ret;
691
692 call.func = func;
693 call.ebx = ebx_in;
694 call.ecx = ecx_in;
695
696 ret = on_cpu0(__apm_bios_call_simple, &call);
697 *eax = call.eax;
698 *err = call.err;
699 return ret;
700}
701
702/**
663 * apm_driver_version - APM driver version 703 * apm_driver_version - APM driver version
664 * @val: loaded with the APM version on return 704 * @val: loaded with the APM version on return
665 * 705 *
@@ -678,9 +718,10 @@ static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax)
678static int apm_driver_version(u_short *val) 718static int apm_driver_version(u_short *val)
679{ 719{
680 u32 eax; 720 u32 eax;
721 int err;
681 722
682 if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax)) 723 if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax, &err))
683 return (eax >> 8) & 0xff; 724 return err;
684 *val = eax; 725 *val = eax;
685 return APM_SUCCESS; 726 return APM_SUCCESS;
686} 727}
@@ -701,22 +742,21 @@ static int apm_driver_version(u_short *val)
701 * that APM 1.2 is in use. If no messges are pending the value 0x80 742 * that APM 1.2 is in use. If no messges are pending the value 0x80
702 * is returned (No power management events pending). 743 * is returned (No power management events pending).
703 */ 744 */
704
705static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info) 745static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info)
706{ 746{
707 u32 eax; 747 struct apm_bios_call call;
708 u32 ebx;
709 u32 ecx;
710 u32 dummy;
711 748
712 if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx, 749 call.func = APM_FUNC_GET_EVENT;
713 &dummy, &dummy)) 750 call.ebx = call.ecx = 0;
714 return (eax >> 8) & 0xff; 751
715 *event = ebx; 752 if (apm_bios_call(&call))
753 return call.err;
754
755 *event = call.ebx;
716 if (apm_info.connection_version < 0x0102) 756 if (apm_info.connection_version < 0x0102)
717 *info = ~0; /* indicate info not valid */ 757 *info = ~0; /* indicate info not valid */
718 else 758 else
719 *info = ecx; 759 *info = call.ecx;
720 return APM_SUCCESS; 760 return APM_SUCCESS;
721} 761}
722 762
@@ -737,9 +777,10 @@ static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info)
737static int set_power_state(u_short what, u_short state) 777static int set_power_state(u_short what, u_short state)
738{ 778{
739 u32 eax; 779 u32 eax;
780 int err;
740 781
741 if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax)) 782 if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax, &err))
742 return (eax >> 8) & 0xff; 783 return err;
743 return APM_SUCCESS; 784 return APM_SUCCESS;
744} 785}
745 786
@@ -770,6 +811,7 @@ static int apm_do_idle(void)
770 u8 ret = 0; 811 u8 ret = 0;
771 int idled = 0; 812 int idled = 0;
772 int polling; 813 int polling;
814 int err;
773 815
774 polling = !!(current_thread_info()->status & TS_POLLING); 816 polling = !!(current_thread_info()->status & TS_POLLING);
775 if (polling) { 817 if (polling) {
@@ -782,7 +824,7 @@ static int apm_do_idle(void)
782 } 824 }
783 if (!need_resched()) { 825 if (!need_resched()) {
784 idled = 1; 826 idled = 1;
785 ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax); 827 ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax, &err);
786 } 828 }
787 if (polling) 829 if (polling)
788 current_thread_info()->status |= TS_POLLING; 830 current_thread_info()->status |= TS_POLLING;
@@ -797,8 +839,7 @@ static int apm_do_idle(void)
797 * Only report the failure the first 5 times. 839 * Only report the failure the first 5 times.
798 */ 840 */
799 if (++t < 5) { 841 if (++t < 5) {
800 printk(KERN_DEBUG "apm_do_idle failed (%d)\n", 842 printk(KERN_DEBUG "apm_do_idle failed (%d)\n", err);
801 (eax >> 8) & 0xff);
802 t = jiffies; 843 t = jiffies;
803 } 844 }
804 return -1; 845 return -1;
@@ -816,9 +857,10 @@ static int apm_do_idle(void)
816static void apm_do_busy(void) 857static void apm_do_busy(void)
817{ 858{
818 u32 dummy; 859 u32 dummy;
860 int err;
819 861
820 if (clock_slowed || ALWAYS_CALL_BUSY) { 862 if (clock_slowed || ALWAYS_CALL_BUSY) {
821 (void)apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy); 863 (void)apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy, &err);
822 clock_slowed = 0; 864 clock_slowed = 0;
823 } 865 }
824} 866}
@@ -937,7 +979,7 @@ static void apm_power_off(void)
937 979
938 /* Some bioses don't like being called from CPU != 0 */ 980 /* Some bioses don't like being called from CPU != 0 */
939 if (apm_info.realmode_power_off) { 981 if (apm_info.realmode_power_off) {
940 (void)apm_save_cpus(); 982 set_cpus_allowed_ptr(current, cpumask_of(0));
941 machine_real_restart(po_bios_call, sizeof(po_bios_call)); 983 machine_real_restart(po_bios_call, sizeof(po_bios_call));
942 } else { 984 } else {
943 (void)set_system_power_state(APM_STATE_OFF); 985 (void)set_system_power_state(APM_STATE_OFF);
@@ -956,12 +998,13 @@ static void apm_power_off(void)
956static int apm_enable_power_management(int enable) 998static int apm_enable_power_management(int enable)
957{ 999{
958 u32 eax; 1000 u32 eax;
1001 int err;
959 1002
960 if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED)) 1003 if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED))
961 return APM_NOT_ENGAGED; 1004 return APM_NOT_ENGAGED;
962 if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL, 1005 if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL,
963 enable, &eax)) 1006 enable, &eax, &err))
964 return (eax >> 8) & 0xff; 1007 return err;
965 if (enable) 1008 if (enable)
966 apm_info.bios.flags &= ~APM_BIOS_DISABLED; 1009 apm_info.bios.flags &= ~APM_BIOS_DISABLED;
967 else 1010 else
@@ -986,24 +1029,23 @@ static int apm_enable_power_management(int enable)
986 1029
987static int apm_get_power_status(u_short *status, u_short *bat, u_short *life) 1030static int apm_get_power_status(u_short *status, u_short *bat, u_short *life)
988{ 1031{
989 u32 eax; 1032 struct apm_bios_call call;
990 u32 ebx; 1033
991 u32 ecx; 1034 call.func = APM_FUNC_GET_STATUS;
992 u32 edx; 1035 call.ebx = APM_DEVICE_ALL;
993 u32 dummy; 1036 call.ecx = 0;
994 1037
995 if (apm_info.get_power_status_broken) 1038 if (apm_info.get_power_status_broken)
996 return APM_32_UNSUPPORTED; 1039 return APM_32_UNSUPPORTED;
997 if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0, 1040 if (apm_bios_call(&call))
998 &eax, &ebx, &ecx, &edx, &dummy)) 1041 return call.err;
999 return (eax >> 8) & 0xff; 1042 *status = call.ebx;
1000 *status = ebx; 1043 *bat = call.ecx;
1001 *bat = ecx;
1002 if (apm_info.get_power_status_swabinminutes) { 1044 if (apm_info.get_power_status_swabinminutes) {
1003 *life = swab16((u16)edx); 1045 *life = swab16((u16)call.edx);
1004 *life |= 0x8000; 1046 *life |= 0x8000;
1005 } else 1047 } else
1006 *life = edx; 1048 *life = call.edx;
1007 return APM_SUCCESS; 1049 return APM_SUCCESS;
1008} 1050}
1009 1051
@@ -1048,12 +1090,14 @@ static int apm_get_battery_status(u_short which, u_short *status,
1048static int apm_engage_power_management(u_short device, int enable) 1090static int apm_engage_power_management(u_short device, int enable)
1049{ 1091{
1050 u32 eax; 1092 u32 eax;
1093 int err;
1051 1094
1052 if ((enable == 0) && (device == APM_DEVICE_ALL) 1095 if ((enable == 0) && (device == APM_DEVICE_ALL)
1053 && (apm_info.bios.flags & APM_BIOS_DISABLED)) 1096 && (apm_info.bios.flags & APM_BIOS_DISABLED))
1054 return APM_DISABLED; 1097 return APM_DISABLED;
1055 if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable, &eax)) 1098 if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable,
1056 return (eax >> 8) & 0xff; 1099 &eax, &err))
1100 return err;
1057 if (device == APM_DEVICE_ALL) { 1101 if (device == APM_DEVICE_ALL) {
1058 if (enable) 1102 if (enable)
1059 apm_info.bios.flags &= ~APM_BIOS_DISENGAGED; 1103 apm_info.bios.flags &= ~APM_BIOS_DISENGAGED;
@@ -1190,8 +1234,10 @@ static int suspend(int vetoable)
1190 struct apm_user *as; 1234 struct apm_user *as;
1191 1235
1192 device_suspend(PMSG_SUSPEND); 1236 device_suspend(PMSG_SUSPEND);
1193 local_irq_disable(); 1237
1194 device_power_down(PMSG_SUSPEND); 1238 device_power_down(PMSG_SUSPEND);
1239
1240 local_irq_disable();
1195 sysdev_suspend(PMSG_SUSPEND); 1241 sysdev_suspend(PMSG_SUSPEND);
1196 1242
1197 local_irq_enable(); 1243 local_irq_enable();
@@ -1209,9 +1255,12 @@ static int suspend(int vetoable)
1209 if (err != APM_SUCCESS) 1255 if (err != APM_SUCCESS)
1210 apm_error("suspend", err); 1256 apm_error("suspend", err);
1211 err = (err == APM_SUCCESS) ? 0 : -EIO; 1257 err = (err == APM_SUCCESS) ? 0 : -EIO;
1258
1212 sysdev_resume(); 1259 sysdev_resume();
1213 device_power_up(PMSG_RESUME);
1214 local_irq_enable(); 1260 local_irq_enable();
1261
1262 device_power_up(PMSG_RESUME);
1263
1215 device_resume(PMSG_RESUME); 1264 device_resume(PMSG_RESUME);
1216 queue_event(APM_NORMAL_RESUME, NULL); 1265 queue_event(APM_NORMAL_RESUME, NULL);
1217 spin_lock(&user_list_lock); 1266 spin_lock(&user_list_lock);
@@ -1228,8 +1277,9 @@ static void standby(void)
1228{ 1277{
1229 int err; 1278 int err;
1230 1279
1231 local_irq_disable();
1232 device_power_down(PMSG_SUSPEND); 1280 device_power_down(PMSG_SUSPEND);
1281
1282 local_irq_disable();
1233 sysdev_suspend(PMSG_SUSPEND); 1283 sysdev_suspend(PMSG_SUSPEND);
1234 local_irq_enable(); 1284 local_irq_enable();
1235 1285
@@ -1239,8 +1289,9 @@ static void standby(void)
1239 1289
1240 local_irq_disable(); 1290 local_irq_disable();
1241 sysdev_resume(); 1291 sysdev_resume();
1242 device_power_up(PMSG_RESUME);
1243 local_irq_enable(); 1292 local_irq_enable();
1293
1294 device_power_up(PMSG_RESUME);
1244} 1295}
1245 1296
1246static apm_event_t get_event(void) 1297static apm_event_t get_event(void)
@@ -1682,16 +1733,14 @@ static int apm(void *unused)
1682 char *power_stat; 1733 char *power_stat;
1683 char *bat_stat; 1734 char *bat_stat;
1684 1735
1685#ifdef CONFIG_SMP
1686 /* 2002/08/01 - WT 1736 /* 2002/08/01 - WT
1687 * This is to avoid random crashes at boot time during initialization 1737 * This is to avoid random crashes at boot time during initialization
1688 * on SMP systems in case of "apm=power-off" mode. Seen on ASUS A7M266D. 1738 * on SMP systems in case of "apm=power-off" mode. Seen on ASUS A7M266D.
1689 * Some bioses don't like being called from CPU != 0. 1739 * Some bioses don't like being called from CPU != 0.
1690 * Method suggested by Ingo Molnar. 1740 * Method suggested by Ingo Molnar.
1691 */ 1741 */
1692 set_cpus_allowed(current, cpumask_of_cpu(0)); 1742 set_cpus_allowed_ptr(current, cpumask_of(0));
1693 BUG_ON(smp_processor_id() != 0); 1743 BUG_ON(smp_processor_id() != 0);
1694#endif
1695 1744
1696 if (apm_info.connection_version == 0) { 1745 if (apm_info.connection_version == 0) {
1697 apm_info.connection_version = apm_info.bios.version; 1746 apm_info.connection_version = apm_info.bios.version;
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index fbf2f33e3080..5a6aa1c1162f 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -18,6 +18,7 @@
18#include <asm/thread_info.h> 18#include <asm/thread_info.h>
19#include <asm/bootparam.h> 19#include <asm/bootparam.h>
20#include <asm/elf.h> 20#include <asm/elf.h>
21#include <asm/suspend.h>
21 22
22#include <xen/interface/xen.h> 23#include <xen/interface/xen.h>
23 24
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 8793ab33e2c1..e72f062fb4b5 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -16,6 +16,7 @@
16#include <asm/thread_info.h> 16#include <asm/thread_info.h>
17#include <asm/ia32.h> 17#include <asm/ia32.h>
18#include <asm/bootparam.h> 18#include <asm/bootparam.h>
19#include <asm/suspend.h>
19 20
20#include <xen/interface/xen.h> 21#include <xen/interface/xen.h>
21 22
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 2ac0ab71412a..fc999e6fc46a 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -83,15 +83,15 @@ void __init setup_bios_corruption_check(void)
83 u64 size; 83 u64 size;
84 addr = find_e820_area_size(addr, &size, PAGE_SIZE); 84 addr = find_e820_area_size(addr, &size, PAGE_SIZE);
85 85
86 if (addr == 0) 86 if (!(addr + 1))
87 break;
88
89 if (addr >= corruption_check_size)
87 break; 90 break;
88 91
89 if ((addr + size) > corruption_check_size) 92 if ((addr + size) > corruption_check_size)
90 size = corruption_check_size - addr; 93 size = corruption_check_size - addr;
91 94
92 if (size == 0)
93 break;
94
95 e820_update_range(addr, size, E820_RAM, E820_RESERVED); 95 e820_update_range(addr, size, E820_RAM, E820_RESERVED);
96 scan_areas[num_scan_areas].addr = addr; 96 scan_areas[num_scan_areas].addr = addr;
97 scan_areas[num_scan_areas].size = size; 97 scan_areas[num_scan_areas].size = size;
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 82db7f45e2de..4e242f9a06e4 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -14,11 +14,12 @@ obj-y += vmware.o hypervisor.o
14obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o 14obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
15obj-$(CONFIG_X86_64) += bugs_64.o 15obj-$(CONFIG_X86_64) += bugs_64.o
16 16
17obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o
18
17obj-$(CONFIG_CPU_SUP_INTEL) += intel.o 19obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
18obj-$(CONFIG_CPU_SUP_AMD) += amd.o 20obj-$(CONFIG_CPU_SUP_AMD) += amd.o
19obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o 21obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
20obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o 22obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
21obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o
22obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o 23obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
23obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o 24obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
24 25
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 6882a735d9c0..8220ae69849d 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -29,7 +29,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
29 u32 regs[4]; 29 u32 regs[4];
30 const struct cpuid_bit *cb; 30 const struct cpuid_bit *cb;
31 31
32 static const struct cpuid_bit cpuid_bits[] = { 32 static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
33 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, 33 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
34 { 0, 0, 0, 0 } 34 { 0, 0, 0, 0 }
35 }; 35 };
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f47df59016c5..7e4a459daa64 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -502,7 +502,7 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int
502} 502}
503#endif 503#endif
504 504
505static struct cpu_dev amd_cpu_dev __cpuinitdata = { 505static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
506 .c_vendor = "AMD", 506 .c_vendor = "AMD",
507 .c_ident = { "AuthenticAMD" }, 507 .c_ident = { "AuthenticAMD" },
508#ifdef CONFIG_X86_32 508#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 89bfdd9cacc6..c95e831bb095 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,11 +1,11 @@
1#include <linux/bitops.h>
1#include <linux/kernel.h> 2#include <linux/kernel.h>
2#include <linux/init.h> 3#include <linux/init.h>
3#include <linux/bitops.h>
4 4
5#include <asm/processor.h> 5#include <asm/processor.h>
6#include <asm/msr.h>
7#include <asm/e820.h> 6#include <asm/e820.h>
8#include <asm/mtrr.h> 7#include <asm/mtrr.h>
8#include <asm/msr.h>
9 9
10#include "cpu.h" 10#include "cpu.h"
11 11
@@ -276,7 +276,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
276 */ 276 */
277 c->x86_capability[5] = cpuid_edx(0xC0000001); 277 c->x86_capability[5] = cpuid_edx(0xC0000001);
278 } 278 }
279 279#ifdef CONFIG_X86_32
280 /* Cyrix III family needs CX8 & PGE explicitly enabled. */ 280 /* Cyrix III family needs CX8 & PGE explicitly enabled. */
281 if (c->x86_model >= 6 && c->x86_model <= 9) { 281 if (c->x86_model >= 6 && c->x86_model <= 9) {
282 rdmsr(MSR_VIA_FCR, lo, hi); 282 rdmsr(MSR_VIA_FCR, lo, hi);
@@ -288,6 +288,11 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
288 /* Before Nehemiah, the C3's had 3dNOW! */ 288 /* Before Nehemiah, the C3's had 3dNOW! */
289 if (c->x86_model >= 6 && c->x86_model < 9) 289 if (c->x86_model >= 6 && c->x86_model < 9)
290 set_cpu_cap(c, X86_FEATURE_3DNOW); 290 set_cpu_cap(c, X86_FEATURE_3DNOW);
291#endif
292 if (c->x86 == 0x6 && c->x86_model >= 0xf) {
293 c->x86_cache_alignment = c->x86_clflush_size * 2;
294 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
295 }
291 296
292 display_cacheinfo(c); 297 display_cacheinfo(c);
293} 298}
@@ -316,16 +321,25 @@ enum {
316static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) 321static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
317{ 322{
318 switch (c->x86) { 323 switch (c->x86) {
324#ifdef CONFIG_X86_32
319 case 5: 325 case 5:
320 /* Emulate MTRRs using Centaur's MCR. */ 326 /* Emulate MTRRs using Centaur's MCR. */
321 set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); 327 set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
322 break; 328 break;
329#endif
330 case 6:
331 if (c->x86_model >= 0xf)
332 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
333 break;
323 } 334 }
335#ifdef CONFIG_X86_64
336 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
337#endif
324} 338}
325 339
326static void __cpuinit init_centaur(struct cpuinfo_x86 *c) 340static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
327{ 341{
328 342#ifdef CONFIG_X86_32
329 char *name; 343 char *name;
330 u32 fcr_set = 0; 344 u32 fcr_set = 0;
331 u32 fcr_clr = 0; 345 u32 fcr_clr = 0;
@@ -337,8 +351,10 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
337 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway 351 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
338 */ 352 */
339 clear_cpu_cap(c, 0*32+31); 353 clear_cpu_cap(c, 0*32+31);
340 354#endif
355 early_init_centaur(c);
341 switch (c->x86) { 356 switch (c->x86) {
357#ifdef CONFIG_X86_32
342 case 5: 358 case 5:
343 switch (c->x86_model) { 359 switch (c->x86_model) {
344 case 4: 360 case 4:
@@ -442,16 +458,20 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
442 } 458 }
443 sprintf(c->x86_model_id, "WinChip %s", name); 459 sprintf(c->x86_model_id, "WinChip %s", name);
444 break; 460 break;
445 461#endif
446 case 6: 462 case 6:
447 init_c3(c); 463 init_c3(c);
448 break; 464 break;
449 } 465 }
466#ifdef CONFIG_X86_64
467 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
468#endif
450} 469}
451 470
452static unsigned int __cpuinit 471static unsigned int __cpuinit
453centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) 472centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
454{ 473{
474#ifdef CONFIG_X86_32
455 /* VIA C3 CPUs (670-68F) need further shifting. */ 475 /* VIA C3 CPUs (670-68F) need further shifting. */
456 if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) 476 if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
457 size >>= 8; 477 size >>= 8;
@@ -464,11 +484,11 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
464 if ((c->x86 == 6) && (c->x86_model == 9) && 484 if ((c->x86 == 6) && (c->x86_model == 9) &&
465 (c->x86_mask == 1) && (size == 65)) 485 (c->x86_mask == 1) && (size == 65))
466 size -= 1; 486 size -= 1;
467 487#endif
468 return size; 488 return size;
469} 489}
470 490
471static struct cpu_dev centaur_cpu_dev __cpuinitdata = { 491static const struct cpu_dev __cpuinitconst centaur_cpu_dev = {
472 .c_vendor = "Centaur", 492 .c_vendor = "Centaur",
473 .c_ident = { "CentaurHauls" }, 493 .c_ident = { "CentaurHauls" },
474 .c_early_init = early_init_centaur, 494 .c_early_init = early_init_centaur,
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
deleted file mode 100644
index a1625f5a1e78..000000000000
--- a/arch/x86/kernel/cpu/centaur_64.c
+++ /dev/null
@@ -1,37 +0,0 @@
1#include <linux/init.h>
2#include <linux/smp.h>
3
4#include <asm/cpufeature.h>
5#include <asm/processor.h>
6
7#include "cpu.h"
8
9static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
10{
11 if (c->x86 == 0x6 && c->x86_model >= 0xf)
12 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
13
14 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
15}
16
17static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
18{
19 early_init_centaur(c);
20
21 if (c->x86 == 0x6 && c->x86_model >= 0xf) {
22 c->x86_cache_alignment = c->x86_clflush_size * 2;
23 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
24 }
25 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
26}
27
28static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
29 .c_vendor = "Centaur",
30 .c_ident = { "CentaurHauls" },
31 .c_early_init = early_init_centaur,
32 .c_init = init_centaur,
33 .c_x86_vendor = X86_VENDOR_CENTAUR,
34};
35
36cpu_dev_register(centaur_cpu_dev);
37
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 826d5c876278..c4f667896c28 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1,52 +1,50 @@
1#include <linux/init.h>
2#include <linux/kernel.h>
3#include <linux/sched.h>
4#include <linux/string.h>
5#include <linux/bootmem.h> 1#include <linux/bootmem.h>
2#include <linux/linkage.h>
6#include <linux/bitops.h> 3#include <linux/bitops.h>
4#include <linux/kernel.h>
7#include <linux/module.h> 5#include <linux/module.h>
8#include <linux/kgdb.h> 6#include <linux/percpu.h>
9#include <linux/topology.h> 7#include <linux/string.h>
10#include <linux/delay.h> 8#include <linux/delay.h>
9#include <linux/sched.h>
10#include <linux/init.h>
11#include <linux/kgdb.h>
11#include <linux/smp.h> 12#include <linux/smp.h>
12#include <linux/percpu.h> 13#include <linux/io.h>
13#include <asm/i387.h> 14
14#include <asm/msr.h> 15#include <asm/stackprotector.h>
15#include <asm/io.h>
16#include <asm/linkage.h>
17#include <asm/mmu_context.h> 16#include <asm/mmu_context.h>
17#include <asm/hypervisor.h>
18#include <asm/processor.h>
19#include <asm/sections.h>
20#include <asm/topology.h>
21#include <asm/cpumask.h>
22#include <asm/pgtable.h>
23#include <asm/atomic.h>
24#include <asm/proto.h>
25#include <asm/setup.h>
26#include <asm/apic.h>
27#include <asm/desc.h>
28#include <asm/i387.h>
18#include <asm/mtrr.h> 29#include <asm/mtrr.h>
30#include <asm/numa.h>
31#include <asm/asm.h>
32#include <asm/cpu.h>
19#include <asm/mce.h> 33#include <asm/mce.h>
34#include <asm/msr.h>
20#include <asm/pat.h> 35#include <asm/pat.h>
21#include <asm/asm.h>
22#include <asm/numa.h>
23#include <asm/smp.h> 36#include <asm/smp.h>
24#include <asm/cpu.h>
25#include <asm/cpumask.h>
26#include <asm/apic.h>
27 37
28#ifdef CONFIG_X86_LOCAL_APIC 38#ifdef CONFIG_X86_LOCAL_APIC
29#include <asm/uv/uv.h> 39#include <asm/uv/uv.h>
30#endif 40#endif
31 41
32#include <asm/pgtable.h>
33#include <asm/processor.h>
34#include <asm/desc.h>
35#include <asm/atomic.h>
36#include <asm/proto.h>
37#include <asm/sections.h>
38#include <asm/setup.h>
39#include <asm/hypervisor.h>
40#include <asm/stackprotector.h>
41
42#include "cpu.h" 42#include "cpu.h"
43 43
44#ifdef CONFIG_X86_64
45
46/* all of these masks are initialized in setup_cpu_local_masks() */ 44/* all of these masks are initialized in setup_cpu_local_masks() */
47cpumask_var_t cpu_callin_mask;
48cpumask_var_t cpu_callout_mask;
49cpumask_var_t cpu_initialized_mask; 45cpumask_var_t cpu_initialized_mask;
46cpumask_var_t cpu_callout_mask;
47cpumask_var_t cpu_callin_mask;
50 48
51/* representing cpus for which sibling maps can be computed */ 49/* representing cpus for which sibling maps can be computed */
52cpumask_var_t cpu_sibling_setup_mask; 50cpumask_var_t cpu_sibling_setup_mask;
@@ -60,17 +58,7 @@ void __init setup_cpu_local_masks(void)
60 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); 58 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
61} 59}
62 60
63#else /* CONFIG_X86_32 */ 61static const struct cpu_dev *this_cpu __cpuinitdata;
64
65cpumask_t cpu_callin_map;
66cpumask_t cpu_callout_map;
67cpumask_t cpu_initialized;
68cpumask_t cpu_sibling_setup_map;
69
70#endif /* CONFIG_X86_32 */
71
72
73static struct cpu_dev *this_cpu __cpuinitdata;
74 62
75DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { 63DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
76#ifdef CONFIG_X86_64 64#ifdef CONFIG_X86_64
@@ -79,48 +67,48 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
79 * IRET will check the segment types kkeil 2000/10/28 67 * IRET will check the segment types kkeil 2000/10/28
80 * Also sysret mandates a special GDT layout 68 * Also sysret mandates a special GDT layout
81 * 69 *
82 * The TLS descriptors are currently at a different place compared to i386. 70 * TLS descriptors are currently at a different place compared to i386.
83 * Hopefully nobody expects them at a fixed place (Wine?) 71 * Hopefully nobody expects them at a fixed place (Wine?)
84 */ 72 */
85 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, 73 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
86 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, 74 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
87 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, 75 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
88 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, 76 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
89 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, 77 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
90 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, 78 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
91#else 79#else
92 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, 80 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
93 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, 81 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
94 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, 82 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
95 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, 83 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } },
96 /* 84 /*
97 * Segments used for calling PnP BIOS have byte granularity. 85 * Segments used for calling PnP BIOS have byte granularity.
98 * They code segments and data segments have fixed 64k limits, 86 * They code segments and data segments have fixed 64k limits,
99 * the transfer segment sizes are set at run time. 87 * the transfer segment sizes are set at run time.
100 */ 88 */
101 /* 32-bit code */ 89 /* 32-bit code */
102 [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, 90 [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } },
103 /* 16-bit code */ 91 /* 16-bit code */
104 [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, 92 [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } },
105 /* 16-bit data */ 93 /* 16-bit data */
106 [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, 94 [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } },
107 /* 16-bit data */ 95 /* 16-bit data */
108 [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, 96 [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } },
109 /* 16-bit data */ 97 /* 16-bit data */
110 [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, 98 [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } },
111 /* 99 /*
112 * The APM segments have byte granularity and their bases 100 * The APM segments have byte granularity and their bases
113 * are set at run time. All have 64k limits. 101 * are set at run time. All have 64k limits.
114 */ 102 */
115 /* 32-bit code */ 103 /* 32-bit code */
116 [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, 104 [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } },
117 /* 16-bit code */ 105 /* 16-bit code */
118 [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, 106 [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } },
119 /* data */ 107 /* data */
120 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, 108 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
121 109
122 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 110 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
123 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, 111 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
124 GDT_STACK_CANARY_INIT 112 GDT_STACK_CANARY_INIT
125#endif 113#endif
126} }; 114} };
@@ -164,16 +152,17 @@ static inline int flag_is_changeable_p(u32 flag)
164 * the CPUID. Add "volatile" to not allow gcc to 152 * the CPUID. Add "volatile" to not allow gcc to
165 * optimize the subsequent calls to this function. 153 * optimize the subsequent calls to this function.
166 */ 154 */
167 asm volatile ("pushfl\n\t" 155 asm volatile ("pushfl \n\t"
168 "pushfl\n\t" 156 "pushfl \n\t"
169 "popl %0\n\t" 157 "popl %0 \n\t"
170 "movl %0,%1\n\t" 158 "movl %0, %1 \n\t"
171 "xorl %2,%0\n\t" 159 "xorl %2, %0 \n\t"
172 "pushl %0\n\t" 160 "pushl %0 \n\t"
173 "popfl\n\t" 161 "popfl \n\t"
174 "pushfl\n\t" 162 "pushfl \n\t"
175 "popl %0\n\t" 163 "popl %0 \n\t"
176 "popfl\n\t" 164 "popfl \n\t"
165
177 : "=&r" (f1), "=&r" (f2) 166 : "=&r" (f1), "=&r" (f2)
178 : "ir" (flag)); 167 : "ir" (flag));
179 168
@@ -188,18 +177,22 @@ static int __cpuinit have_cpuid_p(void)
188 177
189static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 178static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
190{ 179{
191 if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { 180 unsigned long lo, hi;
192 /* Disable processor serial number */ 181
193 unsigned long lo, hi; 182 if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr)
194 rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); 183 return;
195 lo |= 0x200000; 184
196 wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); 185 /* Disable processor serial number: */
197 printk(KERN_NOTICE "CPU serial number disabled.\n"); 186
198 clear_cpu_cap(c, X86_FEATURE_PN); 187 rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
199 188 lo |= 0x200000;
200 /* Disabling the serial number may affect the cpuid level */ 189 wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
201 c->cpuid_level = cpuid_eax(0); 190
202 } 191 printk(KERN_NOTICE "CPU serial number disabled.\n");
192 clear_cpu_cap(c, X86_FEATURE_PN);
193
194 /* Disabling the serial number may affect the cpuid level */
195 c->cpuid_level = cpuid_eax(0);
203} 196}
204 197
205static int __init x86_serial_nr_setup(char *s) 198static int __init x86_serial_nr_setup(char *s)
@@ -232,6 +225,7 @@ struct cpuid_dependent_feature {
232 u32 feature; 225 u32 feature;
233 u32 level; 226 u32 level;
234}; 227};
228
235static const struct cpuid_dependent_feature __cpuinitconst 229static const struct cpuid_dependent_feature __cpuinitconst
236cpuid_dependent_features[] = { 230cpuid_dependent_features[] = {
237 { X86_FEATURE_MWAIT, 0x00000005 }, 231 { X86_FEATURE_MWAIT, 0x00000005 },
@@ -243,7 +237,11 @@ cpuid_dependent_features[] = {
243static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) 237static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
244{ 238{
245 const struct cpuid_dependent_feature *df; 239 const struct cpuid_dependent_feature *df;
240
246 for (df = cpuid_dependent_features; df->feature; df++) { 241 for (df = cpuid_dependent_features; df->feature; df++) {
242
243 if (!cpu_has(c, df->feature))
244 continue;
247 /* 245 /*
248 * Note: cpuid_level is set to -1 if unavailable, but 246 * Note: cpuid_level is set to -1 if unavailable, but
249 * extended_extended_level is set to 0 if unavailable 247 * extended_extended_level is set to 0 if unavailable
@@ -251,32 +249,32 @@ static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
251 * when signed; hence the weird messing around with 249 * when signed; hence the weird messing around with
252 * signs here... 250 * signs here...
253 */ 251 */
254 if (cpu_has(c, df->feature) && 252 if (!((s32)df->level < 0 ?
255 ((s32)df->level < 0 ?
256 (u32)df->level > (u32)c->extended_cpuid_level : 253 (u32)df->level > (u32)c->extended_cpuid_level :
257 (s32)df->level > (s32)c->cpuid_level)) { 254 (s32)df->level > (s32)c->cpuid_level))
258 clear_cpu_cap(c, df->feature); 255 continue;
259 if (warn) 256
260 printk(KERN_WARNING 257 clear_cpu_cap(c, df->feature);
261 "CPU: CPU feature %s disabled " 258 if (!warn)
262 "due to lack of CPUID level 0x%x\n", 259 continue;
263 x86_cap_flags[df->feature], 260
264 df->level); 261 printk(KERN_WARNING
265 } 262 "CPU: CPU feature %s disabled, no CPUID level 0x%x\n",
263 x86_cap_flags[df->feature], df->level);
266 } 264 }
267} 265}
268 266
269/* 267/*
270 * Naming convention should be: <Name> [(<Codename>)] 268 * Naming convention should be: <Name> [(<Codename>)]
271 * This table only is used unless init_<vendor>() below doesn't set it; 269 * This table only is used unless init_<vendor>() below doesn't set it;
272 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used 270 * in particular, if CPUID levels 0x80000002..4 are supported, this
273 * 271 * isn't used
274 */ 272 */
275 273
276/* Look up CPU names by table lookup. */ 274/* Look up CPU names by table lookup. */
277static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) 275static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
278{ 276{
279 struct cpu_model_info *info; 277 const struct cpu_model_info *info;
280 278
281 if (c->x86_model >= 16) 279 if (c->x86_model >= 16)
282 return NULL; /* Range check */ 280 return NULL; /* Range check */
@@ -307,8 +305,10 @@ void load_percpu_segment(int cpu)
307 load_stack_canary_segment(); 305 load_stack_canary_segment();
308} 306}
309 307
310/* Current gdt points %fs at the "master" per-cpu area: after this, 308/*
311 * it's on the real one. */ 309 * Current gdt points %fs at the "master" per-cpu area: after this,
310 * it's on the real one.
311 */
312void switch_to_new_gdt(int cpu) 312void switch_to_new_gdt(int cpu)
313{ 313{
314 struct desc_ptr gdt_descr; 314 struct desc_ptr gdt_descr;
@@ -321,7 +321,7 @@ void switch_to_new_gdt(int cpu)
321 load_percpu_segment(cpu); 321 load_percpu_segment(cpu);
322} 322}
323 323
324static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; 324static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
325 325
326static void __cpuinit default_init(struct cpuinfo_x86 *c) 326static void __cpuinit default_init(struct cpuinfo_x86 *c)
327{ 327{
@@ -340,7 +340,7 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c)
340#endif 340#endif
341} 341}
342 342
343static struct cpu_dev __cpuinitdata default_cpu = { 343static const struct cpu_dev __cpuinitconst default_cpu = {
344 .c_init = default_init, 344 .c_init = default_init,
345 .c_vendor = "Unknown", 345 .c_vendor = "Unknown",
346 .c_x86_vendor = X86_VENDOR_UNKNOWN, 346 .c_x86_vendor = X86_VENDOR_UNKNOWN,
@@ -354,22 +354,24 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
354 if (c->extended_cpuid_level < 0x80000004) 354 if (c->extended_cpuid_level < 0x80000004)
355 return; 355 return;
356 356
357 v = (unsigned int *) c->x86_model_id; 357 v = (unsigned int *)c->x86_model_id;
358 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); 358 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
359 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); 359 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
360 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); 360 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
361 c->x86_model_id[48] = 0; 361 c->x86_model_id[48] = 0;
362 362
363 /* Intel chips right-justify this string for some dumb reason; 363 /*
364 undo that brain damage */ 364 * Intel chips right-justify this string for some dumb reason;
365 * undo that brain damage:
366 */
365 p = q = &c->x86_model_id[0]; 367 p = q = &c->x86_model_id[0];
366 while (*p == ' ') 368 while (*p == ' ')
367 p++; 369 p++;
368 if (p != q) { 370 if (p != q) {
369 while (*p) 371 while (*p)
370 *q++ = *p++; 372 *q++ = *p++;
371 while (q <= &c->x86_model_id[48]) 373 while (q <= &c->x86_model_id[48])
372 *q++ = '\0'; /* Zero-pad the rest */ 374 *q++ = '\0'; /* Zero-pad the rest */
373 } 375 }
374} 376}
375 377
@@ -438,27 +440,30 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
438 440
439 if (smp_num_siblings == 1) { 441 if (smp_num_siblings == 1) {
440 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 442 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
441 } else if (smp_num_siblings > 1) { 443 goto out;
444 }
442 445
443 if (smp_num_siblings > nr_cpu_ids) { 446 if (smp_num_siblings <= 1)
444 printk(KERN_WARNING "CPU: Unsupported number of siblings %d", 447 goto out;
445 smp_num_siblings);
446 smp_num_siblings = 1;
447 return;
448 }
449 448
450 index_msb = get_count_order(smp_num_siblings); 449 if (smp_num_siblings > nr_cpu_ids) {
451 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); 450 pr_warning("CPU: Unsupported number of siblings %d",
451 smp_num_siblings);
452 smp_num_siblings = 1;
453 return;
454 }
452 455
453 smp_num_siblings = smp_num_siblings / c->x86_max_cores; 456 index_msb = get_count_order(smp_num_siblings);
457 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
454 458
455 index_msb = get_count_order(smp_num_siblings); 459 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
456 460
457 core_bits = get_count_order(c->x86_max_cores); 461 index_msb = get_count_order(smp_num_siblings);
458 462
459 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & 463 core_bits = get_count_order(c->x86_max_cores);
460 ((1 << core_bits) - 1); 464
461 } 465 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
466 ((1 << core_bits) - 1);
462 467
463out: 468out:
464 if ((c->x86_max_cores * smp_num_siblings) > 1) { 469 if ((c->x86_max_cores * smp_num_siblings) > 1) {
@@ -473,8 +478,8 @@ out:
473static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) 478static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
474{ 479{
475 char *v = c->x86_vendor_id; 480 char *v = c->x86_vendor_id;
476 int i;
477 static int printed; 481 static int printed;
482 int i;
478 483
479 for (i = 0; i < X86_VENDOR_NUM; i++) { 484 for (i = 0; i < X86_VENDOR_NUM; i++) {
480 if (!cpu_devs[i]) 485 if (!cpu_devs[i])
@@ -483,6 +488,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
483 if (!strcmp(v, cpu_devs[i]->c_ident[0]) || 488 if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
484 (cpu_devs[i]->c_ident[1] && 489 (cpu_devs[i]->c_ident[1] &&
485 !strcmp(v, cpu_devs[i]->c_ident[1]))) { 490 !strcmp(v, cpu_devs[i]->c_ident[1]))) {
491
486 this_cpu = cpu_devs[i]; 492 this_cpu = cpu_devs[i];
487 c->x86_vendor = this_cpu->c_x86_vendor; 493 c->x86_vendor = this_cpu->c_x86_vendor;
488 return; 494 return;
@@ -491,7 +497,9 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
491 497
492 if (!printed) { 498 if (!printed) {
493 printed++; 499 printed++;
494 printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v); 500 printk(KERN_ERR
501 "CPU: vendor_id '%s' unknown, using generic init.\n", v);
502
495 printk(KERN_ERR "CPU: Your system may be unstable.\n"); 503 printk(KERN_ERR "CPU: Your system may be unstable.\n");
496 } 504 }
497 505
@@ -511,14 +519,17 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
511 /* Intel-defined flags: level 0x00000001 */ 519 /* Intel-defined flags: level 0x00000001 */
512 if (c->cpuid_level >= 0x00000001) { 520 if (c->cpuid_level >= 0x00000001) {
513 u32 junk, tfms, cap0, misc; 521 u32 junk, tfms, cap0, misc;
522
514 cpuid(0x00000001, &tfms, &misc, &junk, &cap0); 523 cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
515 c->x86 = (tfms >> 8) & 0xf; 524 c->x86 = (tfms >> 8) & 0xf;
516 c->x86_model = (tfms >> 4) & 0xf; 525 c->x86_model = (tfms >> 4) & 0xf;
517 c->x86_mask = tfms & 0xf; 526 c->x86_mask = tfms & 0xf;
527
518 if (c->x86 == 0xf) 528 if (c->x86 == 0xf)
519 c->x86 += (tfms >> 20) & 0xff; 529 c->x86 += (tfms >> 20) & 0xff;
520 if (c->x86 >= 0x6) 530 if (c->x86 >= 0x6)
521 c->x86_model += ((tfms >> 16) & 0xf) << 4; 531 c->x86_model += ((tfms >> 16) & 0xf) << 4;
532
522 if (cap0 & (1<<19)) { 533 if (cap0 & (1<<19)) {
523 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; 534 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
524 c->x86_cache_alignment = c->x86_clflush_size; 535 c->x86_cache_alignment = c->x86_clflush_size;
@@ -534,6 +545,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
534 /* Intel-defined flags: level 0x00000001 */ 545 /* Intel-defined flags: level 0x00000001 */
535 if (c->cpuid_level >= 0x00000001) { 546 if (c->cpuid_level >= 0x00000001) {
536 u32 capability, excap; 547 u32 capability, excap;
548
537 cpuid(0x00000001, &tfms, &ebx, &excap, &capability); 549 cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
538 c->x86_capability[0] = capability; 550 c->x86_capability[0] = capability;
539 c->x86_capability[4] = excap; 551 c->x86_capability[4] = excap;
@@ -542,6 +554,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
542 /* AMD-defined flags: level 0x80000001 */ 554 /* AMD-defined flags: level 0x80000001 */
543 xlvl = cpuid_eax(0x80000000); 555 xlvl = cpuid_eax(0x80000000);
544 c->extended_cpuid_level = xlvl; 556 c->extended_cpuid_level = xlvl;
557
545 if ((xlvl & 0xffff0000) == 0x80000000) { 558 if ((xlvl & 0xffff0000) == 0x80000000) {
546 if (xlvl >= 0x80000001) { 559 if (xlvl >= 0x80000001) {
547 c->x86_capability[1] = cpuid_edx(0x80000001); 560 c->x86_capability[1] = cpuid_edx(0x80000001);
@@ -549,13 +562,15 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
549 } 562 }
550 } 563 }
551 564
552#ifdef CONFIG_X86_64
553 if (c->extended_cpuid_level >= 0x80000008) { 565 if (c->extended_cpuid_level >= 0x80000008) {
554 u32 eax = cpuid_eax(0x80000008); 566 u32 eax = cpuid_eax(0x80000008);
555 567
556 c->x86_virt_bits = (eax >> 8) & 0xff; 568 c->x86_virt_bits = (eax >> 8) & 0xff;
557 c->x86_phys_bits = eax & 0xff; 569 c->x86_phys_bits = eax & 0xff;
558 } 570 }
571#ifdef CONFIG_X86_32
572 else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
573 c->x86_phys_bits = 36;
559#endif 574#endif
560 575
561 if (c->extended_cpuid_level >= 0x80000007) 576 if (c->extended_cpuid_level >= 0x80000007)
@@ -602,8 +617,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
602{ 617{
603#ifdef CONFIG_X86_64 618#ifdef CONFIG_X86_64
604 c->x86_clflush_size = 64; 619 c->x86_clflush_size = 64;
620 c->x86_phys_bits = 36;
621 c->x86_virt_bits = 48;
605#else 622#else
606 c->x86_clflush_size = 32; 623 c->x86_clflush_size = 32;
624 c->x86_phys_bits = 32;
625 c->x86_virt_bits = 32;
607#endif 626#endif
608 c->x86_cache_alignment = c->x86_clflush_size; 627 c->x86_cache_alignment = c->x86_clflush_size;
609 628
@@ -634,12 +653,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
634 653
635void __init early_cpu_init(void) 654void __init early_cpu_init(void)
636{ 655{
637 struct cpu_dev **cdev; 656 const struct cpu_dev *const *cdev;
638 int count = 0; 657 int count = 0;
639 658
640 printk("KERNEL supported cpus:\n"); 659 printk(KERN_INFO "KERNEL supported cpus:\n");
641 for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { 660 for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
642 struct cpu_dev *cpudev = *cdev; 661 const struct cpu_dev *cpudev = *cdev;
643 unsigned int j; 662 unsigned int j;
644 663
645 if (count >= X86_VENDOR_NUM) 664 if (count >= X86_VENDOR_NUM)
@@ -650,7 +669,7 @@ void __init early_cpu_init(void)
650 for (j = 0; j < 2; j++) { 669 for (j = 0; j < 2; j++) {
651 if (!cpudev->c_ident[j]) 670 if (!cpudev->c_ident[j])
652 continue; 671 continue;
653 printk(" %s %s\n", cpudev->c_vendor, 672 printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
654 cpudev->c_ident[j]); 673 cpudev->c_ident[j]);
655 } 674 }
656 } 675 }
@@ -726,9 +745,13 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
726 c->x86_coreid_bits = 0; 745 c->x86_coreid_bits = 0;
727#ifdef CONFIG_X86_64 746#ifdef CONFIG_X86_64
728 c->x86_clflush_size = 64; 747 c->x86_clflush_size = 64;
748 c->x86_phys_bits = 36;
749 c->x86_virt_bits = 48;
729#else 750#else
730 c->cpuid_level = -1; /* CPUID not detected */ 751 c->cpuid_level = -1; /* CPUID not detected */
731 c->x86_clflush_size = 32; 752 c->x86_clflush_size = 32;
753 c->x86_phys_bits = 32;
754 c->x86_virt_bits = 32;
732#endif 755#endif
733 c->x86_cache_alignment = c->x86_clflush_size; 756 c->x86_cache_alignment = c->x86_clflush_size;
734 memset(&c->x86_capability, 0, sizeof c->x86_capability); 757 memset(&c->x86_capability, 0, sizeof c->x86_capability);
@@ -759,8 +782,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
759 squash_the_stupid_serial_number(c); 782 squash_the_stupid_serial_number(c);
760 783
761 /* 784 /*
762 * The vendor-specific functions might have changed features. Now 785 * The vendor-specific functions might have changed features.
763 * we do "generic changes." 786 * Now we do "generic changes."
764 */ 787 */
765 788
766 /* Filter out anything that depends on CPUID levels we don't have */ 789 /* Filter out anything that depends on CPUID levels we don't have */
@@ -768,7 +791,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
768 791
769 /* If the model name is still unset, do table lookup. */ 792 /* If the model name is still unset, do table lookup. */
770 if (!c->x86_model_id[0]) { 793 if (!c->x86_model_id[0]) {
771 char *p; 794 const char *p;
772 p = table_lookup_model(c); 795 p = table_lookup_model(c);
773 if (p) 796 if (p)
774 strcpy(c->x86_model_id, p); 797 strcpy(c->x86_model_id, p);
@@ -824,6 +847,7 @@ static void vgetcpu_set_mode(void)
824void __init identify_boot_cpu(void) 847void __init identify_boot_cpu(void)
825{ 848{
826 identify_cpu(&boot_cpu_data); 849 identify_cpu(&boot_cpu_data);
850 init_c1e_mask();
827#ifdef CONFIG_X86_32 851#ifdef CONFIG_X86_32
828 sysenter_setup(); 852 sysenter_setup();
829 enable_sep_cpu(); 853 enable_sep_cpu();
@@ -843,11 +867,11 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
843} 867}
844 868
845struct msr_range { 869struct msr_range {
846 unsigned min; 870 unsigned min;
847 unsigned max; 871 unsigned max;
848}; 872};
849 873
850static struct msr_range msr_range_array[] __cpuinitdata = { 874static const struct msr_range msr_range_array[] __cpuinitconst = {
851 { 0x00000000, 0x00000418}, 875 { 0x00000000, 0x00000418},
852 { 0xc0000000, 0xc000040b}, 876 { 0xc0000000, 0xc000040b},
853 { 0xc0010000, 0xc0010142}, 877 { 0xc0010000, 0xc0010142},
@@ -856,14 +880,15 @@ static struct msr_range msr_range_array[] __cpuinitdata = {
856 880
857static void __cpuinit print_cpu_msr(void) 881static void __cpuinit print_cpu_msr(void)
858{ 882{
883 unsigned index_min, index_max;
859 unsigned index; 884 unsigned index;
860 u64 val; 885 u64 val;
861 int i; 886 int i;
862 unsigned index_min, index_max;
863 887
864 for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { 888 for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
865 index_min = msr_range_array[i].min; 889 index_min = msr_range_array[i].min;
866 index_max = msr_range_array[i].max; 890 index_max = msr_range_array[i].max;
891
867 for (index = index_min; index < index_max; index++) { 892 for (index = index_min; index < index_max; index++) {
868 if (rdmsrl_amd_safe(index, &val)) 893 if (rdmsrl_amd_safe(index, &val))
869 continue; 894 continue;
@@ -873,6 +898,7 @@ static void __cpuinit print_cpu_msr(void)
873} 898}
874 899
875static int show_msr __cpuinitdata; 900static int show_msr __cpuinitdata;
901
876static __init int setup_show_msr(char *arg) 902static __init int setup_show_msr(char *arg)
877{ 903{
878 int num; 904 int num;
@@ -894,12 +920,14 @@ __setup("noclflush", setup_noclflush);
894 920
895void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) 921void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
896{ 922{
897 char *vendor = NULL; 923 const char *vendor = NULL;
898 924
899 if (c->x86_vendor < X86_VENDOR_NUM) 925 if (c->x86_vendor < X86_VENDOR_NUM) {
900 vendor = this_cpu->c_vendor; 926 vendor = this_cpu->c_vendor;
901 else if (c->cpuid_level >= 0) 927 } else {
902 vendor = c->x86_vendor_id; 928 if (c->cpuid_level >= 0)
929 vendor = c->x86_vendor_id;
930 }
903 931
904 if (vendor && !strstr(c->x86_model_id, vendor)) 932 if (vendor && !strstr(c->x86_model_id, vendor))
905 printk(KERN_CONT "%s ", vendor); 933 printk(KERN_CONT "%s ", vendor);
@@ -926,10 +954,12 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
926static __init int setup_disablecpuid(char *arg) 954static __init int setup_disablecpuid(char *arg)
927{ 955{
928 int bit; 956 int bit;
957
929 if (get_option(&arg, &bit) && bit < NCAPINTS*32) 958 if (get_option(&arg, &bit) && bit < NCAPINTS*32)
930 setup_clear_cpu_cap(bit); 959 setup_clear_cpu_cap(bit);
931 else 960 else
932 return 0; 961 return 0;
962
933 return 1; 963 return 1;
934} 964}
935__setup("clearcpuid=", setup_disablecpuid); 965__setup("clearcpuid=", setup_disablecpuid);
@@ -939,6 +969,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
939 969
940DEFINE_PER_CPU_FIRST(union irq_stack_union, 970DEFINE_PER_CPU_FIRST(union irq_stack_union,
941 irq_stack_union) __aligned(PAGE_SIZE); 971 irq_stack_union) __aligned(PAGE_SIZE);
972
942DEFINE_PER_CPU(char *, irq_stack_ptr) = 973DEFINE_PER_CPU(char *, irq_stack_ptr) =
943 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; 974 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
944 975
@@ -948,12 +979,21 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack);
948 979
949DEFINE_PER_CPU(unsigned int, irq_count) = -1; 980DEFINE_PER_CPU(unsigned int, irq_count) = -1;
950 981
982/*
983 * Special IST stacks which the CPU switches to when it calls
984 * an IST-marked descriptor entry. Up to 7 stacks (hardware
985 * limit), all of them are 4K, except the debug stack which
986 * is 8K.
987 */
988static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
989 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
990 [DEBUG_STACK - 1] = DEBUG_STKSZ
991};
992
951static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks 993static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
952 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) 994 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
953 __aligned(PAGE_SIZE); 995 __aligned(PAGE_SIZE);
954 996
955extern asmlinkage void ignore_sysret(void);
956
957/* May not be marked __init: used by software suspend */ 997/* May not be marked __init: used by software suspend */
958void syscall_init(void) 998void syscall_init(void)
959{ 999{
@@ -983,7 +1023,7 @@ unsigned long kernel_eflags;
983 */ 1023 */
984DEFINE_PER_CPU(struct orig_ist, orig_ist); 1024DEFINE_PER_CPU(struct orig_ist, orig_ist);
985 1025
986#else /* x86_64 */ 1026#else /* CONFIG_X86_64 */
987 1027
988#ifdef CONFIG_CC_STACKPROTECTOR 1028#ifdef CONFIG_CC_STACKPROTECTOR
989DEFINE_PER_CPU(unsigned long, stack_canary); 1029DEFINE_PER_CPU(unsigned long, stack_canary);
@@ -995,9 +1035,26 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
995 memset(regs, 0, sizeof(struct pt_regs)); 1035 memset(regs, 0, sizeof(struct pt_regs));
996 regs->fs = __KERNEL_PERCPU; 1036 regs->fs = __KERNEL_PERCPU;
997 regs->gs = __KERNEL_STACK_CANARY; 1037 regs->gs = __KERNEL_STACK_CANARY;
1038
998 return regs; 1039 return regs;
999} 1040}
1000#endif /* x86_64 */ 1041#endif /* CONFIG_X86_64 */
1042
1043/*
1044 * Clear all 6 debug registers:
1045 */
1046static void clear_all_debug_regs(void)
1047{
1048 int i;
1049
1050 for (i = 0; i < 8; i++) {
1051 /* Ignore db4, db5 */
1052 if ((i == 4) || (i == 5))
1053 continue;
1054
1055 set_debugreg(0, i);
1056 }
1057}
1001 1058
1002/* 1059/*
1003 * cpu_init() initializes state that is per-CPU. Some data is already 1060 * cpu_init() initializes state that is per-CPU. Some data is already
@@ -1007,15 +1064,20 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
1007 * A lot of state is already set up in PDA init for 64 bit 1064 * A lot of state is already set up in PDA init for 64 bit
1008 */ 1065 */
1009#ifdef CONFIG_X86_64 1066#ifdef CONFIG_X86_64
1067
1010void __cpuinit cpu_init(void) 1068void __cpuinit cpu_init(void)
1011{ 1069{
1012 int cpu = stack_smp_processor_id(); 1070 struct orig_ist *orig_ist;
1013 struct tss_struct *t = &per_cpu(init_tss, cpu);
1014 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
1015 unsigned long v;
1016 struct task_struct *me; 1071 struct task_struct *me;
1072 struct tss_struct *t;
1073 unsigned long v;
1074 int cpu;
1017 int i; 1075 int i;
1018 1076
1077 cpu = stack_smp_processor_id();
1078 t = &per_cpu(init_tss, cpu);
1079 orig_ist = &per_cpu(orig_ist, cpu);
1080
1019#ifdef CONFIG_NUMA 1081#ifdef CONFIG_NUMA
1020 if (cpu != 0 && percpu_read(node_number) == 0 && 1082 if (cpu != 0 && percpu_read(node_number) == 0 &&
1021 cpu_to_node(cpu) != NUMA_NO_NODE) 1083 cpu_to_node(cpu) != NUMA_NO_NODE)
@@ -1056,19 +1118,17 @@ void __cpuinit cpu_init(void)
1056 * set up and load the per-CPU TSS 1118 * set up and load the per-CPU TSS
1057 */ 1119 */
1058 if (!orig_ist->ist[0]) { 1120 if (!orig_ist->ist[0]) {
1059 static const unsigned int sizes[N_EXCEPTION_STACKS] = {
1060 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
1061 [DEBUG_STACK - 1] = DEBUG_STKSZ
1062 };
1063 char *estacks = per_cpu(exception_stacks, cpu); 1121 char *estacks = per_cpu(exception_stacks, cpu);
1122
1064 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1123 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1065 estacks += sizes[v]; 1124 estacks += exception_stack_sizes[v];
1066 orig_ist->ist[v] = t->x86_tss.ist[v] = 1125 orig_ist->ist[v] = t->x86_tss.ist[v] =
1067 (unsigned long)estacks; 1126 (unsigned long)estacks;
1068 } 1127 }
1069 } 1128 }
1070 1129
1071 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1130 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
1131
1072 /* 1132 /*
1073 * <= is required because the CPU will access up to 1133 * <= is required because the CPU will access up to
1074 * 8 bits beyond the end of the IO permission bitmap. 1134 * 8 bits beyond the end of the IO permission bitmap.
@@ -1078,8 +1138,7 @@ void __cpuinit cpu_init(void)
1078 1138
1079 atomic_inc(&init_mm.mm_count); 1139 atomic_inc(&init_mm.mm_count);
1080 me->active_mm = &init_mm; 1140 me->active_mm = &init_mm;
1081 if (me->mm) 1141 BUG_ON(me->mm);
1082 BUG();
1083 enter_lazy_tlb(&init_mm, me); 1142 enter_lazy_tlb(&init_mm, me);
1084 1143
1085 load_sp0(t, &current->thread); 1144 load_sp0(t, &current->thread);
@@ -1098,17 +1157,7 @@ void __cpuinit cpu_init(void)
1098 arch_kgdb_ops.correct_hw_break(); 1157 arch_kgdb_ops.correct_hw_break();
1099 else 1158 else
1100#endif 1159#endif
1101 { 1160 clear_all_debug_regs();
1102 /*
1103 * Clear all 6 debug registers:
1104 */
1105 set_debugreg(0UL, 0);
1106 set_debugreg(0UL, 1);
1107 set_debugreg(0UL, 2);
1108 set_debugreg(0UL, 3);
1109 set_debugreg(0UL, 6);
1110 set_debugreg(0UL, 7);
1111 }
1112 1161
1113 fpu_init(); 1162 fpu_init();
1114 1163
@@ -1129,7 +1178,8 @@ void __cpuinit cpu_init(void)
1129 1178
1130 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { 1179 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
1131 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); 1180 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
1132 for (;;) local_irq_enable(); 1181 for (;;)
1182 local_irq_enable();
1133 } 1183 }
1134 1184
1135 printk(KERN_INFO "Initializing CPU#%d\n", cpu); 1185 printk(KERN_INFO "Initializing CPU#%d\n", cpu);
@@ -1145,8 +1195,7 @@ void __cpuinit cpu_init(void)
1145 */ 1195 */
1146 atomic_inc(&init_mm.mm_count); 1196 atomic_inc(&init_mm.mm_count);
1147 curr->active_mm = &init_mm; 1197 curr->active_mm = &init_mm;
1148 if (curr->mm) 1198 BUG_ON(curr->mm);
1149 BUG();
1150 enter_lazy_tlb(&init_mm, curr); 1199 enter_lazy_tlb(&init_mm, curr);
1151 1200
1152 load_sp0(t, thread); 1201 load_sp0(t, thread);
@@ -1159,13 +1208,7 @@ void __cpuinit cpu_init(void)
1159 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); 1208 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
1160#endif 1209#endif
1161 1210
1162 /* Clear all 6 debug registers: */ 1211 clear_all_debug_regs();
1163 set_debugreg(0, 0);
1164 set_debugreg(0, 1);
1165 set_debugreg(0, 2);
1166 set_debugreg(0, 3);
1167 set_debugreg(0, 6);
1168 set_debugreg(0, 7);
1169 1212
1170 /* 1213 /*
1171 * Force FPU initialization: 1214 * Force FPU initialization:
@@ -1185,6 +1228,4 @@ void __cpuinit cpu_init(void)
1185 1228
1186 xsave_init(); 1229 xsave_init();
1187} 1230}
1188
1189
1190#endif 1231#endif
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index de4094a39210..6de9a908e400 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -3,33 +3,34 @@
3#define ARCH_X86_CPU_H 3#define ARCH_X86_CPU_H
4 4
5struct cpu_model_info { 5struct cpu_model_info {
6 int vendor; 6 int vendor;
7 int family; 7 int family;
8 char *model_names[16]; 8 const char *model_names[16];
9}; 9};
10 10
11/* attempt to consolidate cpu attributes */ 11/* attempt to consolidate cpu attributes */
12struct cpu_dev { 12struct cpu_dev {
13 char * c_vendor; 13 const char *c_vendor;
14 14
15 /* some have two possibilities for cpuid string */ 15 /* some have two possibilities for cpuid string */
16 char * c_ident[2]; 16 const char *c_ident[2];
17 17
18 struct cpu_model_info c_models[4]; 18 struct cpu_model_info c_models[4];
19 19
20 void (*c_early_init)(struct cpuinfo_x86 *c); 20 void (*c_early_init)(struct cpuinfo_x86 *);
21 void (*c_init)(struct cpuinfo_x86 * c); 21 void (*c_init)(struct cpuinfo_x86 *);
22 void (*c_identify)(struct cpuinfo_x86 * c); 22 void (*c_identify)(struct cpuinfo_x86 *);
23 unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); 23 unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int);
24 int c_x86_vendor; 24 int c_x86_vendor;
25}; 25};
26 26
27#define cpu_dev_register(cpu_devX) \ 27#define cpu_dev_register(cpu_devX) \
28 static struct cpu_dev *__cpu_dev_##cpu_devX __used \ 28 static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \
29 __attribute__((__section__(".x86_cpu_dev.init"))) = \ 29 __attribute__((__section__(".x86_cpu_dev.init"))) = \
30 &cpu_devX; 30 &cpu_devX;
31 31
32extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; 32extern const struct cpu_dev *const __x86_cpu_dev_start[],
33 *const __x86_cpu_dev_end[];
33 34
34extern void display_cacheinfo(struct cpuinfo_x86 *c); 35extern void display_cacheinfo(struct cpuinfo_x86 *c);
35 36
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
new file mode 100755
index 000000000000..46e29ab96c6a
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -0,0 +1,901 @@
1/*
2 * CPU x86 architecture debug code
3 *
4 * Copyright(C) 2009 Jaswinder Singh Rajput
5 *
6 * For licencing details see kernel-base/COPYING
7 */
8
9#include <linux/interrupt.h>
10#include <linux/compiler.h>
11#include <linux/seq_file.h>
12#include <linux/debugfs.h>
13#include <linux/kprobes.h>
14#include <linux/uaccess.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/percpu.h>
18#include <linux/signal.h>
19#include <linux/errno.h>
20#include <linux/sched.h>
21#include <linux/types.h>
22#include <linux/init.h>
23#include <linux/slab.h>
24#include <linux/smp.h>
25
26#include <asm/cpu_debug.h>
27#include <asm/paravirt.h>
28#include <asm/system.h>
29#include <asm/traps.h>
30#include <asm/apic.h>
31#include <asm/desc.h>
32
33static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]);
34static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]);
35static DEFINE_PER_CPU(unsigned, cpu_modelflag);
36static DEFINE_PER_CPU(int, cpu_priv_count);
37static DEFINE_PER_CPU(unsigned, cpu_model);
38
39static DEFINE_MUTEX(cpu_debug_lock);
40
41static struct dentry *cpu_debugfs_dir;
42
43static struct cpu_debug_base cpu_base[] = {
44 { "mc", CPU_MC, 0 },
45 { "monitor", CPU_MONITOR, 0 },
46 { "time", CPU_TIME, 0 },
47 { "pmc", CPU_PMC, 1 },
48 { "platform", CPU_PLATFORM, 0 },
49 { "apic", CPU_APIC, 0 },
50 { "poweron", CPU_POWERON, 0 },
51 { "control", CPU_CONTROL, 0 },
52 { "features", CPU_FEATURES, 0 },
53 { "lastbranch", CPU_LBRANCH, 0 },
54 { "bios", CPU_BIOS, 0 },
55 { "freq", CPU_FREQ, 0 },
56 { "mtrr", CPU_MTRR, 0 },
57 { "perf", CPU_PERF, 0 },
58 { "cache", CPU_CACHE, 0 },
59 { "sysenter", CPU_SYSENTER, 0 },
60 { "therm", CPU_THERM, 0 },
61 { "misc", CPU_MISC, 0 },
62 { "debug", CPU_DEBUG, 0 },
63 { "pat", CPU_PAT, 0 },
64 { "vmx", CPU_VMX, 0 },
65 { "call", CPU_CALL, 0 },
66 { "base", CPU_BASE, 0 },
67 { "ver", CPU_VER, 0 },
68 { "conf", CPU_CONF, 0 },
69 { "smm", CPU_SMM, 0 },
70 { "svm", CPU_SVM, 0 },
71 { "osvm", CPU_OSVM, 0 },
72 { "tss", CPU_TSS, 0 },
73 { "cr", CPU_CR, 0 },
74 { "dt", CPU_DT, 0 },
75 { "registers", CPU_REG_ALL, 0 },
76};
77
78static struct cpu_file_base cpu_file[] = {
79 { "index", CPU_REG_ALL, 0 },
80 { "value", CPU_REG_ALL, 1 },
81};
82
83/* Intel Registers Range */
84static struct cpu_debug_range cpu_intel_range[] = {
85 { 0x00000000, 0x00000001, CPU_MC, CPU_INTEL_ALL },
86 { 0x00000006, 0x00000007, CPU_MONITOR, CPU_CX_AT_XE },
87 { 0x00000010, 0x00000010, CPU_TIME, CPU_INTEL_ALL },
88 { 0x00000011, 0x00000013, CPU_PMC, CPU_INTEL_PENTIUM },
89 { 0x00000017, 0x00000017, CPU_PLATFORM, CPU_PX_CX_AT_XE },
90 { 0x0000001B, 0x0000001B, CPU_APIC, CPU_P6_CX_AT_XE },
91
92 { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_PX_CX_AT_XE },
93 { 0x0000002B, 0x0000002B, CPU_POWERON, CPU_INTEL_XEON },
94 { 0x0000002C, 0x0000002C, CPU_FREQ, CPU_INTEL_XEON },
95 { 0x0000003A, 0x0000003A, CPU_CONTROL, CPU_CX_AT_XE },
96
97 { 0x00000040, 0x00000043, CPU_LBRANCH, CPU_PM_CX_AT_XE },
98 { 0x00000044, 0x00000047, CPU_LBRANCH, CPU_PM_CO_AT },
99 { 0x00000060, 0x00000063, CPU_LBRANCH, CPU_C2_AT },
100 { 0x00000064, 0x00000067, CPU_LBRANCH, CPU_INTEL_ATOM },
101
102 { 0x00000079, 0x00000079, CPU_BIOS, CPU_P6_CX_AT_XE },
103 { 0x00000088, 0x0000008A, CPU_CACHE, CPU_INTEL_P6 },
104 { 0x0000008B, 0x0000008B, CPU_BIOS, CPU_P6_CX_AT_XE },
105 { 0x0000009B, 0x0000009B, CPU_MONITOR, CPU_INTEL_XEON },
106
107 { 0x000000C1, 0x000000C2, CPU_PMC, CPU_P6_CX_AT },
108 { 0x000000CD, 0x000000CD, CPU_FREQ, CPU_CX_AT },
109 { 0x000000E7, 0x000000E8, CPU_PERF, CPU_CX_AT },
110 { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_P6_CX_XE },
111
112 { 0x00000116, 0x00000116, CPU_CACHE, CPU_INTEL_P6 },
113 { 0x00000118, 0x00000118, CPU_CACHE, CPU_INTEL_P6 },
114 { 0x00000119, 0x00000119, CPU_CACHE, CPU_INTEL_PX },
115 { 0x0000011A, 0x0000011B, CPU_CACHE, CPU_INTEL_P6 },
116 { 0x0000011E, 0x0000011E, CPU_CACHE, CPU_PX_CX_AT },
117
118 { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_P6_CX_AT_XE },
119 { 0x00000179, 0x0000017A, CPU_MC, CPU_PX_CX_AT_XE },
120 { 0x0000017B, 0x0000017B, CPU_MC, CPU_P6_XE },
121 { 0x00000186, 0x00000187, CPU_PMC, CPU_P6_CX_AT },
122 { 0x00000198, 0x00000199, CPU_PERF, CPU_PM_CX_AT_XE },
123 { 0x0000019A, 0x0000019A, CPU_TIME, CPU_PM_CX_AT_XE },
124 { 0x0000019B, 0x0000019D, CPU_THERM, CPU_PM_CX_AT_XE },
125 { 0x000001A0, 0x000001A0, CPU_MISC, CPU_PM_CX_AT_XE },
126
127 { 0x000001C9, 0x000001C9, CPU_LBRANCH, CPU_PM_CX_AT },
128 { 0x000001D7, 0x000001D8, CPU_LBRANCH, CPU_INTEL_XEON },
129 { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_CX_AT_XE },
130 { 0x000001DA, 0x000001DA, CPU_LBRANCH, CPU_INTEL_XEON },
131 { 0x000001DB, 0x000001DB, CPU_LBRANCH, CPU_P6_XE },
132 { 0x000001DC, 0x000001DC, CPU_LBRANCH, CPU_INTEL_P6 },
133 { 0x000001DD, 0x000001DE, CPU_LBRANCH, CPU_PX_CX_AT_XE },
134 { 0x000001E0, 0x000001E0, CPU_LBRANCH, CPU_INTEL_P6 },
135
136 { 0x00000200, 0x0000020F, CPU_MTRR, CPU_P6_CX_XE },
137 { 0x00000250, 0x00000250, CPU_MTRR, CPU_P6_CX_XE },
138 { 0x00000258, 0x00000259, CPU_MTRR, CPU_P6_CX_XE },
139 { 0x00000268, 0x0000026F, CPU_MTRR, CPU_P6_CX_XE },
140 { 0x00000277, 0x00000277, CPU_PAT, CPU_C2_AT_XE },
141 { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_P6_CX_XE },
142
143 { 0x00000300, 0x00000308, CPU_PMC, CPU_INTEL_XEON },
144 { 0x00000309, 0x0000030B, CPU_PMC, CPU_C2_AT_XE },
145 { 0x0000030C, 0x00000311, CPU_PMC, CPU_INTEL_XEON },
146 { 0x00000345, 0x00000345, CPU_PMC, CPU_C2_AT },
147 { 0x00000360, 0x00000371, CPU_PMC, CPU_INTEL_XEON },
148 { 0x0000038D, 0x00000390, CPU_PMC, CPU_C2_AT },
149 { 0x000003A0, 0x000003BE, CPU_PMC, CPU_INTEL_XEON },
150 { 0x000003C0, 0x000003CD, CPU_PMC, CPU_INTEL_XEON },
151 { 0x000003E0, 0x000003E1, CPU_PMC, CPU_INTEL_XEON },
152 { 0x000003F0, 0x000003F0, CPU_PMC, CPU_INTEL_XEON },
153 { 0x000003F1, 0x000003F1, CPU_PMC, CPU_C2_AT_XE },
154 { 0x000003F2, 0x000003F2, CPU_PMC, CPU_INTEL_XEON },
155
156 { 0x00000400, 0x00000402, CPU_MC, CPU_PM_CX_AT_XE },
157 { 0x00000403, 0x00000403, CPU_MC, CPU_INTEL_XEON },
158 { 0x00000404, 0x00000406, CPU_MC, CPU_PM_CX_AT_XE },
159 { 0x00000407, 0x00000407, CPU_MC, CPU_INTEL_XEON },
160 { 0x00000408, 0x0000040A, CPU_MC, CPU_PM_CX_AT_XE },
161 { 0x0000040B, 0x0000040B, CPU_MC, CPU_INTEL_XEON },
162 { 0x0000040C, 0x0000040E, CPU_MC, CPU_PM_CX_XE },
163 { 0x0000040F, 0x0000040F, CPU_MC, CPU_INTEL_XEON },
164 { 0x00000410, 0x00000412, CPU_MC, CPU_PM_CX_AT_XE },
165 { 0x00000413, 0x00000417, CPU_MC, CPU_CX_AT_XE },
166 { 0x00000480, 0x0000048B, CPU_VMX, CPU_CX_AT_XE },
167
168 { 0x00000600, 0x00000600, CPU_DEBUG, CPU_PM_CX_AT_XE },
169 { 0x00000680, 0x0000068F, CPU_LBRANCH, CPU_INTEL_XEON },
170 { 0x000006C0, 0x000006CF, CPU_LBRANCH, CPU_INTEL_XEON },
171
172 { 0x000107CC, 0x000107D3, CPU_PMC, CPU_INTEL_XEON_MP },
173
174 { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_INTEL_XEON },
175 { 0xC0000081, 0xC0000082, CPU_CALL, CPU_INTEL_XEON },
176 { 0xC0000084, 0xC0000084, CPU_CALL, CPU_INTEL_XEON },
177 { 0xC0000100, 0xC0000102, CPU_BASE, CPU_INTEL_XEON },
178};
179
180/* AMD Registers Range */
181static struct cpu_debug_range cpu_amd_range[] = {
182 { 0x00000000, 0x00000001, CPU_MC, CPU_K10_PLUS, },
183 { 0x00000010, 0x00000010, CPU_TIME, CPU_K8_PLUS, },
184 { 0x0000001B, 0x0000001B, CPU_APIC, CPU_K8_PLUS, },
185 { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_K7_PLUS },
186 { 0x0000008B, 0x0000008B, CPU_VER, CPU_K8_PLUS },
187 { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_K8_PLUS, },
188
189 { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_K8_PLUS, },
190 { 0x00000179, 0x0000017B, CPU_MC, CPU_K8_PLUS, },
191 { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_K8_PLUS, },
192 { 0x000001DB, 0x000001DE, CPU_LBRANCH, CPU_K8_PLUS, },
193
194 { 0x00000200, 0x0000020F, CPU_MTRR, CPU_K8_PLUS, },
195 { 0x00000250, 0x00000250, CPU_MTRR, CPU_K8_PLUS, },
196 { 0x00000258, 0x00000259, CPU_MTRR, CPU_K8_PLUS, },
197 { 0x00000268, 0x0000026F, CPU_MTRR, CPU_K8_PLUS, },
198 { 0x00000277, 0x00000277, CPU_PAT, CPU_K8_PLUS, },
199 { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_K8_PLUS, },
200
201 { 0x00000400, 0x00000413, CPU_MC, CPU_K8_PLUS, },
202
203 { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_AMD_ALL, },
204 { 0xC0000081, 0xC0000084, CPU_CALL, CPU_K8_PLUS, },
205 { 0xC0000100, 0xC0000102, CPU_BASE, CPU_K8_PLUS, },
206 { 0xC0000103, 0xC0000103, CPU_TIME, CPU_K10_PLUS, },
207
208 { 0xC0010000, 0xC0010007, CPU_PMC, CPU_K8_PLUS, },
209 { 0xC0010010, 0xC0010010, CPU_CONF, CPU_K7_PLUS, },
210 { 0xC0010015, 0xC0010015, CPU_CONF, CPU_K7_PLUS, },
211 { 0xC0010016, 0xC001001A, CPU_MTRR, CPU_K8_PLUS, },
212 { 0xC001001D, 0xC001001D, CPU_MTRR, CPU_K8_PLUS, },
213 { 0xC001001F, 0xC001001F, CPU_CONF, CPU_K8_PLUS, },
214 { 0xC0010030, 0xC0010035, CPU_BIOS, CPU_K8_PLUS, },
215 { 0xC0010044, 0xC0010048, CPU_MC, CPU_K8_PLUS, },
216 { 0xC0010050, 0xC0010056, CPU_SMM, CPU_K0F_PLUS, },
217 { 0xC0010058, 0xC0010058, CPU_CONF, CPU_K10_PLUS, },
218 { 0xC0010060, 0xC0010060, CPU_CACHE, CPU_AMD_11, },
219 { 0xC0010061, 0xC0010068, CPU_SMM, CPU_K10_PLUS, },
220 { 0xC0010069, 0xC001006B, CPU_SMM, CPU_AMD_11, },
221 { 0xC0010070, 0xC0010071, CPU_SMM, CPU_K10_PLUS, },
222 { 0xC0010111, 0xC0010113, CPU_SMM, CPU_K8_PLUS, },
223 { 0xC0010114, 0xC0010118, CPU_SVM, CPU_K10_PLUS, },
224 { 0xC0010140, 0xC0010141, CPU_OSVM, CPU_K10_PLUS, },
225 { 0xC0011022, 0xC0011023, CPU_CONF, CPU_K10_PLUS, },
226};
227
228
229/* Intel */
230static int get_intel_modelflag(unsigned model)
231{
232 int flag;
233
234 switch (model) {
235 case 0x0501:
236 case 0x0502:
237 case 0x0504:
238 flag = CPU_INTEL_PENTIUM;
239 break;
240 case 0x0601:
241 case 0x0603:
242 case 0x0605:
243 case 0x0607:
244 case 0x0608:
245 case 0x060A:
246 case 0x060B:
247 flag = CPU_INTEL_P6;
248 break;
249 case 0x0609:
250 case 0x060D:
251 flag = CPU_INTEL_PENTIUM_M;
252 break;
253 case 0x060E:
254 flag = CPU_INTEL_CORE;
255 break;
256 case 0x060F:
257 case 0x0617:
258 flag = CPU_INTEL_CORE2;
259 break;
260 case 0x061C:
261 flag = CPU_INTEL_ATOM;
262 break;
263 case 0x0F00:
264 case 0x0F01:
265 case 0x0F02:
266 case 0x0F03:
267 case 0x0F04:
268 flag = CPU_INTEL_XEON_P4;
269 break;
270 case 0x0F06:
271 flag = CPU_INTEL_XEON_MP;
272 break;
273 default:
274 flag = CPU_NONE;
275 break;
276 }
277
278 return flag;
279}
280
281/* AMD */
282static int get_amd_modelflag(unsigned model)
283{
284 int flag;
285
286 switch (model >> 8) {
287 case 0x6:
288 flag = CPU_AMD_K6;
289 break;
290 case 0x7:
291 flag = CPU_AMD_K7;
292 break;
293 case 0x8:
294 flag = CPU_AMD_K8;
295 break;
296 case 0xf:
297 flag = CPU_AMD_0F;
298 break;
299 case 0x10:
300 flag = CPU_AMD_10;
301 break;
302 case 0x11:
303 flag = CPU_AMD_11;
304 break;
305 default:
306 flag = CPU_NONE;
307 break;
308 }
309
310 return flag;
311}
312
313static int get_cpu_modelflag(unsigned cpu)
314{
315 int flag;
316
317 flag = per_cpu(cpu_model, cpu);
318
319 switch (flag >> 16) {
320 case X86_VENDOR_INTEL:
321 flag = get_intel_modelflag(flag);
322 break;
323 case X86_VENDOR_AMD:
324 flag = get_amd_modelflag(flag & 0xffff);
325 break;
326 default:
327 flag = CPU_NONE;
328 break;
329 }
330
331 return flag;
332}
333
334static int get_cpu_range_count(unsigned cpu)
335{
336 int index;
337
338 switch (per_cpu(cpu_model, cpu) >> 16) {
339 case X86_VENDOR_INTEL:
340 index = ARRAY_SIZE(cpu_intel_range);
341 break;
342 case X86_VENDOR_AMD:
343 index = ARRAY_SIZE(cpu_amd_range);
344 break;
345 default:
346 index = 0;
347 break;
348 }
349
350 return index;
351}
352
353static int is_typeflag_valid(unsigned cpu, unsigned flag)
354{
355 unsigned vendor, modelflag;
356 int i, index;
357
358 /* Standard Registers should be always valid */
359 if (flag >= CPU_TSS)
360 return 1;
361
362 modelflag = per_cpu(cpu_modelflag, cpu);
363 vendor = per_cpu(cpu_model, cpu) >> 16;
364 index = get_cpu_range_count(cpu);
365
366 for (i = 0; i < index; i++) {
367 switch (vendor) {
368 case X86_VENDOR_INTEL:
369 if ((cpu_intel_range[i].model & modelflag) &&
370 (cpu_intel_range[i].flag & flag))
371 return 1;
372 break;
373 case X86_VENDOR_AMD:
374 if ((cpu_amd_range[i].model & modelflag) &&
375 (cpu_amd_range[i].flag & flag))
376 return 1;
377 break;
378 }
379 }
380
381 /* Invalid */
382 return 0;
383}
384
385static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max,
386 int index, unsigned flag)
387{
388 unsigned modelflag;
389
390 modelflag = per_cpu(cpu_modelflag, cpu);
391 *max = 0;
392 switch (per_cpu(cpu_model, cpu) >> 16) {
393 case X86_VENDOR_INTEL:
394 if ((cpu_intel_range[index].model & modelflag) &&
395 (cpu_intel_range[index].flag & flag)) {
396 *min = cpu_intel_range[index].min;
397 *max = cpu_intel_range[index].max;
398 }
399 break;
400 case X86_VENDOR_AMD:
401 if ((cpu_amd_range[index].model & modelflag) &&
402 (cpu_amd_range[index].flag & flag)) {
403 *min = cpu_amd_range[index].min;
404 *max = cpu_amd_range[index].max;
405 }
406 break;
407 }
408
409 return *max;
410}
411
412/* This function can also be called with seq = NULL for printk */
413static void print_cpu_data(struct seq_file *seq, unsigned type,
414 u32 low, u32 high)
415{
416 struct cpu_private *priv;
417 u64 val = high;
418
419 if (seq) {
420 priv = seq->private;
421 if (priv->file) {
422 val = (val << 32) | low;
423 seq_printf(seq, "0x%llx\n", val);
424 } else
425 seq_printf(seq, " %08x: %08x_%08x\n",
426 type, high, low);
427 } else
428 printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low);
429}
430
431/* This function can also be called with seq = NULL for printk */
432static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
433{
434 unsigned msr, msr_min, msr_max;
435 struct cpu_private *priv;
436 u32 low, high;
437 int i, range;
438
439 if (seq) {
440 priv = seq->private;
441 if (priv->file) {
442 if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg,
443 &low, &high))
444 print_cpu_data(seq, priv->reg, low, high);
445 return;
446 }
447 }
448
449 range = get_cpu_range_count(cpu);
450
451 for (i = 0; i < range; i++) {
452 if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag))
453 continue;
454
455 for (msr = msr_min; msr <= msr_max; msr++) {
456 if (rdmsr_safe_on_cpu(cpu, msr, &low, &high))
457 continue;
458 print_cpu_data(seq, msr, low, high);
459 }
460 }
461}
462
463static void print_tss(void *arg)
464{
465 struct pt_regs *regs = task_pt_regs(current);
466 struct seq_file *seq = arg;
467 unsigned int seg;
468
469 seq_printf(seq, " RAX\t: %016lx\n", regs->ax);
470 seq_printf(seq, " RBX\t: %016lx\n", regs->bx);
471 seq_printf(seq, " RCX\t: %016lx\n", regs->cx);
472 seq_printf(seq, " RDX\t: %016lx\n", regs->dx);
473
474 seq_printf(seq, " RSI\t: %016lx\n", regs->si);
475 seq_printf(seq, " RDI\t: %016lx\n", regs->di);
476 seq_printf(seq, " RBP\t: %016lx\n", regs->bp);
477 seq_printf(seq, " ESP\t: %016lx\n", regs->sp);
478
479#ifdef CONFIG_X86_64
480 seq_printf(seq, " R08\t: %016lx\n", regs->r8);
481 seq_printf(seq, " R09\t: %016lx\n", regs->r9);
482 seq_printf(seq, " R10\t: %016lx\n", regs->r10);
483 seq_printf(seq, " R11\t: %016lx\n", regs->r11);
484 seq_printf(seq, " R12\t: %016lx\n", regs->r12);
485 seq_printf(seq, " R13\t: %016lx\n", regs->r13);
486 seq_printf(seq, " R14\t: %016lx\n", regs->r14);
487 seq_printf(seq, " R15\t: %016lx\n", regs->r15);
488#endif
489
490 asm("movl %%cs,%0" : "=r" (seg));
491 seq_printf(seq, " CS\t: %04x\n", seg);
492 asm("movl %%ds,%0" : "=r" (seg));
493 seq_printf(seq, " DS\t: %04x\n", seg);
494 seq_printf(seq, " SS\t: %04lx\n", regs->ss & 0xffff);
495 asm("movl %%es,%0" : "=r" (seg));
496 seq_printf(seq, " ES\t: %04x\n", seg);
497 asm("movl %%fs,%0" : "=r" (seg));
498 seq_printf(seq, " FS\t: %04x\n", seg);
499 asm("movl %%gs,%0" : "=r" (seg));
500 seq_printf(seq, " GS\t: %04x\n", seg);
501
502 seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags);
503
504 seq_printf(seq, " EIP\t: %016lx\n", regs->ip);
505}
506
507static void print_cr(void *arg)
508{
509 struct seq_file *seq = arg;
510
511 seq_printf(seq, " cr0\t: %016lx\n", read_cr0());
512 seq_printf(seq, " cr2\t: %016lx\n", read_cr2());
513 seq_printf(seq, " cr3\t: %016lx\n", read_cr3());
514 seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe());
515#ifdef CONFIG_X86_64
516 seq_printf(seq, " cr8\t: %016lx\n", read_cr8());
517#endif
518}
519
520static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt)
521{
522 seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size));
523}
524
525static void print_dt(void *seq)
526{
527 struct desc_ptr dt;
528 unsigned long ldt;
529
530 /* IDT */
531 store_idt((struct desc_ptr *)&dt);
532 print_desc_ptr("IDT", seq, dt);
533
534 /* GDT */
535 store_gdt((struct desc_ptr *)&dt);
536 print_desc_ptr("GDT", seq, dt);
537
538 /* LDT */
539 store_ldt(ldt);
540 seq_printf(seq, " LDT\t: %016lx\n", ldt);
541
542 /* TR */
543 store_tr(ldt);
544 seq_printf(seq, " TR\t: %016lx\n", ldt);
545}
546
547static void print_dr(void *arg)
548{
549 struct seq_file *seq = arg;
550 unsigned long dr;
551 int i;
552
553 for (i = 0; i < 8; i++) {
554 /* Ignore db4, db5 */
555 if ((i == 4) || (i == 5))
556 continue;
557 get_debugreg(dr, i);
558 seq_printf(seq, " dr%d\t: %016lx\n", i, dr);
559 }
560
561 seq_printf(seq, "\n MSR\t:\n");
562}
563
564static void print_apic(void *arg)
565{
566 struct seq_file *seq = arg;
567
568#ifdef CONFIG_X86_LOCAL_APIC
569 seq_printf(seq, " LAPIC\t:\n");
570 seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24);
571 seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR));
572 seq_printf(seq, " TASKPRI\t: %08x\n", apic_read(APIC_TASKPRI));
573 seq_printf(seq, " ARBPRI\t\t: %08x\n", apic_read(APIC_ARBPRI));
574 seq_printf(seq, " PROCPRI\t: %08x\n", apic_read(APIC_PROCPRI));
575 seq_printf(seq, " LDR\t\t: %08x\n", apic_read(APIC_LDR));
576 seq_printf(seq, " DFR\t\t: %08x\n", apic_read(APIC_DFR));
577 seq_printf(seq, " SPIV\t\t: %08x\n", apic_read(APIC_SPIV));
578 seq_printf(seq, " ISR\t\t: %08x\n", apic_read(APIC_ISR));
579 seq_printf(seq, " ESR\t\t: %08x\n", apic_read(APIC_ESR));
580 seq_printf(seq, " ICR\t\t: %08x\n", apic_read(APIC_ICR));
581 seq_printf(seq, " ICR2\t\t: %08x\n", apic_read(APIC_ICR2));
582 seq_printf(seq, " LVTT\t\t: %08x\n", apic_read(APIC_LVTT));
583 seq_printf(seq, " LVTTHMR\t: %08x\n", apic_read(APIC_LVTTHMR));
584 seq_printf(seq, " LVTPC\t\t: %08x\n", apic_read(APIC_LVTPC));
585 seq_printf(seq, " LVT0\t\t: %08x\n", apic_read(APIC_LVT0));
586 seq_printf(seq, " LVT1\t\t: %08x\n", apic_read(APIC_LVT1));
587 seq_printf(seq, " LVTERR\t\t: %08x\n", apic_read(APIC_LVTERR));
588 seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT));
589 seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT));
590 seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR));
591#endif /* CONFIG_X86_LOCAL_APIC */
592
593 seq_printf(seq, "\n MSR\t:\n");
594}
595
596static int cpu_seq_show(struct seq_file *seq, void *v)
597{
598 struct cpu_private *priv = seq->private;
599
600 if (priv == NULL)
601 return -EINVAL;
602
603 switch (cpu_base[priv->type].flag) {
604 case CPU_TSS:
605 smp_call_function_single(priv->cpu, print_tss, seq, 1);
606 break;
607 case CPU_CR:
608 smp_call_function_single(priv->cpu, print_cr, seq, 1);
609 break;
610 case CPU_DT:
611 smp_call_function_single(priv->cpu, print_dt, seq, 1);
612 break;
613 case CPU_DEBUG:
614 if (priv->file == CPU_INDEX_BIT)
615 smp_call_function_single(priv->cpu, print_dr, seq, 1);
616 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
617 break;
618 case CPU_APIC:
619 if (priv->file == CPU_INDEX_BIT)
620 smp_call_function_single(priv->cpu, print_apic, seq, 1);
621 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
622 break;
623
624 default:
625 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
626 break;
627 }
628 seq_printf(seq, "\n");
629
630 return 0;
631}
632
633static void *cpu_seq_start(struct seq_file *seq, loff_t *pos)
634{
635 if (*pos == 0) /* One time is enough ;-) */
636 return seq;
637
638 return NULL;
639}
640
641static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
642{
643 (*pos)++;
644
645 return cpu_seq_start(seq, pos);
646}
647
648static void cpu_seq_stop(struct seq_file *seq, void *v)
649{
650}
651
652static const struct seq_operations cpu_seq_ops = {
653 .start = cpu_seq_start,
654 .next = cpu_seq_next,
655 .stop = cpu_seq_stop,
656 .show = cpu_seq_show,
657};
658
659static int cpu_seq_open(struct inode *inode, struct file *file)
660{
661 struct cpu_private *priv = inode->i_private;
662 struct seq_file *seq;
663 int err;
664
665 err = seq_open(file, &cpu_seq_ops);
666 if (!err) {
667 seq = file->private_data;
668 seq->private = priv;
669 }
670
671 return err;
672}
673
674static int write_msr(struct cpu_private *priv, u64 val)
675{
676 u32 low, high;
677
678 high = (val >> 32) & 0xffffffff;
679 low = val & 0xffffffff;
680
681 if (!wrmsr_safe_on_cpu(priv->cpu, priv->reg, low, high))
682 return 0;
683
684 return -EPERM;
685}
686
687static int write_cpu_register(struct cpu_private *priv, const char *buf)
688{
689 int ret = -EPERM;
690 u64 val;
691
692 ret = strict_strtoull(buf, 0, &val);
693 if (ret < 0)
694 return ret;
695
696 /* Supporting only MSRs */
697 if (priv->type < CPU_TSS_BIT)
698 return write_msr(priv, val);
699
700 return ret;
701}
702
703static ssize_t cpu_write(struct file *file, const char __user *ubuf,
704 size_t count, loff_t *off)
705{
706 struct seq_file *seq = file->private_data;
707 struct cpu_private *priv = seq->private;
708 char buf[19];
709
710 if ((priv == NULL) || (count >= sizeof(buf)))
711 return -EINVAL;
712
713 if (copy_from_user(&buf, ubuf, count))
714 return -EFAULT;
715
716 buf[count] = 0;
717
718 if ((cpu_base[priv->type].write) && (cpu_file[priv->file].write))
719 if (!write_cpu_register(priv, buf))
720 return count;
721
722 return -EACCES;
723}
724
725static const struct file_operations cpu_fops = {
726 .owner = THIS_MODULE,
727 .open = cpu_seq_open,
728 .read = seq_read,
729 .write = cpu_write,
730 .llseek = seq_lseek,
731 .release = seq_release,
732};
733
734static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
735 unsigned file, struct dentry *dentry)
736{
737 struct cpu_private *priv = NULL;
738
739 /* Already intialized */
740 if (file == CPU_INDEX_BIT)
741 if (per_cpu(cpu_arr[type].init, cpu))
742 return 0;
743
744 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
745 if (priv == NULL)
746 return -ENOMEM;
747
748 priv->cpu = cpu;
749 priv->type = type;
750 priv->reg = reg;
751 priv->file = file;
752 mutex_lock(&cpu_debug_lock);
753 per_cpu(priv_arr[type], cpu) = priv;
754 per_cpu(cpu_priv_count, cpu)++;
755 mutex_unlock(&cpu_debug_lock);
756
757 if (file)
758 debugfs_create_file(cpu_file[file].name, S_IRUGO,
759 dentry, (void *)priv, &cpu_fops);
760 else {
761 debugfs_create_file(cpu_base[type].name, S_IRUGO,
762 per_cpu(cpu_arr[type].dentry, cpu),
763 (void *)priv, &cpu_fops);
764 mutex_lock(&cpu_debug_lock);
765 per_cpu(cpu_arr[type].init, cpu) = 1;
766 mutex_unlock(&cpu_debug_lock);
767 }
768
769 return 0;
770}
771
772static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg,
773 struct dentry *dentry)
774{
775 unsigned file;
776 int err = 0;
777
778 for (file = 0; file < ARRAY_SIZE(cpu_file); file++) {
779 err = cpu_create_file(cpu, type, reg, file, dentry);
780 if (err)
781 return err;
782 }
783
784 return err;
785}
786
787static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry)
788{
789 struct dentry *cpu_dentry = NULL;
790 unsigned reg, reg_min, reg_max;
791 int i, range, err = 0;
792 char reg_dir[12];
793 u32 low, high;
794
795 range = get_cpu_range_count(cpu);
796
797 for (i = 0; i < range; i++) {
798 if (!get_cpu_range(cpu, &reg_min, &reg_max, i,
799 cpu_base[type].flag))
800 continue;
801
802 for (reg = reg_min; reg <= reg_max; reg++) {
803 if (rdmsr_safe_on_cpu(cpu, reg, &low, &high))
804 continue;
805
806 sprintf(reg_dir, "0x%x", reg);
807 cpu_dentry = debugfs_create_dir(reg_dir, dentry);
808 err = cpu_init_regfiles(cpu, type, reg, cpu_dentry);
809 if (err)
810 return err;
811 }
812 }
813
814 return err;
815}
816
817static int cpu_init_allreg(unsigned cpu, struct dentry *dentry)
818{
819 struct dentry *cpu_dentry = NULL;
820 unsigned type;
821 int err = 0;
822
823 for (type = 0; type < ARRAY_SIZE(cpu_base) - 1; type++) {
824 if (!is_typeflag_valid(cpu, cpu_base[type].flag))
825 continue;
826 cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry);
827 per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry;
828
829 if (type < CPU_TSS_BIT)
830 err = cpu_init_msr(cpu, type, cpu_dentry);
831 else
832 err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT,
833 cpu_dentry);
834 if (err)
835 return err;
836 }
837
838 return err;
839}
840
841static int cpu_init_cpu(void)
842{
843 struct dentry *cpu_dentry = NULL;
844 struct cpuinfo_x86 *cpui;
845 char cpu_dir[12];
846 unsigned cpu;
847 int err = 0;
848
849 for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
850 cpui = &cpu_data(cpu);
851 if (!cpu_has(cpui, X86_FEATURE_MSR))
852 continue;
853 per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) |
854 (cpui->x86 << 8) |
855 (cpui->x86_model));
856 per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu);
857
858 sprintf(cpu_dir, "cpu%d", cpu);
859 cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir);
860 err = cpu_init_allreg(cpu, cpu_dentry);
861
862 pr_info("cpu%d(%d) debug files %d\n",
863 cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu));
864 if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) {
865 pr_err("Register files count %d exceeds limit %d\n",
866 per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES);
867 per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES;
868 err = -ENFILE;
869 }
870 if (err)
871 return err;
872 }
873
874 return err;
875}
876
877static int __init cpu_debug_init(void)
878{
879 cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir);
880
881 return cpu_init_cpu();
882}
883
884static void __exit cpu_debug_exit(void)
885{
886 int i, cpu;
887
888 if (cpu_debugfs_dir)
889 debugfs_remove_recursive(cpu_debugfs_dir);
890
891 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
892 for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++)
893 kfree(per_cpu(priv_arr[i], cpu));
894}
895
896module_init(cpu_debug_init);
897module_exit(cpu_debug_exit);
898
899MODULE_AUTHOR("Jaswinder Singh Rajput");
900MODULE_DESCRIPTION("CPU Debug module");
901MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index 65792c2cc462..52c839875478 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -87,30 +87,15 @@ config X86_POWERNOW_K7_ACPI
87config X86_POWERNOW_K8 87config X86_POWERNOW_K8
88 tristate "AMD Opteron/Athlon64 PowerNow!" 88 tristate "AMD Opteron/Athlon64 PowerNow!"
89 select CPU_FREQ_TABLE 89 select CPU_FREQ_TABLE
90 depends on ACPI && ACPI_PROCESSOR
90 help 91 help
91 This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. 92 This adds the CPUFreq driver for K8/K10 Opteron/Athlon64 processors.
92 93
93 To compile this driver as a module, choose M here: the 94 To compile this driver as a module, choose M here: the
94 module will be called powernow-k8. 95 module will be called powernow-k8.
95 96
96 For details, take a look at <file:Documentation/cpu-freq/>. 97 For details, take a look at <file:Documentation/cpu-freq/>.
97 98
98 If in doubt, say N.
99
100config X86_POWERNOW_K8_ACPI
101 bool
102 prompt "ACPI Support" if X86_32
103 depends on ACPI && X86_POWERNOW_K8 && ACPI_PROCESSOR
104 depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m)
105 default y
106 help
107 This provides access to the K8s Processor Performance States via ACPI.
108 This driver is probably required for CPUFreq to work with multi-socket and
109 SMP systems. It is not required on at least some single-socket yet
110 multi-core systems, even if SMP is enabled.
111
112 It is safe to say Y here.
113
114config X86_GX_SUSPMOD 99config X86_GX_SUSPMOD
115 tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" 100 tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
116 depends on X86_32 && PCI 101 depends on X86_32 && PCI
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile
index 560f7760dae5..509296df294d 100644
--- a/arch/x86/kernel/cpu/cpufreq/Makefile
+++ b/arch/x86/kernel/cpu/cpufreq/Makefile
@@ -1,6 +1,11 @@
1# Link order matters. K8 is preferred to ACPI because of firmware bugs in early
2# K8 systems. ACPI is preferred to all other hardware-specific drivers.
3# speedstep-* is preferred over p4-clockmod.
4
5obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
6obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
1obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o 7obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
2obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o 8obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o
3obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
4obj-$(CONFIG_X86_LONGHAUL) += longhaul.o 9obj-$(CONFIG_X86_LONGHAUL) += longhaul.o
5obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o 10obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o
6obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o 11obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o
@@ -10,7 +15,6 @@ obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o
10obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o 15obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o
11obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o 16obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o
12obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o 17obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o
13obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o
14obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o 18obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o
15obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o 19obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o
16obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o 20obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 5e40f54171e7..19f6b9d27e83 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $) 2 * acpi-cpufreq.c - ACPI Processor P-States Driver
3 * 3 *
4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> 4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> 5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
@@ -36,16 +36,18 @@
36#include <trace/power.h> 36#include <trace/power.h>
37 37
38#include <linux/acpi.h> 38#include <linux/acpi.h>
39#include <linux/io.h>
40#include <linux/delay.h>
41#include <linux/uaccess.h>
42
39#include <acpi/processor.h> 43#include <acpi/processor.h>
40 44
41#include <asm/io.h>
42#include <asm/msr.h> 45#include <asm/msr.h>
43#include <asm/processor.h> 46#include <asm/processor.h>
44#include <asm/cpufeature.h> 47#include <asm/cpufeature.h>
45#include <asm/delay.h>
46#include <asm/uaccess.h>
47 48
48#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) 49#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
50 "acpi-cpufreq", msg)
49 51
50MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); 52MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
51MODULE_DESCRIPTION("ACPI Processor P-States Driver"); 53MODULE_DESCRIPTION("ACPI Processor P-States Driver");
@@ -97,7 +99,7 @@ static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
97 99
98 perf = data->acpi_data; 100 perf = data->acpi_data;
99 101
100 for (i=0; i<perf->state_count; i++) { 102 for (i = 0; i < perf->state_count; i++) {
101 if (value == perf->states[i].status) 103 if (value == perf->states[i].status)
102 return data->freq_table[i].frequency; 104 return data->freq_table[i].frequency;
103 } 105 }
@@ -112,7 +114,7 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
112 msr &= INTEL_MSR_RANGE; 114 msr &= INTEL_MSR_RANGE;
113 perf = data->acpi_data; 115 perf = data->acpi_data;
114 116
115 for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { 117 for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
116 if (msr == perf->states[data->freq_table[i].index].status) 118 if (msr == perf->states[data->freq_table[i].index].status)
117 return data->freq_table[i].frequency; 119 return data->freq_table[i].frequency;
118 } 120 }
@@ -140,15 +142,13 @@ struct io_addr {
140 u8 bit_width; 142 u8 bit_width;
141}; 143};
142 144
143typedef union {
144 struct msr_addr msr;
145 struct io_addr io;
146} drv_addr_union;
147
148struct drv_cmd { 145struct drv_cmd {
149 unsigned int type; 146 unsigned int type;
150 const struct cpumask *mask; 147 const struct cpumask *mask;
151 drv_addr_union addr; 148 union {
149 struct msr_addr msr;
150 struct io_addr io;
151 } addr;
152 u32 val; 152 u32 val;
153}; 153};
154 154
@@ -371,7 +371,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
371 unsigned int cur_freq; 371 unsigned int cur_freq;
372 unsigned int i; 372 unsigned int i;
373 373
374 for (i=0; i<100; i++) { 374 for (i = 0; i < 100; i++) {
375 cur_freq = extract_freq(get_cur_val(mask), data); 375 cur_freq = extract_freq(get_cur_val(mask), data);
376 if (cur_freq == freq) 376 if (cur_freq == freq)
377 return 1; 377 return 1;
@@ -496,7 +496,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
496 unsigned long freq; 496 unsigned long freq;
497 unsigned long freqn = perf->states[0].core_frequency * 1000; 497 unsigned long freqn = perf->states[0].core_frequency * 1000;
498 498
499 for (i=0; i<(perf->state_count-1); i++) { 499 for (i = 0; i < (perf->state_count-1); i++) {
500 freq = freqn; 500 freq = freqn;
501 freqn = perf->states[i+1].core_frequency * 1000; 501 freqn = perf->states[i+1].core_frequency * 1000;
502 if ((2 * cpu_khz) > (freqn + freq)) { 502 if ((2 * cpu_khz) > (freqn + freq)) {
@@ -675,17 +675,29 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
675 675
676 /* detect transition latency */ 676 /* detect transition latency */
677 policy->cpuinfo.transition_latency = 0; 677 policy->cpuinfo.transition_latency = 0;
678 for (i=0; i<perf->state_count; i++) { 678 for (i = 0; i < perf->state_count; i++) {
679 if ((perf->states[i].transition_latency * 1000) > 679 if ((perf->states[i].transition_latency * 1000) >
680 policy->cpuinfo.transition_latency) 680 policy->cpuinfo.transition_latency)
681 policy->cpuinfo.transition_latency = 681 policy->cpuinfo.transition_latency =
682 perf->states[i].transition_latency * 1000; 682 perf->states[i].transition_latency * 1000;
683 } 683 }
684 684
685 /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
686 if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
687 policy->cpuinfo.transition_latency > 20 * 1000) {
688 static int print_once;
689 policy->cpuinfo.transition_latency = 20 * 1000;
690 if (!print_once) {
691 print_once = 1;
692 printk(KERN_INFO "Capping off P-state tranision latency"
693 " at 20 uS\n");
694 }
695 }
696
685 data->max_freq = perf->states[0].core_frequency * 1000; 697 data->max_freq = perf->states[0].core_frequency * 1000;
686 /* table init */ 698 /* table init */
687 for (i=0; i<perf->state_count; i++) { 699 for (i = 0; i < perf->state_count; i++) {
688 if (i>0 && perf->states[i].core_frequency >= 700 if (i > 0 && perf->states[i].core_frequency >=
689 data->freq_table[valid_states-1].frequency / 1000) 701 data->freq_table[valid_states-1].frequency / 1000)
690 continue; 702 continue;
691 703
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
index 965ea52767ac..733093d60436 100644
--- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -32,7 +32,7 @@
32 * nforce2_chipset: 32 * nforce2_chipset:
33 * FSB is changed using the chipset 33 * FSB is changed using the chipset
34 */ 34 */
35static struct pci_dev *nforce2_chipset_dev; 35static struct pci_dev *nforce2_dev;
36 36
37/* fid: 37/* fid:
38 * multiplier * 10 38 * multiplier * 10
@@ -56,7 +56,9 @@ MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)");
56MODULE_PARM_DESC(min_fsb, 56MODULE_PARM_DESC(min_fsb,
57 "Minimum FSB to use, if not defined: current FSB - 50"); 57 "Minimum FSB to use, if not defined: current FSB - 50");
58 58
59#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) 59#define PFX "cpufreq-nforce2: "
60#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
61 "cpufreq-nforce2", msg)
60 62
61/** 63/**
62 * nforce2_calc_fsb - calculate FSB 64 * nforce2_calc_fsb - calculate FSB
@@ -118,11 +120,11 @@ static void nforce2_write_pll(int pll)
118 int temp; 120 int temp;
119 121
120 /* Set the pll addr. to 0x00 */ 122 /* Set the pll addr. to 0x00 */
121 pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, 0); 123 pci_write_config_dword(nforce2_dev, NFORCE2_PLLADR, 0);
122 124
123 /* Now write the value in all 64 registers */ 125 /* Now write the value in all 64 registers */
124 for (temp = 0; temp <= 0x3f; temp++) 126 for (temp = 0; temp <= 0x3f; temp++)
125 pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, pll); 127 pci_write_config_dword(nforce2_dev, NFORCE2_PLLREG, pll);
126 128
127 return; 129 return;
128} 130}
@@ -139,8 +141,8 @@ static unsigned int nforce2_fsb_read(int bootfsb)
139 u32 fsb, temp = 0; 141 u32 fsb, temp = 0;
140 142
141 /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ 143 /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */
142 nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 144 nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 0x01EF,
143 0x01EF, PCI_ANY_ID, PCI_ANY_ID, NULL); 145 PCI_ANY_ID, PCI_ANY_ID, NULL);
144 if (!nforce2_sub5) 146 if (!nforce2_sub5)
145 return 0; 147 return 0;
146 148
@@ -148,13 +150,13 @@ static unsigned int nforce2_fsb_read(int bootfsb)
148 fsb /= 1000000; 150 fsb /= 1000000;
149 151
150 /* Check if PLL register is already set */ 152 /* Check if PLL register is already set */
151 pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp); 153 pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
152 154
153 if (bootfsb || !temp) 155 if (bootfsb || !temp)
154 return fsb; 156 return fsb;
155 157
156 /* Use PLL register FSB value */ 158 /* Use PLL register FSB value */
157 pci_read_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, &temp); 159 pci_read_config_dword(nforce2_dev, NFORCE2_PLLREG, &temp);
158 fsb = nforce2_calc_fsb(temp); 160 fsb = nforce2_calc_fsb(temp);
159 161
160 return fsb; 162 return fsb;
@@ -174,18 +176,18 @@ static int nforce2_set_fsb(unsigned int fsb)
174 int pll = 0; 176 int pll = 0;
175 177
176 if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) { 178 if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
177 printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb); 179 printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb);
178 return -EINVAL; 180 return -EINVAL;
179 } 181 }
180 182
181 tfsb = nforce2_fsb_read(0); 183 tfsb = nforce2_fsb_read(0);
182 if (!tfsb) { 184 if (!tfsb) {
183 printk(KERN_ERR "cpufreq: Error while reading the FSB\n"); 185 printk(KERN_ERR PFX "Error while reading the FSB\n");
184 return -EINVAL; 186 return -EINVAL;
185 } 187 }
186 188
187 /* First write? Then set actual value */ 189 /* First write? Then set actual value */
188 pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp); 190 pci_read_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8 *)&temp);
189 if (!temp) { 191 if (!temp) {
190 pll = nforce2_calc_pll(tfsb); 192 pll = nforce2_calc_pll(tfsb);
191 193
@@ -197,7 +199,7 @@ static int nforce2_set_fsb(unsigned int fsb)
197 199
198 /* Enable write access */ 200 /* Enable write access */
199 temp = 0x01; 201 temp = 0x01;
200 pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8)temp); 202 pci_write_config_byte(nforce2_dev, NFORCE2_PLLENABLE, (u8)temp);
201 203
202 diff = tfsb - fsb; 204 diff = tfsb - fsb;
203 205
@@ -222,7 +224,7 @@ static int nforce2_set_fsb(unsigned int fsb)
222 } 224 }
223 225
224 temp = 0x40; 226 temp = 0x40;
225 pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLADR, (u8)temp); 227 pci_write_config_byte(nforce2_dev, NFORCE2_PLLADR, (u8)temp);
226 228
227 return 0; 229 return 0;
228} 230}
@@ -244,7 +246,8 @@ static unsigned int nforce2_get(unsigned int cpu)
244 * nforce2_target - set a new CPUFreq policy 246 * nforce2_target - set a new CPUFreq policy
245 * @policy: new policy 247 * @policy: new policy
246 * @target_freq: the target frequency 248 * @target_freq: the target frequency
247 * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) 249 * @relation: how that frequency relates to achieved frequency
250 * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
248 * 251 *
249 * Sets a new CPUFreq policy. 252 * Sets a new CPUFreq policy.
250 */ 253 */
@@ -276,7 +279,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
276 /* local_irq_save(flags); */ 279 /* local_irq_save(flags); */
277 280
278 if (nforce2_set_fsb(target_fsb) < 0) 281 if (nforce2_set_fsb(target_fsb) < 0)
279 printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", 282 printk(KERN_ERR PFX "Changing FSB to %d failed\n",
280 target_fsb); 283 target_fsb);
281 else 284 else
282 dprintk("Changed FSB successfully to %d\n", 285 dprintk("Changed FSB successfully to %d\n",
@@ -327,8 +330,8 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
327 /* FIX: Get FID from CPU */ 330 /* FIX: Get FID from CPU */
328 if (!fid) { 331 if (!fid) {
329 if (!cpu_khz) { 332 if (!cpu_khz) {
330 printk(KERN_WARNING 333 printk(KERN_WARNING PFX
331 "cpufreq: cpu_khz not set, can't calculate multiplier!\n"); 334 "cpu_khz not set, can't calculate multiplier!\n");
332 return -ENODEV; 335 return -ENODEV;
333 } 336 }
334 337
@@ -343,7 +346,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
343 } 346 }
344 } 347 }
345 348
346 printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb, 349 printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb,
347 fid / 10, fid % 10); 350 fid / 10, fid % 10);
348 351
349 /* Set maximum FSB to FSB at boot time */ 352 /* Set maximum FSB to FSB at boot time */
@@ -392,17 +395,18 @@ static struct cpufreq_driver nforce2_driver = {
392 */ 395 */
393static unsigned int nforce2_detect_chipset(void) 396static unsigned int nforce2_detect_chipset(void)
394{ 397{
395 nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, 398 nforce2_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA,
396 PCI_DEVICE_ID_NVIDIA_NFORCE2, 399 PCI_DEVICE_ID_NVIDIA_NFORCE2,
397 PCI_ANY_ID, PCI_ANY_ID, NULL); 400 PCI_ANY_ID, PCI_ANY_ID, NULL);
398 401
399 if (nforce2_chipset_dev == NULL) 402 if (nforce2_dev == NULL)
400 return -ENODEV; 403 return -ENODEV;
401 404
402 printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n", 405 printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n",
403 nforce2_chipset_dev->revision); 406 nforce2_dev->revision);
404 printk(KERN_INFO 407 printk(KERN_INFO PFX
405 "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n"); 408 "FSB changing is maybe unstable and can lead to "
409 "crashes and data loss.\n");
406 410
407 return 0; 411 return 0;
408} 412}
@@ -420,7 +424,7 @@ static int __init nforce2_init(void)
420 424
421 /* detect chipset */ 425 /* detect chipset */
422 if (nforce2_detect_chipset()) { 426 if (nforce2_detect_chipset()) {
423 printk(KERN_ERR "cpufreq: No nForce2 chipset.\n"); 427 printk(KERN_INFO PFX "No nForce2 chipset.\n");
424 return -ENODEV; 428 return -ENODEV;
425 } 429 }
426 430
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
index 41ab3f064cb1..35a257dd4bb7 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -12,12 +12,12 @@
12#include <linux/cpufreq.h> 12#include <linux/cpufreq.h>
13#include <linux/ioport.h> 13#include <linux/ioport.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/timex.h>
16#include <linux/io.h>
17#include <linux/delay.h>
15 18
16#include <asm/msr.h> 19#include <asm/msr.h>
17#include <asm/tsc.h> 20#include <asm/tsc.h>
18#include <asm/timex.h>
19#include <asm/io.h>
20#include <asm/delay.h>
21 21
22#define EPS_BRAND_C7M 0 22#define EPS_BRAND_C7M 0
23#define EPS_BRAND_C7 1 23#define EPS_BRAND_C7 1
@@ -184,7 +184,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
184 break; 184 break;
185 } 185 }
186 186
187 switch(brand) { 187 switch (brand) {
188 case EPS_BRAND_C7M: 188 case EPS_BRAND_C7M:
189 printk(KERN_CONT "C7-M\n"); 189 printk(KERN_CONT "C7-M\n");
190 break; 190 break;
@@ -218,17 +218,20 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
218 /* Print voltage and multiplier */ 218 /* Print voltage and multiplier */
219 rdmsr(MSR_IA32_PERF_STATUS, lo, hi); 219 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
220 current_voltage = lo & 0xff; 220 current_voltage = lo & 0xff;
221 printk(KERN_INFO "eps: Current voltage = %dmV\n", current_voltage * 16 + 700); 221 printk(KERN_INFO "eps: Current voltage = %dmV\n",
222 current_voltage * 16 + 700);
222 current_multiplier = (lo >> 8) & 0xff; 223 current_multiplier = (lo >> 8) & 0xff;
223 printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier); 224 printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier);
224 225
225 /* Print limits */ 226 /* Print limits */
226 max_voltage = hi & 0xff; 227 max_voltage = hi & 0xff;
227 printk(KERN_INFO "eps: Highest voltage = %dmV\n", max_voltage * 16 + 700); 228 printk(KERN_INFO "eps: Highest voltage = %dmV\n",
229 max_voltage * 16 + 700);
228 max_multiplier = (hi >> 8) & 0xff; 230 max_multiplier = (hi >> 8) & 0xff;
229 printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier); 231 printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier);
230 min_voltage = (hi >> 16) & 0xff; 232 min_voltage = (hi >> 16) & 0xff;
231 printk(KERN_INFO "eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700); 233 printk(KERN_INFO "eps: Lowest voltage = %dmV\n",
234 min_voltage * 16 + 700);
232 min_multiplier = (hi >> 24) & 0xff; 235 min_multiplier = (hi >> 24) & 0xff;
233 printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier); 236 printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier);
234 237
@@ -318,7 +321,7 @@ static int eps_cpu_exit(struct cpufreq_policy *policy)
318 return 0; 321 return 0;
319} 322}
320 323
321static struct freq_attr* eps_attr[] = { 324static struct freq_attr *eps_attr[] = {
322 &cpufreq_freq_attr_scaling_available_freqs, 325 &cpufreq_freq_attr_scaling_available_freqs,
323 NULL, 326 NULL,
324}; 327};
@@ -356,7 +359,7 @@ static void __exit eps_exit(void)
356 cpufreq_unregister_driver(&eps_driver); 359 cpufreq_unregister_driver(&eps_driver);
357} 360}
358 361
359MODULE_AUTHOR("Rafa³ Bilski <rafalbilski@interia.pl>"); 362MODULE_AUTHOR("Rafal Bilski <rafalbilski@interia.pl>");
360MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's."); 363MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's.");
361MODULE_LICENSE("GPL"); 364MODULE_LICENSE("GPL");
362 365
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index fe613c93b366..006b278b0d5d 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -184,7 +184,8 @@ static int elanfreq_target(struct cpufreq_policy *policy,
184{ 184{
185 unsigned int newstate = 0; 185 unsigned int newstate = 0;
186 186
187 if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate)) 187 if (cpufreq_frequency_table_target(policy, &elanfreq_table[0],
188 target_freq, relation, &newstate))
188 return -EINVAL; 189 return -EINVAL;
189 190
190 elanfreq_set_cpu_state(newstate); 191 elanfreq_set_cpu_state(newstate);
@@ -301,7 +302,8 @@ static void __exit elanfreq_exit(void)
301module_param(max_freq, int, 0444); 302module_param(max_freq, int, 0444);
302 303
303MODULE_LICENSE("GPL"); 304MODULE_LICENSE("GPL");
304MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, Sven Geggus <sven@geggus.net>"); 305MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, "
306 "Sven Geggus <sven@geggus.net>");
305MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs"); 307MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs");
306 308
307module_init(elanfreq_init); 309module_init(elanfreq_init);
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
index 9d9eae82e60f..ac27ec2264d5 100644
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
@@ -79,8 +79,9 @@
79#include <linux/smp.h> 79#include <linux/smp.h>
80#include <linux/cpufreq.h> 80#include <linux/cpufreq.h>
81#include <linux/pci.h> 81#include <linux/pci.h>
82#include <linux/errno.h>
83
82#include <asm/processor-cyrix.h> 84#include <asm/processor-cyrix.h>
83#include <asm/errno.h>
84 85
85/* PCI config registers, all at F0 */ 86/* PCI config registers, all at F0 */
86#define PCI_PMER1 0x80 /* power management enable register 1 */ 87#define PCI_PMER1 0x80 /* power management enable register 1 */
@@ -122,8 +123,8 @@ static struct gxfreq_params *gx_params;
122static int stock_freq; 123static int stock_freq;
123 124
124/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */ 125/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */
125static int pci_busclk = 0; 126static int pci_busclk;
126module_param (pci_busclk, int, 0444); 127module_param(pci_busclk, int, 0444);
127 128
128/* maximum duration for which the cpu may be suspended 129/* maximum duration for which the cpu may be suspended
129 * (32us * MAX_DURATION). If no parameter is given, this defaults 130 * (32us * MAX_DURATION). If no parameter is given, this defaults
@@ -132,7 +133,7 @@ module_param (pci_busclk, int, 0444);
132 * is suspended -- processing power is just 0.39% of what it used to be, 133 * is suspended -- processing power is just 0.39% of what it used to be,
133 * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */ 134 * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */
134static int max_duration = 255; 135static int max_duration = 255;
135module_param (max_duration, int, 0444); 136module_param(max_duration, int, 0444);
136 137
137/* For the default policy, we want at least some processing power 138/* For the default policy, we want at least some processing power
138 * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV) 139 * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV)
@@ -140,7 +141,8 @@ module_param (max_duration, int, 0444);
140#define POLICY_MIN_DIV 20 141#define POLICY_MIN_DIV 20
141 142
142 143
143#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg) 144#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
145 "gx-suspmod", msg)
144 146
145/** 147/**
146 * we can detect a core multipiler from dir0_lsb 148 * we can detect a core multipiler from dir0_lsb
@@ -166,12 +168,20 @@ static int gx_freq_mult[16] = {
166 * Low Level chipset interface * 168 * Low Level chipset interface *
167 ****************************************************************/ 169 ****************************************************************/
168static struct pci_device_id gx_chipset_tbl[] __initdata = { 170static struct pci_device_id gx_chipset_tbl[] __initdata = {
169 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID }, 171 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY,
170 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID }, 172 PCI_ANY_ID, PCI_ANY_ID },
171 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID }, 173 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520,
174 PCI_ANY_ID, PCI_ANY_ID },
175 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510,
176 PCI_ANY_ID, PCI_ANY_ID },
172 { 0, }, 177 { 0, },
173}; 178};
174 179
180static void gx_write_byte(int reg, int value)
181{
182 pci_write_config_byte(gx_params->cs55x0, reg, value);
183}
184
175/** 185/**
176 * gx_detect_chipset: 186 * gx_detect_chipset:
177 * 187 *
@@ -200,7 +210,8 @@ static __init struct pci_dev *gx_detect_chipset(void)
200/** 210/**
201 * gx_get_cpuspeed: 211 * gx_get_cpuspeed:
202 * 212 *
203 * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs. 213 * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi
214 * Geode CPU runs.
204 */ 215 */
205static unsigned int gx_get_cpuspeed(unsigned int cpu) 216static unsigned int gx_get_cpuspeed(unsigned int cpu)
206{ 217{
@@ -217,17 +228,18 @@ static unsigned int gx_get_cpuspeed(unsigned int cpu)
217 * 228 *
218 **/ 229 **/
219 230
220static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration) 231static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration,
232 u8 *off_duration)
221{ 233{
222 unsigned int i; 234 unsigned int i;
223 u8 tmp_on, tmp_off; 235 u8 tmp_on, tmp_off;
224 int old_tmp_freq = stock_freq; 236 int old_tmp_freq = stock_freq;
225 int tmp_freq; 237 int tmp_freq;
226 238
227 *off_duration=1; 239 *off_duration = 1;
228 *on_duration=0; 240 *on_duration = 0;
229 241
230 for (i=max_duration; i>0; i--) { 242 for (i = max_duration; i > 0; i--) {
231 tmp_off = ((khz * i) / stock_freq) & 0xff; 243 tmp_off = ((khz * i) / stock_freq) & 0xff;
232 tmp_on = i - tmp_off; 244 tmp_on = i - tmp_off;
233 tmp_freq = (stock_freq * tmp_off) / i; 245 tmp_freq = (stock_freq * tmp_off) / i;
@@ -259,26 +271,34 @@ static void gx_set_cpuspeed(unsigned int khz)
259 freqs.cpu = 0; 271 freqs.cpu = 0;
260 freqs.old = gx_get_cpuspeed(0); 272 freqs.old = gx_get_cpuspeed(0);
261 273
262 new_khz = gx_validate_speed(khz, &gx_params->on_duration, &gx_params->off_duration); 274 new_khz = gx_validate_speed(khz, &gx_params->on_duration,
275 &gx_params->off_duration);
263 276
264 freqs.new = new_khz; 277 freqs.new = new_khz;
265 278
266 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 279 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
267 local_irq_save(flags); 280 local_irq_save(flags);
268 281
269 if (new_khz != stock_freq) { /* if new khz == 100% of CPU speed, it is special case */ 282
283
284 if (new_khz != stock_freq) {
285 /* if new khz == 100% of CPU speed, it is special case */
270 switch (gx_params->cs55x0->device) { 286 switch (gx_params->cs55x0->device) {
271 case PCI_DEVICE_ID_CYRIX_5530_LEGACY: 287 case PCI_DEVICE_ID_CYRIX_5530_LEGACY:
272 pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP; 288 pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP;
273 /* FIXME: need to test other values -- Zwane,Miura */ 289 /* FIXME: need to test other values -- Zwane,Miura */
274 pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */ 290 /* typical 2 to 4ms */
275 pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */ 291 gx_write_byte(PCI_IRQTC, 4);
276 pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1); 292 /* typical 50 to 100ms */
277 293 gx_write_byte(PCI_VIDTC, 100);
278 if (gx_params->cs55x0->revision < 0x10) { /* CS5530(rev 1.2, 1.3) */ 294 gx_write_byte(PCI_PMER1, pmer1);
279 suscfg = gx_params->pci_suscfg | SUSMOD; 295
280 } else { /* CS5530A,B.. */ 296 if (gx_params->cs55x0->revision < 0x10) {
281 suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE; 297 /* CS5530(rev 1.2, 1.3) */
298 suscfg = gx_params->pci_suscfg|SUSMOD;
299 } else {
300 /* CS5530A,B.. */
301 suscfg = gx_params->pci_suscfg|SUSMOD|PWRSVE;
282 } 302 }
283 break; 303 break;
284 case PCI_DEVICE_ID_CYRIX_5520: 304 case PCI_DEVICE_ID_CYRIX_5520:
@@ -294,13 +314,13 @@ static void gx_set_cpuspeed(unsigned int khz)
294 suscfg = gx_params->pci_suscfg & ~(SUSMOD); 314 suscfg = gx_params->pci_suscfg & ~(SUSMOD);
295 gx_params->off_duration = 0; 315 gx_params->off_duration = 0;
296 gx_params->on_duration = 0; 316 gx_params->on_duration = 0;
297 dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n"); 317 dprintk("suspend modulation disabled: cpu runs 100%% speed.\n");
298 } 318 }
299 319
300 pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration); 320 gx_write_byte(PCI_MODOFF, gx_params->off_duration);
301 pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration); 321 gx_write_byte(PCI_MODON, gx_params->on_duration);
302 322
303 pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg); 323 gx_write_byte(PCI_SUSCFG, suscfg);
304 pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg); 324 pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg);
305 325
306 local_irq_restore(flags); 326 local_irq_restore(flags);
@@ -334,7 +354,8 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy)
334 return -EINVAL; 354 return -EINVAL;
335 355
336 policy->cpu = 0; 356 policy->cpu = 0;
337 cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); 357 cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
358 stock_freq);
338 359
339 /* it needs to be assured that at least one supported frequency is 360 /* it needs to be assured that at least one supported frequency is
340 * within policy->min and policy->max. If it is not, policy->max 361 * within policy->min and policy->max. If it is not, policy->max
@@ -354,7 +375,8 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy)
354 policy->max = tmp_freq; 375 policy->max = tmp_freq;
355 if (policy->max < policy->min) 376 if (policy->max < policy->min)
356 policy->max = policy->min; 377 policy->max = policy->min;
357 cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); 378 cpufreq_verify_within_limits(policy, (stock_freq / max_duration),
379 stock_freq);
358 380
359 return 0; 381 return 0;
360} 382}
@@ -398,18 +420,18 @@ static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
398 return -ENODEV; 420 return -ENODEV;
399 421
400 /* determine maximum frequency */ 422 /* determine maximum frequency */
401 if (pci_busclk) { 423 if (pci_busclk)
402 maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; 424 maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
403 } else if (cpu_khz) { 425 else if (cpu_khz)
404 maxfreq = cpu_khz; 426 maxfreq = cpu_khz;
405 } else { 427 else
406 maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; 428 maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f];
407 } 429
408 stock_freq = maxfreq; 430 stock_freq = maxfreq;
409 curfreq = gx_get_cpuspeed(0); 431 curfreq = gx_get_cpuspeed(0);
410 432
411 dprintk("cpu max frequency is %d.\n", maxfreq); 433 dprintk("cpu max frequency is %d.\n", maxfreq);
412 dprintk("cpu current frequency is %dkHz.\n",curfreq); 434 dprintk("cpu current frequency is %dkHz.\n", curfreq);
413 435
414 /* setup basic struct for cpufreq API */ 436 /* setup basic struct for cpufreq API */
415 policy->cpu = 0; 437 policy->cpu = 0;
@@ -447,7 +469,8 @@ static int __init cpufreq_gx_init(void)
447 struct pci_dev *gx_pci; 469 struct pci_dev *gx_pci;
448 470
449 /* Test if we have the right hardware */ 471 /* Test if we have the right hardware */
450 if ((gx_pci = gx_detect_chipset()) == NULL) 472 gx_pci = gx_detect_chipset();
473 if (gx_pci == NULL)
451 return -ENODEV; 474 return -ENODEV;
452 475
453 /* check whether module parameters are sane */ 476 /* check whether module parameters are sane */
@@ -468,9 +491,11 @@ static int __init cpufreq_gx_init(void)
468 pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1)); 491 pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1));
469 pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); 492 pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2));
470 pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); 493 pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration));
471 pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration)); 494 pci_read_config_byte(params->cs55x0, PCI_MODOFF,
495 &(params->off_duration));
472 496
473 if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { 497 ret = cpufreq_register_driver(&gx_suspmod_driver);
498 if (ret) {
474 kfree(params); 499 kfree(params);
475 return ret; /* register error! */ 500 return ret; /* register error! */
476 } 501 }
@@ -485,9 +510,9 @@ static void __exit cpufreq_gx_exit(void)
485 kfree(gx_params); 510 kfree(gx_params);
486} 511}
487 512
488MODULE_AUTHOR ("Hiroshi Miura <miura@da-cha.org>"); 513MODULE_AUTHOR("Hiroshi Miura <miura@da-cha.org>");
489MODULE_DESCRIPTION ("Cpufreq driver for Cyrix MediaGX and NatSemi Geode"); 514MODULE_DESCRIPTION("Cpufreq driver for Cyrix MediaGX and NatSemi Geode");
490MODULE_LICENSE ("GPL"); 515MODULE_LICENSE("GPL");
491 516
492module_init(cpufreq_gx_init); 517module_init(cpufreq_gx_init);
493module_exit(cpufreq_gx_exit); 518module_exit(cpufreq_gx_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index a4cff5d6e380..0bd48e65a0ca 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -30,12 +30,12 @@
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/string.h> 31#include <linux/string.h>
32#include <linux/delay.h> 32#include <linux/delay.h>
33#include <linux/timex.h>
34#include <linux/io.h>
35#include <linux/acpi.h>
36#include <linux/kernel.h>
33 37
34#include <asm/msr.h> 38#include <asm/msr.h>
35#include <asm/timex.h>
36#include <asm/io.h>
37#include <asm/acpi.h>
38#include <linux/acpi.h>
39#include <acpi/processor.h> 39#include <acpi/processor.h>
40 40
41#include "longhaul.h" 41#include "longhaul.h"
@@ -58,7 +58,7 @@
58#define USE_NORTHBRIDGE (1 << 2) 58#define USE_NORTHBRIDGE (1 << 2)
59 59
60static int cpu_model; 60static int cpu_model;
61static unsigned int numscales=16; 61static unsigned int numscales = 16;
62static unsigned int fsb; 62static unsigned int fsb;
63 63
64static const struct mV_pos *vrm_mV_table; 64static const struct mV_pos *vrm_mV_table;
@@ -67,8 +67,8 @@ static const unsigned char *mV_vrm_table;
67static unsigned int highest_speed, lowest_speed; /* kHz */ 67static unsigned int highest_speed, lowest_speed; /* kHz */
68static unsigned int minmult, maxmult; 68static unsigned int minmult, maxmult;
69static int can_scale_voltage; 69static int can_scale_voltage;
70static struct acpi_processor *pr = NULL; 70static struct acpi_processor *pr;
71static struct acpi_processor_cx *cx = NULL; 71static struct acpi_processor_cx *cx;
72static u32 acpi_regs_addr; 72static u32 acpi_regs_addr;
73static u8 longhaul_flags; 73static u8 longhaul_flags;
74static unsigned int longhaul_index; 74static unsigned int longhaul_index;
@@ -78,12 +78,13 @@ static int scale_voltage;
78static int disable_acpi_c3; 78static int disable_acpi_c3;
79static int revid_errata; 79static int revid_errata;
80 80
81#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) 81#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
82 "longhaul", msg)
82 83
83 84
84/* Clock ratios multiplied by 10 */ 85/* Clock ratios multiplied by 10 */
85static int clock_ratio[32]; 86static int mults[32];
86static int eblcr_table[32]; 87static int eblcr[32];
87static int longhaul_version; 88static int longhaul_version;
88static struct cpufreq_frequency_table *longhaul_table; 89static struct cpufreq_frequency_table *longhaul_table;
89 90
@@ -93,7 +94,7 @@ static char speedbuffer[8];
93static char *print_speed(int speed) 94static char *print_speed(int speed)
94{ 95{
95 if (speed < 1000) { 96 if (speed < 1000) {
96 snprintf(speedbuffer, sizeof(speedbuffer),"%dMHz", speed); 97 snprintf(speedbuffer, sizeof(speedbuffer), "%dMHz", speed);
97 return speedbuffer; 98 return speedbuffer;
98 } 99 }
99 100
@@ -122,27 +123,28 @@ static unsigned int calc_speed(int mult)
122 123
123static int longhaul_get_cpu_mult(void) 124static int longhaul_get_cpu_mult(void)
124{ 125{
125 unsigned long invalue=0,lo, hi; 126 unsigned long invalue = 0, lo, hi;
126 127
127 rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi); 128 rdmsr(MSR_IA32_EBL_CR_POWERON, lo, hi);
128 invalue = (lo & (1<<22|1<<23|1<<24|1<<25)) >>22; 129 invalue = (lo & (1<<22|1<<23|1<<24|1<<25))>>22;
129 if (longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) { 130 if (longhaul_version == TYPE_LONGHAUL_V2 ||
131 longhaul_version == TYPE_POWERSAVER) {
130 if (lo & (1<<27)) 132 if (lo & (1<<27))
131 invalue+=16; 133 invalue += 16;
132 } 134 }
133 return eblcr_table[invalue]; 135 return eblcr[invalue];
134} 136}
135 137
136/* For processor with BCR2 MSR */ 138/* For processor with BCR2 MSR */
137 139
138static void do_longhaul1(unsigned int clock_ratio_index) 140static void do_longhaul1(unsigned int mults_index)
139{ 141{
140 union msr_bcr2 bcr2; 142 union msr_bcr2 bcr2;
141 143
142 rdmsrl(MSR_VIA_BCR2, bcr2.val); 144 rdmsrl(MSR_VIA_BCR2, bcr2.val);
143 /* Enable software clock multiplier */ 145 /* Enable software clock multiplier */
144 bcr2.bits.ESOFTBF = 1; 146 bcr2.bits.ESOFTBF = 1;
145 bcr2.bits.CLOCKMUL = clock_ratio_index & 0xff; 147 bcr2.bits.CLOCKMUL = mults_index & 0xff;
146 148
147 /* Sync to timer tick */ 149 /* Sync to timer tick */
148 safe_halt(); 150 safe_halt();
@@ -161,7 +163,7 @@ static void do_longhaul1(unsigned int clock_ratio_index)
161 163
162/* For processor with Longhaul MSR */ 164/* For processor with Longhaul MSR */
163 165
164static void do_powersaver(int cx_address, unsigned int clock_ratio_index, 166static void do_powersaver(int cx_address, unsigned int mults_index,
165 unsigned int dir) 167 unsigned int dir)
166{ 168{
167 union msr_longhaul longhaul; 169 union msr_longhaul longhaul;
@@ -173,11 +175,11 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index,
173 longhaul.bits.RevisionKey = longhaul.bits.RevisionID; 175 longhaul.bits.RevisionKey = longhaul.bits.RevisionID;
174 else 176 else
175 longhaul.bits.RevisionKey = 0; 177 longhaul.bits.RevisionKey = 0;
176 longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; 178 longhaul.bits.SoftBusRatio = mults_index & 0xf;
177 longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; 179 longhaul.bits.SoftBusRatio4 = (mults_index & 0x10) >> 4;
178 /* Setup new voltage */ 180 /* Setup new voltage */
179 if (can_scale_voltage) 181 if (can_scale_voltage)
180 longhaul.bits.SoftVID = (clock_ratio_index >> 8) & 0x1f; 182 longhaul.bits.SoftVID = (mults_index >> 8) & 0x1f;
181 /* Sync to timer tick */ 183 /* Sync to timer tick */
182 safe_halt(); 184 safe_halt();
183 /* Raise voltage if necessary */ 185 /* Raise voltage if necessary */
@@ -240,14 +242,14 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index,
240 242
241/** 243/**
242 * longhaul_set_cpu_frequency() 244 * longhaul_set_cpu_frequency()
243 * @clock_ratio_index : bitpattern of the new multiplier. 245 * @mults_index : bitpattern of the new multiplier.
244 * 246 *
245 * Sets a new clock ratio. 247 * Sets a new clock ratio.
246 */ 248 */
247 249
248static void longhaul_setstate(unsigned int table_index) 250static void longhaul_setstate(unsigned int table_index)
249{ 251{
250 unsigned int clock_ratio_index; 252 unsigned int mults_index;
251 int speed, mult; 253 int speed, mult;
252 struct cpufreq_freqs freqs; 254 struct cpufreq_freqs freqs;
253 unsigned long flags; 255 unsigned long flags;
@@ -256,9 +258,9 @@ static void longhaul_setstate(unsigned int table_index)
256 u32 bm_timeout = 1000; 258 u32 bm_timeout = 1000;
257 unsigned int dir = 0; 259 unsigned int dir = 0;
258 260
259 clock_ratio_index = longhaul_table[table_index].index; 261 mults_index = longhaul_table[table_index].index;
260 /* Safety precautions */ 262 /* Safety precautions */
261 mult = clock_ratio[clock_ratio_index & 0x1f]; 263 mult = mults[mults_index & 0x1f];
262 if (mult == -1) 264 if (mult == -1)
263 return; 265 return;
264 speed = calc_speed(mult); 266 speed = calc_speed(mult);
@@ -274,7 +276,7 @@ static void longhaul_setstate(unsigned int table_index)
274 276
275 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 277 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
276 278
277 dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", 279 dprintk("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n",
278 fsb, mult/10, mult%10, print_speed(speed/1000)); 280 fsb, mult/10, mult%10, print_speed(speed/1000));
279retry_loop: 281retry_loop:
280 preempt_disable(); 282 preempt_disable();
@@ -282,8 +284,8 @@ retry_loop:
282 284
283 pic2_mask = inb(0xA1); 285 pic2_mask = inb(0xA1);
284 pic1_mask = inb(0x21); /* works on C3. save mask. */ 286 pic1_mask = inb(0x21); /* works on C3. save mask. */
285 outb(0xFF,0xA1); /* Overkill */ 287 outb(0xFF, 0xA1); /* Overkill */
286 outb(0xFE,0x21); /* TMR0 only */ 288 outb(0xFE, 0x21); /* TMR0 only */
287 289
288 /* Wait while PCI bus is busy. */ 290 /* Wait while PCI bus is busy. */
289 if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE 291 if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE
@@ -303,7 +305,7 @@ retry_loop:
303 outb(3, 0x22); 305 outb(3, 0x22);
304 } else if ((pr != NULL) && pr->flags.bm_control) { 306 } else if ((pr != NULL) && pr->flags.bm_control) {
305 /* Disable bus master arbitration */ 307 /* Disable bus master arbitration */
306 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); 308 acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1);
307 } 309 }
308 switch (longhaul_version) { 310 switch (longhaul_version) {
309 311
@@ -312,7 +314,7 @@ retry_loop:
312 * Software controlled multipliers only. 314 * Software controlled multipliers only.
313 */ 315 */
314 case TYPE_LONGHAUL_V1: 316 case TYPE_LONGHAUL_V1:
315 do_longhaul1(clock_ratio_index); 317 do_longhaul1(mults_index);
316 break; 318 break;
317 319
318 /* 320 /*
@@ -326,10 +328,10 @@ retry_loop:
326 case TYPE_POWERSAVER: 328 case TYPE_POWERSAVER:
327 if (longhaul_flags & USE_ACPI_C3) { 329 if (longhaul_flags & USE_ACPI_C3) {
328 /* Don't allow wakeup */ 330 /* Don't allow wakeup */
329 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 331 acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
330 do_powersaver(cx->address, clock_ratio_index, dir); 332 do_powersaver(cx->address, mults_index, dir);
331 } else { 333 } else {
332 do_powersaver(0, clock_ratio_index, dir); 334 do_powersaver(0, mults_index, dir);
333 } 335 }
334 break; 336 break;
335 } 337 }
@@ -339,10 +341,10 @@ retry_loop:
339 outb(0, 0x22); 341 outb(0, 0x22);
340 } else if ((pr != NULL) && pr->flags.bm_control) { 342 } else if ((pr != NULL) && pr->flags.bm_control) {
341 /* Enable bus master arbitration */ 343 /* Enable bus master arbitration */
342 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); 344 acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0);
343 } 345 }
344 outb(pic2_mask,0xA1); /* restore mask */ 346 outb(pic2_mask, 0xA1); /* restore mask */
345 outb(pic1_mask,0x21); 347 outb(pic1_mask, 0x21);
346 348
347 local_irq_restore(flags); 349 local_irq_restore(flags);
348 preempt_enable(); 350 preempt_enable();
@@ -392,7 +394,8 @@ retry_loop:
392 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 394 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
393 395
394 if (!bm_timeout) 396 if (!bm_timeout)
395 printk(KERN_INFO PFX "Warning: Timeout while waiting for idle PCI bus.\n"); 397 printk(KERN_INFO PFX "Warning: Timeout while waiting for "
398 "idle PCI bus.\n");
396} 399}
397 400
398/* 401/*
@@ -458,31 +461,32 @@ static int __init longhaul_get_ranges(void)
458 break; 461 break;
459 } 462 }
460 463
461 dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", 464 dprintk("MinMult:%d.%dx MaxMult:%d.%dx\n",
462 minmult/10, minmult%10, maxmult/10, maxmult%10); 465 minmult/10, minmult%10, maxmult/10, maxmult%10);
463 466
464 highest_speed = calc_speed(maxmult); 467 highest_speed = calc_speed(maxmult);
465 lowest_speed = calc_speed(minmult); 468 lowest_speed = calc_speed(minmult);
466 dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, 469 dprintk("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb,
467 print_speed(lowest_speed/1000), 470 print_speed(lowest_speed/1000),
468 print_speed(highest_speed/1000)); 471 print_speed(highest_speed/1000));
469 472
470 if (lowest_speed == highest_speed) { 473 if (lowest_speed == highest_speed) {
471 printk (KERN_INFO PFX "highestspeed == lowest, aborting.\n"); 474 printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n");
472 return -EINVAL; 475 return -EINVAL;
473 } 476 }
474 if (lowest_speed > highest_speed) { 477 if (lowest_speed > highest_speed) {
475 printk (KERN_INFO PFX "nonsense! lowest (%d > %d) !\n", 478 printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
476 lowest_speed, highest_speed); 479 lowest_speed, highest_speed);
477 return -EINVAL; 480 return -EINVAL;
478 } 481 }
479 482
480 longhaul_table = kmalloc((numscales + 1) * sizeof(struct cpufreq_frequency_table), GFP_KERNEL); 483 longhaul_table = kmalloc((numscales + 1) * sizeof(*longhaul_table),
481 if(!longhaul_table) 484 GFP_KERNEL);
485 if (!longhaul_table)
482 return -ENOMEM; 486 return -ENOMEM;
483 487
484 for (j = 0; j < numscales; j++) { 488 for (j = 0; j < numscales; j++) {
485 ratio = clock_ratio[j]; 489 ratio = mults[j];
486 if (ratio == -1) 490 if (ratio == -1)
487 continue; 491 continue;
488 if (ratio > maxmult || ratio < minmult) 492 if (ratio > maxmult || ratio < minmult)
@@ -507,13 +511,10 @@ static int __init longhaul_get_ranges(void)
507 } 511 }
508 } 512 }
509 if (min_i != j) { 513 if (min_i != j) {
510 unsigned int temp; 514 swap(longhaul_table[j].frequency,
511 temp = longhaul_table[j].frequency; 515 longhaul_table[min_i].frequency);
512 longhaul_table[j].frequency = longhaul_table[min_i].frequency; 516 swap(longhaul_table[j].index,
513 longhaul_table[min_i].frequency = temp; 517 longhaul_table[min_i].index);
514 temp = longhaul_table[j].index;
515 longhaul_table[j].index = longhaul_table[min_i].index;
516 longhaul_table[min_i].index = temp;
517 } 518 }
518 } 519 }
519 520
@@ -521,7 +522,7 @@ static int __init longhaul_get_ranges(void)
521 522
522 /* Find index we are running on */ 523 /* Find index we are running on */
523 for (j = 0; j < k; j++) { 524 for (j = 0; j < k; j++) {
524 if (clock_ratio[longhaul_table[j].index & 0x1f] == mult) { 525 if (mults[longhaul_table[j].index & 0x1f] == mult) {
525 longhaul_index = j; 526 longhaul_index = j;
526 break; 527 break;
527 } 528 }
@@ -559,20 +560,22 @@ static void __init longhaul_setup_voltagescaling(void)
559 maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; 560 maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
560 561
561 if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { 562 if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
562 printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " 563 printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
563 "Voltage scaling disabled.\n", 564 "Voltage scaling disabled.\n",
564 minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000); 565 minvid.mV/1000, minvid.mV%1000,
566 maxvid.mV/1000, maxvid.mV%1000);
565 return; 567 return;
566 } 568 }
567 569
568 if (minvid.mV == maxvid.mV) { 570 if (minvid.mV == maxvid.mV) {
569 printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are " 571 printk(KERN_INFO PFX "Claims to support voltage scaling but "
570 "both %d.%03d. Voltage scaling disabled\n", 572 "min & max are both %d.%03d. "
573 "Voltage scaling disabled\n",
571 maxvid.mV/1000, maxvid.mV%1000); 574 maxvid.mV/1000, maxvid.mV%1000);
572 return; 575 return;
573 } 576 }
574 577
575 /* How many voltage steps */ 578 /* How many voltage steps*/
576 numvscales = maxvid.pos - minvid.pos + 1; 579 numvscales = maxvid.pos - minvid.pos + 1;
577 printk(KERN_INFO PFX 580 printk(KERN_INFO PFX
578 "Max VID=%d.%03d " 581 "Max VID=%d.%03d "
@@ -586,7 +589,7 @@ static void __init longhaul_setup_voltagescaling(void)
586 j = longhaul.bits.MinMHzBR; 589 j = longhaul.bits.MinMHzBR;
587 if (longhaul.bits.MinMHzBR4) 590 if (longhaul.bits.MinMHzBR4)
588 j += 16; 591 j += 16;
589 min_vid_speed = eblcr_table[j]; 592 min_vid_speed = eblcr[j];
590 if (min_vid_speed == -1) 593 if (min_vid_speed == -1)
591 return; 594 return;
592 switch (longhaul.bits.MinMHzFSB) { 595 switch (longhaul.bits.MinMHzFSB) {
@@ -617,7 +620,8 @@ static void __init longhaul_setup_voltagescaling(void)
617 pos = minvid.pos; 620 pos = minvid.pos;
618 longhaul_table[j].index |= mV_vrm_table[pos] << 8; 621 longhaul_table[j].index |= mV_vrm_table[pos] << 8;
619 vid = vrm_mV_table[mV_vrm_table[pos]]; 622 vid = vrm_mV_table[mV_vrm_table[pos]];
620 printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", speed, j, vid.mV); 623 printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n",
624 speed, j, vid.mV);
621 j++; 625 j++;
622 } 626 }
623 627
@@ -640,7 +644,8 @@ static int longhaul_target(struct cpufreq_policy *policy,
640 unsigned int dir = 0; 644 unsigned int dir = 0;
641 u8 vid, current_vid; 645 u8 vid, current_vid;
642 646
643 if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index)) 647 if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq,
648 relation, &table_index))
644 return -EINVAL; 649 return -EINVAL;
645 650
646 /* Don't set same frequency again */ 651 /* Don't set same frequency again */
@@ -656,7 +661,8 @@ static int longhaul_target(struct cpufreq_policy *policy,
656 * this in hardware, C3 is old and we need to do this 661 * this in hardware, C3 is old and we need to do this
657 * in software. */ 662 * in software. */
658 i = longhaul_index; 663 i = longhaul_index;
659 current_vid = (longhaul_table[longhaul_index].index >> 8) & 0x1f; 664 current_vid = (longhaul_table[longhaul_index].index >> 8);
665 current_vid &= 0x1f;
660 if (table_index > longhaul_index) 666 if (table_index > longhaul_index)
661 dir = 1; 667 dir = 1;
662 while (i != table_index) { 668 while (i != table_index) {
@@ -691,9 +697,9 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
691{ 697{
692 struct acpi_device *d; 698 struct acpi_device *d;
693 699
694 if ( acpi_bus_get_device(obj_handle, &d) ) { 700 if (acpi_bus_get_device(obj_handle, &d))
695 return 0; 701 return 0;
696 } 702
697 *return_value = acpi_driver_data(d); 703 *return_value = acpi_driver_data(d);
698 return 1; 704 return 1;
699} 705}
@@ -750,7 +756,7 @@ static int longhaul_setup_southbridge(void)
750 /* Find VT8235 southbridge */ 756 /* Find VT8235 southbridge */
751 dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL); 757 dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL);
752 if (dev == NULL) 758 if (dev == NULL)
753 /* Find VT8237 southbridge */ 759 /* Find VT8237 southbridge */
754 dev = pci_get_device(PCI_VENDOR_ID_VIA, 760 dev = pci_get_device(PCI_VENDOR_ID_VIA,
755 PCI_DEVICE_ID_VIA_8237, NULL); 761 PCI_DEVICE_ID_VIA_8237, NULL);
756 if (dev != NULL) { 762 if (dev != NULL) {
@@ -769,7 +775,8 @@ static int longhaul_setup_southbridge(void)
769 if (pci_cmd & 1 << 7) { 775 if (pci_cmd & 1 << 7) {
770 pci_read_config_dword(dev, 0x88, &acpi_regs_addr); 776 pci_read_config_dword(dev, 0x88, &acpi_regs_addr);
771 acpi_regs_addr &= 0xff00; 777 acpi_regs_addr &= 0xff00;
772 printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", acpi_regs_addr); 778 printk(KERN_INFO PFX "ACPI I/O at 0x%x\n",
779 acpi_regs_addr);
773 } 780 }
774 781
775 pci_dev_put(dev); 782 pci_dev_put(dev);
@@ -781,7 +788,7 @@ static int longhaul_setup_southbridge(void)
781static int __init longhaul_cpu_init(struct cpufreq_policy *policy) 788static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
782{ 789{
783 struct cpuinfo_x86 *c = &cpu_data(0); 790 struct cpuinfo_x86 *c = &cpu_data(0);
784 char *cpuname=NULL; 791 char *cpuname = NULL;
785 int ret; 792 int ret;
786 u32 lo, hi; 793 u32 lo, hi;
787 794
@@ -791,8 +798,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
791 cpu_model = CPU_SAMUEL; 798 cpu_model = CPU_SAMUEL;
792 cpuname = "C3 'Samuel' [C5A]"; 799 cpuname = "C3 'Samuel' [C5A]";
793 longhaul_version = TYPE_LONGHAUL_V1; 800 longhaul_version = TYPE_LONGHAUL_V1;
794 memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio)); 801 memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
795 memcpy (eblcr_table, samuel1_eblcr, sizeof(samuel1_eblcr)); 802 memcpy(eblcr, samuel1_eblcr, sizeof(samuel1_eblcr));
796 break; 803 break;
797 804
798 case 7: 805 case 7:
@@ -803,10 +810,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
803 cpuname = "C3 'Samuel 2' [C5B]"; 810 cpuname = "C3 'Samuel 2' [C5B]";
804 /* Note, this is not a typo, early Samuel2's had 811 /* Note, this is not a typo, early Samuel2's had
805 * Samuel1 ratios. */ 812 * Samuel1 ratios. */
806 memcpy(clock_ratio, samuel1_clock_ratio, 813 memcpy(mults, samuel1_mults, sizeof(samuel1_mults));
807 sizeof(samuel1_clock_ratio)); 814 memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr));
808 memcpy(eblcr_table, samuel2_eblcr,
809 sizeof(samuel2_eblcr));
810 break; 815 break;
811 case 1 ... 15: 816 case 1 ... 15:
812 longhaul_version = TYPE_LONGHAUL_V1; 817 longhaul_version = TYPE_LONGHAUL_V1;
@@ -817,10 +822,8 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
817 cpu_model = CPU_EZRA; 822 cpu_model = CPU_EZRA;
818 cpuname = "C3 'Ezra' [C5C]"; 823 cpuname = "C3 'Ezra' [C5C]";
819 } 824 }
820 memcpy(clock_ratio, ezra_clock_ratio, 825 memcpy(mults, ezra_mults, sizeof(ezra_mults));
821 sizeof(ezra_clock_ratio)); 826 memcpy(eblcr, ezra_eblcr, sizeof(ezra_eblcr));
822 memcpy(eblcr_table, ezra_eblcr,
823 sizeof(ezra_eblcr));
824 break; 827 break;
825 } 828 }
826 break; 829 break;
@@ -829,18 +832,16 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
829 cpu_model = CPU_EZRA_T; 832 cpu_model = CPU_EZRA_T;
830 cpuname = "C3 'Ezra-T' [C5M]"; 833 cpuname = "C3 'Ezra-T' [C5M]";
831 longhaul_version = TYPE_POWERSAVER; 834 longhaul_version = TYPE_POWERSAVER;
832 numscales=32; 835 numscales = 32;
833 memcpy (clock_ratio, ezrat_clock_ratio, sizeof(ezrat_clock_ratio)); 836 memcpy(mults, ezrat_mults, sizeof(ezrat_mults));
834 memcpy (eblcr_table, ezrat_eblcr, sizeof(ezrat_eblcr)); 837 memcpy(eblcr, ezrat_eblcr, sizeof(ezrat_eblcr));
835 break; 838 break;
836 839
837 case 9: 840 case 9:
838 longhaul_version = TYPE_POWERSAVER; 841 longhaul_version = TYPE_POWERSAVER;
839 numscales = 32; 842 numscales = 32;
840 memcpy(clock_ratio, 843 memcpy(mults, nehemiah_mults, sizeof(nehemiah_mults));
841 nehemiah_clock_ratio, 844 memcpy(eblcr, nehemiah_eblcr, sizeof(nehemiah_eblcr));
842 sizeof(nehemiah_clock_ratio));
843 memcpy(eblcr_table, nehemiah_eblcr, sizeof(nehemiah_eblcr));
844 switch (c->x86_mask) { 845 switch (c->x86_mask) {
845 case 0 ... 1: 846 case 0 ... 1:
846 cpu_model = CPU_NEHEMIAH; 847 cpu_model = CPU_NEHEMIAH;
@@ -869,14 +870,14 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
869 longhaul_version = TYPE_LONGHAUL_V1; 870 longhaul_version = TYPE_LONGHAUL_V1;
870 } 871 }
871 872
872 printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname); 873 printk(KERN_INFO PFX "VIA %s CPU detected. ", cpuname);
873 switch (longhaul_version) { 874 switch (longhaul_version) {
874 case TYPE_LONGHAUL_V1: 875 case TYPE_LONGHAUL_V1:
875 case TYPE_LONGHAUL_V2: 876 case TYPE_LONGHAUL_V2:
876 printk ("Longhaul v%d supported.\n", longhaul_version); 877 printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version);
877 break; 878 break;
878 case TYPE_POWERSAVER: 879 case TYPE_POWERSAVER:
879 printk ("Powersaver supported.\n"); 880 printk(KERN_CONT "Powersaver supported.\n");
880 break; 881 break;
881 }; 882 };
882 883
@@ -940,7 +941,7 @@ static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy)
940 return 0; 941 return 0;
941} 942}
942 943
943static struct freq_attr* longhaul_attr[] = { 944static struct freq_attr *longhaul_attr[] = {
944 &cpufreq_freq_attr_scaling_available_freqs, 945 &cpufreq_freq_attr_scaling_available_freqs,
945 NULL, 946 NULL,
946}; 947};
@@ -966,13 +967,15 @@ static int __init longhaul_init(void)
966 967
967#ifdef CONFIG_SMP 968#ifdef CONFIG_SMP
968 if (num_online_cpus() > 1) { 969 if (num_online_cpus() > 1) {
969 printk(KERN_ERR PFX "More than 1 CPU detected, longhaul disabled.\n"); 970 printk(KERN_ERR PFX "More than 1 CPU detected, "
971 "longhaul disabled.\n");
970 return -ENODEV; 972 return -ENODEV;
971 } 973 }
972#endif 974#endif
973#ifdef CONFIG_X86_IO_APIC 975#ifdef CONFIG_X86_IO_APIC
974 if (cpu_has_apic) { 976 if (cpu_has_apic) {
975 printk(KERN_ERR PFX "APIC detected. Longhaul is currently broken in this configuration.\n"); 977 printk(KERN_ERR PFX "APIC detected. Longhaul is currently "
978 "broken in this configuration.\n");
976 return -ENODEV; 979 return -ENODEV;
977 } 980 }
978#endif 981#endif
@@ -993,8 +996,8 @@ static void __exit longhaul_exit(void)
993{ 996{
994 int i; 997 int i;
995 998
996 for (i=0; i < numscales; i++) { 999 for (i = 0; i < numscales; i++) {
997 if (clock_ratio[i] == maxmult) { 1000 if (mults[i] == maxmult) {
998 longhaul_setstate(i); 1001 longhaul_setstate(i);
999 break; 1002 break;
1000 } 1003 }
@@ -1007,11 +1010,11 @@ static void __exit longhaul_exit(void)
1007/* Even if BIOS is exporting ACPI C3 state, and it is used 1010/* Even if BIOS is exporting ACPI C3 state, and it is used
1008 * with success when CPU is idle, this state doesn't 1011 * with success when CPU is idle, this state doesn't
1009 * trigger frequency transition in some cases. */ 1012 * trigger frequency transition in some cases. */
1010module_param (disable_acpi_c3, int, 0644); 1013module_param(disable_acpi_c3, int, 0644);
1011MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); 1014MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support");
1012/* Change CPU voltage with frequency. Very usefull to save 1015/* Change CPU voltage with frequency. Very usefull to save
1013 * power, but most VIA C3 processors aren't supporting it. */ 1016 * power, but most VIA C3 processors aren't supporting it. */
1014module_param (scale_voltage, int, 0644); 1017module_param(scale_voltage, int, 0644);
1015MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); 1018MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
1016/* Force revision key to 0 for processors which doesn't 1019/* Force revision key to 0 for processors which doesn't
1017 * support voltage scaling, but are introducing itself as 1020 * support voltage scaling, but are introducing itself as
@@ -1019,9 +1022,9 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
1019module_param(revid_errata, int, 0644); 1022module_param(revid_errata, int, 0644);
1020MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); 1023MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
1021 1024
1022MODULE_AUTHOR ("Dave Jones <davej@redhat.com>"); 1025MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
1023MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); 1026MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors.");
1024MODULE_LICENSE ("GPL"); 1027MODULE_LICENSE("GPL");
1025 1028
1026late_initcall(longhaul_init); 1029late_initcall(longhaul_init);
1027module_exit(longhaul_exit); 1030module_exit(longhaul_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h
index 4fcc320997df..e2360a469f79 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.h
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.h
@@ -49,14 +49,14 @@ union msr_longhaul {
49 49
50/* 50/*
51 * Clock ratio tables. Div/Mod by 10 to get ratio. 51 * Clock ratio tables. Div/Mod by 10 to get ratio.
52 * The eblcr ones specify the ratio read from the CPU. 52 * The eblcr values specify the ratio read from the CPU.
53 * The clock_ratio ones specify what to write to the CPU. 53 * The mults values specify what to write to the CPU.
54 */ 54 */
55 55
56/* 56/*
57 * VIA C3 Samuel 1 & Samuel 2 (stepping 0) 57 * VIA C3 Samuel 1 & Samuel 2 (stepping 0)
58 */ 58 */
59static const int __initdata samuel1_clock_ratio[16] = { 59static const int __initdata samuel1_mults[16] = {
60 -1, /* 0000 -> RESERVED */ 60 -1, /* 0000 -> RESERVED */
61 30, /* 0001 -> 3.0x */ 61 30, /* 0001 -> 3.0x */
62 40, /* 0010 -> 4.0x */ 62 40, /* 0010 -> 4.0x */
@@ -119,7 +119,7 @@ static const int __initdata samuel2_eblcr[16] = {
119/* 119/*
120 * VIA C3 Ezra 120 * VIA C3 Ezra
121 */ 121 */
122static const int __initdata ezra_clock_ratio[16] = { 122static const int __initdata ezra_mults[16] = {
123 100, /* 0000 -> 10.0x */ 123 100, /* 0000 -> 10.0x */
124 30, /* 0001 -> 3.0x */ 124 30, /* 0001 -> 3.0x */
125 40, /* 0010 -> 4.0x */ 125 40, /* 0010 -> 4.0x */
@@ -160,7 +160,7 @@ static const int __initdata ezra_eblcr[16] = {
160/* 160/*
161 * VIA C3 (Ezra-T) [C5M]. 161 * VIA C3 (Ezra-T) [C5M].
162 */ 162 */
163static const int __initdata ezrat_clock_ratio[32] = { 163static const int __initdata ezrat_mults[32] = {
164 100, /* 0000 -> 10.0x */ 164 100, /* 0000 -> 10.0x */
165 30, /* 0001 -> 3.0x */ 165 30, /* 0001 -> 3.0x */
166 40, /* 0010 -> 4.0x */ 166 40, /* 0010 -> 4.0x */
@@ -235,7 +235,7 @@ static const int __initdata ezrat_eblcr[32] = {
235/* 235/*
236 * VIA C3 Nehemiah */ 236 * VIA C3 Nehemiah */
237 237
238static const int __initdata nehemiah_clock_ratio[32] = { 238static const int __initdata nehemiah_mults[32] = {
239 100, /* 0000 -> 10.0x */ 239 100, /* 0000 -> 10.0x */
240 -1, /* 0001 -> 16.0x */ 240 -1, /* 0001 -> 16.0x */
241 40, /* 0010 -> 4.0x */ 241 40, /* 0010 -> 4.0x */
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index 777a7ff075de..da5f70fcb766 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -11,12 +11,13 @@
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/cpufreq.h> 13#include <linux/cpufreq.h>
14#include <linux/timex.h>
14 15
15#include <asm/msr.h> 16#include <asm/msr.h>
16#include <asm/processor.h> 17#include <asm/processor.h>
17#include <asm/timex.h>
18 18
19#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longrun", msg) 19#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
20 "longrun", msg)
20 21
21static struct cpufreq_driver longrun_driver; 22static struct cpufreq_driver longrun_driver;
22 23
@@ -51,7 +52,7 @@ static void __init longrun_get_policy(struct cpufreq_policy *policy)
51 msr_lo &= 0x0000007F; 52 msr_lo &= 0x0000007F;
52 msr_hi &= 0x0000007F; 53 msr_hi &= 0x0000007F;
53 54
54 if ( longrun_high_freq <= longrun_low_freq ) { 55 if (longrun_high_freq <= longrun_low_freq) {
55 /* Assume degenerate Longrun table */ 56 /* Assume degenerate Longrun table */
56 policy->min = policy->max = longrun_high_freq; 57 policy->min = policy->max = longrun_high_freq;
57 } else { 58 } else {
@@ -79,7 +80,7 @@ static int longrun_set_policy(struct cpufreq_policy *policy)
79 if (!policy) 80 if (!policy)
80 return -EINVAL; 81 return -EINVAL;
81 82
82 if ( longrun_high_freq <= longrun_low_freq ) { 83 if (longrun_high_freq <= longrun_low_freq) {
83 /* Assume degenerate Longrun table */ 84 /* Assume degenerate Longrun table */
84 pctg_lo = pctg_hi = 100; 85 pctg_lo = pctg_hi = 100;
85 } else { 86 } else {
@@ -152,7 +153,7 @@ static unsigned int longrun_get(unsigned int cpu)
152 cpuid(0x80860007, &eax, &ebx, &ecx, &edx); 153 cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
153 dprintk("cpuid eax is %u\n", eax); 154 dprintk("cpuid eax is %u\n", eax);
154 155
155 return (eax * 1000); 156 return eax * 1000;
156} 157}
157 158
158/** 159/**
@@ -196,7 +197,8 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
196 rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); 197 rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi);
197 *high_freq = msr_lo * 1000; /* to kHz */ 198 *high_freq = msr_lo * 1000; /* to kHz */
198 199
199 dprintk("longrun table interface told %u - %u kHz\n", *low_freq, *high_freq); 200 dprintk("longrun table interface told %u - %u kHz\n",
201 *low_freq, *high_freq);
200 202
201 if (*low_freq > *high_freq) 203 if (*low_freq > *high_freq)
202 *low_freq = *high_freq; 204 *low_freq = *high_freq;
@@ -219,7 +221,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
219 cpuid(0x80860007, &eax, &ebx, &ecx, &edx); 221 cpuid(0x80860007, &eax, &ebx, &ecx, &edx);
220 /* try decreasing in 10% steps, some processors react only 222 /* try decreasing in 10% steps, some processors react only
221 * on some barrier values */ 223 * on some barrier values */
222 for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -=10) { 224 for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -= 10) {
223 /* set to 0 to try_hi perf_pctg */ 225 /* set to 0 to try_hi perf_pctg */
224 msr_lo &= 0xFFFFFF80; 226 msr_lo &= 0xFFFFFF80;
225 msr_hi &= 0xFFFFFF80; 227 msr_hi &= 0xFFFFFF80;
@@ -236,7 +238,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
236 238
237 /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) 239 /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq)
238 * eqals 240 * eqals
239 * low_freq * ( 1 - perf_pctg) = (cur_freq - high_freq * perf_pctg) 241 * low_freq * (1 - perf_pctg) = (cur_freq - high_freq * perf_pctg)
240 * 242 *
241 * high_freq * perf_pctg is stored tempoarily into "ebx". 243 * high_freq * perf_pctg is stored tempoarily into "ebx".
242 */ 244 */
@@ -317,9 +319,10 @@ static void __exit longrun_exit(void)
317} 319}
318 320
319 321
320MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>"); 322MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
321MODULE_DESCRIPTION ("LongRun driver for Transmeta Crusoe and Efficeon processors."); 323MODULE_DESCRIPTION("LongRun driver for Transmeta Crusoe and "
322MODULE_LICENSE ("GPL"); 324 "Efficeon processors.");
325MODULE_LICENSE("GPL");
323 326
324module_init(longrun_init); 327module_init(longrun_init);
325module_exit(longrun_exit); 328module_exit(longrun_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 3178c3acd97e..6ac55bd341ae 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -27,15 +27,17 @@
27#include <linux/cpufreq.h> 27#include <linux/cpufreq.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/cpumask.h> 29#include <linux/cpumask.h>
30#include <linux/timex.h>
30 31
31#include <asm/processor.h> 32#include <asm/processor.h>
32#include <asm/msr.h> 33#include <asm/msr.h>
33#include <asm/timex.h> 34#include <asm/timer.h>
34 35
35#include "speedstep-lib.h" 36#include "speedstep-lib.h"
36 37
37#define PFX "p4-clockmod: " 38#define PFX "p4-clockmod: "
38#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg) 39#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
40 "p4-clockmod", msg)
39 41
40/* 42/*
41 * Duty Cycle (3bits), note DC_DISABLE is not specified in 43 * Duty Cycle (3bits), note DC_DISABLE is not specified in
@@ -58,7 +60,8 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
58{ 60{
59 u32 l, h; 61 u32 l, h;
60 62
61 if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV)) 63 if (!cpu_online(cpu) ||
64 (newstate > DC_DISABLE) || (newstate == DC_RESV))
62 return -EINVAL; 65 return -EINVAL;
63 66
64 rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h); 67 rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h);
@@ -66,7 +69,8 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate)
66 if (l & 0x01) 69 if (l & 0x01)
67 dprintk("CPU#%d currently thermal throttled\n", cpu); 70 dprintk("CPU#%d currently thermal throttled\n", cpu);
68 71
69 if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT)) 72 if (has_N44_O17_errata[cpu] &&
73 (newstate == DC_25PT || newstate == DC_DFLT))
70 newstate = DC_38PT; 74 newstate = DC_38PT;
71 75
72 rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); 76 rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h);
@@ -112,7 +116,8 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
112 struct cpufreq_freqs freqs; 116 struct cpufreq_freqs freqs;
113 int i; 117 int i;
114 118
115 if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate)) 119 if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0],
120 target_freq, relation, &newstate))
116 return -EINVAL; 121 return -EINVAL;
117 122
118 freqs.old = cpufreq_p4_get(policy->cpu); 123 freqs.old = cpufreq_p4_get(policy->cpu);
@@ -127,7 +132,8 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy,
127 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 132 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
128 } 133 }
129 134
130 /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software 135 /* run on each logical CPU,
136 * see section 13.15.3 of IA32 Intel Architecture Software
131 * Developer's Manual, Volume 3 137 * Developer's Manual, Volume 3
132 */ 138 */
133 for_each_cpu(i, policy->cpus) 139 for_each_cpu(i, policy->cpus)
@@ -153,28 +159,30 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
153{ 159{
154 if (c->x86 == 0x06) { 160 if (c->x86 == 0x06) {
155 if (cpu_has(c, X86_FEATURE_EST)) 161 if (cpu_has(c, X86_FEATURE_EST))
156 printk(KERN_WARNING PFX "Warning: EST-capable CPU detected. " 162 printk(KERN_WARNING PFX "Warning: EST-capable CPU "
157 "The acpi-cpufreq module offers voltage scaling" 163 "detected. The acpi-cpufreq module offers "
158 " in addition of frequency scaling. You should use " 164 "voltage scaling in addition of frequency "
159 "that instead of p4-clockmod, if possible.\n"); 165 "scaling. You should use that instead of "
166 "p4-clockmod, if possible.\n");
160 switch (c->x86_model) { 167 switch (c->x86_model) {
161 case 0x0E: /* Core */ 168 case 0x0E: /* Core */
162 case 0x0F: /* Core Duo */ 169 case 0x0F: /* Core Duo */
163 case 0x16: /* Celeron Core */ 170 case 0x16: /* Celeron Core */
164 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; 171 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
165 return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); 172 return speedstep_get_frequency(SPEEDSTEP_CPU_PCORE);
166 case 0x0D: /* Pentium M (Dothan) */ 173 case 0x0D: /* Pentium M (Dothan) */
167 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; 174 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
168 /* fall through */ 175 /* fall through */
169 case 0x09: /* Pentium M (Banias) */ 176 case 0x09: /* Pentium M (Banias) */
170 return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM); 177 return speedstep_get_frequency(SPEEDSTEP_CPU_PM);
171 } 178 }
172 } 179 }
173 180
174 if (c->x86 != 0xF) { 181 if (c->x86 != 0xF) {
175 if (!cpu_has(c, X86_FEATURE_EST)) 182 if (!cpu_has(c, X86_FEATURE_EST))
176 printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. " 183 printk(KERN_WARNING PFX "Unknown CPU. "
177 "Please send an e-mail to <cpufreq@vger.kernel.org>\n"); 184 "Please send an e-mail to "
185 "<cpufreq@vger.kernel.org>\n");
178 return 0; 186 return 0;
179 } 187 }
180 188
@@ -182,16 +190,16 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
182 * throttling is active or not. */ 190 * throttling is active or not. */
183 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; 191 p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
184 192
185 if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) { 193 if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) {
186 printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. " 194 printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
187 "The speedstep-ich or acpi cpufreq modules offer " 195 "The speedstep-ich or acpi cpufreq modules offer "
188 "voltage scaling in addition of frequency scaling. " 196 "voltage scaling in addition of frequency scaling. "
189 "You should use either one instead of p4-clockmod, " 197 "You should use either one instead of p4-clockmod, "
190 "if possible.\n"); 198 "if possible.\n");
191 return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M); 199 return speedstep_get_frequency(SPEEDSTEP_CPU_P4M);
192 } 200 }
193 201
194 return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D); 202 return speedstep_get_frequency(SPEEDSTEP_CPU_P4D);
195} 203}
196 204
197 205
@@ -203,7 +211,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
203 unsigned int i; 211 unsigned int i;
204 212
205#ifdef CONFIG_SMP 213#ifdef CONFIG_SMP
206 cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu)); 214 cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
207#endif 215#endif
208 216
209 /* Errata workaround */ 217 /* Errata workaround */
@@ -217,14 +225,20 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
217 dprintk("has errata -- disabling low frequencies\n"); 225 dprintk("has errata -- disabling low frequencies\n");
218 } 226 }
219 227
228 if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4D &&
229 c->x86_model < 2) {
230 /* switch to maximum frequency and measure result */
231 cpufreq_p4_setdc(policy->cpu, DC_DISABLE);
232 recalibrate_cpu_khz();
233 }
220 /* get max frequency */ 234 /* get max frequency */
221 stock_freq = cpufreq_p4_get_frequency(c); 235 stock_freq = cpufreq_p4_get_frequency(c);
222 if (!stock_freq) 236 if (!stock_freq)
223 return -EINVAL; 237 return -EINVAL;
224 238
225 /* table init */ 239 /* table init */
226 for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { 240 for (i = 1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) {
227 if ((i<2) && (has_N44_O17_errata[policy->cpu])) 241 if ((i < 2) && (has_N44_O17_errata[policy->cpu]))
228 p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; 242 p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID;
229 else 243 else
230 p4clockmod_table[i].frequency = (stock_freq * i)/8; 244 p4clockmod_table[i].frequency = (stock_freq * i)/8;
@@ -232,7 +246,10 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
232 cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu); 246 cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu);
233 247
234 /* cpuinfo and default policy values */ 248 /* cpuinfo and default policy values */
235 policy->cpuinfo.transition_latency = 1000000; /* assumed */ 249
250 /* the transition latency is set to be 1 higher than the maximum
251 * transition latency of the ondemand governor */
252 policy->cpuinfo.transition_latency = 10000001;
236 policy->cur = stock_freq; 253 policy->cur = stock_freq;
237 254
238 return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]); 255 return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]);
@@ -258,12 +275,12 @@ static unsigned int cpufreq_p4_get(unsigned int cpu)
258 l = DC_DISABLE; 275 l = DC_DISABLE;
259 276
260 if (l != DC_DISABLE) 277 if (l != DC_DISABLE)
261 return (stock_freq * l / 8); 278 return stock_freq * l / 8;
262 279
263 return stock_freq; 280 return stock_freq;
264} 281}
265 282
266static struct freq_attr* p4clockmod_attr[] = { 283static struct freq_attr *p4clockmod_attr[] = {
267 &cpufreq_freq_attr_scaling_available_freqs, 284 &cpufreq_freq_attr_scaling_available_freqs,
268 NULL, 285 NULL,
269}; 286};
@@ -298,9 +315,10 @@ static int __init cpufreq_p4_init(void)
298 315
299 ret = cpufreq_register_driver(&p4clockmod_driver); 316 ret = cpufreq_register_driver(&p4clockmod_driver);
300 if (!ret) 317 if (!ret)
301 printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n"); 318 printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock "
319 "Modulation available\n");
302 320
303 return (ret); 321 return ret;
304} 322}
305 323
306 324
@@ -310,9 +328,9 @@ static void __exit cpufreq_p4_exit(void)
310} 328}
311 329
312 330
313MODULE_AUTHOR ("Zwane Mwaikambo <zwane@commfireservices.com>"); 331MODULE_AUTHOR("Zwane Mwaikambo <zwane@commfireservices.com>");
314MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)"); 332MODULE_DESCRIPTION("cpufreq driver for Pentium(TM) 4/Xeon(TM)");
315MODULE_LICENSE ("GPL"); 333MODULE_LICENSE("GPL");
316 334
317late_initcall(cpufreq_p4_init); 335late_initcall(cpufreq_p4_init);
318module_exit(cpufreq_p4_exit); 336module_exit(cpufreq_p4_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index c1ac5790c63e..f10dea409f40 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * This file was based upon code in Powertweak Linux (http://powertweak.sf.net) 2 * This file was based upon code in Powertweak Linux (http://powertweak.sf.net)
3 * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski. 3 * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä,
4 * Dominik Brodowski.
4 * 5 *
5 * Licensed under the terms of the GNU GPL License version 2. 6 * Licensed under the terms of the GNU GPL License version 2.
6 * 7 *
@@ -13,14 +14,15 @@
13#include <linux/cpufreq.h> 14#include <linux/cpufreq.h>
14#include <linux/ioport.h> 15#include <linux/ioport.h>
15#include <linux/slab.h> 16#include <linux/slab.h>
16
17#include <asm/msr.h>
18#include <linux/timex.h> 17#include <linux/timex.h>
19#include <linux/io.h> 18#include <linux/io.h>
20 19
20#include <asm/msr.h>
21
21#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long 22#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long
22 as it is unused */ 23 as it is unused */
23 24
25#define PFX "powernow-k6: "
24static unsigned int busfreq; /* FSB, in 10 kHz */ 26static unsigned int busfreq; /* FSB, in 10 kHz */
25static unsigned int max_multiplier; 27static unsigned int max_multiplier;
26 28
@@ -47,8 +49,8 @@ static struct cpufreq_frequency_table clock_ratio[] = {
47 */ 49 */
48static int powernow_k6_get_cpu_multiplier(void) 50static int powernow_k6_get_cpu_multiplier(void)
49{ 51{
50 u64 invalue = 0; 52 u64 invalue = 0;
51 u32 msrval; 53 u32 msrval;
52 54
53 msrval = POWERNOW_IOPORT + 0x1; 55 msrval = POWERNOW_IOPORT + 0x1;
54 wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ 56 wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */
@@ -68,12 +70,12 @@ static int powernow_k6_get_cpu_multiplier(void)
68 */ 70 */
69static void powernow_k6_set_state(unsigned int best_i) 71static void powernow_k6_set_state(unsigned int best_i)
70{ 72{
71 unsigned long outvalue = 0, invalue = 0; 73 unsigned long outvalue = 0, invalue = 0;
72 unsigned long msrval; 74 unsigned long msrval;
73 struct cpufreq_freqs freqs; 75 struct cpufreq_freqs freqs;
74 76
75 if (clock_ratio[best_i].index > max_multiplier) { 77 if (clock_ratio[best_i].index > max_multiplier) {
76 printk(KERN_ERR "cpufreq: invalid target frequency\n"); 78 printk(KERN_ERR PFX "invalid target frequency\n");
77 return; 79 return;
78 } 80 }
79 81
@@ -119,7 +121,8 @@ static int powernow_k6_verify(struct cpufreq_policy *policy)
119 * powernow_k6_setpolicy - sets a new CPUFreq policy 121 * powernow_k6_setpolicy - sets a new CPUFreq policy
120 * @policy: new policy 122 * @policy: new policy
121 * @target_freq: the target frequency 123 * @target_freq: the target frequency
122 * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) 124 * @relation: how that frequency relates to achieved frequency
125 * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
123 * 126 *
124 * sets a new CPUFreq policy 127 * sets a new CPUFreq policy
125 */ 128 */
@@ -127,9 +130,10 @@ static int powernow_k6_target(struct cpufreq_policy *policy,
127 unsigned int target_freq, 130 unsigned int target_freq,
128 unsigned int relation) 131 unsigned int relation)
129{ 132{
130 unsigned int newstate = 0; 133 unsigned int newstate = 0;
131 134
132 if (cpufreq_frequency_table_target(policy, &clock_ratio[0], target_freq, relation, &newstate)) 135 if (cpufreq_frequency_table_target(policy, &clock_ratio[0],
136 target_freq, relation, &newstate))
133 return -EINVAL; 137 return -EINVAL;
134 138
135 powernow_k6_set_state(newstate); 139 powernow_k6_set_state(newstate);
@@ -140,7 +144,7 @@ static int powernow_k6_target(struct cpufreq_policy *policy,
140 144
141static int powernow_k6_cpu_init(struct cpufreq_policy *policy) 145static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
142{ 146{
143 unsigned int i; 147 unsigned int i, f;
144 int result; 148 int result;
145 149
146 if (policy->cpu != 0) 150 if (policy->cpu != 0)
@@ -152,10 +156,11 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy)
152 156
153 /* table init */ 157 /* table init */
154 for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { 158 for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) {
155 if (clock_ratio[i].index > max_multiplier) 159 f = clock_ratio[i].index;
160 if (f > max_multiplier)
156 clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; 161 clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID;
157 else 162 else
158 clock_ratio[i].frequency = busfreq * clock_ratio[i].index; 163 clock_ratio[i].frequency = busfreq * f;
159 } 164 }
160 165
161 /* cpuinfo and default policy values */ 166 /* cpuinfo and default policy values */
@@ -185,7 +190,9 @@ static int powernow_k6_cpu_exit(struct cpufreq_policy *policy)
185 190
186static unsigned int powernow_k6_get(unsigned int cpu) 191static unsigned int powernow_k6_get(unsigned int cpu)
187{ 192{
188 return busfreq * powernow_k6_get_cpu_multiplier(); 193 unsigned int ret;
194 ret = (busfreq * powernow_k6_get_cpu_multiplier());
195 return ret;
189} 196}
190 197
191static struct freq_attr *powernow_k6_attr[] = { 198static struct freq_attr *powernow_k6_attr[] = {
@@ -221,7 +228,7 @@ static int __init powernow_k6_init(void)
221 return -ENODEV; 228 return -ENODEV;
222 229
223 if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) { 230 if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) {
224 printk("cpufreq: PowerNow IOPORT region already used.\n"); 231 printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n");
225 return -EIO; 232 return -EIO;
226 } 233 }
227 234
@@ -246,7 +253,8 @@ static void __exit powernow_k6_exit(void)
246} 253}
247 254
248 255
249MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>"); 256MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, "
257 "Dominik Brodowski <linux@brodo.de>");
250MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); 258MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
251MODULE_LICENSE("GPL"); 259MODULE_LICENSE("GPL");
252 260
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 1b446d79a8fd..3c28ccd49742 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -6,10 +6,12 @@
6 * Licensed under the terms of the GNU GPL License version 2. 6 * Licensed under the terms of the GNU GPL License version 2.
7 * Based upon datasheets & sample CPUs kindly provided by AMD. 7 * Based upon datasheets & sample CPUs kindly provided by AMD.
8 * 8 *
9 * Errata 5: Processor may fail to execute a FID/VID change in presence of interrupt. 9 * Errata 5:
10 * - We cli/sti on stepping A0 CPUs around the FID/VID transition. 10 * CPU may fail to execute a FID/VID change in presence of interrupt.
11 * Errata 15: Processors with half frequency multipliers may hang upon wakeup from disconnect. 11 * - We cli/sti on stepping A0 CPUs around the FID/VID transition.
12 * - We disable half multipliers if ACPI is used on A0 stepping CPUs. 12 * Errata 15:
13 * CPU with half frequency multipliers may hang upon wakeup from disconnect.
14 * - We disable half multipliers if ACPI is used on A0 stepping CPUs.
13 */ 15 */
14 16
15#include <linux/kernel.h> 17#include <linux/kernel.h>
@@ -20,11 +22,11 @@
20#include <linux/slab.h> 22#include <linux/slab.h>
21#include <linux/string.h> 23#include <linux/string.h>
22#include <linux/dmi.h> 24#include <linux/dmi.h>
25#include <linux/timex.h>
26#include <linux/io.h>
23 27
28#include <asm/timer.h> /* Needed for recalibrate_cpu_khz() */
24#include <asm/msr.h> 29#include <asm/msr.h>
25#include <asm/timer.h>
26#include <asm/timex.h>
27#include <asm/io.h>
28#include <asm/system.h> 30#include <asm/system.h>
29 31
30#ifdef CONFIG_X86_POWERNOW_K7_ACPI 32#ifdef CONFIG_X86_POWERNOW_K7_ACPI
@@ -58,9 +60,9 @@ struct pst_s {
58union powernow_acpi_control_t { 60union powernow_acpi_control_t {
59 struct { 61 struct {
60 unsigned long fid:5, 62 unsigned long fid:5,
61 vid:5, 63 vid:5,
62 sgtc:20, 64 sgtc:20,
63 res1:2; 65 res1:2;
64 } bits; 66 } bits;
65 unsigned long val; 67 unsigned long val;
66}; 68};
@@ -94,14 +96,15 @@ static struct cpufreq_frequency_table *powernow_table;
94 96
95static unsigned int can_scale_bus; 97static unsigned int can_scale_bus;
96static unsigned int can_scale_vid; 98static unsigned int can_scale_vid;
97static unsigned int minimum_speed=-1; 99static unsigned int minimum_speed = -1;
98static unsigned int maximum_speed; 100static unsigned int maximum_speed;
99static unsigned int number_scales; 101static unsigned int number_scales;
100static unsigned int fsb; 102static unsigned int fsb;
101static unsigned int latency; 103static unsigned int latency;
102static char have_a0; 104static char have_a0;
103 105
104#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k7", msg) 106#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
107 "powernow-k7", msg)
105 108
106static int check_fsb(unsigned int fsbspeed) 109static int check_fsb(unsigned int fsbspeed)
107{ 110{
@@ -109,7 +112,7 @@ static int check_fsb(unsigned int fsbspeed)
109 unsigned int f = fsb / 1000; 112 unsigned int f = fsb / 1000;
110 113
111 delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed; 114 delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed;
112 return (delta < 5); 115 return delta < 5;
113} 116}
114 117
115static int check_powernow(void) 118static int check_powernow(void)
@@ -117,24 +120,26 @@ static int check_powernow(void)
117 struct cpuinfo_x86 *c = &cpu_data(0); 120 struct cpuinfo_x86 *c = &cpu_data(0);
118 unsigned int maxei, eax, ebx, ecx, edx; 121 unsigned int maxei, eax, ebx, ecx, edx;
119 122
120 if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) { 123 if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 6)) {
121#ifdef MODULE 124#ifdef MODULE
122 printk (KERN_INFO PFX "This module only works with AMD K7 CPUs\n"); 125 printk(KERN_INFO PFX "This module only works with "
126 "AMD K7 CPUs\n");
123#endif 127#endif
124 return 0; 128 return 0;
125 } 129 }
126 130
127 /* Get maximum capabilities */ 131 /* Get maximum capabilities */
128 maxei = cpuid_eax (0x80000000); 132 maxei = cpuid_eax(0x80000000);
129 if (maxei < 0x80000007) { /* Any powernow info ? */ 133 if (maxei < 0x80000007) { /* Any powernow info ? */
130#ifdef MODULE 134#ifdef MODULE
131 printk (KERN_INFO PFX "No powernow capabilities detected\n"); 135 printk(KERN_INFO PFX "No powernow capabilities detected\n");
132#endif 136#endif
133 return 0; 137 return 0;
134 } 138 }
135 139
136 if ((c->x86_model == 6) && (c->x86_mask == 0)) { 140 if ((c->x86_model == 6) && (c->x86_mask == 0)) {
137 printk (KERN_INFO PFX "K7 660[A0] core detected, enabling errata workarounds\n"); 141 printk(KERN_INFO PFX "K7 660[A0] core detected, "
142 "enabling errata workarounds\n");
138 have_a0 = 1; 143 have_a0 = 1;
139 } 144 }
140 145
@@ -144,37 +149,42 @@ static int check_powernow(void)
144 if (!(edx & (1 << 1 | 1 << 2))) 149 if (!(edx & (1 << 1 | 1 << 2)))
145 return 0; 150 return 0;
146 151
147 printk (KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); 152 printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
148 153
149 if (edx & 1 << 1) { 154 if (edx & 1 << 1) {
150 printk ("frequency"); 155 printk("frequency");
151 can_scale_bus=1; 156 can_scale_bus = 1;
152 } 157 }
153 158
154 if ((edx & (1 << 1 | 1 << 2)) == 0x6) 159 if ((edx & (1 << 1 | 1 << 2)) == 0x6)
155 printk (" and "); 160 printk(" and ");
156 161
157 if (edx & 1 << 2) { 162 if (edx & 1 << 2) {
158 printk ("voltage"); 163 printk("voltage");
159 can_scale_vid=1; 164 can_scale_vid = 1;
160 } 165 }
161 166
162 printk (".\n"); 167 printk(".\n");
163 return 1; 168 return 1;
164} 169}
165 170
171static void invalidate_entry(unsigned int entry)
172{
173 powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
174}
166 175
167static int get_ranges (unsigned char *pst) 176static int get_ranges(unsigned char *pst)
168{ 177{
169 unsigned int j; 178 unsigned int j;
170 unsigned int speed; 179 unsigned int speed;
171 u8 fid, vid; 180 u8 fid, vid;
172 181
173 powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL); 182 powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
183 (number_scales + 1)), GFP_KERNEL);
174 if (!powernow_table) 184 if (!powernow_table)
175 return -ENOMEM; 185 return -ENOMEM;
176 186
177 for (j=0 ; j < number_scales; j++) { 187 for (j = 0 ; j < number_scales; j++) {
178 fid = *pst++; 188 fid = *pst++;
179 189
180 powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10; 190 powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10;
@@ -182,10 +192,10 @@ static int get_ranges (unsigned char *pst)
182 192
183 speed = powernow_table[j].frequency; 193 speed = powernow_table[j].frequency;
184 194
185 if ((fid_codes[fid] % 10)==5) { 195 if ((fid_codes[fid] % 10) == 5) {
186#ifdef CONFIG_X86_POWERNOW_K7_ACPI 196#ifdef CONFIG_X86_POWERNOW_K7_ACPI
187 if (have_a0 == 1) 197 if (have_a0 == 1)
188 powernow_table[j].frequency = CPUFREQ_ENTRY_INVALID; 198 invalidate_entry(j);
189#endif 199#endif
190 } 200 }
191 201
@@ -197,7 +207,7 @@ static int get_ranges (unsigned char *pst)
197 vid = *pst++; 207 vid = *pst++;
198 powernow_table[j].index |= (vid << 8); /* upper 8 bits */ 208 powernow_table[j].index |= (vid << 8); /* upper 8 bits */
199 209
200 dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " 210 dprintk(" FID: 0x%x (%d.%dx [%dMHz]) "
201 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, 211 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
202 fid_codes[fid] % 10, speed/1000, vid, 212 fid_codes[fid] % 10, speed/1000, vid,
203 mobile_vid_table[vid]/1000, 213 mobile_vid_table[vid]/1000,
@@ -214,13 +224,13 @@ static void change_FID(int fid)
214{ 224{
215 union msr_fidvidctl fidvidctl; 225 union msr_fidvidctl fidvidctl;
216 226
217 rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); 227 rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
218 if (fidvidctl.bits.FID != fid) { 228 if (fidvidctl.bits.FID != fid) {
219 fidvidctl.bits.SGTC = latency; 229 fidvidctl.bits.SGTC = latency;
220 fidvidctl.bits.FID = fid; 230 fidvidctl.bits.FID = fid;
221 fidvidctl.bits.VIDC = 0; 231 fidvidctl.bits.VIDC = 0;
222 fidvidctl.bits.FIDC = 1; 232 fidvidctl.bits.FIDC = 1;
223 wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); 233 wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
224 } 234 }
225} 235}
226 236
@@ -229,18 +239,18 @@ static void change_VID(int vid)
229{ 239{
230 union msr_fidvidctl fidvidctl; 240 union msr_fidvidctl fidvidctl;
231 241
232 rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); 242 rdmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
233 if (fidvidctl.bits.VID != vid) { 243 if (fidvidctl.bits.VID != vid) {
234 fidvidctl.bits.SGTC = latency; 244 fidvidctl.bits.SGTC = latency;
235 fidvidctl.bits.VID = vid; 245 fidvidctl.bits.VID = vid;
236 fidvidctl.bits.FIDC = 0; 246 fidvidctl.bits.FIDC = 0;
237 fidvidctl.bits.VIDC = 1; 247 fidvidctl.bits.VIDC = 1;
238 wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); 248 wrmsrl(MSR_K7_FID_VID_CTL, fidvidctl.val);
239 } 249 }
240} 250}
241 251
242 252
243static void change_speed (unsigned int index) 253static void change_speed(unsigned int index)
244{ 254{
245 u8 fid, vid; 255 u8 fid, vid;
246 struct cpufreq_freqs freqs; 256 struct cpufreq_freqs freqs;
@@ -257,7 +267,7 @@ static void change_speed (unsigned int index)
257 267
258 freqs.cpu = 0; 268 freqs.cpu = 0;
259 269
260 rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); 270 rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
261 cfid = fidvidstatus.bits.CFID; 271 cfid = fidvidstatus.bits.CFID;
262 freqs.old = fsb * fid_codes[cfid] / 10; 272 freqs.old = fsb * fid_codes[cfid] / 10;
263 273
@@ -321,12 +331,14 @@ static int powernow_acpi_init(void)
321 goto err1; 331 goto err1;
322 } 332 }
323 333
324 if (acpi_processor_perf->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { 334 if (acpi_processor_perf->control_register.space_id !=
335 ACPI_ADR_SPACE_FIXED_HARDWARE) {
325 retval = -ENODEV; 336 retval = -ENODEV;
326 goto err2; 337 goto err2;
327 } 338 }
328 339
329 if (acpi_processor_perf->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { 340 if (acpi_processor_perf->status_register.space_id !=
341 ACPI_ADR_SPACE_FIXED_HARDWARE) {
330 retval = -ENODEV; 342 retval = -ENODEV;
331 goto err2; 343 goto err2;
332 } 344 }
@@ -338,7 +350,8 @@ static int powernow_acpi_init(void)
338 goto err2; 350 goto err2;
339 } 351 }
340 352
341 powernow_table = kzalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL); 353 powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) *
354 (number_scales + 1)), GFP_KERNEL);
342 if (!powernow_table) { 355 if (!powernow_table) {
343 retval = -ENOMEM; 356 retval = -ENOMEM;
344 goto err2; 357 goto err2;
@@ -352,7 +365,7 @@ static int powernow_acpi_init(void)
352 unsigned int speed, speed_mhz; 365 unsigned int speed, speed_mhz;
353 366
354 pc.val = (unsigned long) state->control; 367 pc.val = (unsigned long) state->control;
355 dprintk ("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", 368 dprintk("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n",
356 i, 369 i,
357 (u32) state->core_frequency, 370 (u32) state->core_frequency,
358 (u32) state->power, 371 (u32) state->power,
@@ -381,12 +394,12 @@ static int powernow_acpi_init(void)
381 if (speed % 1000 > 0) 394 if (speed % 1000 > 0)
382 speed_mhz++; 395 speed_mhz++;
383 396
384 if ((fid_codes[fid] % 10)==5) { 397 if ((fid_codes[fid] % 10) == 5) {
385 if (have_a0 == 1) 398 if (have_a0 == 1)
386 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; 399 invalidate_entry(i);
387 } 400 }
388 401
389 dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " 402 dprintk(" FID: 0x%x (%d.%dx [%dMHz]) "
390 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, 403 "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10,
391 fid_codes[fid] % 10, speed_mhz, vid, 404 fid_codes[fid] % 10, speed_mhz, vid,
392 mobile_vid_table[vid]/1000, 405 mobile_vid_table[vid]/1000,
@@ -422,7 +435,8 @@ err1:
422err05: 435err05:
423 kfree(acpi_processor_perf); 436 kfree(acpi_processor_perf);
424err0: 437err0:
425 printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); 438 printk(KERN_WARNING PFX "ACPI perflib can not be used on "
439 "this platform\n");
426 acpi_processor_perf = NULL; 440 acpi_processor_perf = NULL;
427 return retval; 441 return retval;
428} 442}
@@ -435,7 +449,14 @@ static int powernow_acpi_init(void)
435} 449}
436#endif 450#endif
437 451
438static int powernow_decode_bios (int maxfid, int startvid) 452static void print_pst_entry(struct pst_s *pst, unsigned int j)
453{
454 dprintk("PST:%d (@%p)\n", j, pst);
455 dprintk(" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n",
456 pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
457}
458
459static int powernow_decode_bios(int maxfid, int startvid)
439{ 460{
440 struct psb_s *psb; 461 struct psb_s *psb;
441 struct pst_s *pst; 462 struct pst_s *pst;
@@ -446,61 +467,67 @@ static int powernow_decode_bios (int maxfid, int startvid)
446 467
447 etuple = cpuid_eax(0x80000001); 468 etuple = cpuid_eax(0x80000001);
448 469
449 for (i=0xC0000; i < 0xffff0 ; i+=16) { 470 for (i = 0xC0000; i < 0xffff0 ; i += 16) {
450 471
451 p = phys_to_virt(i); 472 p = phys_to_virt(i);
452 473
453 if (memcmp(p, "AMDK7PNOW!", 10) == 0){ 474 if (memcmp(p, "AMDK7PNOW!", 10) == 0) {
454 dprintk ("Found PSB header at %p\n", p); 475 dprintk("Found PSB header at %p\n", p);
455 psb = (struct psb_s *) p; 476 psb = (struct psb_s *) p;
456 dprintk ("Table version: 0x%x\n", psb->tableversion); 477 dprintk("Table version: 0x%x\n", psb->tableversion);
457 if (psb->tableversion != 0x12) { 478 if (psb->tableversion != 0x12) {
458 printk (KERN_INFO PFX "Sorry, only v1.2 tables supported right now\n"); 479 printk(KERN_INFO PFX "Sorry, only v1.2 tables"
480 " supported right now\n");
459 return -ENODEV; 481 return -ENODEV;
460 } 482 }
461 483
462 dprintk ("Flags: 0x%x\n", psb->flags); 484 dprintk("Flags: 0x%x\n", psb->flags);
463 if ((psb->flags & 1)==0) { 485 if ((psb->flags & 1) == 0)
464 dprintk ("Mobile voltage regulator\n"); 486 dprintk("Mobile voltage regulator\n");
465 } else { 487 else
466 dprintk ("Desktop voltage regulator\n"); 488 dprintk("Desktop voltage regulator\n");
467 }
468 489
469 latency = psb->settlingtime; 490 latency = psb->settlingtime;
470 if (latency < 100) { 491 if (latency < 100) {
471 printk(KERN_INFO PFX "BIOS set settling time to %d microseconds. " 492 printk(KERN_INFO PFX "BIOS set settling time "
472 "Should be at least 100. Correcting.\n", latency); 493 "to %d microseconds. "
494 "Should be at least 100. "
495 "Correcting.\n", latency);
473 latency = 100; 496 latency = 100;
474 } 497 }
475 dprintk ("Settling Time: %d microseconds.\n", psb->settlingtime); 498 dprintk("Settling Time: %d microseconds.\n",
476 dprintk ("Has %d PST tables. (Only dumping ones relevant to this CPU).\n", psb->numpst); 499 psb->settlingtime);
500 dprintk("Has %d PST tables. (Only dumping ones "
501 "relevant to this CPU).\n",
502 psb->numpst);
477 503
478 p += sizeof (struct psb_s); 504 p += sizeof(struct psb_s);
479 505
480 pst = (struct pst_s *) p; 506 pst = (struct pst_s *) p;
481 507
482 for (j=0; j<psb->numpst; j++) { 508 for (j = 0; j < psb->numpst; j++) {
483 pst = (struct pst_s *) p; 509 pst = (struct pst_s *) p;
484 number_scales = pst->numpstates; 510 number_scales = pst->numpstates;
485 511
486 if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) && 512 if ((etuple == pst->cpuid) &&
487 (maxfid==pst->maxfid) && (startvid==pst->startvid)) 513 check_fsb(pst->fsbspeed) &&
488 { 514 (maxfid == pst->maxfid) &&
489 dprintk ("PST:%d (@%p)\n", j, pst); 515 (startvid == pst->startvid)) {
490 dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", 516 print_pst_entry(pst, j);
491 pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); 517 p = (char *)pst + sizeof(struct pst_s);
492 518 ret = get_ranges(p);
493 ret = get_ranges ((char *) pst + sizeof (struct pst_s));
494 return ret; 519 return ret;
495 } else { 520 } else {
496 unsigned int k; 521 unsigned int k;
497 p = (char *) pst + sizeof (struct pst_s); 522 p = (char *)pst + sizeof(struct pst_s);
498 for (k=0; k<number_scales; k++) 523 for (k = 0; k < number_scales; k++)
499 p+=2; 524 p += 2;
500 } 525 }
501 } 526 }
502 printk (KERN_INFO PFX "No PST tables match this cpuid (0x%x)\n", etuple); 527 printk(KERN_INFO PFX "No PST tables match this cpuid "
503 printk (KERN_INFO PFX "This is indicative of a broken BIOS.\n"); 528 "(0x%x)\n", etuple);
529 printk(KERN_INFO PFX "This is indicative of a broken "
530 "BIOS.\n");
504 531
505 return -EINVAL; 532 return -EINVAL;
506 } 533 }
@@ -511,13 +538,14 @@ static int powernow_decode_bios (int maxfid, int startvid)
511} 538}
512 539
513 540
514static int powernow_target (struct cpufreq_policy *policy, 541static int powernow_target(struct cpufreq_policy *policy,
515 unsigned int target_freq, 542 unsigned int target_freq,
516 unsigned int relation) 543 unsigned int relation)
517{ 544{
518 unsigned int newstate; 545 unsigned int newstate;
519 546
520 if (cpufreq_frequency_table_target(policy, powernow_table, target_freq, relation, &newstate)) 547 if (cpufreq_frequency_table_target(policy, powernow_table, target_freq,
548 relation, &newstate))
521 return -EINVAL; 549 return -EINVAL;
522 550
523 change_speed(newstate); 551 change_speed(newstate);
@@ -526,7 +554,7 @@ static int powernow_target (struct cpufreq_policy *policy,
526} 554}
527 555
528 556
529static int powernow_verify (struct cpufreq_policy *policy) 557static int powernow_verify(struct cpufreq_policy *policy)
530{ 558{
531 return cpufreq_frequency_table_verify(policy, powernow_table); 559 return cpufreq_frequency_table_verify(policy, powernow_table);
532} 560}
@@ -566,18 +594,23 @@ static unsigned int powernow_get(unsigned int cpu)
566 594
567 if (cpu) 595 if (cpu)
568 return 0; 596 return 0;
569 rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); 597 rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
570 cfid = fidvidstatus.bits.CFID; 598 cfid = fidvidstatus.bits.CFID;
571 599
572 return (fsb * fid_codes[cfid] / 10); 600 return fsb * fid_codes[cfid] / 10;
573} 601}
574 602
575 603
576static int __init acer_cpufreq_pst(const struct dmi_system_id *d) 604static int __init acer_cpufreq_pst(const struct dmi_system_id *d)
577{ 605{
578 printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident); 606 printk(KERN_WARNING PFX
579 printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n"); 607 "%s laptop with broken PST tables in BIOS detected.\n",
580 printk(KERN_WARNING "cpufreq scaling has been disabled as a result of this.\n"); 608 d->ident);
609 printk(KERN_WARNING PFX
610 "You need to downgrade to 3A21 (09/09/2002), or try a newer "
611 "BIOS than 3A71 (01/20/2003)\n");
612 printk(KERN_WARNING PFX
613 "cpufreq scaling has been disabled as a result of this.\n");
581 return 0; 614 return 0;
582} 615}
583 616
@@ -598,7 +631,7 @@ static struct dmi_system_id __initdata powernow_dmi_table[] = {
598 { } 631 { }
599}; 632};
600 633
601static int __init powernow_cpu_init (struct cpufreq_policy *policy) 634static int __init powernow_cpu_init(struct cpufreq_policy *policy)
602{ 635{
603 union msr_fidvidstatus fidvidstatus; 636 union msr_fidvidstatus fidvidstatus;
604 int result; 637 int result;
@@ -606,7 +639,7 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
606 if (policy->cpu != 0) 639 if (policy->cpu != 0)
607 return -ENODEV; 640 return -ENODEV;
608 641
609 rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); 642 rdmsrl(MSR_K7_FID_VID_STATUS, fidvidstatus.val);
610 643
611 recalibrate_cpu_khz(); 644 recalibrate_cpu_khz();
612 645
@@ -618,19 +651,21 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
618 dprintk("FSB: %3dMHz\n", fsb/1000); 651 dprintk("FSB: %3dMHz\n", fsb/1000);
619 652
620 if (dmi_check_system(powernow_dmi_table) || acpi_force) { 653 if (dmi_check_system(powernow_dmi_table) || acpi_force) {
621 printk (KERN_INFO PFX "PSB/PST known to be broken. Trying ACPI instead\n"); 654 printk(KERN_INFO PFX "PSB/PST known to be broken. "
655 "Trying ACPI instead\n");
622 result = powernow_acpi_init(); 656 result = powernow_acpi_init();
623 } else { 657 } else {
624 result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID); 658 result = powernow_decode_bios(fidvidstatus.bits.MFID,
659 fidvidstatus.bits.SVID);
625 if (result) { 660 if (result) {
626 printk (KERN_INFO PFX "Trying ACPI perflib\n"); 661 printk(KERN_INFO PFX "Trying ACPI perflib\n");
627 maximum_speed = 0; 662 maximum_speed = 0;
628 minimum_speed = -1; 663 minimum_speed = -1;
629 latency = 0; 664 latency = 0;
630 result = powernow_acpi_init(); 665 result = powernow_acpi_init();
631 if (result) { 666 if (result) {
632 printk (KERN_INFO PFX "ACPI and legacy methods failed\n"); 667 printk(KERN_INFO PFX
633 printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.html\n"); 668 "ACPI and legacy methods failed\n");
634 } 669 }
635 } else { 670 } else {
636 /* SGTC use the bus clock as timer */ 671 /* SGTC use the bus clock as timer */
@@ -642,10 +677,11 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
642 if (result) 677 if (result)
643 return result; 678 return result;
644 679
645 printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", 680 printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
646 minimum_speed/1000, maximum_speed/1000); 681 minimum_speed/1000, maximum_speed/1000);
647 682
648 policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency); 683 policy->cpuinfo.transition_latency =
684 cpufreq_scale(2000000UL, fsb, latency);
649 685
650 policy->cur = powernow_get(0); 686 policy->cur = powernow_get(0);
651 687
@@ -654,7 +690,8 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
654 return cpufreq_frequency_table_cpuinfo(policy, powernow_table); 690 return cpufreq_frequency_table_cpuinfo(policy, powernow_table);
655} 691}
656 692
657static int powernow_cpu_exit (struct cpufreq_policy *policy) { 693static int powernow_cpu_exit(struct cpufreq_policy *policy)
694{
658 cpufreq_frequency_table_put_attr(policy->cpu); 695 cpufreq_frequency_table_put_attr(policy->cpu);
659 696
660#ifdef CONFIG_X86_POWERNOW_K7_ACPI 697#ifdef CONFIG_X86_POWERNOW_K7_ACPI
@@ -669,7 +706,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) {
669 return 0; 706 return 0;
670} 707}
671 708
672static struct freq_attr* powernow_table_attr[] = { 709static struct freq_attr *powernow_table_attr[] = {
673 &cpufreq_freq_attr_scaling_available_freqs, 710 &cpufreq_freq_attr_scaling_available_freqs,
674 NULL, 711 NULL,
675}; 712};
@@ -685,15 +722,15 @@ static struct cpufreq_driver powernow_driver = {
685 .attr = powernow_table_attr, 722 .attr = powernow_table_attr,
686}; 723};
687 724
688static int __init powernow_init (void) 725static int __init powernow_init(void)
689{ 726{
690 if (check_powernow()==0) 727 if (check_powernow() == 0)
691 return -ENODEV; 728 return -ENODEV;
692 return cpufreq_register_driver(&powernow_driver); 729 return cpufreq_register_driver(&powernow_driver);
693} 730}
694 731
695 732
696static void __exit powernow_exit (void) 733static void __exit powernow_exit(void)
697{ 734{
698 cpufreq_unregister_driver(&powernow_driver); 735 cpufreq_unregister_driver(&powernow_driver);
699} 736}
@@ -701,9 +738,9 @@ static void __exit powernow_exit (void)
701module_param(acpi_force, int, 0444); 738module_param(acpi_force, int, 0444);
702MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); 739MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
703 740
704MODULE_AUTHOR ("Dave Jones <davej@redhat.com>"); 741MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
705MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); 742MODULE_DESCRIPTION("Powernow driver for AMD K7 processors.");
706MODULE_LICENSE ("GPL"); 743MODULE_LICENSE("GPL");
707 744
708late_initcall(powernow_init); 745late_initcall(powernow_init);
709module_exit(powernow_exit); 746module_exit(powernow_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 6428aa17b40e..4709ead2db52 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -33,16 +33,14 @@
33#include <linux/string.h> 33#include <linux/string.h>
34#include <linux/cpumask.h> 34#include <linux/cpumask.h>
35#include <linux/sched.h> /* for current / set_cpus_allowed() */ 35#include <linux/sched.h> /* for current / set_cpus_allowed() */
36#include <linux/io.h>
37#include <linux/delay.h>
36 38
37#include <asm/msr.h> 39#include <asm/msr.h>
38#include <asm/io.h>
39#include <asm/delay.h>
40 40
41#ifdef CONFIG_X86_POWERNOW_K8_ACPI
42#include <linux/acpi.h> 41#include <linux/acpi.h>
43#include <linux/mutex.h> 42#include <linux/mutex.h>
44#include <acpi/processor.h> 43#include <acpi/processor.h>
45#endif
46 44
47#define PFX "powernow-k8: " 45#define PFX "powernow-k8: "
48#define VERSION "version 2.20.00" 46#define VERSION "version 2.20.00"
@@ -56,7 +54,10 @@ static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data);
56static int cpu_family = CPU_OPTERON; 54static int cpu_family = CPU_OPTERON;
57 55
58#ifndef CONFIG_SMP 56#ifndef CONFIG_SMP
59DEFINE_PER_CPU(cpumask_t, cpu_core_map); 57static inline const struct cpumask *cpu_core_mask(int cpu)
58{
59 return cpumask_of(0);
60}
60#endif 61#endif
61 62
62/* Return a frequency in MHz, given an input fid */ 63/* Return a frequency in MHz, given an input fid */
@@ -71,7 +72,8 @@ static u32 find_khz_freq_from_fid(u32 fid)
71 return 1000 * find_freq_from_fid(fid); 72 return 1000 * find_freq_from_fid(fid);
72} 73}
73 74
74static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 pstate) 75static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data,
76 u32 pstate)
75{ 77{
76 return data[pstate].frequency; 78 return data[pstate].frequency;
77} 79}
@@ -186,7 +188,9 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
186 return 1; 188 return 1;
187 } 189 }
188 190
189 lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; 191 lo = fid;
192 lo |= (data->currvid << MSR_C_LO_VID_SHIFT);
193 lo |= MSR_C_LO_INIT_FID_VID;
190 194
191 dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", 195 dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
192 fid, lo, data->plllock * PLL_LOCK_CONVERSION); 196 fid, lo, data->plllock * PLL_LOCK_CONVERSION);
@@ -194,7 +198,9 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
194 do { 198 do {
195 wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); 199 wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
196 if (i++ > 100) { 200 if (i++ > 100) {
197 printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n"); 201 printk(KERN_ERR PFX
202 "Hardware error - pending bit very stuck - "
203 "no further pstate changes possible\n");
198 return 1; 204 return 1;
199 } 205 }
200 } while (query_current_values_with_pending_wait(data)); 206 } while (query_current_values_with_pending_wait(data));
@@ -202,14 +208,16 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
202 count_off_irt(data); 208 count_off_irt(data);
203 209
204 if (savevid != data->currvid) { 210 if (savevid != data->currvid) {
205 printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n", 211 printk(KERN_ERR PFX
206 savevid, data->currvid); 212 "vid change on fid trans, old 0x%x, new 0x%x\n",
213 savevid, data->currvid);
207 return 1; 214 return 1;
208 } 215 }
209 216
210 if (fid != data->currfid) { 217 if (fid != data->currfid) {
211 printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid, 218 printk(KERN_ERR PFX
212 data->currfid); 219 "fid trans failed, fid 0x%x, curr 0x%x\n", fid,
220 data->currfid);
213 return 1; 221 return 1;
214 } 222 }
215 223
@@ -228,7 +236,9 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
228 return 1; 236 return 1;
229 } 237 }
230 238
231 lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; 239 lo = data->currfid;
240 lo |= (vid << MSR_C_LO_VID_SHIFT);
241 lo |= MSR_C_LO_INIT_FID_VID;
232 242
233 dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", 243 dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
234 vid, lo, STOP_GRANT_5NS); 244 vid, lo, STOP_GRANT_5NS);
@@ -236,20 +246,24 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
236 do { 246 do {
237 wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); 247 wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
238 if (i++ > 100) { 248 if (i++ > 100) {
239 printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); 249 printk(KERN_ERR PFX "internal error - pending bit "
250 "very stuck - no further pstate "
251 "changes possible\n");
240 return 1; 252 return 1;
241 } 253 }
242 } while (query_current_values_with_pending_wait(data)); 254 } while (query_current_values_with_pending_wait(data));
243 255
244 if (savefid != data->currfid) { 256 if (savefid != data->currfid) {
245 printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n", 257 printk(KERN_ERR PFX "fid changed on vid trans, old "
258 "0x%x new 0x%x\n",
246 savefid, data->currfid); 259 savefid, data->currfid);
247 return 1; 260 return 1;
248 } 261 }
249 262
250 if (vid != data->currvid) { 263 if (vid != data->currvid) {
251 printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid, 264 printk(KERN_ERR PFX "vid trans failed, vid 0x%x, "
252 data->currvid); 265 "curr 0x%x\n",
266 vid, data->currvid);
253 return 1; 267 return 1;
254 } 268 }
255 269
@@ -261,7 +275,8 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
261 * Decreasing vid codes represent increasing voltages: 275 * Decreasing vid codes represent increasing voltages:
262 * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off. 276 * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off.
263 */ 277 */
264static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step) 278static int decrease_vid_code_by_step(struct powernow_k8_data *data,
279 u32 reqvid, u32 step)
265{ 280{
266 if ((data->currvid - reqvid) > step) 281 if ((data->currvid - reqvid) > step)
267 reqvid = data->currvid - step; 282 reqvid = data->currvid - step;
@@ -283,7 +298,8 @@ static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
283} 298}
284 299
285/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */ 300/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
286static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid) 301static int transition_fid_vid(struct powernow_k8_data *data,
302 u32 reqfid, u32 reqvid)
287{ 303{
288 if (core_voltage_pre_transition(data, reqvid)) 304 if (core_voltage_pre_transition(data, reqvid))
289 return 1; 305 return 1;
@@ -298,7 +314,8 @@ static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 req
298 return 1; 314 return 1;
299 315
300 if ((reqfid != data->currfid) || (reqvid != data->currvid)) { 316 if ((reqfid != data->currfid) || (reqvid != data->currvid)) {
301 printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n", 317 printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, "
318 "curr 0x%x 0x%x\n",
302 smp_processor_id(), 319 smp_processor_id(),
303 reqfid, reqvid, data->currfid, data->currvid); 320 reqfid, reqvid, data->currfid, data->currvid);
304 return 1; 321 return 1;
@@ -311,13 +328,15 @@ static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 req
311} 328}
312 329
313/* Phase 1 - core voltage transition ... setup voltage */ 330/* Phase 1 - core voltage transition ... setup voltage */
314static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid) 331static int core_voltage_pre_transition(struct powernow_k8_data *data,
332 u32 reqvid)
315{ 333{
316 u32 rvosteps = data->rvo; 334 u32 rvosteps = data->rvo;
317 u32 savefid = data->currfid; 335 u32 savefid = data->currfid;
318 u32 maxvid, lo; 336 u32 maxvid, lo;
319 337
320 dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n", 338 dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
339 "reqvid 0x%x, rvo 0x%x\n",
321 smp_processor_id(), 340 smp_processor_id(),
322 data->currfid, data->currvid, reqvid, data->rvo); 341 data->currfid, data->currvid, reqvid, data->rvo);
323 342
@@ -340,7 +359,7 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid
340 } else { 359 } else {
341 dprintk("ph1: changing vid for rvo, req 0x%x\n", 360 dprintk("ph1: changing vid for rvo, req 0x%x\n",
342 data->currvid - 1); 361 data->currvid - 1);
343 if (decrease_vid_code_by_step(data, data->currvid - 1, 1)) 362 if (decrease_vid_code_by_step(data, data->currvid-1, 1))
344 return 1; 363 return 1;
345 rvosteps--; 364 rvosteps--;
346 } 365 }
@@ -350,7 +369,8 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid
350 return 1; 369 return 1;
351 370
352 if (savefid != data->currfid) { 371 if (savefid != data->currfid) {
353 printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid); 372 printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n",
373 data->currfid);
354 return 1; 374 return 1;
355 } 375 }
356 376
@@ -363,20 +383,24 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid
363/* Phase 2 - core frequency transition */ 383/* Phase 2 - core frequency transition */
364static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) 384static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
365{ 385{
366 u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid; 386 u32 vcoreqfid, vcocurrfid, vcofiddiff;
387 u32 fid_interval, savevid = data->currvid;
367 388
368 if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { 389 if ((reqfid < HI_FID_TABLE_BOTTOM) &&
369 printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n", 390 (data->currfid < HI_FID_TABLE_BOTTOM)) {
370 reqfid, data->currfid); 391 printk(KERN_ERR PFX "ph2: illegal lo-lo transition "
392 "0x%x 0x%x\n", reqfid, data->currfid);
371 return 1; 393 return 1;
372 } 394 }
373 395
374 if (data->currfid == reqfid) { 396 if (data->currfid == reqfid) {
375 printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid); 397 printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
398 data->currfid);
376 return 0; 399 return 0;
377 } 400 }
378 401
379 dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n", 402 dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, "
403 "reqfid 0x%x\n",
380 smp_processor_id(), 404 smp_processor_id(),
381 data->currfid, data->currvid, reqfid); 405 data->currfid, data->currvid, reqfid);
382 406
@@ -390,14 +414,14 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
390 414
391 if (reqfid > data->currfid) { 415 if (reqfid > data->currfid) {
392 if (data->currfid > LO_FID_TABLE_TOP) { 416 if (data->currfid > LO_FID_TABLE_TOP) {
393 if (write_new_fid(data, data->currfid + fid_interval)) { 417 if (write_new_fid(data,
418 data->currfid + fid_interval))
394 return 1; 419 return 1;
395 }
396 } else { 420 } else {
397 if (write_new_fid 421 if (write_new_fid
398 (data, 2 + convert_fid_to_vco_fid(data->currfid))) { 422 (data,
423 2 + convert_fid_to_vco_fid(data->currfid)))
399 return 1; 424 return 1;
400 }
401 } 425 }
402 } else { 426 } else {
403 if (write_new_fid(data, data->currfid - fid_interval)) 427 if (write_new_fid(data, data->currfid - fid_interval))
@@ -417,7 +441,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
417 441
418 if (data->currfid != reqfid) { 442 if (data->currfid != reqfid) {
419 printk(KERN_ERR PFX 443 printk(KERN_ERR PFX
420 "ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n", 444 "ph2: mismatch, failed fid transition, "
445 "curr 0x%x, req 0x%x\n",
421 data->currfid, reqfid); 446 data->currfid, reqfid);
422 return 1; 447 return 1;
423 } 448 }
@@ -435,7 +460,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
435} 460}
436 461
437/* Phase 3 - core voltage transition flow ... jump to the final vid. */ 462/* Phase 3 - core voltage transition flow ... jump to the final vid. */
438static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid) 463static int core_voltage_post_transition(struct powernow_k8_data *data,
464 u32 reqvid)
439{ 465{
440 u32 savefid = data->currfid; 466 u32 savefid = data->currfid;
441 u32 savereqvid = reqvid; 467 u32 savereqvid = reqvid;
@@ -457,7 +483,8 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi
457 483
458 if (data->currvid != reqvid) { 484 if (data->currvid != reqvid) {
459 printk(KERN_ERR PFX 485 printk(KERN_ERR PFX
460 "ph3: failed vid transition\n, req 0x%x, curr 0x%x", 486 "ph3: failed vid transition\n, "
487 "req 0x%x, curr 0x%x",
461 reqvid, data->currvid); 488 reqvid, data->currvid);
462 return 1; 489 return 1;
463 } 490 }
@@ -508,7 +535,8 @@ static int check_supported_cpu(unsigned int cpu)
508 if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { 535 if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
509 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || 536 if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
510 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { 537 ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) {
511 printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); 538 printk(KERN_INFO PFX
539 "Processor cpuid %x not supported\n", eax);
512 goto out; 540 goto out;
513 } 541 }
514 542
@@ -520,8 +548,10 @@ static int check_supported_cpu(unsigned int cpu)
520 } 548 }
521 549
522 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); 550 cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
523 if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) { 551 if ((edx & P_STATE_TRANSITION_CAPABLE)
524 printk(KERN_INFO PFX "Power state transitions not supported\n"); 552 != P_STATE_TRANSITION_CAPABLE) {
553 printk(KERN_INFO PFX
554 "Power state transitions not supported\n");
525 goto out; 555 goto out;
526 } 556 }
527 } else { /* must be a HW Pstate capable processor */ 557 } else { /* must be a HW Pstate capable processor */
@@ -539,7 +569,8 @@ out:
539 return rc; 569 return rc;
540} 570}
541 571
542static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) 572static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
573 u8 maxvid)
543{ 574{
544 unsigned int j; 575 unsigned int j;
545 u8 lastfid = 0xff; 576 u8 lastfid = 0xff;
@@ -550,12 +581,14 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8
550 j, pst[j].vid); 581 j, pst[j].vid);
551 return -EINVAL; 582 return -EINVAL;
552 } 583 }
553 if (pst[j].vid < data->rvo) { /* vid + rvo >= 0 */ 584 if (pst[j].vid < data->rvo) {
585 /* vid + rvo >= 0 */
554 printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate" 586 printk(KERN_ERR FW_BUG PFX "0 vid exceeded with pstate"
555 " %d\n", j); 587 " %d\n", j);
556 return -ENODEV; 588 return -ENODEV;
557 } 589 }
558 if (pst[j].vid < maxvid + data->rvo) { /* vid + rvo >= maxvid */ 590 if (pst[j].vid < maxvid + data->rvo) {
591 /* vid + rvo >= maxvid */
559 printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate" 592 printk(KERN_ERR FW_BUG PFX "maxvid exceeded with pstate"
560 " %d\n", j); 593 " %d\n", j);
561 return -ENODEV; 594 return -ENODEV;
@@ -579,23 +612,31 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8
579 return -EINVAL; 612 return -EINVAL;
580 } 613 }
581 if (lastfid > LO_FID_TABLE_TOP) 614 if (lastfid > LO_FID_TABLE_TOP)
582 printk(KERN_INFO FW_BUG PFX "first fid not from lo freq table\n"); 615 printk(KERN_INFO FW_BUG PFX
616 "first fid not from lo freq table\n");
583 617
584 return 0; 618 return 0;
585} 619}
586 620
621static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry)
622{
623 data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
624}
625
587static void print_basics(struct powernow_k8_data *data) 626static void print_basics(struct powernow_k8_data *data)
588{ 627{
589 int j; 628 int j;
590 for (j = 0; j < data->numps; j++) { 629 for (j = 0; j < data->numps; j++) {
591 if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) { 630 if (data->powernow_table[j].frequency !=
631 CPUFREQ_ENTRY_INVALID) {
592 if (cpu_family == CPU_HW_PSTATE) { 632 if (cpu_family == CPU_HW_PSTATE) {
593 printk(KERN_INFO PFX " %d : pstate %d (%d MHz)\n", 633 printk(KERN_INFO PFX
594 j, 634 " %d : pstate %d (%d MHz)\n", j,
595 data->powernow_table[j].index, 635 data->powernow_table[j].index,
596 data->powernow_table[j].frequency/1000); 636 data->powernow_table[j].frequency/1000);
597 } else { 637 } else {
598 printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", 638 printk(KERN_INFO PFX
639 " %d : fid 0x%x (%d MHz), vid 0x%x\n",
599 j, 640 j,
600 data->powernow_table[j].index & 0xff, 641 data->powernow_table[j].index & 0xff,
601 data->powernow_table[j].frequency/1000, 642 data->powernow_table[j].frequency/1000,
@@ -604,20 +645,25 @@ static void print_basics(struct powernow_k8_data *data)
604 } 645 }
605 } 646 }
606 if (data->batps) 647 if (data->batps)
607 printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps); 648 printk(KERN_INFO PFX "Only %d pstates on battery\n",
649 data->batps);
608} 650}
609 651
610static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) 652static int fill_powernow_table(struct powernow_k8_data *data,
653 struct pst_s *pst, u8 maxvid)
611{ 654{
612 struct cpufreq_frequency_table *powernow_table; 655 struct cpufreq_frequency_table *powernow_table;
613 unsigned int j; 656 unsigned int j;
614 657
615 if (data->batps) { /* use ACPI support to get full speed on mains power */ 658 if (data->batps) {
616 printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps); 659 /* use ACPI support to get full speed on mains power */
660 printk(KERN_WARNING PFX
661 "Only %d pstates usable (use ACPI driver for full "
662 "range\n", data->batps);
617 data->numps = data->batps; 663 data->numps = data->batps;
618 } 664 }
619 665
620 for ( j=1; j<data->numps; j++ ) { 666 for (j = 1; j < data->numps; j++) {
621 if (pst[j-1].fid >= pst[j].fid) { 667 if (pst[j-1].fid >= pst[j].fid) {
622 printk(KERN_ERR PFX "PST out of sequence\n"); 668 printk(KERN_ERR PFX "PST out of sequence\n");
623 return -EINVAL; 669 return -EINVAL;
@@ -640,9 +686,11 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst,
640 } 686 }
641 687
642 for (j = 0; j < data->numps; j++) { 688 for (j = 0; j < data->numps; j++) {
689 int freq;
643 powernow_table[j].index = pst[j].fid; /* lower 8 bits */ 690 powernow_table[j].index = pst[j].fid; /* lower 8 bits */
644 powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */ 691 powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */
645 powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid); 692 freq = find_khz_freq_from_fid(pst[j].fid);
693 powernow_table[j].frequency = freq;
646 } 694 }
647 powernow_table[data->numps].frequency = CPUFREQ_TABLE_END; 695 powernow_table[data->numps].frequency = CPUFREQ_TABLE_END;
648 powernow_table[data->numps].index = 0; 696 powernow_table[data->numps].index = 0;
@@ -654,11 +702,12 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst,
654 702
655 dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); 703 dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
656 data->powernow_table = powernow_table; 704 data->powernow_table = powernow_table;
657 if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) 705 if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
658 print_basics(data); 706 print_basics(data);
659 707
660 for (j = 0; j < data->numps; j++) 708 for (j = 0; j < data->numps; j++)
661 if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid)) 709 if ((pst[j].fid == data->currfid) &&
710 (pst[j].vid == data->currvid))
662 return 0; 711 return 0;
663 712
664 dprintk("currfid/vid do not match PST, ignoring\n"); 713 dprintk("currfid/vid do not match PST, ignoring\n");
@@ -698,7 +747,8 @@ static int find_psb_table(struct powernow_k8_data *data)
698 } 747 }
699 748
700 data->vstable = psb->vstable; 749 data->vstable = psb->vstable;
701 dprintk("voltage stabilization time: %d(*20us)\n", data->vstable); 750 dprintk("voltage stabilization time: %d(*20us)\n",
751 data->vstable);
702 752
703 dprintk("flags2: 0x%x\n", psb->flags2); 753 dprintk("flags2: 0x%x\n", psb->flags2);
704 data->rvo = psb->flags2 & 3; 754 data->rvo = psb->flags2 & 3;
@@ -713,11 +763,12 @@ static int find_psb_table(struct powernow_k8_data *data)
713 763
714 dprintk("numpst: 0x%x\n", psb->num_tables); 764 dprintk("numpst: 0x%x\n", psb->num_tables);
715 cpst = psb->num_tables; 765 cpst = psb->num_tables;
716 if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){ 766 if ((psb->cpuid == 0x00000fc0) ||
767 (psb->cpuid == 0x00000fe0)) {
717 thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); 768 thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
718 if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) { 769 if ((thiscpuid == 0x00000fc0) ||
770 (thiscpuid == 0x00000fe0))
719 cpst = 1; 771 cpst = 1;
720 }
721 } 772 }
722 if (cpst != 1) { 773 if (cpst != 1) {
723 printk(KERN_ERR FW_BUG PFX "numpst must be 1\n"); 774 printk(KERN_ERR FW_BUG PFX "numpst must be 1\n");
@@ -732,7 +783,8 @@ static int find_psb_table(struct powernow_k8_data *data)
732 783
733 data->numps = psb->numps; 784 data->numps = psb->numps;
734 dprintk("numpstates: 0x%x\n", data->numps); 785 dprintk("numpstates: 0x%x\n", data->numps);
735 return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid); 786 return fill_powernow_table(data,
787 (struct pst_s *)(psb+1), maxvid);
736 } 788 }
737 /* 789 /*
738 * If you see this message, complain to BIOS manufacturer. If 790 * If you see this message, complain to BIOS manufacturer. If
@@ -745,28 +797,31 @@ static int find_psb_table(struct powernow_k8_data *data)
745 * BIOS and Kernel Developer's Guide, which is available on 797 * BIOS and Kernel Developer's Guide, which is available on
746 * www.amd.com 798 * www.amd.com
747 */ 799 */
748 printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n"); 800 printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n");
749 return -ENODEV; 801 return -ENODEV;
750} 802}
751 803
752#ifdef CONFIG_X86_POWERNOW_K8_ACPI 804static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
753static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) 805 unsigned int index)
754{ 806{
807 acpi_integer control;
808
755 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) 809 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
756 return; 810 return;
757 811
758 data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; 812 control = data->acpi_data.states[index].control; data->irt = (control
759 data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; 813 >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >>
760 data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; 814 RVO_SHIFT) & RVO_MASK; data->exttype = (control
761 data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; 815 >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
762 data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); 816 data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1
763 data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; 817 << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable =
764} 818 (control >> VST_SHIFT) & VST_MASK; }
765 819
766static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) 820static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
767{ 821{
768 struct cpufreq_frequency_table *powernow_table; 822 struct cpufreq_frequency_table *powernow_table;
769 int ret_val = -ENODEV; 823 int ret_val = -ENODEV;
824 acpi_integer space_id;
770 825
771 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { 826 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
772 dprintk("register performance failed: bad ACPI data\n"); 827 dprintk("register performance failed: bad ACPI data\n");
@@ -779,11 +834,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
779 goto err_out; 834 goto err_out;
780 } 835 }
781 836
782 if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || 837 space_id = data->acpi_data.control_register.space_id;
783 (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { 838 if ((space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
839 (space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
784 dprintk("Invalid control/status registers (%x - %x)\n", 840 dprintk("Invalid control/status registers (%x - %x)\n",
785 data->acpi_data.control_register.space_id, 841 data->acpi_data.control_register.space_id,
786 data->acpi_data.status_register.space_id); 842 space_id);
787 goto err_out; 843 goto err_out;
788 } 844 }
789 845
@@ -802,13 +858,14 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
802 if (ret_val) 858 if (ret_val)
803 goto err_out_mem; 859 goto err_out_mem;
804 860
805 powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; 861 powernow_table[data->acpi_data.state_count].frequency =
862 CPUFREQ_TABLE_END;
806 powernow_table[data->acpi_data.state_count].index = 0; 863 powernow_table[data->acpi_data.state_count].index = 0;
807 data->powernow_table = powernow_table; 864 data->powernow_table = powernow_table;
808 865
809 /* fill in data */ 866 /* fill in data */
810 data->numps = data->acpi_data.state_count; 867 data->numps = data->acpi_data.state_count;
811 if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) 868 if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
812 print_basics(data); 869 print_basics(data);
813 powernow_k8_acpi_pst_values(data, 0); 870 powernow_k8_acpi_pst_values(data, 0);
814 871
@@ -830,13 +887,15 @@ err_out_mem:
830err_out: 887err_out:
831 acpi_processor_unregister_performance(&data->acpi_data, data->cpu); 888 acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
832 889
833 /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ 890 /* data->acpi_data.state_count informs us at ->exit()
891 * whether ACPI was used */
834 data->acpi_data.state_count = 0; 892 data->acpi_data.state_count = 0;
835 893
836 return ret_val; 894 return ret_val;
837} 895}
838 896
839static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) 897static int fill_powernow_table_pstate(struct powernow_k8_data *data,
898 struct cpufreq_frequency_table *powernow_table)
840{ 899{
841 int i; 900 int i;
842 u32 hi = 0, lo = 0; 901 u32 hi = 0, lo = 0;
@@ -848,84 +907,101 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
848 907
849 index = data->acpi_data.states[i].control & HW_PSTATE_MASK; 908 index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
850 if (index > data->max_hw_pstate) { 909 if (index > data->max_hw_pstate) {
851 printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); 910 printk(KERN_ERR PFX "invalid pstate %d - "
852 printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); 911 "bad value %d.\n", i, index);
853 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; 912 printk(KERN_ERR PFX "Please report to BIOS "
913 "manufacturer\n");
914 invalidate_entry(data, i);
854 continue; 915 continue;
855 } 916 }
856 rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); 917 rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
857 if (!(hi & HW_PSTATE_VALID_MASK)) { 918 if (!(hi & HW_PSTATE_VALID_MASK)) {
858 dprintk("invalid pstate %d, ignoring\n", index); 919 dprintk("invalid pstate %d, ignoring\n", index);
859 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; 920 invalidate_entry(data, i);
860 continue; 921 continue;
861 } 922 }
862 923
863 powernow_table[i].index = index; 924 powernow_table[i].index = index;
864 925
865 powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; 926 powernow_table[i].frequency =
927 data->acpi_data.states[i].core_frequency * 1000;
866 } 928 }
867 return 0; 929 return 0;
868} 930}
869 931
870static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) 932static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
933 struct cpufreq_frequency_table *powernow_table)
871{ 934{
872 int i; 935 int i;
873 int cntlofreq = 0; 936 int cntlofreq = 0;
937
874 for (i = 0; i < data->acpi_data.state_count; i++) { 938 for (i = 0; i < data->acpi_data.state_count; i++) {
875 u32 fid; 939 u32 fid;
876 u32 vid; 940 u32 vid;
941 u32 freq, index;
942 acpi_integer status, control;
877 943
878 if (data->exttype) { 944 if (data->exttype) {
879 fid = data->acpi_data.states[i].status & EXT_FID_MASK; 945 status = data->acpi_data.states[i].status;
880 vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; 946 fid = status & EXT_FID_MASK;
947 vid = (status >> VID_SHIFT) & EXT_VID_MASK;
881 } else { 948 } else {
882 fid = data->acpi_data.states[i].control & FID_MASK; 949 control = data->acpi_data.states[i].control;
883 vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; 950 fid = control & FID_MASK;
951 vid = (control >> VID_SHIFT) & VID_MASK;
884 } 952 }
885 953
886 dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); 954 dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
887 955
888 powernow_table[i].index = fid; /* lower 8 bits */ 956 index = fid | (vid<<8);
889 powernow_table[i].index |= (vid << 8); /* upper 8 bits */ 957 powernow_table[i].index = index;
890 powernow_table[i].frequency = find_khz_freq_from_fid(fid); 958
959 freq = find_khz_freq_from_fid(fid);
960 powernow_table[i].frequency = freq;
891 961
892 /* verify frequency is OK */ 962 /* verify frequency is OK */
893 if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) || 963 if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
894 (powernow_table[i].frequency < (MIN_FREQ * 1000))) { 964 dprintk("invalid freq %u kHz, ignoring\n", freq);
895 dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency); 965 invalidate_entry(data, i);
896 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
897 continue; 966 continue;
898 } 967 }
899 968
900 /* verify voltage is OK - BIOSs are using "off" to indicate invalid */ 969 /* verify voltage is OK -
970 * BIOSs are using "off" to indicate invalid */
901 if (vid == VID_OFF) { 971 if (vid == VID_OFF) {
902 dprintk("invalid vid %u, ignoring\n", vid); 972 dprintk("invalid vid %u, ignoring\n", vid);
903 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; 973 invalidate_entry(data, i);
904 continue; 974 continue;
905 } 975 }
906 976
907 /* verify only 1 entry from the lo frequency table */ 977 /* verify only 1 entry from the lo frequency table */
908 if (fid < HI_FID_TABLE_BOTTOM) { 978 if (fid < HI_FID_TABLE_BOTTOM) {
909 if (cntlofreq) { 979 if (cntlofreq) {
910 /* if both entries are the same, ignore this one ... */ 980 /* if both entries are the same,
911 if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || 981 * ignore this one ... */
912 (powernow_table[i].index != powernow_table[cntlofreq].index)) { 982 if ((freq != powernow_table[cntlofreq].frequency) ||
913 printk(KERN_ERR PFX "Too many lo freq table entries\n"); 983 (index != powernow_table[cntlofreq].index)) {
984 printk(KERN_ERR PFX
985 "Too many lo freq table "
986 "entries\n");
914 return 1; 987 return 1;
915 } 988 }
916 989
917 dprintk("double low frequency table entry, ignoring it.\n"); 990 dprintk("double low frequency table entry, "
918 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; 991 "ignoring it.\n");
992 invalidate_entry(data, i);
919 continue; 993 continue;
920 } else 994 } else
921 cntlofreq = i; 995 cntlofreq = i;
922 } 996 }
923 997
924 if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { 998 if (freq != (data->acpi_data.states[i].core_frequency * 1000)) {
925 printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", 999 printk(KERN_INFO PFX "invalid freq entries "
926 powernow_table[i].frequency, 1000 "%u kHz vs. %u kHz\n", freq,
927 (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); 1001 (unsigned int)
928 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; 1002 (data->acpi_data.states[i].core_frequency
1003 * 1000));
1004 invalidate_entry(data, i);
929 continue; 1005 continue;
930 } 1006 }
931 } 1007 }
@@ -935,7 +1011,8 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
935static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) 1011static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
936{ 1012{
937 if (data->acpi_data.state_count) 1013 if (data->acpi_data.state_count)
938 acpi_processor_unregister_performance(&data->acpi_data, data->cpu); 1014 acpi_processor_unregister_performance(&data->acpi_data,
1015 data->cpu);
939 free_cpumask_var(data->acpi_data.shared_cpu_map); 1016 free_cpumask_var(data->acpi_data.shared_cpu_map);
940} 1017}
941 1018
@@ -953,15 +1030,9 @@ static int get_transition_latency(struct powernow_k8_data *data)
953 return 1000 * max_latency; 1030 return 1000 * max_latency;
954} 1031}
955 1032
956#else
957static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
958static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
959static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
960static int get_transition_latency(struct powernow_k8_data *data) { return 0; }
961#endif /* CONFIG_X86_POWERNOW_K8_ACPI */
962
963/* Take a frequency, and issue the fid/vid transition command */ 1033/* Take a frequency, and issue the fid/vid transition command */
964static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index) 1034static int transition_frequency_fidvid(struct powernow_k8_data *data,
1035 unsigned int index)
965{ 1036{
966 u32 fid = 0; 1037 u32 fid = 0;
967 u32 vid = 0; 1038 u32 vid = 0;
@@ -989,7 +1060,8 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i
989 return 0; 1060 return 0;
990 } 1061 }
991 1062
992 if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { 1063 if ((fid < HI_FID_TABLE_BOTTOM) &&
1064 (data->currfid < HI_FID_TABLE_BOTTOM)) {
993 printk(KERN_ERR PFX 1065 printk(KERN_ERR PFX
994 "ignoring illegal change in lo freq table-%x to 0x%x\n", 1066 "ignoring illegal change in lo freq table-%x to 0x%x\n",
995 data->currfid, fid); 1067 data->currfid, fid);
@@ -1017,7 +1089,8 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i
1017} 1089}
1018 1090
1019/* Take a frequency, and issue the hardware pstate transition command */ 1091/* Take a frequency, and issue the hardware pstate transition command */
1020static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index) 1092static int transition_frequency_pstate(struct powernow_k8_data *data,
1093 unsigned int index)
1021{ 1094{
1022 u32 pstate = 0; 1095 u32 pstate = 0;
1023 int res, i; 1096 int res, i;
@@ -1029,7 +1102,8 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
1029 pstate = index & HW_PSTATE_MASK; 1102 pstate = index & HW_PSTATE_MASK;
1030 if (pstate > data->max_hw_pstate) 1103 if (pstate > data->max_hw_pstate)
1031 return 0; 1104 return 0;
1032 freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); 1105 freqs.old = find_khz_freq_from_pstate(data->powernow_table,
1106 data->currpstate);
1033 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); 1107 freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate);
1034 1108
1035 for_each_cpu_mask_nr(i, *(data->available_cores)) { 1109 for_each_cpu_mask_nr(i, *(data->available_cores)) {
@@ -1048,7 +1122,8 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i
1048} 1122}
1049 1123
1050/* Driver entry point to switch to the target frequency */ 1124/* Driver entry point to switch to the target frequency */
1051static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) 1125static int powernowk8_target(struct cpufreq_policy *pol,
1126 unsigned targfreq, unsigned relation)
1052{ 1127{
1053 cpumask_t oldmask; 1128 cpumask_t oldmask;
1054 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); 1129 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
@@ -1087,14 +1162,18 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
1087 dprintk("targ: curr fid 0x%x, vid 0x%x\n", 1162 dprintk("targ: curr fid 0x%x, vid 0x%x\n",
1088 data->currfid, data->currvid); 1163 data->currfid, data->currvid);
1089 1164
1090 if ((checkvid != data->currvid) || (checkfid != data->currfid)) { 1165 if ((checkvid != data->currvid) ||
1166 (checkfid != data->currfid)) {
1091 printk(KERN_INFO PFX 1167 printk(KERN_INFO PFX
1092 "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n", 1168 "error - out of sync, fix 0x%x 0x%x, "
1093 checkfid, data->currfid, checkvid, data->currvid); 1169 "vid 0x%x 0x%x\n",
1170 checkfid, data->currfid,
1171 checkvid, data->currvid);
1094 } 1172 }
1095 } 1173 }
1096 1174
1097 if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) 1175 if (cpufreq_frequency_table_target(pol, data->powernow_table,
1176 targfreq, relation, &newstate))
1098 goto err_out; 1177 goto err_out;
1099 1178
1100 mutex_lock(&fidvid_mutex); 1179 mutex_lock(&fidvid_mutex);
@@ -1114,7 +1193,8 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
1114 mutex_unlock(&fidvid_mutex); 1193 mutex_unlock(&fidvid_mutex);
1115 1194
1116 if (cpu_family == CPU_HW_PSTATE) 1195 if (cpu_family == CPU_HW_PSTATE)
1117 pol->cur = find_khz_freq_from_pstate(data->powernow_table, newstate); 1196 pol->cur = find_khz_freq_from_pstate(data->powernow_table,
1197 newstate);
1118 else 1198 else
1119 pol->cur = find_khz_freq_from_fid(data->currfid); 1199 pol->cur = find_khz_freq_from_fid(data->currfid);
1120 ret = 0; 1200 ret = 0;
@@ -1141,6 +1221,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1141 struct powernow_k8_data *data; 1221 struct powernow_k8_data *data;
1142 cpumask_t oldmask; 1222 cpumask_t oldmask;
1143 int rc; 1223 int rc;
1224 static int print_once;
1144 1225
1145 if (!cpu_online(pol->cpu)) 1226 if (!cpu_online(pol->cpu))
1146 return -ENODEV; 1227 return -ENODEV;
@@ -1163,33 +1244,31 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1163 * an UP version, and is deprecated by AMD. 1244 * an UP version, and is deprecated by AMD.
1164 */ 1245 */
1165 if (num_online_cpus() != 1) { 1246 if (num_online_cpus() != 1) {
1166#ifndef CONFIG_ACPI_PROCESSOR 1247 /*
1167 printk(KERN_ERR PFX "ACPI Processor support is required " 1248 * Replace this one with print_once as soon as such a
1168 "for SMP systems but is absent. Please load the " 1249 * thing gets introduced
1169 "ACPI Processor module before starting this " 1250 */
1170 "driver.\n"); 1251 if (!print_once) {
1171#else 1252 WARN_ONCE(1, KERN_ERR FW_BUG PFX "Your BIOS "
1172 printk(KERN_ERR FW_BUG PFX "Your BIOS does not provide" 1253 "does not provide ACPI _PSS objects "
1173 " ACPI _PSS objects in a way that Linux " 1254 "in a way that Linux understands. "
1174 "understands. Please report this to the Linux " 1255 "Please report this to the Linux ACPI"
1175 "ACPI maintainers and complain to your BIOS " 1256 " maintainers and complain to your "
1176 "vendor.\n"); 1257 "BIOS vendor.\n");
1177#endif 1258 print_once++;
1178 kfree(data); 1259 }
1179 return -ENODEV; 1260 goto err_out;
1180 } 1261 }
1181 if (pol->cpu != 0) { 1262 if (pol->cpu != 0) {
1182 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " 1263 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
1183 "CPU other than CPU0. Complain to your BIOS " 1264 "CPU other than CPU0. Complain to your BIOS "
1184 "vendor.\n"); 1265 "vendor.\n");
1185 kfree(data); 1266 goto err_out;
1186 return -ENODEV;
1187 } 1267 }
1188 rc = find_psb_table(data); 1268 rc = find_psb_table(data);
1189 if (rc) { 1269 if (rc)
1190 kfree(data); 1270 goto err_out;
1191 return -ENODEV; 1271
1192 }
1193 /* Take a crude guess here. 1272 /* Take a crude guess here.
1194 * That guess was in microseconds, so multiply with 1000 */ 1273 * That guess was in microseconds, so multiply with 1000 */
1195 pol->cpuinfo.transition_latency = ( 1274 pol->cpuinfo.transition_latency = (
@@ -1204,16 +1283,16 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1204 1283
1205 if (smp_processor_id() != pol->cpu) { 1284 if (smp_processor_id() != pol->cpu) {
1206 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); 1285 printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu);
1207 goto err_out; 1286 goto err_out_unmask;
1208 } 1287 }
1209 1288
1210 if (pending_bit_stuck()) { 1289 if (pending_bit_stuck()) {
1211 printk(KERN_ERR PFX "failing init, change pending bit set\n"); 1290 printk(KERN_ERR PFX "failing init, change pending bit set\n");
1212 goto err_out; 1291 goto err_out_unmask;
1213 } 1292 }
1214 1293
1215 if (query_current_values_with_pending_wait(data)) 1294 if (query_current_values_with_pending_wait(data))
1216 goto err_out; 1295 goto err_out_unmask;
1217 1296
1218 if (cpu_family == CPU_OPTERON) 1297 if (cpu_family == CPU_OPTERON)
1219 fidvid_msr_init(); 1298 fidvid_msr_init();
@@ -1224,11 +1303,12 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1224 if (cpu_family == CPU_HW_PSTATE) 1303 if (cpu_family == CPU_HW_PSTATE)
1225 cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); 1304 cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
1226 else 1305 else
1227 cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu)); 1306 cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu));
1228 data->available_cores = pol->cpus; 1307 data->available_cores = pol->cpus;
1229 1308
1230 if (cpu_family == CPU_HW_PSTATE) 1309 if (cpu_family == CPU_HW_PSTATE)
1231 pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); 1310 pol->cur = find_khz_freq_from_pstate(data->powernow_table,
1311 data->currpstate);
1232 else 1312 else
1233 pol->cur = find_khz_freq_from_fid(data->currfid); 1313 pol->cur = find_khz_freq_from_fid(data->currfid);
1234 dprintk("policy current frequency %d kHz\n", pol->cur); 1314 dprintk("policy current frequency %d kHz\n", pol->cur);
@@ -1245,7 +1325,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1245 cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); 1325 cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
1246 1326
1247 if (cpu_family == CPU_HW_PSTATE) 1327 if (cpu_family == CPU_HW_PSTATE)
1248 dprintk("cpu_init done, current pstate 0x%x\n", data->currpstate); 1328 dprintk("cpu_init done, current pstate 0x%x\n",
1329 data->currpstate);
1249 else 1330 else
1250 dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", 1331 dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
1251 data->currfid, data->currvid); 1332 data->currfid, data->currvid);
@@ -1254,15 +1335,16 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1254 1335
1255 return 0; 1336 return 0;
1256 1337
1257err_out: 1338err_out_unmask:
1258 set_cpus_allowed_ptr(current, &oldmask); 1339 set_cpus_allowed_ptr(current, &oldmask);
1259 powernow_k8_cpu_exit_acpi(data); 1340 powernow_k8_cpu_exit_acpi(data);
1260 1341
1342err_out:
1261 kfree(data); 1343 kfree(data);
1262 return -ENODEV; 1344 return -ENODEV;
1263} 1345}
1264 1346
1265static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) 1347static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
1266{ 1348{
1267 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); 1349 struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu);
1268 1350
@@ -1279,14 +1361,14 @@ static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol)
1279 return 0; 1361 return 0;
1280} 1362}
1281 1363
1282static unsigned int powernowk8_get (unsigned int cpu) 1364static unsigned int powernowk8_get(unsigned int cpu)
1283{ 1365{
1284 struct powernow_k8_data *data; 1366 struct powernow_k8_data *data;
1285 cpumask_t oldmask = current->cpus_allowed; 1367 cpumask_t oldmask = current->cpus_allowed;
1286 unsigned int khz = 0; 1368 unsigned int khz = 0;
1287 unsigned int first; 1369 unsigned int first;
1288 1370
1289 first = first_cpu(per_cpu(cpu_core_map, cpu)); 1371 first = cpumask_first(cpu_core_mask(cpu));
1290 data = per_cpu(powernow_data, first); 1372 data = per_cpu(powernow_data, first);
1291 1373
1292 if (!data) 1374 if (!data)
@@ -1315,7 +1397,7 @@ out:
1315 return khz; 1397 return khz;
1316} 1398}
1317 1399
1318static struct freq_attr* powernow_k8_attr[] = { 1400static struct freq_attr *powernow_k8_attr[] = {
1319 &cpufreq_freq_attr_scaling_available_freqs, 1401 &cpufreq_freq_attr_scaling_available_freqs,
1320 NULL, 1402 NULL,
1321}; 1403};
@@ -1360,7 +1442,8 @@ static void __exit powernowk8_exit(void)
1360 cpufreq_unregister_driver(&cpufreq_amd64_driver); 1442 cpufreq_unregister_driver(&cpufreq_amd64_driver);
1361} 1443}
1362 1444
1363MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>"); 1445MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and "
1446 "Mark Langsdorf <mark.langsdorf@amd.com>");
1364MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); 1447MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver.");
1365MODULE_LICENSE("GPL"); 1448MODULE_LICENSE("GPL");
1366 1449
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index 8ecc75b6c7c3..6c6698feade1 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -45,11 +45,10 @@ struct powernow_k8_data {
45 * frequency is in kHz */ 45 * frequency is in kHz */
46 struct cpufreq_frequency_table *powernow_table; 46 struct cpufreq_frequency_table *powernow_table;
47 47
48#ifdef CONFIG_X86_POWERNOW_K8_ACPI
49 /* the acpi table needs to be kept. it's only available if ACPI was 48 /* the acpi table needs to be kept. it's only available if ACPI was
50 * used to determine valid frequency/vid/fid states */ 49 * used to determine valid frequency/vid/fid states */
51 struct acpi_processor_performance acpi_data; 50 struct acpi_processor_performance acpi_data;
52#endif 51
53 /* we need to keep track of associated cores, but let cpufreq 52 /* we need to keep track of associated cores, but let cpufreq
54 * handle hotplug events - so just point at cpufreq pol->cpus 53 * handle hotplug events - so just point at cpufreq pol->cpus
55 * structure */ 54 * structure */
@@ -222,10 +221,8 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
222 221
223static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); 222static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);
224 223
225#ifdef CONFIG_X86_POWERNOW_K8_ACPI
226static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); 224static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
227static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); 225static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
228#endif
229 226
230#ifdef CONFIG_SMP 227#ifdef CONFIG_SMP
231static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) 228static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
index 42da9bd677d6..435a996a613a 100644
--- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
+++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
@@ -19,17 +19,19 @@
19 19
20#include <linux/delay.h> 20#include <linux/delay.h>
21#include <linux/cpufreq.h> 21#include <linux/cpufreq.h>
22#include <linux/timex.h>
23#include <linux/io.h>
22 24
23#include <asm/msr.h> 25#include <asm/msr.h>
24#include <asm/timex.h>
25#include <asm/io.h>
26 26
27#define MMCR_BASE 0xfffef000 /* The default base address */ 27#define MMCR_BASE 0xfffef000 /* The default base address */
28#define OFFS_CPUCTL 0x2 /* CPU Control Register */ 28#define OFFS_CPUCTL 0x2 /* CPU Control Register */
29 29
30static __u8 __iomem *cpuctl; 30static __u8 __iomem *cpuctl;
31 31
32#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "sc520_freq", msg) 32#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
33 "sc520_freq", msg)
34#define PFX "sc520_freq: "
33 35
34static struct cpufreq_frequency_table sc520_freq_table[] = { 36static struct cpufreq_frequency_table sc520_freq_table[] = {
35 {0x01, 100000}, 37 {0x01, 100000},
@@ -43,7 +45,8 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
43 45
44 switch (clockspeed_reg & 0x03) { 46 switch (clockspeed_reg & 0x03) {
45 default: 47 default:
46 printk(KERN_ERR "sc520_freq: error: cpuctl register has unexpected value %02x\n", clockspeed_reg); 48 printk(KERN_ERR PFX "error: cpuctl register has unexpected "
49 "value %02x\n", clockspeed_reg);
47 case 0x01: 50 case 0x01:
48 return 100000; 51 return 100000;
49 case 0x02: 52 case 0x02:
@@ -51,7 +54,7 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
51 } 54 }
52} 55}
53 56
54static void sc520_freq_set_cpu_state (unsigned int state) 57static void sc520_freq_set_cpu_state(unsigned int state)
55{ 58{
56 59
57 struct cpufreq_freqs freqs; 60 struct cpufreq_freqs freqs;
@@ -76,18 +79,19 @@ static void sc520_freq_set_cpu_state (unsigned int state)
76 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 79 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
77}; 80};
78 81
79static int sc520_freq_verify (struct cpufreq_policy *policy) 82static int sc520_freq_verify(struct cpufreq_policy *policy)
80{ 83{
81 return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]); 84 return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]);
82} 85}
83 86
84static int sc520_freq_target (struct cpufreq_policy *policy, 87static int sc520_freq_target(struct cpufreq_policy *policy,
85 unsigned int target_freq, 88 unsigned int target_freq,
86 unsigned int relation) 89 unsigned int relation)
87{ 90{
88 unsigned int newstate = 0; 91 unsigned int newstate = 0;
89 92
90 if (cpufreq_frequency_table_target(policy, sc520_freq_table, target_freq, relation, &newstate)) 93 if (cpufreq_frequency_table_target(policy, sc520_freq_table,
94 target_freq, relation, &newstate))
91 return -EINVAL; 95 return -EINVAL;
92 96
93 sc520_freq_set_cpu_state(newstate); 97 sc520_freq_set_cpu_state(newstate);
@@ -116,7 +120,7 @@ static int sc520_freq_cpu_init(struct cpufreq_policy *policy)
116 120
117 result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table); 121 result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table);
118 if (result) 122 if (result)
119 return (result); 123 return result;
120 124
121 cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu); 125 cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu);
122 126
@@ -131,7 +135,7 @@ static int sc520_freq_cpu_exit(struct cpufreq_policy *policy)
131} 135}
132 136
133 137
134static struct freq_attr* sc520_freq_attr[] = { 138static struct freq_attr *sc520_freq_attr[] = {
135 &cpufreq_freq_attr_scaling_available_freqs, 139 &cpufreq_freq_attr_scaling_available_freqs,
136 NULL, 140 NULL,
137}; 141};
@@ -155,13 +159,13 @@ static int __init sc520_freq_init(void)
155 int err; 159 int err;
156 160
157 /* Test if we have the right hardware */ 161 /* Test if we have the right hardware */
158 if(c->x86_vendor != X86_VENDOR_AMD || 162 if (c->x86_vendor != X86_VENDOR_AMD ||
159 c->x86 != 4 || c->x86_model != 9) { 163 c->x86 != 4 || c->x86_model != 9) {
160 dprintk("no Elan SC520 processor found!\n"); 164 dprintk("no Elan SC520 processor found!\n");
161 return -ENODEV; 165 return -ENODEV;
162 } 166 }
163 cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1); 167 cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
164 if(!cpuctl) { 168 if (!cpuctl) {
165 printk(KERN_ERR "sc520_freq: error: failed to remap memory\n"); 169 printk(KERN_ERR "sc520_freq: error: failed to remap memory\n");
166 return -ENOMEM; 170 return -ENOMEM;
167 } 171 }
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index dedc1e98f168..016c1a4fa3fc 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -39,7 +39,7 @@ static struct pci_dev *speedstep_chipset_dev;
39 39
40/* speedstep_processor 40/* speedstep_processor
41 */ 41 */
42static unsigned int speedstep_processor = 0; 42static unsigned int speedstep_processor;
43 43
44static u32 pmbase; 44static u32 pmbase;
45 45
@@ -54,7 +54,8 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
54}; 54};
55 55
56 56
57#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg) 57#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
58 "speedstep-ich", msg)
58 59
59 60
60/** 61/**
@@ -62,7 +63,7 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
62 * 63 *
63 * Returns: -ENODEV if no register could be found 64 * Returns: -ENODEV if no register could be found
64 */ 65 */
65static int speedstep_find_register (void) 66static int speedstep_find_register(void)
66{ 67{
67 if (!speedstep_chipset_dev) 68 if (!speedstep_chipset_dev)
68 return -ENODEV; 69 return -ENODEV;
@@ -90,7 +91,7 @@ static int speedstep_find_register (void)
90 * 91 *
91 * Tries to change the SpeedStep state. 92 * Tries to change the SpeedStep state.
92 */ 93 */
93static void speedstep_set_state (unsigned int state) 94static void speedstep_set_state(unsigned int state)
94{ 95{
95 u8 pm2_blk; 96 u8 pm2_blk;
96 u8 value; 97 u8 value;
@@ -133,11 +134,11 @@ static void speedstep_set_state (unsigned int state)
133 134
134 dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); 135 dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value);
135 136
136 if (state == (value & 0x1)) { 137 if (state == (value & 0x1))
137 dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000)); 138 dprintk("change to %u MHz succeeded\n",
138 } else { 139 speedstep_get_frequency(speedstep_processor) / 1000);
139 printk (KERN_ERR "cpufreq: change failed - I/O error\n"); 140 else
140 } 141 printk(KERN_ERR "cpufreq: change failed - I/O error\n");
141 142
142 return; 143 return;
143} 144}
@@ -149,7 +150,7 @@ static void speedstep_set_state (unsigned int state)
149 * Tries to activate the SpeedStep status and control registers. 150 * Tries to activate the SpeedStep status and control registers.
150 * Returns -EINVAL on an unsupported chipset, and zero on success. 151 * Returns -EINVAL on an unsupported chipset, and zero on success.
151 */ 152 */
152static int speedstep_activate (void) 153static int speedstep_activate(void)
153{ 154{
154 u16 value = 0; 155 u16 value = 0;
155 156
@@ -175,20 +176,18 @@ static int speedstep_activate (void)
175 * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected 176 * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected
176 * chipset, or zero on failure. 177 * chipset, or zero on failure.
177 */ 178 */
178static unsigned int speedstep_detect_chipset (void) 179static unsigned int speedstep_detect_chipset(void)
179{ 180{
180 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, 181 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
181 PCI_DEVICE_ID_INTEL_82801DB_12, 182 PCI_DEVICE_ID_INTEL_82801DB_12,
182 PCI_ANY_ID, 183 PCI_ANY_ID, PCI_ANY_ID,
183 PCI_ANY_ID,
184 NULL); 184 NULL);
185 if (speedstep_chipset_dev) 185 if (speedstep_chipset_dev)
186 return 4; /* 4-M */ 186 return 4; /* 4-M */
187 187
188 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, 188 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
189 PCI_DEVICE_ID_INTEL_82801CA_12, 189 PCI_DEVICE_ID_INTEL_82801CA_12,
190 PCI_ANY_ID, 190 PCI_ANY_ID, PCI_ANY_ID,
191 PCI_ANY_ID,
192 NULL); 191 NULL);
193 if (speedstep_chipset_dev) 192 if (speedstep_chipset_dev)
194 return 3; /* 3-M */ 193 return 3; /* 3-M */
@@ -196,8 +195,7 @@ static unsigned int speedstep_detect_chipset (void)
196 195
197 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, 196 speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL,
198 PCI_DEVICE_ID_INTEL_82801BA_10, 197 PCI_DEVICE_ID_INTEL_82801BA_10,
199 PCI_ANY_ID, 198 PCI_ANY_ID, PCI_ANY_ID,
200 PCI_ANY_ID,
201 NULL); 199 NULL);
202 if (speedstep_chipset_dev) { 200 if (speedstep_chipset_dev) {
203 /* speedstep.c causes lockups on Dell Inspirons 8000 and 201 /* speedstep.c causes lockups on Dell Inspirons 8000 and
@@ -208,8 +206,7 @@ static unsigned int speedstep_detect_chipset (void)
208 206
209 hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL, 207 hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL,
210 PCI_DEVICE_ID_INTEL_82815_MC, 208 PCI_DEVICE_ID_INTEL_82815_MC,
211 PCI_ANY_ID, 209 PCI_ANY_ID, PCI_ANY_ID,
212 PCI_ANY_ID,
213 NULL); 210 NULL);
214 211
215 if (!hostbridge) 212 if (!hostbridge)
@@ -236,7 +233,7 @@ static unsigned int _speedstep_get(const struct cpumask *cpus)
236 233
237 cpus_allowed = current->cpus_allowed; 234 cpus_allowed = current->cpus_allowed;
238 set_cpus_allowed_ptr(current, cpus); 235 set_cpus_allowed_ptr(current, cpus);
239 speed = speedstep_get_processor_frequency(speedstep_processor); 236 speed = speedstep_get_frequency(speedstep_processor);
240 set_cpus_allowed_ptr(current, &cpus_allowed); 237 set_cpus_allowed_ptr(current, &cpus_allowed);
241 dprintk("detected %u kHz as current frequency\n", speed); 238 dprintk("detected %u kHz as current frequency\n", speed);
242 return speed; 239 return speed;
@@ -251,11 +248,12 @@ static unsigned int speedstep_get(unsigned int cpu)
251 * speedstep_target - set a new CPUFreq policy 248 * speedstep_target - set a new CPUFreq policy
252 * @policy: new policy 249 * @policy: new policy
253 * @target_freq: the target frequency 250 * @target_freq: the target frequency
254 * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) 251 * @relation: how that frequency relates to achieved frequency
252 * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
255 * 253 *
256 * Sets a new CPUFreq policy. 254 * Sets a new CPUFreq policy.
257 */ 255 */
258static int speedstep_target (struct cpufreq_policy *policy, 256static int speedstep_target(struct cpufreq_policy *policy,
259 unsigned int target_freq, 257 unsigned int target_freq,
260 unsigned int relation) 258 unsigned int relation)
261{ 259{
@@ -264,7 +262,8 @@ static int speedstep_target (struct cpufreq_policy *policy,
264 cpumask_t cpus_allowed; 262 cpumask_t cpus_allowed;
265 int i; 263 int i;
266 264
267 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) 265 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
266 target_freq, relation, &newstate))
268 return -EINVAL; 267 return -EINVAL;
269 268
270 freqs.old = _speedstep_get(policy->cpus); 269 freqs.old = _speedstep_get(policy->cpus);
@@ -308,7 +307,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
308 * Limit must be within speedstep_low_freq and speedstep_high_freq, with 307 * Limit must be within speedstep_low_freq and speedstep_high_freq, with
309 * at least one border included. 308 * at least one border included.
310 */ 309 */
311static int speedstep_verify (struct cpufreq_policy *policy) 310static int speedstep_verify(struct cpufreq_policy *policy)
312{ 311{
313 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); 312 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
314} 313}
@@ -322,7 +321,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
322 321
323 /* only run on CPU to be set, or on its sibling */ 322 /* only run on CPU to be set, or on its sibling */
324#ifdef CONFIG_SMP 323#ifdef CONFIG_SMP
325 cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu)); 324 cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
326#endif 325#endif
327 326
328 cpus_allowed = current->cpus_allowed; 327 cpus_allowed = current->cpus_allowed;
@@ -344,7 +343,8 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
344 return -EIO; 343 return -EIO;
345 344
346 dprintk("currently at %s speed setting - %i MHz\n", 345 dprintk("currently at %s speed setting - %i MHz\n",
347 (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", 346 (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
347 ? "low" : "high",
348 (speed / 1000)); 348 (speed / 1000));
349 349
350 /* cpuinfo and default policy values */ 350 /* cpuinfo and default policy values */
@@ -352,9 +352,9 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
352 352
353 result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); 353 result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
354 if (result) 354 if (result)
355 return (result); 355 return result;
356 356
357 cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); 357 cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
358 358
359 return 0; 359 return 0;
360} 360}
@@ -366,7 +366,7 @@ static int speedstep_cpu_exit(struct cpufreq_policy *policy)
366 return 0; 366 return 0;
367} 367}
368 368
369static struct freq_attr* speedstep_attr[] = { 369static struct freq_attr *speedstep_attr[] = {
370 &cpufreq_freq_attr_scaling_available_freqs, 370 &cpufreq_freq_attr_scaling_available_freqs,
371 NULL, 371 NULL,
372}; 372};
@@ -396,13 +396,15 @@ static int __init speedstep_init(void)
396 /* detect processor */ 396 /* detect processor */
397 speedstep_processor = speedstep_detect_processor(); 397 speedstep_processor = speedstep_detect_processor();
398 if (!speedstep_processor) { 398 if (!speedstep_processor) {
399 dprintk("Intel(R) SpeedStep(TM) capable processor not found\n"); 399 dprintk("Intel(R) SpeedStep(TM) capable processor "
400 "not found\n");
400 return -ENODEV; 401 return -ENODEV;
401 } 402 }
402 403
403 /* detect chipset */ 404 /* detect chipset */
404 if (!speedstep_detect_chipset()) { 405 if (!speedstep_detect_chipset()) {
405 dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n"); 406 dprintk("Intel(R) SpeedStep(TM) for this chipset not "
407 "(yet) available.\n");
406 return -ENODEV; 408 return -ENODEV;
407 } 409 }
408 410
@@ -431,9 +433,11 @@ static void __exit speedstep_exit(void)
431} 433}
432 434
433 435
434MODULE_AUTHOR ("Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>"); 436MODULE_AUTHOR("Dave Jones <davej@redhat.com>, "
435MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges."); 437 "Dominik Brodowski <linux@brodo.de>");
436MODULE_LICENSE ("GPL"); 438MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets "
439 "with ICH-M southbridges.");
440MODULE_LICENSE("GPL");
437 441
438module_init(speedstep_init); 442module_init(speedstep_init);
439module_exit(speedstep_exit); 443module_exit(speedstep_exit);
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index cdac7d62369b..2e3c6862657b 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -16,12 +16,16 @@
16#include <linux/slab.h> 16#include <linux/slab.h>
17 17
18#include <asm/msr.h> 18#include <asm/msr.h>
19#include <asm/tsc.h>
19#include "speedstep-lib.h" 20#include "speedstep-lib.h"
20 21
21#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-lib", msg) 22#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
23 "speedstep-lib", msg)
24
25#define PFX "speedstep-lib: "
22 26
23#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK 27#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
24static int relaxed_check = 0; 28static int relaxed_check;
25#else 29#else
26#define relaxed_check 0 30#define relaxed_check 0
27#endif 31#endif
@@ -30,14 +34,14 @@ static int relaxed_check = 0;
30 * GET PROCESSOR CORE SPEED IN KHZ * 34 * GET PROCESSOR CORE SPEED IN KHZ *
31 *********************************************************************/ 35 *********************************************************************/
32 36
33static unsigned int pentium3_get_frequency (unsigned int processor) 37static unsigned int pentium3_get_frequency(unsigned int processor)
34{ 38{
35 /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ 39 /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */
36 struct { 40 struct {
37 unsigned int ratio; /* Frequency Multiplier (x10) */ 41 unsigned int ratio; /* Frequency Multiplier (x10) */
38 u8 bitmap; /* power on configuration bits 42 u8 bitmap; /* power on configuration bits
39 [27, 25:22] (in MSR 0x2a) */ 43 [27, 25:22] (in MSR 0x2a) */
40 } msr_decode_mult [] = { 44 } msr_decode_mult[] = {
41 { 30, 0x01 }, 45 { 30, 0x01 },
42 { 35, 0x05 }, 46 { 35, 0x05 },
43 { 40, 0x02 }, 47 { 40, 0x02 },
@@ -52,7 +56,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor)
52 { 85, 0x26 }, 56 { 85, 0x26 },
53 { 90, 0x20 }, 57 { 90, 0x20 },
54 { 100, 0x2b }, 58 { 100, 0x2b },
55 { 0, 0xff } /* error or unknown value */ 59 { 0, 0xff } /* error or unknown value */
56 }; 60 };
57 61
58 /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */ 62 /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */
@@ -60,7 +64,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor)
60 unsigned int value; /* Front Side Bus speed in MHz */ 64 unsigned int value; /* Front Side Bus speed in MHz */
61 u8 bitmap; /* power on configuration bits [18: 19] 65 u8 bitmap; /* power on configuration bits [18: 19]
62 (in MSR 0x2a) */ 66 (in MSR 0x2a) */
63 } msr_decode_fsb [] = { 67 } msr_decode_fsb[] = {
64 { 66, 0x0 }, 68 { 66, 0x0 },
65 { 100, 0x2 }, 69 { 100, 0x2 },
66 { 133, 0x1 }, 70 { 133, 0x1 },
@@ -85,7 +89,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor)
85 } 89 }
86 90
87 /* decode the multiplier */ 91 /* decode the multiplier */
88 if (processor == SPEEDSTEP_PROCESSOR_PIII_C_EARLY) { 92 if (processor == SPEEDSTEP_CPU_PIII_C_EARLY) {
89 dprintk("workaround for early PIIIs\n"); 93 dprintk("workaround for early PIIIs\n");
90 msr_lo &= 0x03c00000; 94 msr_lo &= 0x03c00000;
91 } else 95 } else
@@ -97,9 +101,10 @@ static unsigned int pentium3_get_frequency (unsigned int processor)
97 j++; 101 j++;
98 } 102 }
99 103
100 dprintk("speed is %u\n", (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100)); 104 dprintk("speed is %u\n",
105 (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100));
101 106
102 return (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100); 107 return msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100;
103} 108}
104 109
105 110
@@ -112,20 +117,23 @@ static unsigned int pentiumM_get_frequency(void)
112 117
113 /* see table B-2 of 24547212.pdf */ 118 /* see table B-2 of 24547212.pdf */
114 if (msr_lo & 0x00040000) { 119 if (msr_lo & 0x00040000) {
115 printk(KERN_DEBUG "speedstep-lib: PM - invalid FSB: 0x%x 0x%x\n", msr_lo, msr_tmp); 120 printk(KERN_DEBUG PFX "PM - invalid FSB: 0x%x 0x%x\n",
121 msr_lo, msr_tmp);
116 return 0; 122 return 0;
117 } 123 }
118 124
119 msr_tmp = (msr_lo >> 22) & 0x1f; 125 msr_tmp = (msr_lo >> 22) & 0x1f;
120 dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * 100 * 1000)); 126 dprintk("bits 22-26 are 0x%x, speed is %u\n",
127 msr_tmp, (msr_tmp * 100 * 1000));
121 128
122 return (msr_tmp * 100 * 1000); 129 return msr_tmp * 100 * 1000;
123} 130}
124 131
125static unsigned int pentium_core_get_frequency(void) 132static unsigned int pentium_core_get_frequency(void)
126{ 133{
127 u32 fsb = 0; 134 u32 fsb = 0;
128 u32 msr_lo, msr_tmp; 135 u32 msr_lo, msr_tmp;
136 int ret;
129 137
130 rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp); 138 rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp);
131 /* see table B-2 of 25366920.pdf */ 139 /* see table B-2 of 25366920.pdf */
@@ -153,12 +161,15 @@ static unsigned int pentium_core_get_frequency(void)
153 } 161 }
154 162
155 rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); 163 rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
156 dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); 164 dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n",
165 msr_lo, msr_tmp);
157 166
158 msr_tmp = (msr_lo >> 22) & 0x1f; 167 msr_tmp = (msr_lo >> 22) & 0x1f;
159 dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * fsb)); 168 dprintk("bits 22-26 are 0x%x, speed is %u\n",
169 msr_tmp, (msr_tmp * fsb));
160 170
161 return (msr_tmp * fsb); 171 ret = (msr_tmp * fsb);
172 return ret;
162} 173}
163 174
164 175
@@ -167,6 +178,16 @@ static unsigned int pentium4_get_frequency(void)
167 struct cpuinfo_x86 *c = &boot_cpu_data; 178 struct cpuinfo_x86 *c = &boot_cpu_data;
168 u32 msr_lo, msr_hi, mult; 179 u32 msr_lo, msr_hi, mult;
169 unsigned int fsb = 0; 180 unsigned int fsb = 0;
181 unsigned int ret;
182 u8 fsb_code;
183
184 /* Pentium 4 Model 0 and 1 do not have the Core Clock Frequency
185 * to System Bus Frequency Ratio Field in the Processor Frequency
186 * Configuration Register of the MSR. Therefore the current
187 * frequency cannot be calculated and has to be measured.
188 */
189 if (c->x86_model < 2)
190 return cpu_khz;
170 191
171 rdmsr(0x2c, msr_lo, msr_hi); 192 rdmsr(0x2c, msr_lo, msr_hi);
172 193
@@ -177,62 +198,61 @@ static unsigned int pentium4_get_frequency(void)
177 * revision #12 in Table B-1: MSRs in the Pentium 4 and 198 * revision #12 in Table B-1: MSRs in the Pentium 4 and
178 * Intel Xeon Processors, on page B-4 and B-5. 199 * Intel Xeon Processors, on page B-4 and B-5.
179 */ 200 */
180 if (c->x86_model < 2) 201 fsb_code = (msr_lo >> 16) & 0x7;
202 switch (fsb_code) {
203 case 0:
181 fsb = 100 * 1000; 204 fsb = 100 * 1000;
182 else { 205 break;
183 u8 fsb_code = (msr_lo >> 16) & 0x7; 206 case 1:
184 switch (fsb_code) { 207 fsb = 13333 * 10;
185 case 0: 208 break;
186 fsb = 100 * 1000; 209 case 2:
187 break; 210 fsb = 200 * 1000;
188 case 1: 211 break;
189 fsb = 13333 * 10;
190 break;
191 case 2:
192 fsb = 200 * 1000;
193 break;
194 }
195 } 212 }
196 213
197 if (!fsb) 214 if (!fsb)
198 printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to <linux@brodo.de>\n"); 215 printk(KERN_DEBUG PFX "couldn't detect FSB speed. "
216 "Please send an e-mail to <linux@brodo.de>\n");
199 217
200 /* Multiplier. */ 218 /* Multiplier. */
201 mult = msr_lo >> 24; 219 mult = msr_lo >> 24;
202 220
203 dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult)); 221 dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n",
222 fsb, mult, (fsb * mult));
204 223
205 return (fsb * mult); 224 ret = (fsb * mult);
225 return ret;
206} 226}
207 227
208 228
209unsigned int speedstep_get_processor_frequency(unsigned int processor) 229unsigned int speedstep_get_frequency(unsigned int processor)
210{ 230{
211 switch (processor) { 231 switch (processor) {
212 case SPEEDSTEP_PROCESSOR_PCORE: 232 case SPEEDSTEP_CPU_PCORE:
213 return pentium_core_get_frequency(); 233 return pentium_core_get_frequency();
214 case SPEEDSTEP_PROCESSOR_PM: 234 case SPEEDSTEP_CPU_PM:
215 return pentiumM_get_frequency(); 235 return pentiumM_get_frequency();
216 case SPEEDSTEP_PROCESSOR_P4D: 236 case SPEEDSTEP_CPU_P4D:
217 case SPEEDSTEP_PROCESSOR_P4M: 237 case SPEEDSTEP_CPU_P4M:
218 return pentium4_get_frequency(); 238 return pentium4_get_frequency();
219 case SPEEDSTEP_PROCESSOR_PIII_T: 239 case SPEEDSTEP_CPU_PIII_T:
220 case SPEEDSTEP_PROCESSOR_PIII_C: 240 case SPEEDSTEP_CPU_PIII_C:
221 case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: 241 case SPEEDSTEP_CPU_PIII_C_EARLY:
222 return pentium3_get_frequency(processor); 242 return pentium3_get_frequency(processor);
223 default: 243 default:
224 return 0; 244 return 0;
225 }; 245 };
226 return 0; 246 return 0;
227} 247}
228EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency); 248EXPORT_SYMBOL_GPL(speedstep_get_frequency);
229 249
230 250
231/********************************************************************* 251/*********************************************************************
232 * DETECT SPEEDSTEP-CAPABLE PROCESSOR * 252 * DETECT SPEEDSTEP-CAPABLE PROCESSOR *
233 *********************************************************************/ 253 *********************************************************************/
234 254
235unsigned int speedstep_detect_processor (void) 255unsigned int speedstep_detect_processor(void)
236{ 256{
237 struct cpuinfo_x86 *c = &cpu_data(0); 257 struct cpuinfo_x86 *c = &cpu_data(0);
238 u32 ebx, msr_lo, msr_hi; 258 u32 ebx, msr_lo, msr_hi;
@@ -261,7 +281,7 @@ unsigned int speedstep_detect_processor (void)
261 * sample has ebx = 0x0f, production has 0x0e. 281 * sample has ebx = 0x0f, production has 0x0e.
262 */ 282 */
263 if ((ebx == 0x0e) || (ebx == 0x0f)) 283 if ((ebx == 0x0e) || (ebx == 0x0f))
264 return SPEEDSTEP_PROCESSOR_P4M; 284 return SPEEDSTEP_CPU_P4M;
265 break; 285 break;
266 case 7: 286 case 7:
267 /* 287 /*
@@ -272,7 +292,7 @@ unsigned int speedstep_detect_processor (void)
272 * samples are only of B-stepping... 292 * samples are only of B-stepping...
273 */ 293 */
274 if (ebx == 0x0e) 294 if (ebx == 0x0e)
275 return SPEEDSTEP_PROCESSOR_P4M; 295 return SPEEDSTEP_CPU_P4M;
276 break; 296 break;
277 case 9: 297 case 9:
278 /* 298 /*
@@ -288,10 +308,13 @@ unsigned int speedstep_detect_processor (void)
288 * M-P4-Ms may have either ebx=0xe or 0xf [see above] 308 * M-P4-Ms may have either ebx=0xe or 0xf [see above]
289 * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf] 309 * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf]
290 * also, M-P4M HTs have ebx=0x8, too 310 * also, M-P4M HTs have ebx=0x8, too
291 * For now, they are distinguished by the model_id string 311 * For now, they are distinguished by the model_id
312 * string
292 */ 313 */
293 if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL)) 314 if ((ebx == 0x0e) ||
294 return SPEEDSTEP_PROCESSOR_P4M; 315 (strstr(c->x86_model_id,
316 "Mobile Intel(R) Pentium(R) 4") != NULL))
317 return SPEEDSTEP_CPU_P4M;
295 break; 318 break;
296 default: 319 default:
297 break; 320 break;
@@ -301,7 +324,8 @@ unsigned int speedstep_detect_processor (void)
301 324
302 switch (c->x86_model) { 325 switch (c->x86_model) {
303 case 0x0B: /* Intel PIII [Tualatin] */ 326 case 0x0B: /* Intel PIII [Tualatin] */
304 /* cpuid_ebx(1) is 0x04 for desktop PIII, 0x06 for mobile PIII-M */ 327 /* cpuid_ebx(1) is 0x04 for desktop PIII,
328 * 0x06 for mobile PIII-M */
305 ebx = cpuid_ebx(0x00000001); 329 ebx = cpuid_ebx(0x00000001);
306 dprintk("ebx is %x\n", ebx); 330 dprintk("ebx is %x\n", ebx);
307 331
@@ -313,14 +337,15 @@ unsigned int speedstep_detect_processor (void)
313 /* So far all PIII-M processors support SpeedStep. See 337 /* So far all PIII-M processors support SpeedStep. See
314 * Intel's 24540640.pdf of June 2003 338 * Intel's 24540640.pdf of June 2003
315 */ 339 */
316 return SPEEDSTEP_PROCESSOR_PIII_T; 340 return SPEEDSTEP_CPU_PIII_T;
317 341
318 case 0x08: /* Intel PIII [Coppermine] */ 342 case 0x08: /* Intel PIII [Coppermine] */
319 343
320 /* all mobile PIII Coppermines have FSB 100 MHz 344 /* all mobile PIII Coppermines have FSB 100 MHz
321 * ==> sort out a few desktop PIIIs. */ 345 * ==> sort out a few desktop PIIIs. */
322 rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi); 346 rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi);
323 dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", msr_lo, msr_hi); 347 dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n",
348 msr_lo, msr_hi);
324 msr_lo &= 0x00c0000; 349 msr_lo &= 0x00c0000;
325 if (msr_lo != 0x0080000) 350 if (msr_lo != 0x0080000)
326 return 0; 351 return 0;
@@ -332,13 +357,15 @@ unsigned int speedstep_detect_processor (void)
332 * bit 56 or 57 is set 357 * bit 56 or 57 is set
333 */ 358 */
334 rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi); 359 rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi);
335 dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", msr_lo, msr_hi); 360 dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n",
336 if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) { 361 msr_lo, msr_hi);
362 if ((msr_hi & (1<<18)) &&
363 (relaxed_check ? 1 : (msr_hi & (3<<24)))) {
337 if (c->x86_mask == 0x01) { 364 if (c->x86_mask == 0x01) {
338 dprintk("early PIII version\n"); 365 dprintk("early PIII version\n");
339 return SPEEDSTEP_PROCESSOR_PIII_C_EARLY; 366 return SPEEDSTEP_CPU_PIII_C_EARLY;
340 } else 367 } else
341 return SPEEDSTEP_PROCESSOR_PIII_C; 368 return SPEEDSTEP_CPU_PIII_C;
342 } 369 }
343 370
344 default: 371 default:
@@ -369,7 +396,7 @@ unsigned int speedstep_get_freqs(unsigned int processor,
369 dprintk("trying to determine both speeds\n"); 396 dprintk("trying to determine both speeds\n");
370 397
371 /* get current speed */ 398 /* get current speed */
372 prev_speed = speedstep_get_processor_frequency(processor); 399 prev_speed = speedstep_get_frequency(processor);
373 if (!prev_speed) 400 if (!prev_speed)
374 return -EIO; 401 return -EIO;
375 402
@@ -379,7 +406,7 @@ unsigned int speedstep_get_freqs(unsigned int processor,
379 406
380 /* switch to low state */ 407 /* switch to low state */
381 set_state(SPEEDSTEP_LOW); 408 set_state(SPEEDSTEP_LOW);
382 *low_speed = speedstep_get_processor_frequency(processor); 409 *low_speed = speedstep_get_frequency(processor);
383 if (!*low_speed) { 410 if (!*low_speed) {
384 ret = -EIO; 411 ret = -EIO;
385 goto out; 412 goto out;
@@ -398,7 +425,7 @@ unsigned int speedstep_get_freqs(unsigned int processor,
398 if (transition_latency) 425 if (transition_latency)
399 do_gettimeofday(&tv2); 426 do_gettimeofday(&tv2);
400 427
401 *high_speed = speedstep_get_processor_frequency(processor); 428 *high_speed = speedstep_get_frequency(processor);
402 if (!*high_speed) { 429 if (!*high_speed) {
403 ret = -EIO; 430 ret = -EIO;
404 goto out; 431 goto out;
@@ -426,9 +453,12 @@ unsigned int speedstep_get_freqs(unsigned int processor,
426 /* check if the latency measurement is too high or too low 453 /* check if the latency measurement is too high or too low
427 * and set it to a safe value (500uSec) in that case 454 * and set it to a safe value (500uSec) in that case
428 */ 455 */
429 if (*transition_latency > 10000000 || *transition_latency < 50000) { 456 if (*transition_latency > 10000000 ||
430 printk (KERN_WARNING "speedstep: frequency transition measured seems out of " 457 *transition_latency < 50000) {
431 "range (%u nSec), falling back to a safe one of %u nSec.\n", 458 printk(KERN_WARNING PFX "frequency transition "
459 "measured seems out of range (%u "
460 "nSec), falling back to a safe one of"
461 "%u nSec.\n",
432 *transition_latency, 500000); 462 *transition_latency, 500000);
433 *transition_latency = 500000; 463 *transition_latency = 500000;
434 } 464 }
@@ -436,15 +466,16 @@ unsigned int speedstep_get_freqs(unsigned int processor,
436 466
437out: 467out:
438 local_irq_restore(flags); 468 local_irq_restore(flags);
439 return (ret); 469 return ret;
440} 470}
441EXPORT_SYMBOL_GPL(speedstep_get_freqs); 471EXPORT_SYMBOL_GPL(speedstep_get_freqs);
442 472
443#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK 473#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK
444module_param(relaxed_check, int, 0444); 474module_param(relaxed_check, int, 0444);
445MODULE_PARM_DESC(relaxed_check, "Don't do all checks for speedstep capability."); 475MODULE_PARM_DESC(relaxed_check,
476 "Don't do all checks for speedstep capability.");
446#endif 477#endif
447 478
448MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>"); 479MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
449MODULE_DESCRIPTION ("Library for Intel SpeedStep 1 or 2 cpufreq drivers."); 480MODULE_DESCRIPTION("Library for Intel SpeedStep 1 or 2 cpufreq drivers.");
450MODULE_LICENSE ("GPL"); 481MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
index b11bcc608cac..2b6c04e5a304 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h
@@ -12,17 +12,17 @@
12 12
13/* processors */ 13/* processors */
14 14
15#define SPEEDSTEP_PROCESSOR_PIII_C_EARLY 0x00000001 /* Coppermine core */ 15#define SPEEDSTEP_CPU_PIII_C_EARLY 0x00000001 /* Coppermine core */
16#define SPEEDSTEP_PROCESSOR_PIII_C 0x00000002 /* Coppermine core */ 16#define SPEEDSTEP_CPU_PIII_C 0x00000002 /* Coppermine core */
17#define SPEEDSTEP_PROCESSOR_PIII_T 0x00000003 /* Tualatin core */ 17#define SPEEDSTEP_CPU_PIII_T 0x00000003 /* Tualatin core */
18#define SPEEDSTEP_PROCESSOR_P4M 0x00000004 /* P4-M */ 18#define SPEEDSTEP_CPU_P4M 0x00000004 /* P4-M */
19 19
20/* the following processors are not speedstep-capable and are not auto-detected 20/* the following processors are not speedstep-capable and are not auto-detected
21 * in speedstep_detect_processor(). However, their speed can be detected using 21 * in speedstep_detect_processor(). However, their speed can be detected using
22 * the speedstep_get_processor_frequency() call. */ 22 * the speedstep_get_frequency() call. */
23#define SPEEDSTEP_PROCESSOR_PM 0xFFFFFF03 /* Pentium M */ 23#define SPEEDSTEP_CPU_PM 0xFFFFFF03 /* Pentium M */
24#define SPEEDSTEP_PROCESSOR_P4D 0xFFFFFF04 /* desktop P4 */ 24#define SPEEDSTEP_CPU_P4D 0xFFFFFF04 /* desktop P4 */
25#define SPEEDSTEP_PROCESSOR_PCORE 0xFFFFFF05 /* Core */ 25#define SPEEDSTEP_CPU_PCORE 0xFFFFFF05 /* Core */
26 26
27/* speedstep states -- only two of them */ 27/* speedstep states -- only two of them */
28 28
@@ -34,7 +34,7 @@
34extern unsigned int speedstep_detect_processor (void); 34extern unsigned int speedstep_detect_processor (void);
35 35
36/* detect the current speed (in khz) of the processor */ 36/* detect the current speed (in khz) of the processor */
37extern unsigned int speedstep_get_processor_frequency(unsigned int processor); 37extern unsigned int speedstep_get_frequency(unsigned int processor);
38 38
39 39
40/* detect the low and high speeds of the processor. The callback 40/* detect the low and high speeds of the processor. The callback
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
index 8a85c93bd62a..befea088e4f5 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c
@@ -19,8 +19,8 @@
19#include <linux/cpufreq.h> 19#include <linux/cpufreq.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/delay.h> 21#include <linux/delay.h>
22#include <linux/io.h>
22#include <asm/ist.h> 23#include <asm/ist.h>
23#include <asm/io.h>
24 24
25#include "speedstep-lib.h" 25#include "speedstep-lib.h"
26 26
@@ -30,12 +30,12 @@
30 * If user gives it, these are used. 30 * If user gives it, these are used.
31 * 31 *
32 */ 32 */
33static int smi_port = 0; 33static int smi_port;
34static int smi_cmd = 0; 34static int smi_cmd;
35static unsigned int smi_sig = 0; 35static unsigned int smi_sig;
36 36
37/* info about the processor */ 37/* info about the processor */
38static unsigned int speedstep_processor = 0; 38static unsigned int speedstep_processor;
39 39
40/* 40/*
41 * There are only two frequency states for each processor. Values 41 * There are only two frequency states for each processor. Values
@@ -56,12 +56,13 @@ static struct cpufreq_frequency_table speedstep_freqs[] = {
56 * of DMA activity going on? */ 56 * of DMA activity going on? */
57#define SMI_TRIES 5 57#define SMI_TRIES 5
58 58
59#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-smi", msg) 59#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \
60 "speedstep-smi", msg)
60 61
61/** 62/**
62 * speedstep_smi_ownership 63 * speedstep_smi_ownership
63 */ 64 */
64static int speedstep_smi_ownership (void) 65static int speedstep_smi_ownership(void)
65{ 66{
66 u32 command, result, magic, dummy; 67 u32 command, result, magic, dummy;
67 u32 function = GET_SPEEDSTEP_OWNER; 68 u32 function = GET_SPEEDSTEP_OWNER;
@@ -70,16 +71,18 @@ static int speedstep_smi_ownership (void)
70 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); 71 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
71 magic = virt_to_phys(magic_data); 72 magic = virt_to_phys(magic_data);
72 73
73 dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port); 74 dprintk("trying to obtain ownership with command %x at port %x\n",
75 command, smi_port);
74 76
75 __asm__ __volatile__( 77 __asm__ __volatile__(
76 "push %%ebp\n" 78 "push %%ebp\n"
77 "out %%al, (%%dx)\n" 79 "out %%al, (%%dx)\n"
78 "pop %%ebp\n" 80 "pop %%ebp\n"
79 : "=D" (result), "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy), 81 : "=D" (result),
80 "=S" (dummy) 82 "=a" (dummy), "=b" (dummy), "=c" (dummy), "=d" (dummy),
83 "=S" (dummy)
81 : "a" (command), "b" (function), "c" (0), "d" (smi_port), 84 : "a" (command), "b" (function), "c" (0), "d" (smi_port),
82 "D" (0), "S" (magic) 85 "D" (0), "S" (magic)
83 : "memory" 86 : "memory"
84 ); 87 );
85 88
@@ -97,10 +100,10 @@ static int speedstep_smi_ownership (void)
97 * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing 100 * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing
98 * shows that the latter occurs if !(ist_info.event & 0xFFFF). 101 * shows that the latter occurs if !(ist_info.event & 0xFFFF).
99 */ 102 */
100static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) 103static int speedstep_smi_get_freqs(unsigned int *low, unsigned int *high)
101{ 104{
102 u32 command, result = 0, edi, high_mhz, low_mhz, dummy; 105 u32 command, result = 0, edi, high_mhz, low_mhz, dummy;
103 u32 state=0; 106 u32 state = 0;
104 u32 function = GET_SPEEDSTEP_FREQS; 107 u32 function = GET_SPEEDSTEP_FREQS;
105 108
106 if (!(ist_info.event & 0xFFFF)) { 109 if (!(ist_info.event & 0xFFFF)) {
@@ -110,17 +113,25 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
110 113
111 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); 114 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
112 115
113 dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port); 116 dprintk("trying to determine frequencies with command %x at port %x\n",
117 command, smi_port);
114 118
115 __asm__ __volatile__( 119 __asm__ __volatile__(
116 "push %%ebp\n" 120 "push %%ebp\n"
117 "out %%al, (%%dx)\n" 121 "out %%al, (%%dx)\n"
118 "pop %%ebp" 122 "pop %%ebp"
119 : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi), "=S" (dummy) 123 : "=a" (result),
120 : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0) 124 "=b" (high_mhz),
125 "=c" (low_mhz),
126 "=d" (state), "=D" (edi), "=S" (dummy)
127 : "a" (command),
128 "b" (function),
129 "c" (state),
130 "d" (smi_port), "S" (0), "D" (0)
121 ); 131 );
122 132
123 dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz); 133 dprintk("result %x, low_freq %u, high_freq %u\n",
134 result, low_mhz, high_mhz);
124 135
125 /* abort if results are obviously incorrect... */ 136 /* abort if results are obviously incorrect... */
126 if ((high_mhz + low_mhz) < 600) 137 if ((high_mhz + low_mhz) < 600)
@@ -137,26 +148,30 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high)
137 * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) 148 * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
138 * 149 *
139 */ 150 */
140static int speedstep_get_state (void) 151static int speedstep_get_state(void)
141{ 152{
142 u32 function=GET_SPEEDSTEP_STATE; 153 u32 function = GET_SPEEDSTEP_STATE;
143 u32 result, state, edi, command, dummy; 154 u32 result, state, edi, command, dummy;
144 155
145 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); 156 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
146 157
147 dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port); 158 dprintk("trying to determine current setting with command %x "
159 "at port %x\n", command, smi_port);
148 160
149 __asm__ __volatile__( 161 __asm__ __volatile__(
150 "push %%ebp\n" 162 "push %%ebp\n"
151 "out %%al, (%%dx)\n" 163 "out %%al, (%%dx)\n"
152 "pop %%ebp\n" 164 "pop %%ebp\n"
153 : "=a" (result), "=b" (state), "=D" (edi), "=c" (dummy), "=d" (dummy), "=S" (dummy) 165 : "=a" (result),
154 : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0), "D" (0) 166 "=b" (state), "=D" (edi),
167 "=c" (dummy), "=d" (dummy), "=S" (dummy)
168 : "a" (command), "b" (function), "c" (0),
169 "d" (smi_port), "S" (0), "D" (0)
155 ); 170 );
156 171
157 dprintk("state is %x, result is %x\n", state, result); 172 dprintk("state is %x, result is %x\n", state, result);
158 173
159 return (state & 1); 174 return state & 1;
160} 175}
161 176
162 177
@@ -165,11 +180,11 @@ static int speedstep_get_state (void)
165 * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) 180 * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH)
166 * 181 *
167 */ 182 */
168static void speedstep_set_state (unsigned int state) 183static void speedstep_set_state(unsigned int state)
169{ 184{
170 unsigned int result = 0, command, new_state, dummy; 185 unsigned int result = 0, command, new_state, dummy;
171 unsigned long flags; 186 unsigned long flags;
172 unsigned int function=SET_SPEEDSTEP_STATE; 187 unsigned int function = SET_SPEEDSTEP_STATE;
173 unsigned int retry = 0; 188 unsigned int retry = 0;
174 189
175 if (state > 0x1) 190 if (state > 0x1)
@@ -180,11 +195,14 @@ static void speedstep_set_state (unsigned int state)
180 195
181 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); 196 command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff);
182 197
183 dprintk("trying to set frequency to state %u with command %x at port %x\n", state, command, smi_port); 198 dprintk("trying to set frequency to state %u "
199 "with command %x at port %x\n",
200 state, command, smi_port);
184 201
185 do { 202 do {
186 if (retry) { 203 if (retry) {
187 dprintk("retry %u, previous result %u, waiting...\n", retry, result); 204 dprintk("retry %u, previous result %u, waiting...\n",
205 retry, result);
188 mdelay(retry * 50); 206 mdelay(retry * 50);
189 } 207 }
190 retry++; 208 retry++;
@@ -192,20 +210,26 @@ static void speedstep_set_state (unsigned int state)
192 "push %%ebp\n" 210 "push %%ebp\n"
193 "out %%al, (%%dx)\n" 211 "out %%al, (%%dx)\n"
194 "pop %%ebp" 212 "pop %%ebp"
195 : "=b" (new_state), "=D" (result), "=c" (dummy), "=a" (dummy), 213 : "=b" (new_state), "=D" (result),
196 "=d" (dummy), "=S" (dummy) 214 "=c" (dummy), "=a" (dummy),
197 : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0), "D" (0) 215 "=d" (dummy), "=S" (dummy)
216 : "a" (command), "b" (function), "c" (state),
217 "d" (smi_port), "S" (0), "D" (0)
198 ); 218 );
199 } while ((new_state != state) && (retry <= SMI_TRIES)); 219 } while ((new_state != state) && (retry <= SMI_TRIES));
200 220
201 /* enable IRQs */ 221 /* enable IRQs */
202 local_irq_restore(flags); 222 local_irq_restore(flags);
203 223
204 if (new_state == state) { 224 if (new_state == state)
205 dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result); 225 dprintk("change to %u MHz succeeded after %u tries "
206 } else { 226 "with result %u\n",
207 printk(KERN_ERR "cpufreq: change to state %u failed with new_state %u and result %u\n", state, new_state, result); 227 (speedstep_freqs[new_state].frequency / 1000),
208 } 228 retry, result);
229 else
230 printk(KERN_ERR "cpufreq: change to state %u "
231 "failed with new_state %u and result %u\n",
232 state, new_state, result);
209 233
210 return; 234 return;
211} 235}
@@ -219,13 +243,14 @@ static void speedstep_set_state (unsigned int state)
219 * 243 *
220 * Sets a new CPUFreq policy/freq. 244 * Sets a new CPUFreq policy/freq.
221 */ 245 */
222static int speedstep_target (struct cpufreq_policy *policy, 246static int speedstep_target(struct cpufreq_policy *policy,
223 unsigned int target_freq, unsigned int relation) 247 unsigned int target_freq, unsigned int relation)
224{ 248{
225 unsigned int newstate = 0; 249 unsigned int newstate = 0;
226 struct cpufreq_freqs freqs; 250 struct cpufreq_freqs freqs;
227 251
228 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) 252 if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
253 target_freq, relation, &newstate))
229 return -EINVAL; 254 return -EINVAL;
230 255
231 freqs.old = speedstep_freqs[speedstep_get_state()].frequency; 256 freqs.old = speedstep_freqs[speedstep_get_state()].frequency;
@@ -250,7 +275,7 @@ static int speedstep_target (struct cpufreq_policy *policy,
250 * Limit must be within speedstep_low_freq and speedstep_high_freq, with 275 * Limit must be within speedstep_low_freq and speedstep_high_freq, with
251 * at least one border included. 276 * at least one border included.
252 */ 277 */
253static int speedstep_verify (struct cpufreq_policy *policy) 278static int speedstep_verify(struct cpufreq_policy *policy)
254{ 279{
255 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); 280 return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]);
256} 281}
@@ -259,7 +284,8 @@ static int speedstep_verify (struct cpufreq_policy *policy)
259static int speedstep_cpu_init(struct cpufreq_policy *policy) 284static int speedstep_cpu_init(struct cpufreq_policy *policy)
260{ 285{
261 int result; 286 int result;
262 unsigned int speed,state; 287 unsigned int speed, state;
288 unsigned int *low, *high;
263 289
264 /* capability check */ 290 /* capability check */
265 if (policy->cpu != 0) 291 if (policy->cpu != 0)
@@ -272,19 +298,23 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
272 } 298 }
273 299
274 /* detect low and high frequency */ 300 /* detect low and high frequency */
275 result = speedstep_smi_get_freqs(&speedstep_freqs[SPEEDSTEP_LOW].frequency, 301 low = &speedstep_freqs[SPEEDSTEP_LOW].frequency;
276 &speedstep_freqs[SPEEDSTEP_HIGH].frequency); 302 high = &speedstep_freqs[SPEEDSTEP_HIGH].frequency;
303
304 result = speedstep_smi_get_freqs(low, high);
277 if (result) { 305 if (result) {
278 /* fall back to speedstep_lib.c dection mechanism: try both states out */ 306 /* fall back to speedstep_lib.c dection mechanism:
279 dprintk("could not detect low and high frequencies by SMI call.\n"); 307 * try both states out */
308 dprintk("could not detect low and high frequencies "
309 "by SMI call.\n");
280 result = speedstep_get_freqs(speedstep_processor, 310 result = speedstep_get_freqs(speedstep_processor,
281 &speedstep_freqs[SPEEDSTEP_LOW].frequency, 311 low, high,
282 &speedstep_freqs[SPEEDSTEP_HIGH].frequency,
283 NULL, 312 NULL,
284 &speedstep_set_state); 313 &speedstep_set_state);
285 314
286 if (result) { 315 if (result) {
287 dprintk("could not detect two different speeds -- aborting.\n"); 316 dprintk("could not detect two different speeds"
317 " -- aborting.\n");
288 return result; 318 return result;
289 } else 319 } else
290 dprintk("workaround worked.\n"); 320 dprintk("workaround worked.\n");
@@ -295,7 +325,8 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
295 speed = speedstep_freqs[state].frequency; 325 speed = speedstep_freqs[state].frequency;
296 326
297 dprintk("currently at %s speed setting - %i MHz\n", 327 dprintk("currently at %s speed setting - %i MHz\n",
298 (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", 328 (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency)
329 ? "low" : "high",
299 (speed / 1000)); 330 (speed / 1000));
300 331
301 /* cpuinfo and default policy values */ 332 /* cpuinfo and default policy values */
@@ -304,7 +335,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
304 335
305 result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); 336 result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs);
306 if (result) 337 if (result)
307 return (result); 338 return result;
308 339
309 cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); 340 cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu);
310 341
@@ -321,7 +352,7 @@ static unsigned int speedstep_get(unsigned int cpu)
321{ 352{
322 if (cpu) 353 if (cpu)
323 return -ENODEV; 354 return -ENODEV;
324 return speedstep_get_processor_frequency(speedstep_processor); 355 return speedstep_get_frequency(speedstep_processor);
325} 356}
326 357
327 358
@@ -335,7 +366,7 @@ static int speedstep_resume(struct cpufreq_policy *policy)
335 return result; 366 return result;
336} 367}
337 368
338static struct freq_attr* speedstep_attr[] = { 369static struct freq_attr *speedstep_attr[] = {
339 &cpufreq_freq_attr_scaling_available_freqs, 370 &cpufreq_freq_attr_scaling_available_freqs,
340 NULL, 371 NULL,
341}; 372};
@@ -364,21 +395,23 @@ static int __init speedstep_init(void)
364 speedstep_processor = speedstep_detect_processor(); 395 speedstep_processor = speedstep_detect_processor();
365 396
366 switch (speedstep_processor) { 397 switch (speedstep_processor) {
367 case SPEEDSTEP_PROCESSOR_PIII_T: 398 case SPEEDSTEP_CPU_PIII_T:
368 case SPEEDSTEP_PROCESSOR_PIII_C: 399 case SPEEDSTEP_CPU_PIII_C:
369 case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: 400 case SPEEDSTEP_CPU_PIII_C_EARLY:
370 break; 401 break;
371 default: 402 default:
372 speedstep_processor = 0; 403 speedstep_processor = 0;
373 } 404 }
374 405
375 if (!speedstep_processor) { 406 if (!speedstep_processor) {
376 dprintk ("No supported Intel CPU detected.\n"); 407 dprintk("No supported Intel CPU detected.\n");
377 return -ENODEV; 408 return -ENODEV;
378 } 409 }
379 410
380 dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n", 411 dprintk("signature:0x%.8lx, command:0x%.8lx, "
381 ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level); 412 "event:0x%.8lx, perf_level:0x%.8lx.\n",
413 ist_info.signature, ist_info.command,
414 ist_info.event, ist_info.perf_level);
382 415
383 /* Error if no IST-SMI BIOS or no PARM 416 /* Error if no IST-SMI BIOS or no PARM
384 sig= 'ISGE' aka 'Intel Speedstep Gate E' */ 417 sig= 'ISGE' aka 'Intel Speedstep Gate E' */
@@ -416,17 +449,20 @@ static void __exit speedstep_exit(void)
416 cpufreq_unregister_driver(&speedstep_driver); 449 cpufreq_unregister_driver(&speedstep_driver);
417} 450}
418 451
419module_param(smi_port, int, 0444); 452module_param(smi_port, int, 0444);
420module_param(smi_cmd, int, 0444); 453module_param(smi_cmd, int, 0444);
421module_param(smi_sig, uint, 0444); 454module_param(smi_sig, uint, 0444);
422 455
423MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value -- Intel's default setting is 0xb2"); 456MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value "
424MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value -- Intel's default setting is 0x82"); 457 "-- Intel's default setting is 0xb2");
425MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the SMI interface."); 458MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value "
459 "-- Intel's default setting is 0x82");
460MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the "
461 "SMI interface.");
426 462
427MODULE_AUTHOR ("Hiroshi Miura"); 463MODULE_AUTHOR("Hiroshi Miura");
428MODULE_DESCRIPTION ("Speedstep driver for IST applet SMI interface."); 464MODULE_DESCRIPTION("Speedstep driver for IST applet SMI interface.");
429MODULE_LICENSE ("GPL"); 465MODULE_LICENSE("GPL");
430 466
431module_init(speedstep_init); 467module_init(speedstep_init);
432module_exit(speedstep_exit); 468module_exit(speedstep_exit);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index ffd0f5ed071a..593171e967ef 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -61,23 +61,23 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
61 */ 61 */
62static unsigned char Cx86_dir0_msb __cpuinitdata = 0; 62static unsigned char Cx86_dir0_msb __cpuinitdata = 0;
63 63
64static char Cx86_model[][9] __cpuinitdata = { 64static const char __cpuinitconst Cx86_model[][9] = {
65 "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", 65 "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
66 "M II ", "Unknown" 66 "M II ", "Unknown"
67}; 67};
68static char Cx486_name[][5] __cpuinitdata = { 68static const char __cpuinitconst Cx486_name[][5] = {
69 "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", 69 "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
70 "SRx2", "DRx2" 70 "SRx2", "DRx2"
71}; 71};
72static char Cx486S_name[][4] __cpuinitdata = { 72static const char __cpuinitconst Cx486S_name[][4] = {
73 "S", "S2", "Se", "S2e" 73 "S", "S2", "Se", "S2e"
74}; 74};
75static char Cx486D_name[][4] __cpuinitdata = { 75static const char __cpuinitconst Cx486D_name[][4] = {
76 "DX", "DX2", "?", "?", "?", "DX4" 76 "DX", "DX2", "?", "?", "?", "DX4"
77}; 77};
78static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock"; 78static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock";
79static char cyrix_model_mult1[] __cpuinitdata = "12??43"; 79static const char __cpuinitconst cyrix_model_mult1[] = "12??43";
80static char cyrix_model_mult2[] __cpuinitdata = "12233445"; 80static const char __cpuinitconst cyrix_model_mult2[] = "12233445";
81 81
82/* 82/*
83 * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old 83 * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
@@ -435,7 +435,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
435 } 435 }
436} 436}
437 437
438static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { 438static const struct cpu_dev __cpuinitconst cyrix_cpu_dev = {
439 .c_vendor = "Cyrix", 439 .c_vendor = "Cyrix",
440 .c_ident = { "CyrixInstead" }, 440 .c_ident = { "CyrixInstead" },
441 .c_early_init = early_init_cyrix, 441 .c_early_init = early_init_cyrix,
@@ -446,7 +446,7 @@ static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
446 446
447cpu_dev_register(cyrix_cpu_dev); 447cpu_dev_register(cyrix_cpu_dev);
448 448
449static struct cpu_dev nsc_cpu_dev __cpuinitdata = { 449static const struct cpu_dev __cpuinitconst nsc_cpu_dev = {
450 .c_vendor = "NSC", 450 .c_vendor = "NSC",
451 .c_ident = { "Geode by NSC" }, 451 .c_ident = { "Geode by NSC" },
452 .c_init = init_nsc, 452 .c_init = init_nsc,
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index c1c04bf0df77..7437fa133c02 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -55,6 +55,11 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
55 c->x86_cache_alignment = 128; 55 c->x86_cache_alignment = 128;
56#endif 56#endif
57 57
58 /* CPUID workaround for 0F33/0F34 CPU */
59 if (c->x86 == 0xF && c->x86_model == 0x3
60 && (c->x86_mask == 0x3 || c->x86_mask == 0x4))
61 c->x86_phys_bits = 36;
62
58 /* 63 /*
59 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate 64 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
60 * with P/T states and does not stop in deep C-states. 65 * with P/T states and does not stop in deep C-states.
@@ -416,7 +421,7 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i
416} 421}
417#endif 422#endif
418 423
419static struct cpu_dev intel_cpu_dev __cpuinitdata = { 424static const struct cpu_dev __cpuinitconst intel_cpu_dev = {
420 .c_vendor = "Intel", 425 .c_vendor = "Intel",
421 .c_ident = { "GenuineIntel" }, 426 .c_ident = { "GenuineIntel" },
422#ifdef CONFIG_X86_32 427#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 7293508d8f5c..483eda96e102 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -32,7 +32,7 @@ struct _cache_table
32}; 32};
33 33
34/* all the cache descriptor types we care about (no TLB or trace cache entries) */ 34/* all the cache descriptor types we care about (no TLB or trace cache entries) */
35static struct _cache_table cache_table[] __cpuinitdata = 35static const struct _cache_table __cpuinitconst cache_table[] =
36{ 36{
37 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ 37 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
38 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ 38 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
@@ -159,7 +159,7 @@ struct _cpuid4_info_regs {
159 unsigned long can_disable; 159 unsigned long can_disable;
160}; 160};
161 161
162#ifdef CONFIG_PCI 162#if defined(CONFIG_PCI) && defined(CONFIG_SYSFS)
163static struct pci_device_id k8_nb_id[] = { 163static struct pci_device_id k8_nb_id[] = {
164 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, 164 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
165 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, 165 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
@@ -206,15 +206,15 @@ union l3_cache {
206 unsigned val; 206 unsigned val;
207}; 207};
208 208
209static unsigned short assocs[] __cpuinitdata = { 209static const unsigned short __cpuinitconst assocs[] = {
210 [1] = 1, [2] = 2, [4] = 4, [6] = 8, 210 [1] = 1, [2] = 2, [4] = 4, [6] = 8,
211 [8] = 16, [0xa] = 32, [0xb] = 48, 211 [8] = 16, [0xa] = 32, [0xb] = 48,
212 [0xc] = 64, 212 [0xc] = 64,
213 [0xf] = 0xffff // ?? 213 [0xf] = 0xffff // ??
214}; 214};
215 215
216static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; 216static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
217static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; 217static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
218 218
219static void __cpuinit 219static void __cpuinit
220amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, 220amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
@@ -324,15 +324,6 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
324 return 0; 324 return 0;
325} 325}
326 326
327static int
328__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
329{
330 struct _cpuid4_info_regs *leaf_regs =
331 (struct _cpuid4_info_regs *)this_leaf;
332
333 return cpuid4_cache_lookup_regs(index, leaf_regs);
334}
335
336static int __cpuinit find_num_cache_leaves(void) 327static int __cpuinit find_num_cache_leaves(void)
337{ 328{
338 unsigned int eax, ebx, ecx, edx; 329 unsigned int eax, ebx, ecx, edx;
@@ -508,6 +499,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
508 return l2; 499 return l2;
509} 500}
510 501
502#ifdef CONFIG_SYSFS
503
511/* pointer to _cpuid4_info array (for each cache leaf) */ 504/* pointer to _cpuid4_info array (for each cache leaf) */
512static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); 505static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
513#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) 506#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
@@ -571,6 +564,15 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
571 per_cpu(cpuid4_info, cpu) = NULL; 564 per_cpu(cpuid4_info, cpu) = NULL;
572} 565}
573 566
567static int
568__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
569{
570 struct _cpuid4_info_regs *leaf_regs =
571 (struct _cpuid4_info_regs *)this_leaf;
572
573 return cpuid4_cache_lookup_regs(index, leaf_regs);
574}
575
574static void __cpuinit get_cpu_leaves(void *_retval) 576static void __cpuinit get_cpu_leaves(void *_retval)
575{ 577{
576 int j, *retval = _retval, cpu = smp_processor_id(); 578 int j, *retval = _retval, cpu = smp_processor_id();
@@ -612,8 +614,6 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
612 return retval; 614 return retval;
613} 615}
614 616
615#ifdef CONFIG_SYSFS
616
617#include <linux/kobject.h> 617#include <linux/kobject.h>
618#include <linux/sysfs.h> 618#include <linux/sysfs.h>
619 619
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index bfbd5323a635..863f89568b1a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -639,7 +639,7 @@ static void mce_init_timer(void)
639 if (!next_interval) 639 if (!next_interval)
640 return; 640 return;
641 setup_timer(t, mcheck_timer, smp_processor_id()); 641 setup_timer(t, mcheck_timer, smp_processor_id());
642 t->expires = round_jiffies_relative(jiffies + next_interval); 642 t->expires = round_jiffies(jiffies + next_interval);
643 add_timer(t); 643 add_timer(t);
644} 644}
645 645
@@ -990,7 +990,7 @@ static struct sysdev_attribute *mce_attributes[] = {
990 NULL 990 NULL
991}; 991};
992 992
993static cpumask_t mce_device_initialized = CPU_MASK_NONE; 993static cpumask_var_t mce_device_initialized;
994 994
995/* Per cpu sysdev init. All of the cpus still share the same ctl bank */ 995/* Per cpu sysdev init. All of the cpus still share the same ctl bank */
996static __cpuinit int mce_create_device(unsigned int cpu) 996static __cpuinit int mce_create_device(unsigned int cpu)
@@ -1021,7 +1021,7 @@ static __cpuinit int mce_create_device(unsigned int cpu)
1021 if (err) 1021 if (err)
1022 goto error2; 1022 goto error2;
1023 } 1023 }
1024 cpu_set(cpu, mce_device_initialized); 1024 cpumask_set_cpu(cpu, mce_device_initialized);
1025 1025
1026 return 0; 1026 return 0;
1027error2: 1027error2:
@@ -1043,7 +1043,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
1043{ 1043{
1044 int i; 1044 int i;
1045 1045
1046 if (!cpu_isset(cpu, mce_device_initialized)) 1046 if (!cpumask_test_cpu(cpu, mce_device_initialized))
1047 return; 1047 return;
1048 1048
1049 for (i = 0; mce_attributes[i]; i++) 1049 for (i = 0; mce_attributes[i]; i++)
@@ -1053,7 +1053,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
1053 sysdev_remove_file(&per_cpu(device_mce, cpu), 1053 sysdev_remove_file(&per_cpu(device_mce, cpu),
1054 &bank_attrs[i]); 1054 &bank_attrs[i]);
1055 sysdev_unregister(&per_cpu(device_mce,cpu)); 1055 sysdev_unregister(&per_cpu(device_mce,cpu));
1056 cpu_clear(cpu, mce_device_initialized); 1056 cpumask_clear_cpu(cpu, mce_device_initialized);
1057} 1057}
1058 1058
1059/* Make sure there are no machine checks on offlined CPUs. */ 1059/* Make sure there are no machine checks on offlined CPUs. */
@@ -1110,7 +1110,7 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
1110 break; 1110 break;
1111 case CPU_DOWN_FAILED: 1111 case CPU_DOWN_FAILED:
1112 case CPU_DOWN_FAILED_FROZEN: 1112 case CPU_DOWN_FAILED_FROZEN:
1113 t->expires = round_jiffies_relative(jiffies + next_interval); 1113 t->expires = round_jiffies(jiffies + next_interval);
1114 add_timer_on(t, cpu); 1114 add_timer_on(t, cpu);
1115 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); 1115 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
1116 break; 1116 break;
@@ -1162,6 +1162,8 @@ static __init int mce_init_device(void)
1162 if (!mce_available(&boot_cpu_data)) 1162 if (!mce_available(&boot_cpu_data))
1163 return -EIO; 1163 return -EIO;
1164 1164
1165 alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
1166
1165 err = mce_init_banks(); 1167 err = mce_init_banks();
1166 if (err) 1168 if (err)
1167 return err; 1169 return err;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index c5a32f92d07e..56dde9c4bc96 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -92,7 +92,8 @@ struct thresh_restart {
92}; 92};
93 93
94/* must be called with correct cpu affinity */ 94/* must be called with correct cpu affinity */
95static long threshold_restart_bank(void *_tr) 95/* Called via smp_call_function_single() */
96static void threshold_restart_bank(void *_tr)
96{ 97{
97 struct thresh_restart *tr = _tr; 98 struct thresh_restart *tr = _tr;
98 u32 mci_misc_hi, mci_misc_lo; 99 u32 mci_misc_hi, mci_misc_lo;
@@ -119,7 +120,6 @@ static long threshold_restart_bank(void *_tr)
119 120
120 mci_misc_hi |= MASK_COUNT_EN_HI; 121 mci_misc_hi |= MASK_COUNT_EN_HI;
121 wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); 122 wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
122 return 0;
123} 123}
124 124
125/* cpu init entry point, called from mce.c with preempt off */ 125/* cpu init entry point, called from mce.c with preempt off */
@@ -279,7 +279,7 @@ static ssize_t store_interrupt_enable(struct threshold_block *b,
279 tr.b = b; 279 tr.b = b;
280 tr.reset = 0; 280 tr.reset = 0;
281 tr.old_limit = 0; 281 tr.old_limit = 0;
282 work_on_cpu(b->cpu, threshold_restart_bank, &tr); 282 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
283 283
284 return end - buf; 284 return end - buf;
285} 285}
@@ -301,23 +301,32 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
301 tr.b = b; 301 tr.b = b;
302 tr.reset = 0; 302 tr.reset = 0;
303 303
304 work_on_cpu(b->cpu, threshold_restart_bank, &tr); 304 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
305 305
306 return end - buf; 306 return end - buf;
307} 307}
308 308
309static long local_error_count(void *_b) 309struct threshold_block_cross_cpu {
310 struct threshold_block *tb;
311 long retval;
312};
313
314static void local_error_count_handler(void *_tbcc)
310{ 315{
311 struct threshold_block *b = _b; 316 struct threshold_block_cross_cpu *tbcc = _tbcc;
317 struct threshold_block *b = tbcc->tb;
312 u32 low, high; 318 u32 low, high;
313 319
314 rdmsr(b->address, low, high); 320 rdmsr(b->address, low, high);
315 return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); 321 tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
316} 322}
317 323
318static ssize_t show_error_count(struct threshold_block *b, char *buf) 324static ssize_t show_error_count(struct threshold_block *b, char *buf)
319{ 325{
320 return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b)); 326 struct threshold_block_cross_cpu tbcc = { .tb = b, };
327
328 smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1);
329 return sprintf(buf, "%lx\n", tbcc.retval);
321} 330}
322 331
323static ssize_t store_error_count(struct threshold_block *b, 332static ssize_t store_error_count(struct threshold_block *b,
@@ -325,7 +334,7 @@ static ssize_t store_error_count(struct threshold_block *b,
325{ 334{
326 struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; 335 struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
327 336
328 work_on_cpu(b->cpu, threshold_restart_bank, &tr); 337 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
329 return 1; 338 return 1;
330} 339}
331 340
@@ -394,7 +403,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
394 if ((bank >= NR_BANKS) || (block >= NR_BLOCKS)) 403 if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
395 return 0; 404 return 0;
396 405
397 if (rdmsr_safe(address, &low, &high)) 406 if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
398 return 0; 407 return 0;
399 408
400 if (!(high & MASK_VALID_HI)) { 409 if (!(high & MASK_VALID_HI)) {
@@ -458,12 +467,11 @@ out_free:
458 return err; 467 return err;
459} 468}
460 469
461static __cpuinit long local_allocate_threshold_blocks(void *_bank) 470static __cpuinit long
471local_allocate_threshold_blocks(int cpu, unsigned int bank)
462{ 472{
463 unsigned int *bank = _bank; 473 return allocate_threshold_blocks(cpu, bank, 0,
464 474 MSR_IA32_MC0_MISC + bank * 4);
465 return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
466 MSR_IA32_MC0_MISC + *bank * 4);
467} 475}
468 476
469/* symlinks sibling shared banks to first core. first core owns dir/files. */ 477/* symlinks sibling shared banks to first core. first core owns dir/files. */
@@ -477,7 +485,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
477 485
478#ifdef CONFIG_SMP 486#ifdef CONFIG_SMP
479 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 487 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
480 i = cpumask_first(&per_cpu(cpu_core_map, cpu)); 488 i = cpumask_first(cpu_core_mask(cpu));
481 489
482 /* first core not up yet */ 490 /* first core not up yet */
483 if (cpu_data(i).cpu_core_id) 491 if (cpu_data(i).cpu_core_id)
@@ -497,7 +505,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
497 if (err) 505 if (err)
498 goto out; 506 goto out;
499 507
500 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu)); 508 cpumask_copy(b->cpus, cpu_core_mask(cpu));
501 per_cpu(threshold_banks, cpu)[bank] = b; 509 per_cpu(threshold_banks, cpu)[bank] = b;
502 goto out; 510 goto out;
503 } 511 }
@@ -521,12 +529,12 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
521#ifndef CONFIG_SMP 529#ifndef CONFIG_SMP
522 cpumask_setall(b->cpus); 530 cpumask_setall(b->cpus);
523#else 531#else
524 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu)); 532 cpumask_copy(b->cpus, cpu_core_mask(cpu));
525#endif 533#endif
526 534
527 per_cpu(threshold_banks, cpu)[bank] = b; 535 per_cpu(threshold_banks, cpu)[bank] = b;
528 536
529 err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank); 537 err = local_allocate_threshold_blocks(cpu, bank);
530 if (err) 538 if (err)
531 goto out_free; 539 goto out_free;
532 540
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index aaa7d9730938..d6b72df89d69 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -249,7 +249,7 @@ void cmci_rediscover(int dying)
249 for_each_online_cpu (cpu) { 249 for_each_online_cpu (cpu) {
250 if (cpu == dying) 250 if (cpu == dying)
251 continue; 251 continue;
252 if (set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu))) 252 if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
253 continue; 253 continue;
254 /* Recheck banks in case CPUs don't all have the same */ 254 /* Recheck banks in case CPUs don't all have the same */
255 if (cmci_supported(&banks)) 255 if (cmci_supported(&banks))
@@ -270,7 +270,7 @@ void cmci_reenable(void)
270 cmci_discover(banks, 0); 270 cmci_discover(banks, 0);
271} 271}
272 272
273static __cpuinit void intel_init_cmci(void) 273static void intel_init_cmci(void)
274{ 274{
275 int banks; 275 int banks;
276 276
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile
index 191fc0533649..f4361b56f8e9 100644
--- a/arch/x86/kernel/cpu/mtrr/Makefile
+++ b/arch/x86/kernel/cpu/mtrr/Makefile
@@ -1,3 +1,3 @@
1obj-y := main.o if.o generic.o state.o 1obj-y := main.o if.o generic.o state.o cleanup.o
2obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o 2obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
3 3
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
new file mode 100644
index 000000000000..ce0fe4b5c04f
--- /dev/null
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -0,0 +1,1101 @@
1/* MTRR (Memory Type Range Register) cleanup
2
3 Copyright (C) 2009 Yinghai Lu
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License as published by the Free Software Foundation; either
8 version 2 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with this library; if not, write to the Free
17 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18*/
19
20#include <linux/module.h>
21#include <linux/init.h>
22#include <linux/pci.h>
23#include <linux/smp.h>
24#include <linux/cpu.h>
25#include <linux/mutex.h>
26#include <linux/sort.h>
27
28#include <asm/e820.h>
29#include <asm/mtrr.h>
30#include <asm/uaccess.h>
31#include <asm/processor.h>
32#include <asm/msr.h>
33#include <asm/kvm_para.h>
34#include "mtrr.h"
35
36/* should be related to MTRR_VAR_RANGES nums */
37#define RANGE_NUM 256
38
39struct res_range {
40 unsigned long start;
41 unsigned long end;
42};
43
44static int __init
45add_range(struct res_range *range, int nr_range, unsigned long start,
46 unsigned long end)
47{
48 /* out of slots */
49 if (nr_range >= RANGE_NUM)
50 return nr_range;
51
52 range[nr_range].start = start;
53 range[nr_range].end = end;
54
55 nr_range++;
56
57 return nr_range;
58}
59
60static int __init
61add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
62 unsigned long end)
63{
64 int i;
65
66 /* try to merge it with old one */
67 for (i = 0; i < nr_range; i++) {
68 unsigned long final_start, final_end;
69 unsigned long common_start, common_end;
70
71 if (!range[i].end)
72 continue;
73
74 common_start = max(range[i].start, start);
75 common_end = min(range[i].end, end);
76 if (common_start > common_end + 1)
77 continue;
78
79 final_start = min(range[i].start, start);
80 final_end = max(range[i].end, end);
81
82 range[i].start = final_start;
83 range[i].end = final_end;
84 return nr_range;
85 }
86
87 /* need to add that */
88 return add_range(range, nr_range, start, end);
89}
90
91static void __init
92subtract_range(struct res_range *range, unsigned long start, unsigned long end)
93{
94 int i, j;
95
96 for (j = 0; j < RANGE_NUM; j++) {
97 if (!range[j].end)
98 continue;
99
100 if (start <= range[j].start && end >= range[j].end) {
101 range[j].start = 0;
102 range[j].end = 0;
103 continue;
104 }
105
106 if (start <= range[j].start && end < range[j].end &&
107 range[j].start < end + 1) {
108 range[j].start = end + 1;
109 continue;
110 }
111
112
113 if (start > range[j].start && end >= range[j].end &&
114 range[j].end > start - 1) {
115 range[j].end = start - 1;
116 continue;
117 }
118
119 if (start > range[j].start && end < range[j].end) {
120 /* find the new spare */
121 for (i = 0; i < RANGE_NUM; i++) {
122 if (range[i].end == 0)
123 break;
124 }
125 if (i < RANGE_NUM) {
126 range[i].end = range[j].end;
127 range[i].start = end + 1;
128 } else {
129 printk(KERN_ERR "run of slot in ranges\n");
130 }
131 range[j].end = start - 1;
132 continue;
133 }
134 }
135}
136
137static int __init cmp_range(const void *x1, const void *x2)
138{
139 const struct res_range *r1 = x1;
140 const struct res_range *r2 = x2;
141 long start1, start2;
142
143 start1 = r1->start;
144 start2 = r2->start;
145
146 return start1 - start2;
147}
148
149struct var_mtrr_range_state {
150 unsigned long base_pfn;
151 unsigned long size_pfn;
152 mtrr_type type;
153};
154
155static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
156static int __initdata debug_print;
157
158static int __init
159x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
160 unsigned long extra_remove_base,
161 unsigned long extra_remove_size)
162{
163 unsigned long base, size;
164 mtrr_type type;
165 int i;
166
167 for (i = 0; i < num_var_ranges; i++) {
168 type = range_state[i].type;
169 if (type != MTRR_TYPE_WRBACK)
170 continue;
171 base = range_state[i].base_pfn;
172 size = range_state[i].size_pfn;
173 nr_range = add_range_with_merge(range, nr_range, base,
174 base + size - 1);
175 }
176 if (debug_print) {
177 printk(KERN_DEBUG "After WB checking\n");
178 for (i = 0; i < nr_range; i++)
179 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
180 range[i].start, range[i].end + 1);
181 }
182
183 /* take out UC ranges */
184 for (i = 0; i < num_var_ranges; i++) {
185 type = range_state[i].type;
186 if (type != MTRR_TYPE_UNCACHABLE &&
187 type != MTRR_TYPE_WRPROT)
188 continue;
189 size = range_state[i].size_pfn;
190 if (!size)
191 continue;
192 base = range_state[i].base_pfn;
193 if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
194 (mtrr_state.enabled & 1)) {
195 /* Var MTRR contains UC entry below 1M? Skip it: */
196 printk(KERN_WARNING "WARNING: BIOS bug: VAR MTRR %d "
197 "contains strange UC entry under 1M, check "
198 "with your system vendor!\n", i);
199 if (base + size <= (1<<(20-PAGE_SHIFT)))
200 continue;
201 size -= (1<<(20-PAGE_SHIFT)) - base;
202 base = 1<<(20-PAGE_SHIFT);
203 }
204 subtract_range(range, base, base + size - 1);
205 }
206 if (extra_remove_size)
207 subtract_range(range, extra_remove_base,
208 extra_remove_base + extra_remove_size - 1);
209
210 /* get new range num */
211 nr_range = 0;
212 for (i = 0; i < RANGE_NUM; i++) {
213 if (!range[i].end)
214 continue;
215 nr_range++;
216 }
217 if (debug_print) {
218 printk(KERN_DEBUG "After UC checking\n");
219 for (i = 0; i < nr_range; i++)
220 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
221 range[i].start, range[i].end + 1);
222 }
223
224 /* sort the ranges */
225 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
226 if (debug_print) {
227 printk(KERN_DEBUG "After sorting\n");
228 for (i = 0; i < nr_range; i++)
229 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
230 range[i].start, range[i].end + 1);
231 }
232
233 /* clear those is not used */
234 for (i = nr_range; i < RANGE_NUM; i++)
235 memset(&range[i], 0, sizeof(range[i]));
236
237 return nr_range;
238}
239
240static struct res_range __initdata range[RANGE_NUM];
241static int __initdata nr_range;
242
243#ifdef CONFIG_MTRR_SANITIZER
244
245static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
246{
247 unsigned long sum;
248 int i;
249
250 sum = 0;
251 for (i = 0; i < nr_range; i++)
252 sum += range[i].end + 1 - range[i].start;
253
254 return sum;
255}
256
257static int enable_mtrr_cleanup __initdata =
258 CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
259
260static int __init disable_mtrr_cleanup_setup(char *str)
261{
262 enable_mtrr_cleanup = 0;
263 return 0;
264}
265early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
266
267static int __init enable_mtrr_cleanup_setup(char *str)
268{
269 enable_mtrr_cleanup = 1;
270 return 0;
271}
272early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
273
274static int __init mtrr_cleanup_debug_setup(char *str)
275{
276 debug_print = 1;
277 return 0;
278}
279early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
280
281struct var_mtrr_state {
282 unsigned long range_startk;
283 unsigned long range_sizek;
284 unsigned long chunk_sizek;
285 unsigned long gran_sizek;
286 unsigned int reg;
287};
288
289static void __init
290set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
291 unsigned char type, unsigned int address_bits)
292{
293 u32 base_lo, base_hi, mask_lo, mask_hi;
294 u64 base, mask;
295
296 if (!sizek) {
297 fill_mtrr_var_range(reg, 0, 0, 0, 0);
298 return;
299 }
300
301 mask = (1ULL << address_bits) - 1;
302 mask &= ~((((u64)sizek) << 10) - 1);
303
304 base = ((u64)basek) << 10;
305
306 base |= type;
307 mask |= 0x800;
308
309 base_lo = base & ((1ULL<<32) - 1);
310 base_hi = base >> 32;
311
312 mask_lo = mask & ((1ULL<<32) - 1);
313 mask_hi = mask >> 32;
314
315 fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
316}
317
318static void __init
319save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
320 unsigned char type)
321{
322 range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
323 range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
324 range_state[reg].type = type;
325}
326
327static void __init
328set_var_mtrr_all(unsigned int address_bits)
329{
330 unsigned long basek, sizek;
331 unsigned char type;
332 unsigned int reg;
333
334 for (reg = 0; reg < num_var_ranges; reg++) {
335 basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
336 sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
337 type = range_state[reg].type;
338
339 set_var_mtrr(reg, basek, sizek, type, address_bits);
340 }
341}
342
343static unsigned long to_size_factor(unsigned long sizek, char *factorp)
344{
345 char factor;
346 unsigned long base = sizek;
347
348 if (base & ((1<<10) - 1)) {
349 /* not MB alignment */
350 factor = 'K';
351 } else if (base & ((1<<20) - 1)) {
352 factor = 'M';
353 base >>= 10;
354 } else {
355 factor = 'G';
356 base >>= 20;
357 }
358
359 *factorp = factor;
360
361 return base;
362}
363
364static unsigned int __init
365range_to_mtrr(unsigned int reg, unsigned long range_startk,
366 unsigned long range_sizek, unsigned char type)
367{
368 if (!range_sizek || (reg >= num_var_ranges))
369 return reg;
370
371 while (range_sizek) {
372 unsigned long max_align, align;
373 unsigned long sizek;
374
375 /* Compute the maximum size I can make a range */
376 if (range_startk)
377 max_align = ffs(range_startk) - 1;
378 else
379 max_align = 32;
380 align = fls(range_sizek) - 1;
381 if (align > max_align)
382 align = max_align;
383
384 sizek = 1 << align;
385 if (debug_print) {
386 char start_factor = 'K', size_factor = 'K';
387 unsigned long start_base, size_base;
388
389 start_base = to_size_factor(range_startk,
390 &start_factor),
391 size_base = to_size_factor(sizek, &size_factor),
392
393 printk(KERN_DEBUG "Setting variable MTRR %d, "
394 "base: %ld%cB, range: %ld%cB, type %s\n",
395 reg, start_base, start_factor,
396 size_base, size_factor,
397 (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
398 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")
399 );
400 }
401 save_var_mtrr(reg++, range_startk, sizek, type);
402 range_startk += sizek;
403 range_sizek -= sizek;
404 if (reg >= num_var_ranges)
405 break;
406 }
407 return reg;
408}
409
410static unsigned __init
411range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
412 unsigned long sizek)
413{
414 unsigned long hole_basek, hole_sizek;
415 unsigned long second_basek, second_sizek;
416 unsigned long range0_basek, range0_sizek;
417 unsigned long range_basek, range_sizek;
418 unsigned long chunk_sizek;
419 unsigned long gran_sizek;
420
421 hole_basek = 0;
422 hole_sizek = 0;
423 second_basek = 0;
424 second_sizek = 0;
425 chunk_sizek = state->chunk_sizek;
426 gran_sizek = state->gran_sizek;
427
428 /* align with gran size, prevent small block used up MTRRs */
429 range_basek = ALIGN(state->range_startk, gran_sizek);
430 if ((range_basek > basek) && basek)
431 return second_sizek;
432 state->range_sizek -= (range_basek - state->range_startk);
433 range_sizek = ALIGN(state->range_sizek, gran_sizek);
434
435 while (range_sizek > state->range_sizek) {
436 range_sizek -= gran_sizek;
437 if (!range_sizek)
438 return 0;
439 }
440 state->range_sizek = range_sizek;
441
442 /* try to append some small hole */
443 range0_basek = state->range_startk;
444 range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
445
446 /* no increase */
447 if (range0_sizek == state->range_sizek) {
448 if (debug_print)
449 printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
450 range0_basek<<10,
451 (range0_basek + state->range_sizek)<<10);
452 state->reg = range_to_mtrr(state->reg, range0_basek,
453 state->range_sizek, MTRR_TYPE_WRBACK);
454 return 0;
455 }
456
457 /* only cut back, when it is not the last */
458 if (sizek) {
459 while (range0_basek + range0_sizek > (basek + sizek)) {
460 if (range0_sizek >= chunk_sizek)
461 range0_sizek -= chunk_sizek;
462 else
463 range0_sizek = 0;
464
465 if (!range0_sizek)
466 break;
467 }
468 }
469
470second_try:
471 range_basek = range0_basek + range0_sizek;
472
473 /* one hole in the middle */
474 if (range_basek > basek && range_basek <= (basek + sizek))
475 second_sizek = range_basek - basek;
476
477 if (range0_sizek > state->range_sizek) {
478
479 /* one hole in middle or at end */
480 hole_sizek = range0_sizek - state->range_sizek - second_sizek;
481
482 /* hole size should be less than half of range0 size */
483 if (hole_sizek >= (range0_sizek >> 1) &&
484 range0_sizek >= chunk_sizek) {
485 range0_sizek -= chunk_sizek;
486 second_sizek = 0;
487 hole_sizek = 0;
488
489 goto second_try;
490 }
491 }
492
493 if (range0_sizek) {
494 if (debug_print)
495 printk(KERN_DEBUG "range0: %016lx - %016lx\n",
496 range0_basek<<10,
497 (range0_basek + range0_sizek)<<10);
498 state->reg = range_to_mtrr(state->reg, range0_basek,
499 range0_sizek, MTRR_TYPE_WRBACK);
500 }
501
502 if (range0_sizek < state->range_sizek) {
503 /* need to handle left over */
504 range_sizek = state->range_sizek - range0_sizek;
505
506 if (debug_print)
507 printk(KERN_DEBUG "range: %016lx - %016lx\n",
508 range_basek<<10,
509 (range_basek + range_sizek)<<10);
510 state->reg = range_to_mtrr(state->reg, range_basek,
511 range_sizek, MTRR_TYPE_WRBACK);
512 }
513
514 if (hole_sizek) {
515 hole_basek = range_basek - hole_sizek - second_sizek;
516 if (debug_print)
517 printk(KERN_DEBUG "hole: %016lx - %016lx\n",
518 hole_basek<<10,
519 (hole_basek + hole_sizek)<<10);
520 state->reg = range_to_mtrr(state->reg, hole_basek,
521 hole_sizek, MTRR_TYPE_UNCACHABLE);
522 }
523
524 return second_sizek;
525}
526
527static void __init
528set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
529 unsigned long size_pfn)
530{
531 unsigned long basek, sizek;
532 unsigned long second_sizek = 0;
533
534 if (state->reg >= num_var_ranges)
535 return;
536
537 basek = base_pfn << (PAGE_SHIFT - 10);
538 sizek = size_pfn << (PAGE_SHIFT - 10);
539
540 /* See if I can merge with the last range */
541 if ((basek <= 1024) ||
542 (state->range_startk + state->range_sizek == basek)) {
543 unsigned long endk = basek + sizek;
544 state->range_sizek = endk - state->range_startk;
545 return;
546 }
547 /* Write the range mtrrs */
548 if (state->range_sizek != 0)
549 second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
550
551 /* Allocate an msr */
552 state->range_startk = basek + second_sizek;
553 state->range_sizek = sizek - second_sizek;
554}
555
556/* mininum size of mtrr block that can take hole */
557static u64 mtrr_chunk_size __initdata = (256ULL<<20);
558
559static int __init parse_mtrr_chunk_size_opt(char *p)
560{
561 if (!p)
562 return -EINVAL;
563 mtrr_chunk_size = memparse(p, &p);
564 return 0;
565}
566early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
567
568/* granity of mtrr of block */
569static u64 mtrr_gran_size __initdata;
570
571static int __init parse_mtrr_gran_size_opt(char *p)
572{
573 if (!p)
574 return -EINVAL;
575 mtrr_gran_size = memparse(p, &p);
576 return 0;
577}
578early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
579
580static int nr_mtrr_spare_reg __initdata =
581 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
582
583static int __init parse_mtrr_spare_reg(char *arg)
584{
585 if (arg)
586 nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
587 return 0;
588}
589
590early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
591
592static int __init
593x86_setup_var_mtrrs(struct res_range *range, int nr_range,
594 u64 chunk_size, u64 gran_size)
595{
596 struct var_mtrr_state var_state;
597 int i;
598 int num_reg;
599
600 var_state.range_startk = 0;
601 var_state.range_sizek = 0;
602 var_state.reg = 0;
603 var_state.chunk_sizek = chunk_size >> 10;
604 var_state.gran_sizek = gran_size >> 10;
605
606 memset(range_state, 0, sizeof(range_state));
607
608 /* Write the range etc */
609 for (i = 0; i < nr_range; i++)
610 set_var_mtrr_range(&var_state, range[i].start,
611 range[i].end - range[i].start + 1);
612
613 /* Write the last range */
614 if (var_state.range_sizek != 0)
615 range_to_mtrr_with_hole(&var_state, 0, 0);
616
617 num_reg = var_state.reg;
618 /* Clear out the extra MTRR's */
619 while (var_state.reg < num_var_ranges) {
620 save_var_mtrr(var_state.reg, 0, 0, 0);
621 var_state.reg++;
622 }
623
624 return num_reg;
625}
626
627struct mtrr_cleanup_result {
628 unsigned long gran_sizek;
629 unsigned long chunk_sizek;
630 unsigned long lose_cover_sizek;
631 unsigned int num_reg;
632 int bad;
633};
634
635/*
636 * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
637 * chunk size: gran_size, ..., 2G
638 * so we need (1+16)*8
639 */
640#define NUM_RESULT 136
641#define PSHIFT (PAGE_SHIFT - 10)
642
643static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
644static unsigned long __initdata min_loss_pfn[RANGE_NUM];
645
646static void __init print_out_mtrr_range_state(void)
647{
648 int i;
649 char start_factor = 'K', size_factor = 'K';
650 unsigned long start_base, size_base;
651 mtrr_type type;
652
653 for (i = 0; i < num_var_ranges; i++) {
654
655 size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
656 if (!size_base)
657 continue;
658
659 size_base = to_size_factor(size_base, &size_factor),
660 start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
661 start_base = to_size_factor(start_base, &start_factor),
662 type = range_state[i].type;
663
664 printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
665 i, start_base, start_factor,
666 size_base, size_factor,
667 (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
668 ((type == MTRR_TYPE_WRPROT) ? "WP" :
669 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
670 );
671 }
672}
673
674static int __init mtrr_need_cleanup(void)
675{
676 int i;
677 mtrr_type type;
678 unsigned long size;
679 /* extra one for all 0 */
680 int num[MTRR_NUM_TYPES + 1];
681
682 /* check entries number */
683 memset(num, 0, sizeof(num));
684 for (i = 0; i < num_var_ranges; i++) {
685 type = range_state[i].type;
686 size = range_state[i].size_pfn;
687 if (type >= MTRR_NUM_TYPES)
688 continue;
689 if (!size)
690 type = MTRR_NUM_TYPES;
691 if (type == MTRR_TYPE_WRPROT)
692 type = MTRR_TYPE_UNCACHABLE;
693 num[type]++;
694 }
695
696 /* check if we got UC entries */
697 if (!num[MTRR_TYPE_UNCACHABLE])
698 return 0;
699
700 /* check if we only had WB and UC */
701 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
702 num_var_ranges - num[MTRR_NUM_TYPES])
703 return 0;
704
705 return 1;
706}
707
708static unsigned long __initdata range_sums;
709static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
710 unsigned long extra_remove_base,
711 unsigned long extra_remove_size,
712 int i)
713{
714 int num_reg;
715 static struct res_range range_new[RANGE_NUM];
716 static int nr_range_new;
717 unsigned long range_sums_new;
718
719 /* convert ranges to var ranges state */
720 num_reg = x86_setup_var_mtrrs(range, nr_range,
721 chunk_size, gran_size);
722
723 /* we got new setting in range_state, check it */
724 memset(range_new, 0, sizeof(range_new));
725 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
726 extra_remove_base, extra_remove_size);
727 range_sums_new = sum_ranges(range_new, nr_range_new);
728
729 result[i].chunk_sizek = chunk_size >> 10;
730 result[i].gran_sizek = gran_size >> 10;
731 result[i].num_reg = num_reg;
732 if (range_sums < range_sums_new) {
733 result[i].lose_cover_sizek =
734 (range_sums_new - range_sums) << PSHIFT;
735 result[i].bad = 1;
736 } else
737 result[i].lose_cover_sizek =
738 (range_sums - range_sums_new) << PSHIFT;
739
740 /* double check it */
741 if (!result[i].bad && !result[i].lose_cover_sizek) {
742 if (nr_range_new != nr_range ||
743 memcmp(range, range_new, sizeof(range)))
744 result[i].bad = 1;
745 }
746
747 if (!result[i].bad && (range_sums - range_sums_new <
748 min_loss_pfn[num_reg])) {
749 min_loss_pfn[num_reg] =
750 range_sums - range_sums_new;
751 }
752}
753
754static void __init mtrr_print_out_one_result(int i)
755{
756 char gran_factor, chunk_factor, lose_factor;
757 unsigned long gran_base, chunk_base, lose_base;
758
759 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
760 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
761 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
762 printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
763 result[i].bad ? "*BAD*" : " ",
764 gran_base, gran_factor, chunk_base, chunk_factor);
765 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
766 result[i].num_reg, result[i].bad ? "-" : "",
767 lose_base, lose_factor);
768}
769
770static int __init mtrr_search_optimal_index(void)
771{
772 int i;
773 int num_reg_good;
774 int index_good;
775
776 if (nr_mtrr_spare_reg >= num_var_ranges)
777 nr_mtrr_spare_reg = num_var_ranges - 1;
778 num_reg_good = -1;
779 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
780 if (!min_loss_pfn[i])
781 num_reg_good = i;
782 }
783
784 index_good = -1;
785 if (num_reg_good != -1) {
786 for (i = 0; i < NUM_RESULT; i++) {
787 if (!result[i].bad &&
788 result[i].num_reg == num_reg_good &&
789 !result[i].lose_cover_sizek) {
790 index_good = i;
791 break;
792 }
793 }
794 }
795
796 return index_good;
797}
798
799
800int __init mtrr_cleanup(unsigned address_bits)
801{
802 unsigned long extra_remove_base, extra_remove_size;
803 unsigned long base, size, def, dummy;
804 mtrr_type type;
805 u64 chunk_size, gran_size;
806 int index_good;
807 int i;
808
809 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
810 return 0;
811 rdmsr(MTRRdefType_MSR, def, dummy);
812 def &= 0xff;
813 if (def != MTRR_TYPE_UNCACHABLE)
814 return 0;
815
816 /* get it and store it aside */
817 memset(range_state, 0, sizeof(range_state));
818 for (i = 0; i < num_var_ranges; i++) {
819 mtrr_if->get(i, &base, &size, &type);
820 range_state[i].base_pfn = base;
821 range_state[i].size_pfn = size;
822 range_state[i].type = type;
823 }
824
825 /* check if we need handle it and can handle it */
826 if (!mtrr_need_cleanup())
827 return 0;
828
829 /* print original var MTRRs at first, for debugging: */
830 printk(KERN_DEBUG "original variable MTRRs\n");
831 print_out_mtrr_range_state();
832
833 memset(range, 0, sizeof(range));
834 extra_remove_size = 0;
835 extra_remove_base = 1 << (32 - PAGE_SHIFT);
836 if (mtrr_tom2)
837 extra_remove_size =
838 (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
839 nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
840 extra_remove_size);
841 /*
842 * [0, 1M) should always be coverred by var mtrr with WB
843 * and fixed mtrrs should take effective before var mtrr for it
844 */
845 nr_range = add_range_with_merge(range, nr_range, 0,
846 (1ULL<<(20 - PAGE_SHIFT)) - 1);
847 /* sort the ranges */
848 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
849
850 range_sums = sum_ranges(range, nr_range);
851 printk(KERN_INFO "total RAM coverred: %ldM\n",
852 range_sums >> (20 - PAGE_SHIFT));
853
854 if (mtrr_chunk_size && mtrr_gran_size) {
855 i = 0;
856 mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
857 extra_remove_base, extra_remove_size, i);
858
859 mtrr_print_out_one_result(i);
860
861 if (!result[i].bad) {
862 set_var_mtrr_all(address_bits);
863 printk(KERN_DEBUG "New variable MTRRs\n");
864 print_out_mtrr_range_state();
865 return 1;
866 }
867 printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
868 "will find optimal one\n");
869 }
870
871 i = 0;
872 memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
873 memset(result, 0, sizeof(result));
874 for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
875
876 for (chunk_size = gran_size; chunk_size < (1ULL<<32);
877 chunk_size <<= 1) {
878
879 if (i >= NUM_RESULT)
880 continue;
881
882 mtrr_calc_range_state(chunk_size, gran_size,
883 extra_remove_base, extra_remove_size, i);
884 if (debug_print) {
885 mtrr_print_out_one_result(i);
886 printk(KERN_INFO "\n");
887 }
888
889 i++;
890 }
891 }
892
893 /* try to find the optimal index */
894 index_good = mtrr_search_optimal_index();
895
896 if (index_good != -1) {
897 printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
898 i = index_good;
899 mtrr_print_out_one_result(i);
900
901 /* convert ranges to var ranges state */
902 chunk_size = result[i].chunk_sizek;
903 chunk_size <<= 10;
904 gran_size = result[i].gran_sizek;
905 gran_size <<= 10;
906 x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
907 set_var_mtrr_all(address_bits);
908 printk(KERN_DEBUG "New variable MTRRs\n");
909 print_out_mtrr_range_state();
910 return 1;
911 } else {
912 /* print out all */
913 for (i = 0; i < NUM_RESULT; i++)
914 mtrr_print_out_one_result(i);
915 }
916
917 printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
918 printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
919
920 return 0;
921}
922#else
923int __init mtrr_cleanup(unsigned address_bits)
924{
925 return 0;
926}
927#endif
928
929static int disable_mtrr_trim;
930
931static int __init disable_mtrr_trim_setup(char *str)
932{
933 disable_mtrr_trim = 1;
934 return 0;
935}
936early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
937
938/*
939 * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
940 * for memory >4GB. Check for that here.
941 * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
942 * apply to are wrong, but so far we don't know of any such case in the wild.
943 */
944#define Tom2Enabled (1U << 21)
945#define Tom2ForceMemTypeWB (1U << 22)
946
947int __init amd_special_default_mtrr(void)
948{
949 u32 l, h;
950
951 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
952 return 0;
953 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
954 return 0;
955 /* In case some hypervisor doesn't pass SYSCFG through */
956 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
957 return 0;
958 /*
959 * Memory between 4GB and top of mem is forced WB by this magic bit.
960 * Reserved before K8RevF, but should be zero there.
961 */
962 if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
963 (Tom2Enabled | Tom2ForceMemTypeWB))
964 return 1;
965 return 0;
966}
967
968static u64 __init real_trim_memory(unsigned long start_pfn,
969 unsigned long limit_pfn)
970{
971 u64 trim_start, trim_size;
972 trim_start = start_pfn;
973 trim_start <<= PAGE_SHIFT;
974 trim_size = limit_pfn;
975 trim_size <<= PAGE_SHIFT;
976 trim_size -= trim_start;
977
978 return e820_update_range(trim_start, trim_size, E820_RAM,
979 E820_RESERVED);
980}
981/**
982 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
983 * @end_pfn: ending page frame number
984 *
985 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
986 * memory configurations. This routine checks that the highest MTRR matches
987 * the end of memory, to make sure the MTRRs having a write back type cover
988 * all of the memory the kernel is intending to use. If not, it'll trim any
989 * memory off the end by adjusting end_pfn, removing it from the kernel's
990 * allocation pools, warning the user with an obnoxious message.
991 */
992int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
993{
994 unsigned long i, base, size, highest_pfn = 0, def, dummy;
995 mtrr_type type;
996 u64 total_trim_size;
997
998 /* extra one for all 0 */
999 int num[MTRR_NUM_TYPES + 1];
1000 /*
1001 * Make sure we only trim uncachable memory on machines that
1002 * support the Intel MTRR architecture:
1003 */
1004 if (!is_cpu(INTEL) || disable_mtrr_trim)
1005 return 0;
1006 rdmsr(MTRRdefType_MSR, def, dummy);
1007 def &= 0xff;
1008 if (def != MTRR_TYPE_UNCACHABLE)
1009 return 0;
1010
1011 /* get it and store it aside */
1012 memset(range_state, 0, sizeof(range_state));
1013 for (i = 0; i < num_var_ranges; i++) {
1014 mtrr_if->get(i, &base, &size, &type);
1015 range_state[i].base_pfn = base;
1016 range_state[i].size_pfn = size;
1017 range_state[i].type = type;
1018 }
1019
1020 /* Find highest cached pfn */
1021 for (i = 0; i < num_var_ranges; i++) {
1022 type = range_state[i].type;
1023 if (type != MTRR_TYPE_WRBACK)
1024 continue;
1025 base = range_state[i].base_pfn;
1026 size = range_state[i].size_pfn;
1027 if (highest_pfn < base + size)
1028 highest_pfn = base + size;
1029 }
1030
1031 /* kvm/qemu doesn't have mtrr set right, don't trim them all */
1032 if (!highest_pfn) {
1033 printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
1034 return 0;
1035 }
1036
1037 /* check entries number */
1038 memset(num, 0, sizeof(num));
1039 for (i = 0; i < num_var_ranges; i++) {
1040 type = range_state[i].type;
1041 if (type >= MTRR_NUM_TYPES)
1042 continue;
1043 size = range_state[i].size_pfn;
1044 if (!size)
1045 type = MTRR_NUM_TYPES;
1046 num[type]++;
1047 }
1048
1049 /* no entry for WB? */
1050 if (!num[MTRR_TYPE_WRBACK])
1051 return 0;
1052
1053 /* check if we only had WB and UC */
1054 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1055 num_var_ranges - num[MTRR_NUM_TYPES])
1056 return 0;
1057
1058 memset(range, 0, sizeof(range));
1059 nr_range = 0;
1060 if (mtrr_tom2) {
1061 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
1062 range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
1063 if (highest_pfn < range[nr_range].end + 1)
1064 highest_pfn = range[nr_range].end + 1;
1065 nr_range++;
1066 }
1067 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
1068
1069 total_trim_size = 0;
1070 /* check the head */
1071 if (range[0].start)
1072 total_trim_size += real_trim_memory(0, range[0].start);
1073 /* check the holes */
1074 for (i = 0; i < nr_range - 1; i++) {
1075 if (range[i].end + 1 < range[i+1].start)
1076 total_trim_size += real_trim_memory(range[i].end + 1,
1077 range[i+1].start);
1078 }
1079 /* check the top */
1080 i = nr_range - 1;
1081 if (range[i].end + 1 < end_pfn)
1082 total_trim_size += real_trim_memory(range[i].end + 1,
1083 end_pfn);
1084
1085 if (total_trim_size) {
1086 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
1087 " all of memory, losing %lluMB of RAM.\n",
1088 total_trim_size >> 20);
1089
1090 if (!changed_by_mtrr_cleanup)
1091 WARN_ON(1);
1092
1093 printk(KERN_INFO "update e820 for mtrr\n");
1094 update_e820();
1095
1096 return 1;
1097 }
1098
1099 return 0;
1100}
1101
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 0c0a455fe95c..0b776c09aff3 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -33,13 +33,31 @@ u64 mtrr_tom2;
33struct mtrr_state_type mtrr_state = {}; 33struct mtrr_state_type mtrr_state = {};
34EXPORT_SYMBOL_GPL(mtrr_state); 34EXPORT_SYMBOL_GPL(mtrr_state);
35 35
36static int __initdata mtrr_show; 36/**
37static int __init mtrr_debug(char *opt) 37 * BIOS is expected to clear MtrrFixDramModEn bit, see for example
38 * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
39 * Opteron Processors" (26094 Rev. 3.30 February 2006), section
40 * "13.2.1.2 SYSCFG Register": "The MtrrFixDramModEn bit should be set
41 * to 1 during BIOS initalization of the fixed MTRRs, then cleared to
42 * 0 for operation."
43 */
44static inline void k8_check_syscfg_dram_mod_en(void)
38{ 45{
39 mtrr_show = 1; 46 u32 lo, hi;
40 return 0; 47
48 if (!((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
49 (boot_cpu_data.x86 >= 0x0f)))
50 return;
51
52 rdmsr(MSR_K8_SYSCFG, lo, hi);
53 if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
54 printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
55 " not cleared by BIOS, clearing this bit\n",
56 smp_processor_id());
57 lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
58 mtrr_wrmsr(MSR_K8_SYSCFG, lo, hi);
59 }
41} 60}
42early_param("mtrr.show", mtrr_debug);
43 61
44/* 62/*
45 * Returns the effective MTRR type for the region 63 * Returns the effective MTRR type for the region
@@ -174,6 +192,8 @@ get_fixed_ranges(mtrr_type * frs)
174 unsigned int *p = (unsigned int *) frs; 192 unsigned int *p = (unsigned int *) frs;
175 int i; 193 int i;
176 194
195 k8_check_syscfg_dram_mod_en();
196
177 rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]); 197 rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
178 198
179 for (i = 0; i < 2; i++) 199 for (i = 0; i < 2; i++)
@@ -188,18 +208,94 @@ void mtrr_save_fixed_ranges(void *info)
188 get_fixed_ranges(mtrr_state.fixed_ranges); 208 get_fixed_ranges(mtrr_state.fixed_ranges);
189} 209}
190 210
191static void print_fixed(unsigned base, unsigned step, const mtrr_type*types) 211static unsigned __initdata last_fixed_start;
212static unsigned __initdata last_fixed_end;
213static mtrr_type __initdata last_fixed_type;
214
215static void __init print_fixed_last(void)
216{
217 if (!last_fixed_end)
218 return;
219
220 printk(KERN_DEBUG " %05X-%05X %s\n", last_fixed_start,
221 last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type));
222
223 last_fixed_end = 0;
224}
225
226static void __init update_fixed_last(unsigned base, unsigned end,
227 mtrr_type type)
228{
229 last_fixed_start = base;
230 last_fixed_end = end;
231 last_fixed_type = type;
232}
233
234static void __init print_fixed(unsigned base, unsigned step,
235 const mtrr_type *types)
192{ 236{
193 unsigned i; 237 unsigned i;
194 238
195 for (i = 0; i < 8; ++i, ++types, base += step) 239 for (i = 0; i < 8; ++i, ++types, base += step) {
196 printk(KERN_INFO "MTRR %05X-%05X %s\n", 240 if (last_fixed_end == 0) {
197 base, base + step - 1, mtrr_attrib_to_str(*types)); 241 update_fixed_last(base, base + step, *types);
242 continue;
243 }
244 if (last_fixed_end == base && last_fixed_type == *types) {
245 last_fixed_end = base + step;
246 continue;
247 }
248 /* new segments: gap or different type */
249 print_fixed_last();
250 update_fixed_last(base, base + step, *types);
251 }
198} 252}
199 253
200static void prepare_set(void); 254static void prepare_set(void);
201static void post_set(void); 255static void post_set(void);
202 256
257static void __init print_mtrr_state(void)
258{
259 unsigned int i;
260 int high_width;
261
262 printk(KERN_DEBUG "MTRR default type: %s\n",
263 mtrr_attrib_to_str(mtrr_state.def_type));
264 if (mtrr_state.have_fixed) {
265 printk(KERN_DEBUG "MTRR fixed ranges %sabled:\n",
266 mtrr_state.enabled & 1 ? "en" : "dis");
267 print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
268 for (i = 0; i < 2; ++i)
269 print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8);
270 for (i = 0; i < 8; ++i)
271 print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8);
272
273 /* tail */
274 print_fixed_last();
275 }
276 printk(KERN_DEBUG "MTRR variable ranges %sabled:\n",
277 mtrr_state.enabled & 2 ? "en" : "dis");
278 high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4;
279 for (i = 0; i < num_var_ranges; ++i) {
280 if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
281 printk(KERN_DEBUG " %u base %0*X%05X000 mask %0*X%05X000 %s\n",
282 i,
283 high_width,
284 mtrr_state.var_ranges[i].base_hi,
285 mtrr_state.var_ranges[i].base_lo >> 12,
286 high_width,
287 mtrr_state.var_ranges[i].mask_hi,
288 mtrr_state.var_ranges[i].mask_lo >> 12,
289 mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
290 else
291 printk(KERN_DEBUG " %u disabled\n", i);
292 }
293 if (mtrr_tom2) {
294 printk(KERN_DEBUG "TOM2: %016llx aka %lldM\n",
295 mtrr_tom2, mtrr_tom2>>20);
296 }
297}
298
203/* Grab all of the MTRR state for this CPU into *state */ 299/* Grab all of the MTRR state for this CPU into *state */
204void __init get_mtrr_state(void) 300void __init get_mtrr_state(void)
205{ 301{
@@ -231,41 +327,9 @@ void __init get_mtrr_state(void)
231 mtrr_tom2 |= low; 327 mtrr_tom2 |= low;
232 mtrr_tom2 &= 0xffffff800000ULL; 328 mtrr_tom2 &= 0xffffff800000ULL;
233 } 329 }
234 if (mtrr_show) { 330
235 int high_width; 331 print_mtrr_state();
236 332
237 printk(KERN_INFO "MTRR default type: %s\n", mtrr_attrib_to_str(mtrr_state.def_type));
238 if (mtrr_state.have_fixed) {
239 printk(KERN_INFO "MTRR fixed ranges %sabled:\n",
240 mtrr_state.enabled & 1 ? "en" : "dis");
241 print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
242 for (i = 0; i < 2; ++i)
243 print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8);
244 for (i = 0; i < 8; ++i)
245 print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8);
246 }
247 printk(KERN_INFO "MTRR variable ranges %sabled:\n",
248 mtrr_state.enabled & 2 ? "en" : "dis");
249 high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4;
250 for (i = 0; i < num_var_ranges; ++i) {
251 if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
252 printk(KERN_INFO "MTRR %u base %0*X%05X000 mask %0*X%05X000 %s\n",
253 i,
254 high_width,
255 mtrr_state.var_ranges[i].base_hi,
256 mtrr_state.var_ranges[i].base_lo >> 12,
257 high_width,
258 mtrr_state.var_ranges[i].mask_hi,
259 mtrr_state.var_ranges[i].mask_lo >> 12,
260 mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
261 else
262 printk(KERN_INFO "MTRR %u disabled\n", i);
263 }
264 if (mtrr_tom2) {
265 printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
266 mtrr_tom2, mtrr_tom2>>20);
267 }
268 }
269 mtrr_state_set = 1; 333 mtrr_state_set = 1;
270 334
271 /* PAT setup for BP. We need to go through sync steps here */ 335 /* PAT setup for BP. We need to go through sync steps here */
@@ -308,27 +372,10 @@ void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
308} 372}
309 373
310/** 374/**
311 * Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs
312 * see AMD publication no. 24593, chapter 3.2.1 for more information
313 */
314static inline void k8_enable_fixed_iorrs(void)
315{
316 unsigned lo, hi;
317
318 rdmsr(MSR_K8_SYSCFG, lo, hi);
319 mtrr_wrmsr(MSR_K8_SYSCFG, lo
320 | K8_MTRRFIXRANGE_DRAM_ENABLE
321 | K8_MTRRFIXRANGE_DRAM_MODIFY, hi);
322}
323
324/**
325 * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have 375 * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have
326 * @msr: MSR address of the MTTR which should be checked and updated 376 * @msr: MSR address of the MTTR which should be checked and updated
327 * @changed: pointer which indicates whether the MTRR needed to be changed 377 * @changed: pointer which indicates whether the MTRR needed to be changed
328 * @msrwords: pointer to the MSR values which the MSR should have 378 * @msrwords: pointer to the MSR values which the MSR should have
329 *
330 * If K8 extentions are wanted, update the K8 SYSCFG MSR also.
331 * See AMD publication no. 24593, chapter 7.8.1, page 233 for more information.
332 */ 379 */
333static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) 380static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
334{ 381{
@@ -337,10 +384,6 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
337 rdmsr(msr, lo, hi); 384 rdmsr(msr, lo, hi);
338 385
339 if (lo != msrwords[0] || hi != msrwords[1]) { 386 if (lo != msrwords[0] || hi != msrwords[1]) {
340 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
341 (boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) &&
342 ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
343 k8_enable_fixed_iorrs();
344 mtrr_wrmsr(msr, msrwords[0], msrwords[1]); 387 mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
345 *changed = true; 388 *changed = true;
346 } 389 }
@@ -376,22 +419,31 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
376{ 419{
377 unsigned int mask_lo, mask_hi, base_lo, base_hi; 420 unsigned int mask_lo, mask_hi, base_lo, base_hi;
378 unsigned int tmp, hi; 421 unsigned int tmp, hi;
422 int cpu;
423
424 /*
425 * get_mtrr doesn't need to update mtrr_state, also it could be called
426 * from any cpu, so try to print it out directly.
427 */
428 cpu = get_cpu();
379 429
380 rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); 430 rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
431
381 if ((mask_lo & 0x800) == 0) { 432 if ((mask_lo & 0x800) == 0) {
382 /* Invalid (i.e. free) range */ 433 /* Invalid (i.e. free) range */
383 *base = 0; 434 *base = 0;
384 *size = 0; 435 *size = 0;
385 *type = 0; 436 *type = 0;
386 return; 437 goto out_put_cpu;
387 } 438 }
388 439
389 rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); 440 rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
390 441
391 /* Work out the shifted address mask. */ 442 /* Work out the shifted address mask: */
392 tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; 443 tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
393 mask_lo = size_or_mask | tmp; 444 mask_lo = size_or_mask | tmp;
394 /* Expand tmp with high bits to all 1s*/ 445
446 /* Expand tmp with high bits to all 1s: */
395 hi = fls(tmp); 447 hi = fls(tmp);
396 if (hi > 0) { 448 if (hi > 0) {
397 tmp |= ~((1<<(hi - 1)) - 1); 449 tmp |= ~((1<<(hi - 1)) - 1);
@@ -402,11 +454,16 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
402 } 454 }
403 } 455 }
404 456
405 /* This works correctly if size is a power of two, i.e. a 457 /*
406 contiguous range. */ 458 * This works correctly if size is a power of two, i.e. a
459 * contiguous range:
460 */
407 *size = -mask_lo; 461 *size = -mask_lo;
408 *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; 462 *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
409 *type = base_lo & 0xff; 463 *type = base_lo & 0xff;
464
465out_put_cpu:
466 put_cpu();
410} 467}
411 468
412/** 469/**
@@ -419,6 +476,8 @@ static int set_fixed_ranges(mtrr_type * frs)
419 bool changed = false; 476 bool changed = false;
420 int block=-1, range; 477 int block=-1, range;
421 478
479 k8_check_syscfg_dram_mod_en();
480
422 while (fixed_range_blocks[++block].ranges) 481 while (fixed_range_blocks[++block].ranges)
423 for (range=0; range < fixed_range_blocks[block].ranges; range++) 482 for (range=0; range < fixed_range_blocks[block].ranges; range++)
424 set_fixed_range(fixed_range_blocks[block].base_msr + range, 483 set_fixed_range(fixed_range_blocks[block].base_msr + range,
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 4c4214690dd1..fb73a52913a4 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -377,10 +377,6 @@ static const struct file_operations mtrr_fops = {
377 .release = mtrr_close, 377 .release = mtrr_close,
378}; 378};
379 379
380
381static struct proc_dir_entry *proc_root_mtrr;
382
383
384static int mtrr_seq_show(struct seq_file *seq, void *offset) 380static int mtrr_seq_show(struct seq_file *seq, void *offset)
385{ 381{
386 char factor; 382 char factor;
@@ -423,11 +419,7 @@ static int __init mtrr_if_init(void)
423 (!cpu_has(c, X86_FEATURE_CENTAUR_MCR))) 419 (!cpu_has(c, X86_FEATURE_CENTAUR_MCR)))
424 return -ENODEV; 420 return -ENODEV;
425 421
426 proc_root_mtrr = 422 proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops);
427 proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops);
428
429 if (proc_root_mtrr)
430 proc_root_mtrr->owner = THIS_MODULE;
431 return 0; 423 return 0;
432} 424}
433 425
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 236a401b8259..03cda01f57c7 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -574,7 +574,7 @@ struct mtrr_value {
574 unsigned long lsize; 574 unsigned long lsize;
575}; 575};
576 576
577static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES]; 577static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
578 578
579static int mtrr_save(struct sys_device * sysdev, pm_message_t state) 579static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
580{ 580{
@@ -582,9 +582,9 @@ static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
582 582
583 for (i = 0; i < num_var_ranges; i++) { 583 for (i = 0; i < num_var_ranges; i++) {
584 mtrr_if->get(i, 584 mtrr_if->get(i,
585 &mtrr_state[i].lbase, 585 &mtrr_value[i].lbase,
586 &mtrr_state[i].lsize, 586 &mtrr_value[i].lsize,
587 &mtrr_state[i].ltype); 587 &mtrr_value[i].ltype);
588 } 588 }
589 return 0; 589 return 0;
590} 590}
@@ -594,11 +594,11 @@ static int mtrr_restore(struct sys_device * sysdev)
594 int i; 594 int i;
595 595
596 for (i = 0; i < num_var_ranges; i++) { 596 for (i = 0; i < num_var_ranges; i++) {
597 if (mtrr_state[i].lsize) 597 if (mtrr_value[i].lsize)
598 set_mtrr(i, 598 set_mtrr(i,
599 mtrr_state[i].lbase, 599 mtrr_value[i].lbase,
600 mtrr_state[i].lsize, 600 mtrr_value[i].lsize,
601 mtrr_state[i].ltype); 601 mtrr_value[i].ltype);
602 } 602 }
603 return 0; 603 return 0;
604} 604}
@@ -610,1058 +610,7 @@ static struct sysdev_driver mtrr_sysdev_driver = {
610 .resume = mtrr_restore, 610 .resume = mtrr_restore,
611}; 611};
612 612
613/* should be related to MTRR_VAR_RANGES nums */ 613int __initdata changed_by_mtrr_cleanup;
614#define RANGE_NUM 256
615
616struct res_range {
617 unsigned long start;
618 unsigned long end;
619};
620
621static int __init
622add_range(struct res_range *range, int nr_range, unsigned long start,
623 unsigned long end)
624{
625 /* out of slots */
626 if (nr_range >= RANGE_NUM)
627 return nr_range;
628
629 range[nr_range].start = start;
630 range[nr_range].end = end;
631
632 nr_range++;
633
634 return nr_range;
635}
636
637static int __init
638add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
639 unsigned long end)
640{
641 int i;
642
643 /* try to merge it with old one */
644 for (i = 0; i < nr_range; i++) {
645 unsigned long final_start, final_end;
646 unsigned long common_start, common_end;
647
648 if (!range[i].end)
649 continue;
650
651 common_start = max(range[i].start, start);
652 common_end = min(range[i].end, end);
653 if (common_start > common_end + 1)
654 continue;
655
656 final_start = min(range[i].start, start);
657 final_end = max(range[i].end, end);
658
659 range[i].start = final_start;
660 range[i].end = final_end;
661 return nr_range;
662 }
663
664 /* need to add that */
665 return add_range(range, nr_range, start, end);
666}
667
668static void __init
669subtract_range(struct res_range *range, unsigned long start, unsigned long end)
670{
671 int i, j;
672
673 for (j = 0; j < RANGE_NUM; j++) {
674 if (!range[j].end)
675 continue;
676
677 if (start <= range[j].start && end >= range[j].end) {
678 range[j].start = 0;
679 range[j].end = 0;
680 continue;
681 }
682
683 if (start <= range[j].start && end < range[j].end &&
684 range[j].start < end + 1) {
685 range[j].start = end + 1;
686 continue;
687 }
688
689
690 if (start > range[j].start && end >= range[j].end &&
691 range[j].end > start - 1) {
692 range[j].end = start - 1;
693 continue;
694 }
695
696 if (start > range[j].start && end < range[j].end) {
697 /* find the new spare */
698 for (i = 0; i < RANGE_NUM; i++) {
699 if (range[i].end == 0)
700 break;
701 }
702 if (i < RANGE_NUM) {
703 range[i].end = range[j].end;
704 range[i].start = end + 1;
705 } else {
706 printk(KERN_ERR "run of slot in ranges\n");
707 }
708 range[j].end = start - 1;
709 continue;
710 }
711 }
712}
713
714static int __init cmp_range(const void *x1, const void *x2)
715{
716 const struct res_range *r1 = x1;
717 const struct res_range *r2 = x2;
718 long start1, start2;
719
720 start1 = r1->start;
721 start2 = r2->start;
722
723 return start1 - start2;
724}
725
726struct var_mtrr_range_state {
727 unsigned long base_pfn;
728 unsigned long size_pfn;
729 mtrr_type type;
730};
731
732static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
733static int __initdata debug_print;
734
735static int __init
736x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
737 unsigned long extra_remove_base,
738 unsigned long extra_remove_size)
739{
740 unsigned long i, base, size;
741 mtrr_type type;
742
743 for (i = 0; i < num_var_ranges; i++) {
744 type = range_state[i].type;
745 if (type != MTRR_TYPE_WRBACK)
746 continue;
747 base = range_state[i].base_pfn;
748 size = range_state[i].size_pfn;
749 nr_range = add_range_with_merge(range, nr_range, base,
750 base + size - 1);
751 }
752 if (debug_print) {
753 printk(KERN_DEBUG "After WB checking\n");
754 for (i = 0; i < nr_range; i++)
755 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
756 range[i].start, range[i].end + 1);
757 }
758
759 /* take out UC ranges */
760 for (i = 0; i < num_var_ranges; i++) {
761 type = range_state[i].type;
762 if (type != MTRR_TYPE_UNCACHABLE &&
763 type != MTRR_TYPE_WRPROT)
764 continue;
765 size = range_state[i].size_pfn;
766 if (!size)
767 continue;
768 base = range_state[i].base_pfn;
769 subtract_range(range, base, base + size - 1);
770 }
771 if (extra_remove_size)
772 subtract_range(range, extra_remove_base,
773 extra_remove_base + extra_remove_size - 1);
774
775 /* get new range num */
776 nr_range = 0;
777 for (i = 0; i < RANGE_NUM; i++) {
778 if (!range[i].end)
779 continue;
780 nr_range++;
781 }
782 if (debug_print) {
783 printk(KERN_DEBUG "After UC checking\n");
784 for (i = 0; i < nr_range; i++)
785 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
786 range[i].start, range[i].end + 1);
787 }
788
789 /* sort the ranges */
790 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
791 if (debug_print) {
792 printk(KERN_DEBUG "After sorting\n");
793 for (i = 0; i < nr_range; i++)
794 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
795 range[i].start, range[i].end + 1);
796 }
797
798 /* clear those is not used */
799 for (i = nr_range; i < RANGE_NUM; i++)
800 memset(&range[i], 0, sizeof(range[i]));
801
802 return nr_range;
803}
804
805static struct res_range __initdata range[RANGE_NUM];
806static int __initdata nr_range;
807
808#ifdef CONFIG_MTRR_SANITIZER
809
810static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
811{
812 unsigned long sum;
813 int i;
814
815 sum = 0;
816 for (i = 0; i < nr_range; i++)
817 sum += range[i].end + 1 - range[i].start;
818
819 return sum;
820}
821
822static int enable_mtrr_cleanup __initdata =
823 CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
824
825static int __init disable_mtrr_cleanup_setup(char *str)
826{
827 enable_mtrr_cleanup = 0;
828 return 0;
829}
830early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
831
832static int __init enable_mtrr_cleanup_setup(char *str)
833{
834 enable_mtrr_cleanup = 1;
835 return 0;
836}
837early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
838
839static int __init mtrr_cleanup_debug_setup(char *str)
840{
841 debug_print = 1;
842 return 0;
843}
844early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
845
846struct var_mtrr_state {
847 unsigned long range_startk;
848 unsigned long range_sizek;
849 unsigned long chunk_sizek;
850 unsigned long gran_sizek;
851 unsigned int reg;
852};
853
854static void __init
855set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
856 unsigned char type, unsigned int address_bits)
857{
858 u32 base_lo, base_hi, mask_lo, mask_hi;
859 u64 base, mask;
860
861 if (!sizek) {
862 fill_mtrr_var_range(reg, 0, 0, 0, 0);
863 return;
864 }
865
866 mask = (1ULL << address_bits) - 1;
867 mask &= ~((((u64)sizek) << 10) - 1);
868
869 base = ((u64)basek) << 10;
870
871 base |= type;
872 mask |= 0x800;
873
874 base_lo = base & ((1ULL<<32) - 1);
875 base_hi = base >> 32;
876
877 mask_lo = mask & ((1ULL<<32) - 1);
878 mask_hi = mask >> 32;
879
880 fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
881}
882
883static void __init
884save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
885 unsigned char type)
886{
887 range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
888 range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
889 range_state[reg].type = type;
890}
891
892static void __init
893set_var_mtrr_all(unsigned int address_bits)
894{
895 unsigned long basek, sizek;
896 unsigned char type;
897 unsigned int reg;
898
899 for (reg = 0; reg < num_var_ranges; reg++) {
900 basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
901 sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
902 type = range_state[reg].type;
903
904 set_var_mtrr(reg, basek, sizek, type, address_bits);
905 }
906}
907
908static unsigned long to_size_factor(unsigned long sizek, char *factorp)
909{
910 char factor;
911 unsigned long base = sizek;
912
913 if (base & ((1<<10) - 1)) {
914 /* not MB alignment */
915 factor = 'K';
916 } else if (base & ((1<<20) - 1)){
917 factor = 'M';
918 base >>= 10;
919 } else {
920 factor = 'G';
921 base >>= 20;
922 }
923
924 *factorp = factor;
925
926 return base;
927}
928
929static unsigned int __init
930range_to_mtrr(unsigned int reg, unsigned long range_startk,
931 unsigned long range_sizek, unsigned char type)
932{
933 if (!range_sizek || (reg >= num_var_ranges))
934 return reg;
935
936 while (range_sizek) {
937 unsigned long max_align, align;
938 unsigned long sizek;
939
940 /* Compute the maximum size I can make a range */
941 if (range_startk)
942 max_align = ffs(range_startk) - 1;
943 else
944 max_align = 32;
945 align = fls(range_sizek) - 1;
946 if (align > max_align)
947 align = max_align;
948
949 sizek = 1 << align;
950 if (debug_print) {
951 char start_factor = 'K', size_factor = 'K';
952 unsigned long start_base, size_base;
953
954 start_base = to_size_factor(range_startk, &start_factor),
955 size_base = to_size_factor(sizek, &size_factor),
956
957 printk(KERN_DEBUG "Setting variable MTRR %d, "
958 "base: %ld%cB, range: %ld%cB, type %s\n",
959 reg, start_base, start_factor,
960 size_base, size_factor,
961 (type == MTRR_TYPE_UNCACHABLE)?"UC":
962 ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
963 );
964 }
965 save_var_mtrr(reg++, range_startk, sizek, type);
966 range_startk += sizek;
967 range_sizek -= sizek;
968 if (reg >= num_var_ranges)
969 break;
970 }
971 return reg;
972}
973
974static unsigned __init
975range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
976 unsigned long sizek)
977{
978 unsigned long hole_basek, hole_sizek;
979 unsigned long second_basek, second_sizek;
980 unsigned long range0_basek, range0_sizek;
981 unsigned long range_basek, range_sizek;
982 unsigned long chunk_sizek;
983 unsigned long gran_sizek;
984
985 hole_basek = 0;
986 hole_sizek = 0;
987 second_basek = 0;
988 second_sizek = 0;
989 chunk_sizek = state->chunk_sizek;
990 gran_sizek = state->gran_sizek;
991
992 /* align with gran size, prevent small block used up MTRRs */
993 range_basek = ALIGN(state->range_startk, gran_sizek);
994 if ((range_basek > basek) && basek)
995 return second_sizek;
996 state->range_sizek -= (range_basek - state->range_startk);
997 range_sizek = ALIGN(state->range_sizek, gran_sizek);
998
999 while (range_sizek > state->range_sizek) {
1000 range_sizek -= gran_sizek;
1001 if (!range_sizek)
1002 return 0;
1003 }
1004 state->range_sizek = range_sizek;
1005
1006 /* try to append some small hole */
1007 range0_basek = state->range_startk;
1008 range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
1009
1010 /* no increase */
1011 if (range0_sizek == state->range_sizek) {
1012 if (debug_print)
1013 printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
1014 range0_basek<<10,
1015 (range0_basek + state->range_sizek)<<10);
1016 state->reg = range_to_mtrr(state->reg, range0_basek,
1017 state->range_sizek, MTRR_TYPE_WRBACK);
1018 return 0;
1019 }
1020
1021 /* only cut back, when it is not the last */
1022 if (sizek) {
1023 while (range0_basek + range0_sizek > (basek + sizek)) {
1024 if (range0_sizek >= chunk_sizek)
1025 range0_sizek -= chunk_sizek;
1026 else
1027 range0_sizek = 0;
1028
1029 if (!range0_sizek)
1030 break;
1031 }
1032 }
1033
1034second_try:
1035 range_basek = range0_basek + range0_sizek;
1036
1037 /* one hole in the middle */
1038 if (range_basek > basek && range_basek <= (basek + sizek))
1039 second_sizek = range_basek - basek;
1040
1041 if (range0_sizek > state->range_sizek) {
1042
1043 /* one hole in middle or at end */
1044 hole_sizek = range0_sizek - state->range_sizek - second_sizek;
1045
1046 /* hole size should be less than half of range0 size */
1047 if (hole_sizek >= (range0_sizek >> 1) &&
1048 range0_sizek >= chunk_sizek) {
1049 range0_sizek -= chunk_sizek;
1050 second_sizek = 0;
1051 hole_sizek = 0;
1052
1053 goto second_try;
1054 }
1055 }
1056
1057 if (range0_sizek) {
1058 if (debug_print)
1059 printk(KERN_DEBUG "range0: %016lx - %016lx\n",
1060 range0_basek<<10,
1061 (range0_basek + range0_sizek)<<10);
1062 state->reg = range_to_mtrr(state->reg, range0_basek,
1063 range0_sizek, MTRR_TYPE_WRBACK);
1064 }
1065
1066 if (range0_sizek < state->range_sizek) {
1067 /* need to handle left over */
1068 range_sizek = state->range_sizek - range0_sizek;
1069
1070 if (debug_print)
1071 printk(KERN_DEBUG "range: %016lx - %016lx\n",
1072 range_basek<<10,
1073 (range_basek + range_sizek)<<10);
1074 state->reg = range_to_mtrr(state->reg, range_basek,
1075 range_sizek, MTRR_TYPE_WRBACK);
1076 }
1077
1078 if (hole_sizek) {
1079 hole_basek = range_basek - hole_sizek - second_sizek;
1080 if (debug_print)
1081 printk(KERN_DEBUG "hole: %016lx - %016lx\n",
1082 hole_basek<<10,
1083 (hole_basek + hole_sizek)<<10);
1084 state->reg = range_to_mtrr(state->reg, hole_basek,
1085 hole_sizek, MTRR_TYPE_UNCACHABLE);
1086 }
1087
1088 return second_sizek;
1089}
1090
1091static void __init
1092set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
1093 unsigned long size_pfn)
1094{
1095 unsigned long basek, sizek;
1096 unsigned long second_sizek = 0;
1097
1098 if (state->reg >= num_var_ranges)
1099 return;
1100
1101 basek = base_pfn << (PAGE_SHIFT - 10);
1102 sizek = size_pfn << (PAGE_SHIFT - 10);
1103
1104 /* See if I can merge with the last range */
1105 if ((basek <= 1024) ||
1106 (state->range_startk + state->range_sizek == basek)) {
1107 unsigned long endk = basek + sizek;
1108 state->range_sizek = endk - state->range_startk;
1109 return;
1110 }
1111 /* Write the range mtrrs */
1112 if (state->range_sizek != 0)
1113 second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
1114
1115 /* Allocate an msr */
1116 state->range_startk = basek + second_sizek;
1117 state->range_sizek = sizek - second_sizek;
1118}
1119
1120/* mininum size of mtrr block that can take hole */
1121static u64 mtrr_chunk_size __initdata = (256ULL<<20);
1122
1123static int __init parse_mtrr_chunk_size_opt(char *p)
1124{
1125 if (!p)
1126 return -EINVAL;
1127 mtrr_chunk_size = memparse(p, &p);
1128 return 0;
1129}
1130early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
1131
1132/* granity of mtrr of block */
1133static u64 mtrr_gran_size __initdata;
1134
1135static int __init parse_mtrr_gran_size_opt(char *p)
1136{
1137 if (!p)
1138 return -EINVAL;
1139 mtrr_gran_size = memparse(p, &p);
1140 return 0;
1141}
1142early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
1143
1144static int nr_mtrr_spare_reg __initdata =
1145 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
1146
1147static int __init parse_mtrr_spare_reg(char *arg)
1148{
1149 if (arg)
1150 nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
1151 return 0;
1152}
1153
1154early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
1155
1156static int __init
1157x86_setup_var_mtrrs(struct res_range *range, int nr_range,
1158 u64 chunk_size, u64 gran_size)
1159{
1160 struct var_mtrr_state var_state;
1161 int i;
1162 int num_reg;
1163
1164 var_state.range_startk = 0;
1165 var_state.range_sizek = 0;
1166 var_state.reg = 0;
1167 var_state.chunk_sizek = chunk_size >> 10;
1168 var_state.gran_sizek = gran_size >> 10;
1169
1170 memset(range_state, 0, sizeof(range_state));
1171
1172 /* Write the range etc */
1173 for (i = 0; i < nr_range; i++)
1174 set_var_mtrr_range(&var_state, range[i].start,
1175 range[i].end - range[i].start + 1);
1176
1177 /* Write the last range */
1178 if (var_state.range_sizek != 0)
1179 range_to_mtrr_with_hole(&var_state, 0, 0);
1180
1181 num_reg = var_state.reg;
1182 /* Clear out the extra MTRR's */
1183 while (var_state.reg < num_var_ranges) {
1184 save_var_mtrr(var_state.reg, 0, 0, 0);
1185 var_state.reg++;
1186 }
1187
1188 return num_reg;
1189}
1190
1191struct mtrr_cleanup_result {
1192 unsigned long gran_sizek;
1193 unsigned long chunk_sizek;
1194 unsigned long lose_cover_sizek;
1195 unsigned int num_reg;
1196 int bad;
1197};
1198
1199/*
1200 * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
1201 * chunk size: gran_size, ..., 2G
1202 * so we need (1+16)*8
1203 */
1204#define NUM_RESULT 136
1205#define PSHIFT (PAGE_SHIFT - 10)
1206
1207static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
1208static unsigned long __initdata min_loss_pfn[RANGE_NUM];
1209
1210static void __init print_out_mtrr_range_state(void)
1211{
1212 int i;
1213 char start_factor = 'K', size_factor = 'K';
1214 unsigned long start_base, size_base;
1215 mtrr_type type;
1216
1217 for (i = 0; i < num_var_ranges; i++) {
1218
1219 size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
1220 if (!size_base)
1221 continue;
1222
1223 size_base = to_size_factor(size_base, &size_factor),
1224 start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
1225 start_base = to_size_factor(start_base, &start_factor),
1226 type = range_state[i].type;
1227
1228 printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
1229 i, start_base, start_factor,
1230 size_base, size_factor,
1231 (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
1232 ((type == MTRR_TYPE_WRPROT) ? "WP" :
1233 ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
1234 );
1235 }
1236}
1237
1238static int __init mtrr_need_cleanup(void)
1239{
1240 int i;
1241 mtrr_type type;
1242 unsigned long size;
1243 /* extra one for all 0 */
1244 int num[MTRR_NUM_TYPES + 1];
1245
1246 /* check entries number */
1247 memset(num, 0, sizeof(num));
1248 for (i = 0; i < num_var_ranges; i++) {
1249 type = range_state[i].type;
1250 size = range_state[i].size_pfn;
1251 if (type >= MTRR_NUM_TYPES)
1252 continue;
1253 if (!size)
1254 type = MTRR_NUM_TYPES;
1255 if (type == MTRR_TYPE_WRPROT)
1256 type = MTRR_TYPE_UNCACHABLE;
1257 num[type]++;
1258 }
1259
1260 /* check if we got UC entries */
1261 if (!num[MTRR_TYPE_UNCACHABLE])
1262 return 0;
1263
1264 /* check if we only had WB and UC */
1265 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1266 num_var_ranges - num[MTRR_NUM_TYPES])
1267 return 0;
1268
1269 return 1;
1270}
1271
1272static unsigned long __initdata range_sums;
1273static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
1274 unsigned long extra_remove_base,
1275 unsigned long extra_remove_size,
1276 int i)
1277{
1278 int num_reg;
1279 static struct res_range range_new[RANGE_NUM];
1280 static int nr_range_new;
1281 unsigned long range_sums_new;
1282
1283 /* convert ranges to var ranges state */
1284 num_reg = x86_setup_var_mtrrs(range, nr_range,
1285 chunk_size, gran_size);
1286
1287 /* we got new setting in range_state, check it */
1288 memset(range_new, 0, sizeof(range_new));
1289 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
1290 extra_remove_base, extra_remove_size);
1291 range_sums_new = sum_ranges(range_new, nr_range_new);
1292
1293 result[i].chunk_sizek = chunk_size >> 10;
1294 result[i].gran_sizek = gran_size >> 10;
1295 result[i].num_reg = num_reg;
1296 if (range_sums < range_sums_new) {
1297 result[i].lose_cover_sizek =
1298 (range_sums_new - range_sums) << PSHIFT;
1299 result[i].bad = 1;
1300 } else
1301 result[i].lose_cover_sizek =
1302 (range_sums - range_sums_new) << PSHIFT;
1303
1304 /* double check it */
1305 if (!result[i].bad && !result[i].lose_cover_sizek) {
1306 if (nr_range_new != nr_range ||
1307 memcmp(range, range_new, sizeof(range)))
1308 result[i].bad = 1;
1309 }
1310
1311 if (!result[i].bad && (range_sums - range_sums_new <
1312 min_loss_pfn[num_reg])) {
1313 min_loss_pfn[num_reg] =
1314 range_sums - range_sums_new;
1315 }
1316}
1317
1318static void __init mtrr_print_out_one_result(int i)
1319{
1320 char gran_factor, chunk_factor, lose_factor;
1321 unsigned long gran_base, chunk_base, lose_base;
1322
1323 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
1324 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
1325 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
1326 printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
1327 result[i].bad ? "*BAD*" : " ",
1328 gran_base, gran_factor, chunk_base, chunk_factor);
1329 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
1330 result[i].num_reg, result[i].bad ? "-" : "",
1331 lose_base, lose_factor);
1332}
1333
1334static int __init mtrr_search_optimal_index(void)
1335{
1336 int i;
1337 int num_reg_good;
1338 int index_good;
1339
1340 if (nr_mtrr_spare_reg >= num_var_ranges)
1341 nr_mtrr_spare_reg = num_var_ranges - 1;
1342 num_reg_good = -1;
1343 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
1344 if (!min_loss_pfn[i])
1345 num_reg_good = i;
1346 }
1347
1348 index_good = -1;
1349 if (num_reg_good != -1) {
1350 for (i = 0; i < NUM_RESULT; i++) {
1351 if (!result[i].bad &&
1352 result[i].num_reg == num_reg_good &&
1353 !result[i].lose_cover_sizek) {
1354 index_good = i;
1355 break;
1356 }
1357 }
1358 }
1359
1360 return index_good;
1361}
1362
1363
1364static int __init mtrr_cleanup(unsigned address_bits)
1365{
1366 unsigned long extra_remove_base, extra_remove_size;
1367 unsigned long base, size, def, dummy;
1368 mtrr_type type;
1369 u64 chunk_size, gran_size;
1370 int index_good;
1371 int i;
1372
1373 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
1374 return 0;
1375 rdmsr(MTRRdefType_MSR, def, dummy);
1376 def &= 0xff;
1377 if (def != MTRR_TYPE_UNCACHABLE)
1378 return 0;
1379
1380 /* get it and store it aside */
1381 memset(range_state, 0, sizeof(range_state));
1382 for (i = 0; i < num_var_ranges; i++) {
1383 mtrr_if->get(i, &base, &size, &type);
1384 range_state[i].base_pfn = base;
1385 range_state[i].size_pfn = size;
1386 range_state[i].type = type;
1387 }
1388
1389 /* check if we need handle it and can handle it */
1390 if (!mtrr_need_cleanup())
1391 return 0;
1392
1393 /* print original var MTRRs at first, for debugging: */
1394 printk(KERN_DEBUG "original variable MTRRs\n");
1395 print_out_mtrr_range_state();
1396
1397 memset(range, 0, sizeof(range));
1398 extra_remove_size = 0;
1399 extra_remove_base = 1 << (32 - PAGE_SHIFT);
1400 if (mtrr_tom2)
1401 extra_remove_size =
1402 (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
1403 nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
1404 extra_remove_size);
1405 /*
1406 * [0, 1M) should always be coverred by var mtrr with WB
1407 * and fixed mtrrs should take effective before var mtrr for it
1408 */
1409 nr_range = add_range_with_merge(range, nr_range, 0,
1410 (1ULL<<(20 - PAGE_SHIFT)) - 1);
1411 /* sort the ranges */
1412 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
1413
1414 range_sums = sum_ranges(range, nr_range);
1415 printk(KERN_INFO "total RAM coverred: %ldM\n",
1416 range_sums >> (20 - PAGE_SHIFT));
1417
1418 if (mtrr_chunk_size && mtrr_gran_size) {
1419 i = 0;
1420 mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
1421 extra_remove_base, extra_remove_size, i);
1422
1423 mtrr_print_out_one_result(i);
1424
1425 if (!result[i].bad) {
1426 set_var_mtrr_all(address_bits);
1427 return 1;
1428 }
1429 printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
1430 "will find optimal one\n");
1431 }
1432
1433 i = 0;
1434 memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
1435 memset(result, 0, sizeof(result));
1436 for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
1437
1438 for (chunk_size = gran_size; chunk_size < (1ULL<<32);
1439 chunk_size <<= 1) {
1440
1441 if (i >= NUM_RESULT)
1442 continue;
1443
1444 mtrr_calc_range_state(chunk_size, gran_size,
1445 extra_remove_base, extra_remove_size, i);
1446 if (debug_print) {
1447 mtrr_print_out_one_result(i);
1448 printk(KERN_INFO "\n");
1449 }
1450
1451 i++;
1452 }
1453 }
1454
1455 /* try to find the optimal index */
1456 index_good = mtrr_search_optimal_index();
1457
1458 if (index_good != -1) {
1459 printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
1460 i = index_good;
1461 mtrr_print_out_one_result(i);
1462
1463 /* convert ranges to var ranges state */
1464 chunk_size = result[i].chunk_sizek;
1465 chunk_size <<= 10;
1466 gran_size = result[i].gran_sizek;
1467 gran_size <<= 10;
1468 x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
1469 set_var_mtrr_all(address_bits);
1470 printk(KERN_DEBUG "New variable MTRRs\n");
1471 print_out_mtrr_range_state();
1472 return 1;
1473 } else {
1474 /* print out all */
1475 for (i = 0; i < NUM_RESULT; i++)
1476 mtrr_print_out_one_result(i);
1477 }
1478
1479 printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
1480 printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
1481
1482 return 0;
1483}
1484#else
1485static int __init mtrr_cleanup(unsigned address_bits)
1486{
1487 return 0;
1488}
1489#endif
1490
1491static int __initdata changed_by_mtrr_cleanup;
1492
1493static int disable_mtrr_trim;
1494
1495static int __init disable_mtrr_trim_setup(char *str)
1496{
1497 disable_mtrr_trim = 1;
1498 return 0;
1499}
1500early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
1501
1502/*
1503 * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
1504 * for memory >4GB. Check for that here.
1505 * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
1506 * apply to are wrong, but so far we don't know of any such case in the wild.
1507 */
1508#define Tom2Enabled (1U << 21)
1509#define Tom2ForceMemTypeWB (1U << 22)
1510
1511int __init amd_special_default_mtrr(void)
1512{
1513 u32 l, h;
1514
1515 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
1516 return 0;
1517 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
1518 return 0;
1519 /* In case some hypervisor doesn't pass SYSCFG through */
1520 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
1521 return 0;
1522 /*
1523 * Memory between 4GB and top of mem is forced WB by this magic bit.
1524 * Reserved before K8RevF, but should be zero there.
1525 */
1526 if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
1527 (Tom2Enabled | Tom2ForceMemTypeWB))
1528 return 1;
1529 return 0;
1530}
1531
1532static u64 __init real_trim_memory(unsigned long start_pfn,
1533 unsigned long limit_pfn)
1534{
1535 u64 trim_start, trim_size;
1536 trim_start = start_pfn;
1537 trim_start <<= PAGE_SHIFT;
1538 trim_size = limit_pfn;
1539 trim_size <<= PAGE_SHIFT;
1540 trim_size -= trim_start;
1541
1542 return e820_update_range(trim_start, trim_size, E820_RAM,
1543 E820_RESERVED);
1544}
1545/**
1546 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
1547 * @end_pfn: ending page frame number
1548 *
1549 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
1550 * memory configurations. This routine checks that the highest MTRR matches
1551 * the end of memory, to make sure the MTRRs having a write back type cover
1552 * all of the memory the kernel is intending to use. If not, it'll trim any
1553 * memory off the end by adjusting end_pfn, removing it from the kernel's
1554 * allocation pools, warning the user with an obnoxious message.
1555 */
1556int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1557{
1558 unsigned long i, base, size, highest_pfn = 0, def, dummy;
1559 mtrr_type type;
1560 u64 total_trim_size;
1561
1562 /* extra one for all 0 */
1563 int num[MTRR_NUM_TYPES + 1];
1564 /*
1565 * Make sure we only trim uncachable memory on machines that
1566 * support the Intel MTRR architecture:
1567 */
1568 if (!is_cpu(INTEL) || disable_mtrr_trim)
1569 return 0;
1570 rdmsr(MTRRdefType_MSR, def, dummy);
1571 def &= 0xff;
1572 if (def != MTRR_TYPE_UNCACHABLE)
1573 return 0;
1574
1575 /* get it and store it aside */
1576 memset(range_state, 0, sizeof(range_state));
1577 for (i = 0; i < num_var_ranges; i++) {
1578 mtrr_if->get(i, &base, &size, &type);
1579 range_state[i].base_pfn = base;
1580 range_state[i].size_pfn = size;
1581 range_state[i].type = type;
1582 }
1583
1584 /* Find highest cached pfn */
1585 for (i = 0; i < num_var_ranges; i++) {
1586 type = range_state[i].type;
1587 if (type != MTRR_TYPE_WRBACK)
1588 continue;
1589 base = range_state[i].base_pfn;
1590 size = range_state[i].size_pfn;
1591 if (highest_pfn < base + size)
1592 highest_pfn = base + size;
1593 }
1594
1595 /* kvm/qemu doesn't have mtrr set right, don't trim them all */
1596 if (!highest_pfn) {
1597 printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
1598 return 0;
1599 }
1600
1601 /* check entries number */
1602 memset(num, 0, sizeof(num));
1603 for (i = 0; i < num_var_ranges; i++) {
1604 type = range_state[i].type;
1605 if (type >= MTRR_NUM_TYPES)
1606 continue;
1607 size = range_state[i].size_pfn;
1608 if (!size)
1609 type = MTRR_NUM_TYPES;
1610 num[type]++;
1611 }
1612
1613 /* no entry for WB? */
1614 if (!num[MTRR_TYPE_WRBACK])
1615 return 0;
1616
1617 /* check if we only had WB and UC */
1618 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1619 num_var_ranges - num[MTRR_NUM_TYPES])
1620 return 0;
1621
1622 memset(range, 0, sizeof(range));
1623 nr_range = 0;
1624 if (mtrr_tom2) {
1625 range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
1626 range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
1627 if (highest_pfn < range[nr_range].end + 1)
1628 highest_pfn = range[nr_range].end + 1;
1629 nr_range++;
1630 }
1631 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
1632
1633 total_trim_size = 0;
1634 /* check the head */
1635 if (range[0].start)
1636 total_trim_size += real_trim_memory(0, range[0].start);
1637 /* check the holes */
1638 for (i = 0; i < nr_range - 1; i++) {
1639 if (range[i].end + 1 < range[i+1].start)
1640 total_trim_size += real_trim_memory(range[i].end + 1,
1641 range[i+1].start);
1642 }
1643 /* check the top */
1644 i = nr_range - 1;
1645 if (range[i].end + 1 < end_pfn)
1646 total_trim_size += real_trim_memory(range[i].end + 1,
1647 end_pfn);
1648
1649 if (total_trim_size) {
1650 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
1651 " all of memory, losing %lluMB of RAM.\n",
1652 total_trim_size >> 20);
1653
1654 if (!changed_by_mtrr_cleanup)
1655 WARN_ON(1);
1656
1657 printk(KERN_INFO "update e820 for mtrr\n");
1658 update_e820();
1659
1660 return 1;
1661 }
1662
1663 return 0;
1664}
1665 614
1666/** 615/**
1667 * mtrr_bp_init - initialize mtrrs on the boot CPU 616 * mtrr_bp_init - initialize mtrrs on the boot CPU
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index ffd60409cc6d..77f67f7b347a 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -79,6 +79,7 @@ extern struct mtrr_ops * mtrr_if;
79 79
80extern unsigned int num_var_ranges; 80extern unsigned int num_var_ranges;
81extern u64 mtrr_tom2; 81extern u64 mtrr_tom2;
82extern struct mtrr_state_type mtrr_state;
82 83
83void mtrr_state_warn(void); 84void mtrr_state_warn(void);
84const char *mtrr_attrib_to_str(int x); 85const char *mtrr_attrib_to_str(int x);
@@ -88,3 +89,6 @@ void mtrr_wrmsr(unsigned, unsigned, unsigned);
88int amd_init_mtrr(void); 89int amd_init_mtrr(void);
89int cyrix_init_mtrr(void); 90int cyrix_init_mtrr(void);
90int centaur_init_mtrr(void); 91int centaur_init_mtrr(void);
92
93extern int changed_by_mtrr_cleanup;
94extern int mtrr_cleanup(unsigned address_bits);
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index d67e0e48bc2d..f93047fed791 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -14,7 +14,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
14 if (c->x86_max_cores * smp_num_siblings > 1) { 14 if (c->x86_max_cores * smp_num_siblings > 1) {
15 seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); 15 seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
16 seq_printf(m, "siblings\t: %d\n", 16 seq_printf(m, "siblings\t: %d\n",
17 cpus_weight(per_cpu(cpu_core_map, cpu))); 17 cpumask_weight(cpu_sibling_mask(cpu)));
18 seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); 18 seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
19 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); 19 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
20 seq_printf(m, "apicid\t\t: %d\n", c->apicid); 20 seq_printf(m, "apicid\t\t: %d\n", c->apicid);
@@ -143,9 +143,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
143static void *c_start(struct seq_file *m, loff_t *pos) 143static void *c_start(struct seq_file *m, loff_t *pos)
144{ 144{
145 if (*pos == 0) /* just in case, cpu 0 is not the first */ 145 if (*pos == 0) /* just in case, cpu 0 is not the first */
146 *pos = first_cpu(cpu_online_map); 146 *pos = cpumask_first(cpu_online_mask);
147 else 147 else
148 *pos = next_cpu_nr(*pos - 1, cpu_online_map); 148 *pos = cpumask_next(*pos - 1, cpu_online_mask);
149 if ((*pos) < nr_cpu_ids) 149 if ((*pos) < nr_cpu_ids)
150 return &cpu_data(*pos); 150 return &cpu_data(*pos);
151 return NULL; 151 return NULL;
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 52b3fefbd5af..bb62b3e5caad 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -98,7 +98,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
98#endif 98#endif
99} 99}
100 100
101static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { 101static const struct cpu_dev __cpuinitconst transmeta_cpu_dev = {
102 .c_vendor = "Transmeta", 102 .c_vendor = "Transmeta",
103 .c_ident = { "GenuineTMx86", "TransmetaCPU" }, 103 .c_ident = { "GenuineTMx86", "TransmetaCPU" },
104 .c_early_init = early_init_transmeta, 104 .c_early_init = early_init_transmeta,
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c
index e777f79e0960..fd2c37bf7acb 100644
--- a/arch/x86/kernel/cpu/umc.c
+++ b/arch/x86/kernel/cpu/umc.c
@@ -8,7 +8,7 @@
8 * so no special init takes place. 8 * so no special init takes place.
9 */ 9 */
10 10
11static struct cpu_dev umc_cpu_dev __cpuinitdata = { 11static const struct cpu_dev __cpuinitconst umc_cpu_dev = {
12 .c_vendor = "UMC", 12 .c_vendor = "UMC",
13 .c_ident = { "UMC UMC UMC" }, 13 .c_ident = { "UMC UMC UMC" },
14 .c_models = { 14 .c_models = {
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 508bec1cee27..ef2c3563357d 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -110,19 +110,50 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type)
110/* 110/*
111 * Add a memory region to the kernel e820 map. 111 * Add a memory region to the kernel e820 map.
112 */ 112 */
113void __init e820_add_region(u64 start, u64 size, int type) 113static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
114 int type)
114{ 115{
115 int x = e820.nr_map; 116 int x = e820x->nr_map;
116 117
117 if (x == ARRAY_SIZE(e820.map)) { 118 if (x == ARRAY_SIZE(e820x->map)) {
118 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); 119 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
119 return; 120 return;
120 } 121 }
121 122
122 e820.map[x].addr = start; 123 e820x->map[x].addr = start;
123 e820.map[x].size = size; 124 e820x->map[x].size = size;
124 e820.map[x].type = type; 125 e820x->map[x].type = type;
125 e820.nr_map++; 126 e820x->nr_map++;
127}
128
129void __init e820_add_region(u64 start, u64 size, int type)
130{
131 __e820_add_region(&e820, start, size, type);
132}
133
134static void __init e820_print_type(u32 type)
135{
136 switch (type) {
137 case E820_RAM:
138 case E820_RESERVED_KERN:
139 printk(KERN_CONT "(usable)");
140 break;
141 case E820_RESERVED:
142 printk(KERN_CONT "(reserved)");
143 break;
144 case E820_ACPI:
145 printk(KERN_CONT "(ACPI data)");
146 break;
147 case E820_NVS:
148 printk(KERN_CONT "(ACPI NVS)");
149 break;
150 case E820_UNUSABLE:
151 printk(KERN_CONT "(unusable)");
152 break;
153 default:
154 printk(KERN_CONT "type %u", type);
155 break;
156 }
126} 157}
127 158
128void __init e820_print_map(char *who) 159void __init e820_print_map(char *who)
@@ -134,27 +165,8 @@ void __init e820_print_map(char *who)
134 (unsigned long long) e820.map[i].addr, 165 (unsigned long long) e820.map[i].addr,
135 (unsigned long long) 166 (unsigned long long)
136 (e820.map[i].addr + e820.map[i].size)); 167 (e820.map[i].addr + e820.map[i].size));
137 switch (e820.map[i].type) { 168 e820_print_type(e820.map[i].type);
138 case E820_RAM: 169 printk(KERN_CONT "\n");
139 case E820_RESERVED_KERN:
140 printk(KERN_CONT "(usable)\n");
141 break;
142 case E820_RESERVED:
143 printk(KERN_CONT "(reserved)\n");
144 break;
145 case E820_ACPI:
146 printk(KERN_CONT "(ACPI data)\n");
147 break;
148 case E820_NVS:
149 printk(KERN_CONT "(ACPI NVS)\n");
150 break;
151 case E820_UNUSABLE:
152 printk("(unusable)\n");
153 break;
154 default:
155 printk(KERN_CONT "type %u\n", e820.map[i].type);
156 break;
157 }
158 } 170 }
159} 171}
160 172
@@ -221,7 +233,7 @@ void __init e820_print_map(char *who)
221 */ 233 */
222 234
223int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, 235int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
224 int *pnr_map) 236 u32 *pnr_map)
225{ 237{
226 struct change_member { 238 struct change_member {
227 struct e820entry *pbios; /* pointer to original bios entry */ 239 struct e820entry *pbios; /* pointer to original bios entry */
@@ -417,11 +429,12 @@ static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
417 return __append_e820_map(biosmap, nr_map); 429 return __append_e820_map(biosmap, nr_map);
418} 430}
419 431
420static u64 __init e820_update_range_map(struct e820map *e820x, u64 start, 432static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
421 u64 size, unsigned old_type, 433 u64 size, unsigned old_type,
422 unsigned new_type) 434 unsigned new_type)
423{ 435{
424 int i; 436 u64 end;
437 unsigned int i;
425 u64 real_updated_size = 0; 438 u64 real_updated_size = 0;
426 439
427 BUG_ON(old_type == new_type); 440 BUG_ON(old_type == new_type);
@@ -429,27 +442,55 @@ static u64 __init e820_update_range_map(struct e820map *e820x, u64 start,
429 if (size > (ULLONG_MAX - start)) 442 if (size > (ULLONG_MAX - start))
430 size = ULLONG_MAX - start; 443 size = ULLONG_MAX - start;
431 444
432 for (i = 0; i < e820.nr_map; i++) { 445 end = start + size;
446 printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ",
447 (unsigned long long) start,
448 (unsigned long long) end);
449 e820_print_type(old_type);
450 printk(KERN_CONT " ==> ");
451 e820_print_type(new_type);
452 printk(KERN_CONT "\n");
453
454 for (i = 0; i < e820x->nr_map; i++) {
433 struct e820entry *ei = &e820x->map[i]; 455 struct e820entry *ei = &e820x->map[i];
434 u64 final_start, final_end; 456 u64 final_start, final_end;
457 u64 ei_end;
458
435 if (ei->type != old_type) 459 if (ei->type != old_type)
436 continue; 460 continue;
437 /* totally covered? */ 461
438 if (ei->addr >= start && 462 ei_end = ei->addr + ei->size;
439 (ei->addr + ei->size) <= (start + size)) { 463 /* totally covered by new range? */
464 if (ei->addr >= start && ei_end <= end) {
440 ei->type = new_type; 465 ei->type = new_type;
441 real_updated_size += ei->size; 466 real_updated_size += ei->size;
442 continue; 467 continue;
443 } 468 }
469
470 /* new range is totally covered? */
471 if (ei->addr < start && ei_end > end) {
472 __e820_add_region(e820x, start, size, new_type);
473 __e820_add_region(e820x, end, ei_end - end, ei->type);
474 ei->size = start - ei->addr;
475 real_updated_size += size;
476 continue;
477 }
478
444 /* partially covered */ 479 /* partially covered */
445 final_start = max(start, ei->addr); 480 final_start = max(start, ei->addr);
446 final_end = min(start + size, ei->addr + ei->size); 481 final_end = min(end, ei_end);
447 if (final_start >= final_end) 482 if (final_start >= final_end)
448 continue; 483 continue;
449 e820_add_region(final_start, final_end - final_start, 484
450 new_type); 485 __e820_add_region(e820x, final_start, final_end - final_start,
486 new_type);
487
451 real_updated_size += final_end - final_start; 488 real_updated_size += final_end - final_start;
452 489
490 /*
491 * left range could be head or tail, so need to update
492 * size at first.
493 */
453 ei->size -= final_end - final_start; 494 ei->size -= final_end - final_start;
454 if (ei->addr < final_start) 495 if (ei->addr < final_start)
455 continue; 496 continue;
@@ -461,13 +502,13 @@ static u64 __init e820_update_range_map(struct e820map *e820x, u64 start,
461u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, 502u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
462 unsigned new_type) 503 unsigned new_type)
463{ 504{
464 return e820_update_range_map(&e820, start, size, old_type, new_type); 505 return __e820_update_range(&e820, start, size, old_type, new_type);
465} 506}
466 507
467static u64 __init e820_update_range_saved(u64 start, u64 size, 508static u64 __init e820_update_range_saved(u64 start, u64 size,
468 unsigned old_type, unsigned new_type) 509 unsigned old_type, unsigned new_type)
469{ 510{
470 return e820_update_range_map(&e820_saved, start, size, old_type, 511 return __e820_update_range(&e820_saved, start, size, old_type,
471 new_type); 512 new_type);
472} 513}
473 514
@@ -511,7 +552,7 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
511 552
512void __init update_e820(void) 553void __init update_e820(void)
513{ 554{
514 int nr_map; 555 u32 nr_map;
515 556
516 nr_map = e820.nr_map; 557 nr_map = e820.nr_map;
517 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) 558 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
@@ -522,7 +563,7 @@ void __init update_e820(void)
522} 563}
523static void __init update_e820_saved(void) 564static void __init update_e820_saved(void)
524{ 565{
525 int nr_map; 566 u32 nr_map;
526 567
527 nr_map = e820_saved.nr_map; 568 nr_map = e820_saved.nr_map;
528 if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map)) 569 if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
@@ -1020,8 +1061,8 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
1020 continue; 1061 continue;
1021 return addr; 1062 return addr;
1022 } 1063 }
1023 return -1UL;
1024 1064
1065 return -1ULL;
1025} 1066}
1026 1067
1027/* 1068/*
@@ -1034,13 +1075,22 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
1034 u64 start; 1075 u64 start;
1035 1076
1036 start = startt; 1077 start = startt;
1037 while (size < sizet) 1078 while (size < sizet && (start + 1))
1038 start = find_e820_area_size(start, &size, align); 1079 start = find_e820_area_size(start, &size, align);
1039 1080
1040 if (size < sizet) 1081 if (size < sizet)
1041 return 0; 1082 return 0;
1042 1083
1084#ifdef CONFIG_X86_32
1085 if (start >= MAXMEM)
1086 return 0;
1087 if (start + size > MAXMEM)
1088 size = MAXMEM - start;
1089#endif
1090
1043 addr = round_down(start + size - sizet, align); 1091 addr = round_down(start + size - sizet, align);
1092 if (addr < start)
1093 return 0;
1044 e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); 1094 e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
1045 e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); 1095 e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
1046 printk(KERN_INFO "update e820 for early_reserve_e820\n"); 1096 printk(KERN_INFO "update e820 for early_reserve_e820\n");
@@ -1253,7 +1303,7 @@ early_param("memmap", parse_memmap_opt);
1253void __init finish_e820_parsing(void) 1303void __init finish_e820_parsing(void)
1254{ 1304{
1255 if (userdef) { 1305 if (userdef) {
1256 int nr = e820.nr_map; 1306 u32 nr = e820.nr_map;
1257 1307
1258 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) 1308 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
1259 early_panic("Invalid user supplied memory map"); 1309 early_panic("Invalid user supplied memory map");
@@ -1336,7 +1386,7 @@ void __init e820_reserve_resources_late(void)
1336char *__init default_machine_specific_memory_setup(void) 1386char *__init default_machine_specific_memory_setup(void)
1337{ 1387{
1338 char *who = "BIOS-e820"; 1388 char *who = "BIOS-e820";
1339 int new_nr; 1389 u32 new_nr;
1340 /* 1390 /*
1341 * Try to copy the BIOS-supplied E820-map. 1391 * Try to copy the BIOS-supplied E820-map.
1342 * 1392 *
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 639ad98238a2..335f049d110f 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -250,7 +250,7 @@ static int dbgp_wait_until_complete(void)
250 return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl); 250 return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
251} 251}
252 252
253static void dbgp_mdelay(int ms) 253static void __init dbgp_mdelay(int ms)
254{ 254{
255 int i; 255 int i;
256 256
@@ -311,7 +311,7 @@ static void dbgp_set_data(const void *buf, int size)
311 writel(hi, &ehci_debug->data47); 311 writel(hi, &ehci_debug->data47);
312} 312}
313 313
314static void dbgp_get_data(void *buf, int size) 314static void __init dbgp_get_data(void *buf, int size)
315{ 315{
316 unsigned char *bytes = buf; 316 unsigned char *bytes = buf;
317 u32 lo, hi; 317 u32 lo, hi;
@@ -355,7 +355,7 @@ static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
355 return ret; 355 return ret;
356} 356}
357 357
358static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data, 358static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
359 int size) 359 int size)
360{ 360{
361 u32 pids, addr, ctrl; 361 u32 pids, addr, ctrl;
@@ -386,8 +386,8 @@ static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
386 return ret; 386 return ret;
387} 387}
388 388
389static int dbgp_control_msg(unsigned devnum, int requesttype, int request, 389static int __init dbgp_control_msg(unsigned devnum, int requesttype,
390 int value, int index, void *data, int size) 390 int request, int value, int index, void *data, int size)
391{ 391{
392 u32 pids, addr, ctrl; 392 u32 pids, addr, ctrl;
393 struct usb_ctrlrequest req; 393 struct usb_ctrlrequest req;
@@ -489,7 +489,7 @@ static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
489 return 0; 489 return 0;
490} 490}
491 491
492static int ehci_reset_port(int port) 492static int __init ehci_reset_port(int port)
493{ 493{
494 u32 portsc; 494 u32 portsc;
495 u32 delay_time, delay; 495 u32 delay_time, delay;
@@ -532,7 +532,7 @@ static int ehci_reset_port(int port)
532 return -EBUSY; 532 return -EBUSY;
533} 533}
534 534
535static int ehci_wait_for_port(int port) 535static int __init ehci_wait_for_port(int port)
536{ 536{
537 u32 status; 537 u32 status;
538 int ret, reps; 538 int ret, reps;
@@ -557,13 +557,13 @@ static inline void dbgp_printk(const char *fmt, ...) { }
557 557
558typedef void (*set_debug_port_t)(int port); 558typedef void (*set_debug_port_t)(int port);
559 559
560static void default_set_debug_port(int port) 560static void __init default_set_debug_port(int port)
561{ 561{
562} 562}
563 563
564static set_debug_port_t set_debug_port = default_set_debug_port; 564static set_debug_port_t __initdata set_debug_port = default_set_debug_port;
565 565
566static void nvidia_set_debug_port(int port) 566static void __init nvidia_set_debug_port(int port)
567{ 567{
568 u32 dword; 568 u32 dword;
569 dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 569 dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 899e8938e79f..c929add475c9 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -442,8 +442,7 @@ sysenter_past_esp:
442 442
443 GET_THREAD_INFO(%ebp) 443 GET_THREAD_INFO(%ebp)
444 444
445 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 445 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
446 testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
447 jnz sysenter_audit 446 jnz sysenter_audit
448sysenter_do_call: 447sysenter_do_call:
449 cmpl $(nr_syscalls), %eax 448 cmpl $(nr_syscalls), %eax
@@ -454,7 +453,7 @@ sysenter_do_call:
454 DISABLE_INTERRUPTS(CLBR_ANY) 453 DISABLE_INTERRUPTS(CLBR_ANY)
455 TRACE_IRQS_OFF 454 TRACE_IRQS_OFF
456 movl TI_flags(%ebp), %ecx 455 movl TI_flags(%ebp), %ecx
457 testw $_TIF_ALLWORK_MASK, %cx 456 testl $_TIF_ALLWORK_MASK, %ecx
458 jne sysexit_audit 457 jne sysexit_audit
459sysenter_exit: 458sysenter_exit:
460/* if something modifies registers it must also disable sysexit */ 459/* if something modifies registers it must also disable sysexit */
@@ -468,7 +467,7 @@ sysenter_exit:
468 467
469#ifdef CONFIG_AUDITSYSCALL 468#ifdef CONFIG_AUDITSYSCALL
470sysenter_audit: 469sysenter_audit:
471 testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) 470 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
472 jnz syscall_trace_entry 471 jnz syscall_trace_entry
473 addl $4,%esp 472 addl $4,%esp
474 CFI_ADJUST_CFA_OFFSET -4 473 CFI_ADJUST_CFA_OFFSET -4
@@ -485,7 +484,7 @@ sysenter_audit:
485 jmp sysenter_do_call 484 jmp sysenter_do_call
486 485
487sysexit_audit: 486sysexit_audit:
488 testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx 487 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
489 jne syscall_exit_work 488 jne syscall_exit_work
490 TRACE_IRQS_ON 489 TRACE_IRQS_ON
491 ENABLE_INTERRUPTS(CLBR_ANY) 490 ENABLE_INTERRUPTS(CLBR_ANY)
@@ -498,7 +497,7 @@ sysexit_audit:
498 DISABLE_INTERRUPTS(CLBR_ANY) 497 DISABLE_INTERRUPTS(CLBR_ANY)
499 TRACE_IRQS_OFF 498 TRACE_IRQS_OFF
500 movl TI_flags(%ebp), %ecx 499 movl TI_flags(%ebp), %ecx
501 testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx 500 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
502 jne syscall_exit_work 501 jne syscall_exit_work
503 movl PT_EAX(%esp),%eax /* reload syscall return value */ 502 movl PT_EAX(%esp),%eax /* reload syscall return value */
504 jmp sysenter_exit 503 jmp sysenter_exit
@@ -523,8 +522,7 @@ ENTRY(system_call)
523 SAVE_ALL 522 SAVE_ALL
524 GET_THREAD_INFO(%ebp) 523 GET_THREAD_INFO(%ebp)
525 # system call tracing in operation / emulation 524 # system call tracing in operation / emulation
526 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ 525 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
527 testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
528 jnz syscall_trace_entry 526 jnz syscall_trace_entry
529 cmpl $(nr_syscalls), %eax 527 cmpl $(nr_syscalls), %eax
530 jae syscall_badsys 528 jae syscall_badsys
@@ -538,7 +536,7 @@ syscall_exit:
538 # between sampling and the iret 536 # between sampling and the iret
539 TRACE_IRQS_OFF 537 TRACE_IRQS_OFF
540 movl TI_flags(%ebp), %ecx 538 movl TI_flags(%ebp), %ecx
541 testw $_TIF_ALLWORK_MASK, %cx # current->work 539 testl $_TIF_ALLWORK_MASK, %ecx # current->work
542 jne syscall_exit_work 540 jne syscall_exit_work
543 541
544restore_all: 542restore_all:
@@ -673,7 +671,7 @@ END(syscall_trace_entry)
673 # perform syscall exit tracing 671 # perform syscall exit tracing
674 ALIGN 672 ALIGN
675syscall_exit_work: 673syscall_exit_work:
676 testb $_TIF_WORK_SYSCALL_EXIT, %cl 674 testl $_TIF_WORK_SYSCALL_EXIT, %ecx
677 jz work_pending 675 jz work_pending
678 TRACE_IRQS_ON 676 TRACE_IRQS_ON
679 ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call 677 ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7ba4621c0dfa..a331ec38af9e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -368,6 +368,7 @@ ENTRY(save_rest)
368END(save_rest) 368END(save_rest)
369 369
370/* save complete stack frame */ 370/* save complete stack frame */
371 .pushsection .kprobes.text, "ax"
371ENTRY(save_paranoid) 372ENTRY(save_paranoid)
372 XCPT_FRAME 1 RDI+8 373 XCPT_FRAME 1 RDI+8
373 cld 374 cld
@@ -396,6 +397,7 @@ ENTRY(save_paranoid)
3961: ret 3971: ret
397 CFI_ENDPROC 398 CFI_ENDPROC
398END(save_paranoid) 399END(save_paranoid)
400 .popsection
399 401
400/* 402/*
401 * A newly forked process directly context switches into this address. 403 * A newly forked process directly context switches into this address.
@@ -416,7 +418,6 @@ ENTRY(ret_from_fork)
416 418
417 GET_THREAD_INFO(%rcx) 419 GET_THREAD_INFO(%rcx)
418 420
419 CFI_REMEMBER_STATE
420 RESTORE_REST 421 RESTORE_REST
421 422
422 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? 423 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
@@ -428,7 +429,6 @@ ENTRY(ret_from_fork)
428 RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET 429 RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
429 jmp ret_from_sys_call # go to the SYSRET fastpath 430 jmp ret_from_sys_call # go to the SYSRET fastpath
430 431
431 CFI_RESTORE_STATE
432 CFI_ENDPROC 432 CFI_ENDPROC
433END(ret_from_fork) 433END(ret_from_fork)
434 434
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index a85da1764b1c..61df77532120 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -79,11 +79,11 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
79 * 79 *
80 * 1) Put the instruction pointer into the IP buffer 80 * 1) Put the instruction pointer into the IP buffer
81 * and the new code into the "code" buffer. 81 * and the new code into the "code" buffer.
82 * 2) Set a flag that says we are modifying code 82 * 2) Wait for any running NMIs to finish and set a flag that says
83 * 3) Wait for any running NMIs to finish. 83 * we are modifying code, it is done in an atomic operation.
84 * 4) Write the code 84 * 3) Write the code
85 * 5) clear the flag. 85 * 4) clear the flag.
86 * 6) Wait for any running NMIs to finish. 86 * 5) Wait for any running NMIs to finish.
87 * 87 *
88 * If an NMI is executed, the first thing it does is to call 88 * If an NMI is executed, the first thing it does is to call
89 * "ftrace_nmi_enter". This will check if the flag is set to write 89 * "ftrace_nmi_enter". This will check if the flag is set to write
@@ -95,9 +95,9 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
95 * are the same as what exists. 95 * are the same as what exists.
96 */ 96 */
97 97
98#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
98static atomic_t nmi_running = ATOMIC_INIT(0); 99static atomic_t nmi_running = ATOMIC_INIT(0);
99static int mod_code_status; /* holds return value of text write */ 100static int mod_code_status; /* holds return value of text write */
100static int mod_code_write; /* set when NMI should do the write */
101static void *mod_code_ip; /* holds the IP to write to */ 101static void *mod_code_ip; /* holds the IP to write to */
102static void *mod_code_newcode; /* holds the text to write to the IP */ 102static void *mod_code_newcode; /* holds the text to write to the IP */
103 103
@@ -114,6 +114,20 @@ int ftrace_arch_read_dyn_info(char *buf, int size)
114 return r; 114 return r;
115} 115}
116 116
117static void clear_mod_flag(void)
118{
119 int old = atomic_read(&nmi_running);
120
121 for (;;) {
122 int new = old & ~MOD_CODE_WRITE_FLAG;
123
124 if (old == new)
125 break;
126
127 old = atomic_cmpxchg(&nmi_running, old, new);
128 }
129}
130
117static void ftrace_mod_code(void) 131static void ftrace_mod_code(void)
118{ 132{
119 /* 133 /*
@@ -127,27 +141,39 @@ static void ftrace_mod_code(void)
127 141
128 /* if we fail, then kill any new writers */ 142 /* if we fail, then kill any new writers */
129 if (mod_code_status) 143 if (mod_code_status)
130 mod_code_write = 0; 144 clear_mod_flag();
131} 145}
132 146
133void ftrace_nmi_enter(void) 147void ftrace_nmi_enter(void)
134{ 148{
135 atomic_inc(&nmi_running); 149 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
136 /* Must have nmi_running seen before reading write flag */ 150 smp_rmb();
137 smp_mb();
138 if (mod_code_write) {
139 ftrace_mod_code(); 151 ftrace_mod_code();
140 atomic_inc(&nmi_update_count); 152 atomic_inc(&nmi_update_count);
141 } 153 }
154 /* Must have previous changes seen before executions */
155 smp_mb();
142} 156}
143 157
144void ftrace_nmi_exit(void) 158void ftrace_nmi_exit(void)
145{ 159{
146 /* Finish all executions before clearing nmi_running */ 160 /* Finish all executions before clearing nmi_running */
147 smp_wmb(); 161 smp_mb();
148 atomic_dec(&nmi_running); 162 atomic_dec(&nmi_running);
149} 163}
150 164
165static void wait_for_nmi_and_set_mod_flag(void)
166{
167 if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
168 return;
169
170 do {
171 cpu_relax();
172 } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
173
174 nmi_wait_count++;
175}
176
151static void wait_for_nmi(void) 177static void wait_for_nmi(void)
152{ 178{
153 if (!atomic_read(&nmi_running)) 179 if (!atomic_read(&nmi_running))
@@ -167,14 +193,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
167 mod_code_newcode = new_code; 193 mod_code_newcode = new_code;
168 194
169 /* The buffers need to be visible before we let NMIs write them */ 195 /* The buffers need to be visible before we let NMIs write them */
170 smp_wmb();
171
172 mod_code_write = 1;
173
174 /* Make sure write bit is visible before we wait on NMIs */
175 smp_mb(); 196 smp_mb();
176 197
177 wait_for_nmi(); 198 wait_for_nmi_and_set_mod_flag();
178 199
179 /* Make sure all running NMIs have finished before we write the code */ 200 /* Make sure all running NMIs have finished before we write the code */
180 smp_mb(); 201 smp_mb();
@@ -182,13 +203,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
182 ftrace_mod_code(); 203 ftrace_mod_code();
183 204
184 /* Make sure the write happens before clearing the bit */ 205 /* Make sure the write happens before clearing the bit */
185 smp_wmb();
186
187 mod_code_write = 0;
188
189 /* make sure NMIs see the cleared bit */
190 smp_mb(); 206 smp_mb();
191 207
208 clear_mod_flag();
192 wait_for_nmi(); 209 wait_for_nmi();
193 210
194 return mod_code_status; 211 return mod_code_status;
@@ -393,7 +410,6 @@ int ftrace_disable_ftrace_graph_caller(void)
393void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) 410void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
394{ 411{
395 unsigned long old; 412 unsigned long old;
396 unsigned long long calltime;
397 int faulted; 413 int faulted;
398 struct ftrace_graph_ent trace; 414 struct ftrace_graph_ent trace;
399 unsigned long return_hooker = (unsigned long) 415 unsigned long return_hooker = (unsigned long)
@@ -436,10 +452,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
436 return; 452 return;
437 } 453 }
438 454
439 calltime = trace_clock_local(); 455 if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) {
440
441 if (ftrace_push_return_trace(old, calltime,
442 self_addr, &trace.depth) == -EBUSY) {
443 *parent = old; 456 *parent = old;
444 return; 457 return;
445 } 458 }
@@ -453,3 +466,66 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
453 } 466 }
454} 467}
455#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 468#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
469
470#ifdef CONFIG_FTRACE_SYSCALLS
471
472extern unsigned long __start_syscalls_metadata[];
473extern unsigned long __stop_syscalls_metadata[];
474extern unsigned long *sys_call_table;
475
476static struct syscall_metadata **syscalls_metadata;
477
478static struct syscall_metadata *find_syscall_meta(unsigned long *syscall)
479{
480 struct syscall_metadata *start;
481 struct syscall_metadata *stop;
482 char str[KSYM_SYMBOL_LEN];
483
484
485 start = (struct syscall_metadata *)__start_syscalls_metadata;
486 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
487 kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str);
488
489 for ( ; start < stop; start++) {
490 if (start->name && !strcmp(start->name, str))
491 return start;
492 }
493 return NULL;
494}
495
496struct syscall_metadata *syscall_nr_to_meta(int nr)
497{
498 if (!syscalls_metadata || nr >= FTRACE_SYSCALL_MAX || nr < 0)
499 return NULL;
500
501 return syscalls_metadata[nr];
502}
503
504void arch_init_ftrace_syscalls(void)
505{
506 int i;
507 struct syscall_metadata *meta;
508 unsigned long **psys_syscall_table = &sys_call_table;
509 static atomic_t refs;
510
511 if (atomic_inc_return(&refs) != 1)
512 goto end;
513
514 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
515 FTRACE_SYSCALL_MAX, GFP_KERNEL);
516 if (!syscalls_metadata) {
517 WARN_ON(1);
518 return;
519 }
520
521 for (i = 0; i < FTRACE_SYSCALL_MAX; i++) {
522 meta = find_syscall_meta(psys_syscall_table[i]);
523 syscalls_metadata[i] = meta;
524 }
525 return;
526
527 /* Paranoid: avoid overflow */
528end:
529 atomic_dec(&refs);
530}
531#endif
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index ac108d1fe182..3f8579f8d42c 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -18,7 +18,7 @@ void __init i386_start_kernel(void)
18{ 18{
19 reserve_trampoline_memory(); 19 reserve_trampoline_memory();
20 20
21 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); 21 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
22 22
23#ifdef CONFIG_BLK_DEV_INITRD 23#ifdef CONFIG_BLK_DEV_INITRD
24 /* Reserve INITRD */ 24 /* Reserve INITRD */
@@ -29,9 +29,6 @@ void __init i386_start_kernel(void)
29 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); 29 reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
30 } 30 }
31#endif 31#endif
32 reserve_early(init_pg_tables_start, init_pg_tables_end,
33 "INIT_PG_TABLE");
34
35 reserve_ebda_region(); 32 reserve_ebda_region();
36 33
37 /* 34 /*
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index f5b272247690..70eaa852c732 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -100,7 +100,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
100 100
101 reserve_trampoline_memory(); 101 reserve_trampoline_memory();
102 102
103 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); 103 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
104 104
105#ifdef CONFIG_BLK_DEV_INITRD 105#ifdef CONFIG_BLK_DEV_INITRD
106 /* Reserve INITRD */ 106 /* Reserve INITRD */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index c32ca19d591a..30683883e0cd 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -38,42 +38,40 @@
38#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id 38#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
39 39
40/* 40/*
41 * This is how much memory *in addition to the memory covered up to 41 * This is how much memory in addition to the memory covered up to
42 * and including _end* we need mapped initially. 42 * and including _end we need mapped initially.
43 * We need: 43 * We need:
44 * - one bit for each possible page, but only in low memory, which means 44 * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
45 * 2^32/4096/8 = 128K worst case (4G/4G split.) 45 * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
46 * - enough space to map all low memory, which means
47 * (2^32/4096) / 1024 pages (worst case, non PAE)
48 * (2^32/4096) / 512 + 4 pages (worst case for PAE)
49 * - a few pages for allocator use before the kernel pagetable has
50 * been set up
51 * 46 *
52 * Modulo rounding, each megabyte assigned here requires a kilobyte of 47 * Modulo rounding, each megabyte assigned here requires a kilobyte of
53 * memory, which is currently unreclaimed. 48 * memory, which is currently unreclaimed.
54 * 49 *
55 * This should be a multiple of a page. 50 * This should be a multiple of a page.
51 *
52 * KERNEL_IMAGE_SIZE should be greater than pa(_end)
53 * and small than max_low_pfn, otherwise will waste some page table entries
56 */ 54 */
57LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
58
59/*
60 * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
61 * pagetables from above the 16MB DMA limit, so we'll have to set
62 * up pagetables 16MB more (worst-case):
63 */
64#ifdef CONFIG_DEBUG_PAGEALLOC
65LOW_PAGES = LOW_PAGES + 0x1000000
66#endif
67 55
68#if PTRS_PER_PMD > 1 56#if PTRS_PER_PMD > 1
69PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD 57#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
70#else 58#else
71PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD) 59#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
72#endif 60#endif
73BOOTBITMAP_SIZE = LOW_PAGES / 8
74ALLOCATOR_SLOP = 4
75 61
76INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm 62/* Enough space to fit pagetables for the low memory linear map */
63MAPPING_BEYOND_END = \
64 PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
65
66/*
67 * Worst-case size of the kernel mapping we need to make:
68 * the worst-case size of the kernel itself, plus the extra we need
69 * to map for the linear map.
70 */
71KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT
72
73INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm
74RESERVE_BRK(pagetables, INIT_MAP_SIZE)
77 75
78/* 76/*
79 * 32-bit kernel entrypoint; only used by the boot CPU. On entry, 77 * 32-bit kernel entrypoint; only used by the boot CPU. On entry,
@@ -166,10 +164,10 @@ num_subarch_entries = (. - subarch_entries) / 4
166 164
167/* 165/*
168 * Initialize page tables. This creates a PDE and a set of page 166 * Initialize page tables. This creates a PDE and a set of page
169 * tables, which are located immediately beyond _end. The variable 167 * tables, which are located immediately beyond __brk_base. The variable
170 * init_pg_tables_end is set up to point to the first "safe" location. 168 * _brk_end is set up to point to the first "safe" location.
171 * Mappings are created both at virtual address 0 (identity mapping) 169 * Mappings are created both at virtual address 0 (identity mapping)
172 * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END. 170 * and PAGE_OFFSET for up to _end.
173 * 171 *
174 * Note that the stack is not yet set up! 172 * Note that the stack is not yet set up!
175 */ 173 */
@@ -190,8 +188,7 @@ default_entry:
190 188
191 xorl %ebx,%ebx /* %ebx is kept at zero */ 189 xorl %ebx,%ebx /* %ebx is kept at zero */
192 190
193 movl $pa(pg0), %edi 191 movl $pa(__brk_base), %edi
194 movl %edi, pa(init_pg_tables_start)
195 movl $pa(swapper_pg_pmd), %edx 192 movl $pa(swapper_pg_pmd), %edx
196 movl $PTE_IDENT_ATTR, %eax 193 movl $PTE_IDENT_ATTR, %eax
19710: 19410:
@@ -209,14 +206,14 @@ default_entry:
209 loop 11b 206 loop 11b
210 207
211 /* 208 /*
212 * End condition: we must map up to and including INIT_MAP_BEYOND_END 209 * End condition: we must map up to the end + MAPPING_BEYOND_END.
213 * bytes beyond the end of our own page tables.
214 */ 210 */
215 leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp 211 movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
216 cmpl %ebp,%eax 212 cmpl %ebp,%eax
217 jb 10b 213 jb 10b
2181: 2141:
219 movl %edi,pa(init_pg_tables_end) 215 addl $__PAGE_OFFSET, %edi
216 movl %edi, pa(_brk_end)
220 shrl $12, %eax 217 shrl $12, %eax
221 movl %eax, pa(max_pfn_mapped) 218 movl %eax, pa(max_pfn_mapped)
222 219
@@ -227,8 +224,7 @@ default_entry:
227 224
228page_pde_offset = (__PAGE_OFFSET >> 20); 225page_pde_offset = (__PAGE_OFFSET >> 20);
229 226
230 movl $pa(pg0), %edi 227 movl $pa(__brk_base), %edi
231 movl %edi, pa(init_pg_tables_start)
232 movl $pa(swapper_pg_dir), %edx 228 movl $pa(swapper_pg_dir), %edx
233 movl $PTE_IDENT_ATTR, %eax 229 movl $PTE_IDENT_ATTR, %eax
23410: 23010:
@@ -242,14 +238,13 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
242 addl $0x1000,%eax 238 addl $0x1000,%eax
243 loop 11b 239 loop 11b
244 /* 240 /*
245 * End condition: we must map up to and including INIT_MAP_BEYOND_END 241 * End condition: we must map up to the end + MAPPING_BEYOND_END.
246 * bytes beyond the end of our own page tables; the +0x007 is
247 * the attribute bits
248 */ 242 */
249 leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp 243 movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
250 cmpl %ebp,%eax 244 cmpl %ebp,%eax
251 jb 10b 245 jb 10b
252 movl %edi,pa(init_pg_tables_end) 246 addl $__PAGE_OFFSET, %edi
247 movl %edi, pa(_brk_end)
253 shrl $12, %eax 248 shrl $12, %eax
254 movl %eax, pa(max_pfn_mapped) 249 movl %eax, pa(max_pfn_mapped)
255 250
@@ -636,6 +631,7 @@ swapper_pg_fixmap:
636 .fill 1024,4,0 631 .fill 1024,4,0
637ENTRY(empty_zero_page) 632ENTRY(empty_zero_page)
638 .fill 4096,1,0 633 .fill 4096,1,0
634
639/* 635/*
640 * This starts the data section. 636 * This starts the data section.
641 */ 637 */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a00545fe5cdd..648b3a2a3a44 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -80,6 +80,7 @@ static inline void hpet_clear_mapping(void)
80 */ 80 */
81static int boot_hpet_disable; 81static int boot_hpet_disable;
82int hpet_force_user; 82int hpet_force_user;
83static int hpet_verbose;
83 84
84static int __init hpet_setup(char *str) 85static int __init hpet_setup(char *str)
85{ 86{
@@ -88,6 +89,8 @@ static int __init hpet_setup(char *str)
88 boot_hpet_disable = 1; 89 boot_hpet_disable = 1;
89 if (!strncmp("force", str, 5)) 90 if (!strncmp("force", str, 5))
90 hpet_force_user = 1; 91 hpet_force_user = 1;
92 if (!strncmp("verbose", str, 7))
93 hpet_verbose = 1;
91 } 94 }
92 return 1; 95 return 1;
93} 96}
@@ -119,6 +122,43 @@ int is_hpet_enabled(void)
119} 122}
120EXPORT_SYMBOL_GPL(is_hpet_enabled); 123EXPORT_SYMBOL_GPL(is_hpet_enabled);
121 124
125static void _hpet_print_config(const char *function, int line)
126{
127 u32 i, timers, l, h;
128 printk(KERN_INFO "hpet: %s(%d):\n", function, line);
129 l = hpet_readl(HPET_ID);
130 h = hpet_readl(HPET_PERIOD);
131 timers = ((l & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
132 printk(KERN_INFO "hpet: ID: 0x%x, PERIOD: 0x%x\n", l, h);
133 l = hpet_readl(HPET_CFG);
134 h = hpet_readl(HPET_STATUS);
135 printk(KERN_INFO "hpet: CFG: 0x%x, STATUS: 0x%x\n", l, h);
136 l = hpet_readl(HPET_COUNTER);
137 h = hpet_readl(HPET_COUNTER+4);
138 printk(KERN_INFO "hpet: COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h);
139
140 for (i = 0; i < timers; i++) {
141 l = hpet_readl(HPET_Tn_CFG(i));
142 h = hpet_readl(HPET_Tn_CFG(i)+4);
143 printk(KERN_INFO "hpet: T%d: CFG_l: 0x%x, CFG_h: 0x%x\n",
144 i, l, h);
145 l = hpet_readl(HPET_Tn_CMP(i));
146 h = hpet_readl(HPET_Tn_CMP(i)+4);
147 printk(KERN_INFO "hpet: T%d: CMP_l: 0x%x, CMP_h: 0x%x\n",
148 i, l, h);
149 l = hpet_readl(HPET_Tn_ROUTE(i));
150 h = hpet_readl(HPET_Tn_ROUTE(i)+4);
151 printk(KERN_INFO "hpet: T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n",
152 i, l, h);
153 }
154}
155
156#define hpet_print_config() \
157do { \
158 if (hpet_verbose) \
159 _hpet_print_config(__FUNCTION__, __LINE__); \
160} while (0)
161
122/* 162/*
123 * When the hpet driver (/dev/hpet) is enabled, we need to reserve 163 * When the hpet driver (/dev/hpet) is enabled, we need to reserve
124 * timer 0 and timer 1 in case of RTC emulation. 164 * timer 0 and timer 1 in case of RTC emulation.
@@ -191,27 +231,37 @@ static struct clock_event_device hpet_clockevent = {
191 .rating = 50, 231 .rating = 50,
192}; 232};
193 233
194static void hpet_start_counter(void) 234static void hpet_stop_counter(void)
195{ 235{
196 unsigned long cfg = hpet_readl(HPET_CFG); 236 unsigned long cfg = hpet_readl(HPET_CFG);
197
198 cfg &= ~HPET_CFG_ENABLE; 237 cfg &= ~HPET_CFG_ENABLE;
199 hpet_writel(cfg, HPET_CFG); 238 hpet_writel(cfg, HPET_CFG);
200 hpet_writel(0, HPET_COUNTER); 239 hpet_writel(0, HPET_COUNTER);
201 hpet_writel(0, HPET_COUNTER + 4); 240 hpet_writel(0, HPET_COUNTER + 4);
241}
242
243static void hpet_start_counter(void)
244{
245 unsigned long cfg = hpet_readl(HPET_CFG);
202 cfg |= HPET_CFG_ENABLE; 246 cfg |= HPET_CFG_ENABLE;
203 hpet_writel(cfg, HPET_CFG); 247 hpet_writel(cfg, HPET_CFG);
204} 248}
205 249
250static void hpet_restart_counter(void)
251{
252 hpet_stop_counter();
253 hpet_start_counter();
254}
255
206static void hpet_resume_device(void) 256static void hpet_resume_device(void)
207{ 257{
208 force_hpet_resume(); 258 force_hpet_resume();
209} 259}
210 260
211static void hpet_restart_counter(void) 261static void hpet_resume_counter(void)
212{ 262{
213 hpet_resume_device(); 263 hpet_resume_device();
214 hpet_start_counter(); 264 hpet_restart_counter();
215} 265}
216 266
217static void hpet_enable_legacy_int(void) 267static void hpet_enable_legacy_int(void)
@@ -259,29 +309,23 @@ static int hpet_setup_msi_irq(unsigned int irq);
259static void hpet_set_mode(enum clock_event_mode mode, 309static void hpet_set_mode(enum clock_event_mode mode,
260 struct clock_event_device *evt, int timer) 310 struct clock_event_device *evt, int timer)
261{ 311{
262 unsigned long cfg, cmp, now; 312 unsigned long cfg;
263 uint64_t delta; 313 uint64_t delta;
264 314
265 switch (mode) { 315 switch (mode) {
266 case CLOCK_EVT_MODE_PERIODIC: 316 case CLOCK_EVT_MODE_PERIODIC:
317 hpet_stop_counter();
267 delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; 318 delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
268 delta >>= evt->shift; 319 delta >>= evt->shift;
269 now = hpet_readl(HPET_COUNTER);
270 cmp = now + (unsigned long) delta;
271 cfg = hpet_readl(HPET_Tn_CFG(timer)); 320 cfg = hpet_readl(HPET_Tn_CFG(timer));
272 /* Make sure we use edge triggered interrupts */ 321 /* Make sure we use edge triggered interrupts */
273 cfg &= ~HPET_TN_LEVEL; 322 cfg &= ~HPET_TN_LEVEL;
274 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | 323 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
275 HPET_TN_SETVAL | HPET_TN_32BIT; 324 HPET_TN_SETVAL | HPET_TN_32BIT;
276 hpet_writel(cfg, HPET_Tn_CFG(timer)); 325 hpet_writel(cfg, HPET_Tn_CFG(timer));
277 /*
278 * The first write after writing TN_SETVAL to the
279 * config register sets the counter value, the second
280 * write sets the period.
281 */
282 hpet_writel(cmp, HPET_Tn_CMP(timer));
283 udelay(1);
284 hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer)); 326 hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
327 hpet_start_counter();
328 hpet_print_config();
285 break; 329 break;
286 330
287 case CLOCK_EVT_MODE_ONESHOT: 331 case CLOCK_EVT_MODE_ONESHOT:
@@ -308,6 +352,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
308 irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); 352 irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
309 enable_irq(hdev->irq); 353 enable_irq(hdev->irq);
310 } 354 }
355 hpet_print_config();
311 break; 356 break;
312 } 357 }
313} 358}
@@ -526,6 +571,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
526 571
527 num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); 572 num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
528 num_timers++; /* Value read out starts from 0 */ 573 num_timers++; /* Value read out starts from 0 */
574 hpet_print_config();
529 575
530 hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL); 576 hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
531 if (!hpet_devs) 577 if (!hpet_devs)
@@ -695,7 +741,7 @@ static struct clocksource clocksource_hpet = {
695 .mask = HPET_MASK, 741 .mask = HPET_MASK,
696 .shift = HPET_SHIFT, 742 .shift = HPET_SHIFT,
697 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 743 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
698 .resume = hpet_restart_counter, 744 .resume = hpet_resume_counter,
699#ifdef CONFIG_X86_64 745#ifdef CONFIG_X86_64
700 .vread = vread_hpet, 746 .vread = vread_hpet,
701#endif 747#endif
@@ -707,7 +753,7 @@ static int hpet_clocksource_register(void)
707 cycle_t t1; 753 cycle_t t1;
708 754
709 /* Start the counter */ 755 /* Start the counter */
710 hpet_start_counter(); 756 hpet_restart_counter();
711 757
712 /* Verify whether hpet counter works */ 758 /* Verify whether hpet counter works */
713 t1 = read_hpet(); 759 t1 = read_hpet();
@@ -793,6 +839,7 @@ int __init hpet_enable(void)
793 * information and the number of channels 839 * information and the number of channels
794 */ 840 */
795 id = hpet_readl(HPET_ID); 841 id = hpet_readl(HPET_ID);
842 hpet_print_config();
796 843
797#ifdef CONFIG_HPET_EMULATE_RTC 844#ifdef CONFIG_HPET_EMULATE_RTC
798 /* 845 /*
@@ -845,6 +892,7 @@ static __init int hpet_late_init(void)
845 return -ENODEV; 892 return -ENODEV;
846 893
847 hpet_reserve_platform_timers(hpet_readl(HPET_ID)); 894 hpet_reserve_platform_timers(hpet_readl(HPET_ID));
895 hpet_print_config();
848 896
849 for_each_online_cpu(cpu) { 897 for_each_online_cpu(cpu) {
850 hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); 898 hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 10f92fb532f3..3475440baa54 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -3,17 +3,17 @@
3 * 3 *
4 */ 4 */
5#include <linux/clockchips.h> 5#include <linux/clockchips.h>
6#include <linux/init.h>
7#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/spinlock.h>
8#include <linux/jiffies.h> 8#include <linux/jiffies.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/spinlock.h> 10#include <linux/delay.h>
11#include <linux/init.h>
12#include <linux/io.h>
11 13
12#include <asm/smp.h>
13#include <asm/delay.h>
14#include <asm/i8253.h> 14#include <asm/i8253.h>
15#include <asm/io.h>
16#include <asm/hpet.h> 15#include <asm/hpet.h>
16#include <asm/smp.h>
17 17
18DEFINE_SPINLOCK(i8253_lock); 18DEFINE_SPINLOCK(i8253_lock);
19EXPORT_SYMBOL(i8253_lock); 19EXPORT_SYMBOL(i8253_lock);
@@ -40,7 +40,7 @@ static void init_pit_timer(enum clock_event_mode mode,
40{ 40{
41 spin_lock(&i8253_lock); 41 spin_lock(&i8253_lock);
42 42
43 switch(mode) { 43 switch (mode) {
44 case CLOCK_EVT_MODE_PERIODIC: 44 case CLOCK_EVT_MODE_PERIODIC:
45 /* binary, mode 2, LSB/MSB, ch 0 */ 45 /* binary, mode 2, LSB/MSB, ch 0 */
46 outb_pit(0x34, PIT_MODE); 46 outb_pit(0x34, PIT_MODE);
@@ -95,7 +95,7 @@ static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
95 * registered. This mechanism replaces the previous #ifdef LOCAL_APIC - 95 * registered. This mechanism replaces the previous #ifdef LOCAL_APIC -
96 * !using_apic_timer decisions in do_timer_interrupt_hook() 96 * !using_apic_timer decisions in do_timer_interrupt_hook()
97 */ 97 */
98static struct clock_event_device pit_clockevent = { 98static struct clock_event_device pit_ce = {
99 .name = "pit", 99 .name = "pit",
100 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, 100 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
101 .set_mode = init_pit_timer, 101 .set_mode = init_pit_timer,
@@ -114,15 +114,13 @@ void __init setup_pit_timer(void)
114 * Start pit with the boot cpu mask and make it global after the 114 * Start pit with the boot cpu mask and make it global after the
115 * IO_APIC has been initialized. 115 * IO_APIC has been initialized.
116 */ 116 */
117 pit_clockevent.cpumask = cpumask_of(smp_processor_id()); 117 pit_ce.cpumask = cpumask_of(smp_processor_id());
118 pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 118 pit_ce.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, pit_ce.shift);
119 pit_clockevent.shift); 119 pit_ce.max_delta_ns = clockevent_delta2ns(0x7FFF, &pit_ce);
120 pit_clockevent.max_delta_ns = 120 pit_ce.min_delta_ns = clockevent_delta2ns(0xF, &pit_ce);
121 clockevent_delta2ns(0x7FFF, &pit_clockevent); 121
122 pit_clockevent.min_delta_ns = 122 clockevents_register_device(&pit_ce);
123 clockevent_delta2ns(0xF, &pit_clockevent); 123 global_clock_event = &pit_ce;
124 clockevents_register_device(&pit_clockevent);
125 global_clock_event = &pit_clockevent;
126} 124}
127 125
128#ifndef CONFIG_X86_64 126#ifndef CONFIG_X86_64
@@ -133,11 +131,11 @@ void __init setup_pit_timer(void)
133 */ 131 */
134static cycle_t pit_read(void) 132static cycle_t pit_read(void)
135{ 133{
134 static int old_count;
135 static u32 old_jifs;
136 unsigned long flags; 136 unsigned long flags;
137 int count; 137 int count;
138 u32 jifs; 138 u32 jifs;
139 static int old_count;
140 static u32 old_jifs;
141 139
142 spin_lock_irqsave(&i8253_lock, flags); 140 spin_lock_irqsave(&i8253_lock, flags);
143 /* 141 /*
@@ -179,9 +177,9 @@ static cycle_t pit_read(void)
179 * Previous attempts to handle these cases intelligently were 177 * Previous attempts to handle these cases intelligently were
180 * buggy, so we just do the simple thing now. 178 * buggy, so we just do the simple thing now.
181 */ 179 */
182 if (count > old_count && jifs == old_jifs) { 180 if (count > old_count && jifs == old_jifs)
183 count = old_count; 181 count = old_count;
184 } 182
185 old_count = count; 183 old_count = count;
186 old_jifs = jifs; 184 old_jifs = jifs;
187 185
@@ -192,13 +190,13 @@ static cycle_t pit_read(void)
192 return (cycle_t)(jifs * LATCH) + count; 190 return (cycle_t)(jifs * LATCH) + count;
193} 191}
194 192
195static struct clocksource clocksource_pit = { 193static struct clocksource pit_cs = {
196 .name = "pit", 194 .name = "pit",
197 .rating = 110, 195 .rating = 110,
198 .read = pit_read, 196 .read = pit_read,
199 .mask = CLOCKSOURCE_MASK(32), 197 .mask = CLOCKSOURCE_MASK(32),
200 .mult = 0, 198 .mult = 0,
201 .shift = 20, 199 .shift = 20,
202}; 200};
203 201
204static void pit_disable_clocksource(void) 202static void pit_disable_clocksource(void)
@@ -206,9 +204,9 @@ static void pit_disable_clocksource(void)
206 /* 204 /*
207 * Use mult to check whether it is registered or not 205 * Use mult to check whether it is registered or not
208 */ 206 */
209 if (clocksource_pit.mult) { 207 if (pit_cs.mult) {
210 clocksource_unregister(&clocksource_pit); 208 clocksource_unregister(&pit_cs);
211 clocksource_pit.mult = 0; 209 pit_cs.mult = 0;
212 } 210 }
213} 211}
214 212
@@ -222,13 +220,13 @@ static int __init init_pit_clocksource(void)
222 * - when local APIC timer is active (PIT is switched off) 220 * - when local APIC timer is active (PIT is switched off)
223 */ 221 */
224 if (num_possible_cpus() > 1 || is_hpet_enabled() || 222 if (num_possible_cpus() > 1 || is_hpet_enabled() ||
225 pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC) 223 pit_ce.mode != CLOCK_EVT_MODE_PERIODIC)
226 return 0; 224 return 0;
227 225
228 clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 226 pit_cs.mult = clocksource_hz2mult(CLOCK_TICK_RATE, pit_cs.shift);
229 clocksource_pit.shift); 227
230 return clocksource_register(&clocksource_pit); 228 return clocksource_register(&pit_cs);
231} 229}
232arch_initcall(init_pit_clocksource); 230arch_initcall(init_pit_clocksource);
233 231
234#endif 232#endif /* !CONFIG_X86_64 */
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index 720d2607aacb..a979b5bd2fc0 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -7,10 +7,10 @@
7 */ 7 */
8#include <linux/kernel.h> 8#include <linux/kernel.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/init.h>
11#include <linux/delay.h> 10#include <linux/delay.h>
11#include <linux/init.h>
12#include <linux/dmi.h> 12#include <linux/dmi.h>
13#include <asm/io.h> 13#include <linux/io.h>
14 14
15int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE; 15int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE;
16 16
@@ -47,8 +47,7 @@ EXPORT_SYMBOL(native_io_delay);
47static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id) 47static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
48{ 48{
49 if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) { 49 if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) {
50 printk(KERN_NOTICE "%s: using 0xed I/O delay port\n", 50 pr_notice("%s: using 0xed I/O delay port\n", id->ident);
51 id->ident);
52 io_delay_type = CONFIG_IO_DELAY_TYPE_0XED; 51 io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
53 } 52 }
54 53
@@ -64,40 +63,40 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = {
64 .callback = dmi_io_delay_0xed_port, 63 .callback = dmi_io_delay_0xed_port,
65 .ident = "Compaq Presario V6000", 64 .ident = "Compaq Presario V6000",
66 .matches = { 65 .matches = {
67 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), 66 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
68 DMI_MATCH(DMI_BOARD_NAME, "30B7") 67 DMI_MATCH(DMI_BOARD_NAME, "30B7")
69 } 68 }
70 }, 69 },
71 { 70 {
72 .callback = dmi_io_delay_0xed_port, 71 .callback = dmi_io_delay_0xed_port,
73 .ident = "HP Pavilion dv9000z", 72 .ident = "HP Pavilion dv9000z",
74 .matches = { 73 .matches = {
75 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), 74 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
76 DMI_MATCH(DMI_BOARD_NAME, "30B9") 75 DMI_MATCH(DMI_BOARD_NAME, "30B9")
77 } 76 }
78 }, 77 },
79 { 78 {
80 .callback = dmi_io_delay_0xed_port, 79 .callback = dmi_io_delay_0xed_port,
81 .ident = "HP Pavilion dv6000", 80 .ident = "HP Pavilion dv6000",
82 .matches = { 81 .matches = {
83 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), 82 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
84 DMI_MATCH(DMI_BOARD_NAME, "30B8") 83 DMI_MATCH(DMI_BOARD_NAME, "30B8")
85 } 84 }
86 }, 85 },
87 { 86 {
88 .callback = dmi_io_delay_0xed_port, 87 .callback = dmi_io_delay_0xed_port,
89 .ident = "HP Pavilion tx1000", 88 .ident = "HP Pavilion tx1000",
90 .matches = { 89 .matches = {
91 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), 90 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
92 DMI_MATCH(DMI_BOARD_NAME, "30BF") 91 DMI_MATCH(DMI_BOARD_NAME, "30BF")
93 } 92 }
94 }, 93 },
95 { 94 {
96 .callback = dmi_io_delay_0xed_port, 95 .callback = dmi_io_delay_0xed_port,
97 .ident = "Presario F700", 96 .ident = "Presario F700",
98 .matches = { 97 .matches = {
99 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), 98 DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
100 DMI_MATCH(DMI_BOARD_NAME, "30D3") 99 DMI_MATCH(DMI_BOARD_NAME, "30D3")
101 } 100 }
102 }, 101 },
103 { } 102 { }
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index b864341dcc45..3aaf7b9e3a8b 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -45,19 +45,24 @@ void ack_bad_irq(unsigned int irq)
45/* 45/*
46 * /proc/interrupts printing: 46 * /proc/interrupts printing:
47 */ 47 */
48static int show_other_interrupts(struct seq_file *p) 48static int show_other_interrupts(struct seq_file *p, int prec)
49{ 49{
50 int j; 50 int j;
51 51
52 seq_printf(p, "NMI: "); 52 seq_printf(p, "%*s: ", prec, "NMI");
53 for_each_online_cpu(j) 53 for_each_online_cpu(j)
54 seq_printf(p, "%10u ", irq_stats(j)->__nmi_count); 54 seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
55 seq_printf(p, " Non-maskable interrupts\n"); 55 seq_printf(p, " Non-maskable interrupts\n");
56#ifdef CONFIG_X86_LOCAL_APIC 56#ifdef CONFIG_X86_LOCAL_APIC
57 seq_printf(p, "LOC: "); 57 seq_printf(p, "%*s: ", prec, "LOC");
58 for_each_online_cpu(j) 58 for_each_online_cpu(j)
59 seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); 59 seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
60 seq_printf(p, " Local timer interrupts\n"); 60 seq_printf(p, " Local timer interrupts\n");
61
62 seq_printf(p, "%*s: ", prec, "SPU");
63 for_each_online_cpu(j)
64 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
65 seq_printf(p, " Spurious interrupts\n");
61#endif 66#endif
62 if (generic_interrupt_extension) { 67 if (generic_interrupt_extension) {
63 seq_printf(p, "PLT: "); 68 seq_printf(p, "PLT: ");
@@ -66,40 +71,34 @@ static int show_other_interrupts(struct seq_file *p)
66 seq_printf(p, " Platform interrupts\n"); 71 seq_printf(p, " Platform interrupts\n");
67 } 72 }
68#ifdef CONFIG_SMP 73#ifdef CONFIG_SMP
69 seq_printf(p, "RES: "); 74 seq_printf(p, "%*s: ", prec, "RES");
70 for_each_online_cpu(j) 75 for_each_online_cpu(j)
71 seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); 76 seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
72 seq_printf(p, " Rescheduling interrupts\n"); 77 seq_printf(p, " Rescheduling interrupts\n");
73 seq_printf(p, "CAL: "); 78 seq_printf(p, "%*s: ", prec, "CAL");
74 for_each_online_cpu(j) 79 for_each_online_cpu(j)
75 seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); 80 seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
76 seq_printf(p, " Function call interrupts\n"); 81 seq_printf(p, " Function call interrupts\n");
77 seq_printf(p, "TLB: "); 82 seq_printf(p, "%*s: ", prec, "TLB");
78 for_each_online_cpu(j) 83 for_each_online_cpu(j)
79 seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); 84 seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
80 seq_printf(p, " TLB shootdowns\n"); 85 seq_printf(p, " TLB shootdowns\n");
81#endif 86#endif
82#ifdef CONFIG_X86_MCE 87#ifdef CONFIG_X86_MCE
83 seq_printf(p, "TRM: "); 88 seq_printf(p, "%*s: ", prec, "TRM");
84 for_each_online_cpu(j) 89 for_each_online_cpu(j)
85 seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); 90 seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
86 seq_printf(p, " Thermal event interrupts\n"); 91 seq_printf(p, " Thermal event interrupts\n");
87# ifdef CONFIG_X86_64 92# ifdef CONFIG_X86_64
88 seq_printf(p, "THR: "); 93 seq_printf(p, "%*s: ", prec, "THR");
89 for_each_online_cpu(j) 94 for_each_online_cpu(j)
90 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); 95 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
91 seq_printf(p, " Threshold APIC interrupts\n"); 96 seq_printf(p, " Threshold APIC interrupts\n");
92# endif 97# endif
93#endif 98#endif
94#ifdef CONFIG_X86_LOCAL_APIC 99 seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
95 seq_printf(p, "SPU: ");
96 for_each_online_cpu(j)
97 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
98 seq_printf(p, " Spurious interrupts\n");
99#endif
100 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
101#if defined(CONFIG_X86_IO_APIC) 100#if defined(CONFIG_X86_IO_APIC)
102 seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); 101 seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
103#endif 102#endif
104 return 0; 103 return 0;
105} 104}
@@ -107,19 +106,22 @@ static int show_other_interrupts(struct seq_file *p)
107int show_interrupts(struct seq_file *p, void *v) 106int show_interrupts(struct seq_file *p, void *v)
108{ 107{
109 unsigned long flags, any_count = 0; 108 unsigned long flags, any_count = 0;
110 int i = *(loff_t *) v, j; 109 int i = *(loff_t *) v, j, prec;
111 struct irqaction *action; 110 struct irqaction *action;
112 struct irq_desc *desc; 111 struct irq_desc *desc;
113 112
114 if (i > nr_irqs) 113 if (i > nr_irqs)
115 return 0; 114 return 0;
116 115
116 for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
117 j *= 10;
118
117 if (i == nr_irqs) 119 if (i == nr_irqs)
118 return show_other_interrupts(p); 120 return show_other_interrupts(p, prec);
119 121
120 /* print header */ 122 /* print header */
121 if (i == 0) { 123 if (i == 0) {
122 seq_printf(p, " "); 124 seq_printf(p, "%*s", prec + 8, "");
123 for_each_online_cpu(j) 125 for_each_online_cpu(j)
124 seq_printf(p, "CPU%-8d", j); 126 seq_printf(p, "CPU%-8d", j);
125 seq_putc(p, '\n'); 127 seq_putc(p, '\n');
@@ -130,23 +132,15 @@ int show_interrupts(struct seq_file *p, void *v)
130 return 0; 132 return 0;
131 133
132 spin_lock_irqsave(&desc->lock, flags); 134 spin_lock_irqsave(&desc->lock, flags);
133#ifndef CONFIG_SMP
134 any_count = kstat_irqs(i);
135#else
136 for_each_online_cpu(j) 135 for_each_online_cpu(j)
137 any_count |= kstat_irqs_cpu(i, j); 136 any_count |= kstat_irqs_cpu(i, j);
138#endif
139 action = desc->action; 137 action = desc->action;
140 if (!action && !any_count) 138 if (!action && !any_count)
141 goto out; 139 goto out;
142 140
143 seq_printf(p, "%3d: ", i); 141 seq_printf(p, "%*d: ", prec, i);
144#ifndef CONFIG_SMP
145 seq_printf(p, "%10u ", kstat_irqs(i));
146#else
147 for_each_online_cpu(j) 142 for_each_online_cpu(j)
148 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); 143 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
149#endif
150 seq_printf(p, " %8s", desc->chip->name); 144 seq_printf(p, " %8s", desc->chip->name);
151 seq_printf(p, "-%-8s", desc->name); 145 seq_printf(p, "-%-8s", desc->name);
152 146
@@ -171,6 +165,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
171 165
172#ifdef CONFIG_X86_LOCAL_APIC 166#ifdef CONFIG_X86_LOCAL_APIC
173 sum += irq_stats(cpu)->apic_timer_irqs; 167 sum += irq_stats(cpu)->apic_timer_irqs;
168 sum += irq_stats(cpu)->irq_spurious_count;
174#endif 169#endif
175 if (generic_interrupt_extension) 170 if (generic_interrupt_extension)
176 sum += irq_stats(cpu)->generic_irqs; 171 sum += irq_stats(cpu)->generic_irqs;
@@ -185,9 +180,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
185 sum += irq_stats(cpu)->irq_threshold_count; 180 sum += irq_stats(cpu)->irq_threshold_count;
186#endif 181#endif
187#endif 182#endif
188#ifdef CONFIG_X86_LOCAL_APIC
189 sum += irq_stats(cpu)->irq_spurious_count;
190#endif
191 return sum; 183 return sum;
192} 184}
193 185
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index bc1326105448..368b0a8836f9 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -50,7 +50,6 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id)
50 */ 50 */
51static struct irqaction fpu_irq = { 51static struct irqaction fpu_irq = {
52 .handler = math_error_irq, 52 .handler = math_error_irq,
53 .mask = CPU_MASK_NONE,
54 .name = "fpu", 53 .name = "fpu",
55}; 54};
56 55
@@ -83,7 +82,6 @@ void __init init_ISA_irqs(void)
83 */ 82 */
84static struct irqaction irq2 = { 83static struct irqaction irq2 = {
85 .handler = no_action, 84 .handler = no_action,
86 .mask = CPU_MASK_NONE,
87 .name = "cascade", 85 .name = "cascade",
88}; 86};
89 87
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index c7a49e0ffbfb..8cd10537fd46 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -45,7 +45,6 @@
45 45
46static struct irqaction irq2 = { 46static struct irqaction irq2 = {
47 .handler = no_action, 47 .handler = no_action,
48 .mask = CPU_MASK_NONE,
49 .name = "cascade", 48 .name = "cascade",
50}; 49};
51DEFINE_PER_CPU(vector_irq_t, vector_irq) = { 50DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index ff7d3b0124f1..e444357375ce 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -8,11 +8,11 @@
8 */ 8 */
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/uaccess.h> 10#include <linux/uaccess.h>
11#include <linux/stat.h> 11#include <linux/module.h>
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/stat.h>
13#include <linux/io.h> 14#include <linux/io.h>
14#include <linux/mm.h> 15#include <linux/mm.h>
15#include <linux/module.h>
16 16
17#include <asm/setup.h> 17#include <asm/setup.h>
18 18
@@ -26,9 +26,8 @@ struct setup_data_node {
26 u32 len; 26 u32 len;
27}; 27};
28 28
29static ssize_t 29static ssize_t setup_data_read(struct file *file, char __user *user_buf,
30setup_data_read(struct file *file, char __user *user_buf, size_t count, 30 size_t count, loff_t *ppos)
31 loff_t *ppos)
32{ 31{
33 struct setup_data_node *node = file->private_data; 32 struct setup_data_node *node = file->private_data;
34 unsigned long remain; 33 unsigned long remain;
@@ -39,20 +38,21 @@ setup_data_read(struct file *file, char __user *user_buf, size_t count,
39 38
40 if (pos < 0) 39 if (pos < 0)
41 return -EINVAL; 40 return -EINVAL;
41
42 if (pos >= node->len) 42 if (pos >= node->len)
43 return 0; 43 return 0;
44 44
45 if (count > node->len - pos) 45 if (count > node->len - pos)
46 count = node->len - pos; 46 count = node->len - pos;
47
47 pa = node->paddr + sizeof(struct setup_data) + pos; 48 pa = node->paddr + sizeof(struct setup_data) + pos;
48 pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT); 49 pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT);
49 if (PageHighMem(pg)) { 50 if (PageHighMem(pg)) {
50 p = ioremap_cache(pa, count); 51 p = ioremap_cache(pa, count);
51 if (!p) 52 if (!p)
52 return -ENXIO; 53 return -ENXIO;
53 } else { 54 } else
54 p = __va(pa); 55 p = __va(pa);
55 }
56 56
57 remain = copy_to_user(user_buf, p, count); 57 remain = copy_to_user(user_buf, p, count);
58 58
@@ -70,12 +70,13 @@ setup_data_read(struct file *file, char __user *user_buf, size_t count,
70static int setup_data_open(struct inode *inode, struct file *file) 70static int setup_data_open(struct inode *inode, struct file *file)
71{ 71{
72 file->private_data = inode->i_private; 72 file->private_data = inode->i_private;
73
73 return 0; 74 return 0;
74} 75}
75 76
76static const struct file_operations fops_setup_data = { 77static const struct file_operations fops_setup_data = {
77 .read = setup_data_read, 78 .read = setup_data_read,
78 .open = setup_data_open, 79 .open = setup_data_open,
79}; 80};
80 81
81static int __init 82static int __init
@@ -84,57 +85,50 @@ create_setup_data_node(struct dentry *parent, int no,
84{ 85{
85 struct dentry *d, *type, *data; 86 struct dentry *d, *type, *data;
86 char buf[16]; 87 char buf[16];
87 int error;
88 88
89 sprintf(buf, "%d", no); 89 sprintf(buf, "%d", no);
90 d = debugfs_create_dir(buf, parent); 90 d = debugfs_create_dir(buf, parent);
91 if (!d) { 91 if (!d)
92 error = -ENOMEM; 92 return -ENOMEM;
93 goto err_return; 93
94 }
95 type = debugfs_create_x32("type", S_IRUGO, d, &node->type); 94 type = debugfs_create_x32("type", S_IRUGO, d, &node->type);
96 if (!type) { 95 if (!type)
97 error = -ENOMEM;
98 goto err_dir; 96 goto err_dir;
99 } 97
100 data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data); 98 data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data);
101 if (!data) { 99 if (!data)
102 error = -ENOMEM;
103 goto err_type; 100 goto err_type;
104 } 101
105 return 0; 102 return 0;
106 103
107err_type: 104err_type:
108 debugfs_remove(type); 105 debugfs_remove(type);
109err_dir: 106err_dir:
110 debugfs_remove(d); 107 debugfs_remove(d);
111err_return: 108 return -ENOMEM;
112 return error;
113} 109}
114 110
115static int __init create_setup_data_nodes(struct dentry *parent) 111static int __init create_setup_data_nodes(struct dentry *parent)
116{ 112{
117 struct setup_data_node *node; 113 struct setup_data_node *node;
118 struct setup_data *data; 114 struct setup_data *data;
119 int error, no = 0; 115 int error = -ENOMEM;
120 struct dentry *d; 116 struct dentry *d;
121 struct page *pg; 117 struct page *pg;
122 u64 pa_data; 118 u64 pa_data;
119 int no = 0;
123 120
124 d = debugfs_create_dir("setup_data", parent); 121 d = debugfs_create_dir("setup_data", parent);
125 if (!d) { 122 if (!d)
126 error = -ENOMEM; 123 return -ENOMEM;
127 goto err_return;
128 }
129 124
130 pa_data = boot_params.hdr.setup_data; 125 pa_data = boot_params.hdr.setup_data;
131 126
132 while (pa_data) { 127 while (pa_data) {
133 node = kmalloc(sizeof(*node), GFP_KERNEL); 128 node = kmalloc(sizeof(*node), GFP_KERNEL);
134 if (!node) { 129 if (!node)
135 error = -ENOMEM;
136 goto err_dir; 130 goto err_dir;
137 } 131
138 pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT); 132 pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT);
139 if (PageHighMem(pg)) { 133 if (PageHighMem(pg)) {
140 data = ioremap_cache(pa_data, sizeof(*data)); 134 data = ioremap_cache(pa_data, sizeof(*data));
@@ -143,9 +137,8 @@ static int __init create_setup_data_nodes(struct dentry *parent)
143 error = -ENXIO; 137 error = -ENXIO;
144 goto err_dir; 138 goto err_dir;
145 } 139 }
146 } else { 140 } else
147 data = __va(pa_data); 141 data = __va(pa_data);
148 }
149 142
150 node->paddr = pa_data; 143 node->paddr = pa_data;
151 node->type = data->type; 144 node->type = data->type;
@@ -159,11 +152,11 @@ static int __init create_setup_data_nodes(struct dentry *parent)
159 goto err_dir; 152 goto err_dir;
160 no++; 153 no++;
161 } 154 }
155
162 return 0; 156 return 0;
163 157
164err_dir: 158err_dir:
165 debugfs_remove(d); 159 debugfs_remove(d);
166err_return:
167 return error; 160 return error;
168} 161}
169 162
@@ -175,28 +168,26 @@ static struct debugfs_blob_wrapper boot_params_blob = {
175static int __init boot_params_kdebugfs_init(void) 168static int __init boot_params_kdebugfs_init(void)
176{ 169{
177 struct dentry *dbp, *version, *data; 170 struct dentry *dbp, *version, *data;
178 int error; 171 int error = -ENOMEM;
179 172
180 dbp = debugfs_create_dir("boot_params", NULL); 173 dbp = debugfs_create_dir("boot_params", NULL);
181 if (!dbp) { 174 if (!dbp)
182 error = -ENOMEM; 175 return -ENOMEM;
183 goto err_return; 176
184 }
185 version = debugfs_create_x16("version", S_IRUGO, dbp, 177 version = debugfs_create_x16("version", S_IRUGO, dbp,
186 &boot_params.hdr.version); 178 &boot_params.hdr.version);
187 if (!version) { 179 if (!version)
188 error = -ENOMEM;
189 goto err_dir; 180 goto err_dir;
190 } 181
191 data = debugfs_create_blob("data", S_IRUGO, dbp, 182 data = debugfs_create_blob("data", S_IRUGO, dbp,
192 &boot_params_blob); 183 &boot_params_blob);
193 if (!data) { 184 if (!data)
194 error = -ENOMEM;
195 goto err_version; 185 goto err_version;
196 } 186
197 error = create_setup_data_nodes(dbp); 187 error = create_setup_data_nodes(dbp);
198 if (error) 188 if (error)
199 goto err_data; 189 goto err_data;
190
200 return 0; 191 return 0;
201 192
202err_data: 193err_data:
@@ -205,10 +196,9 @@ err_version:
205 debugfs_remove(version); 196 debugfs_remove(version);
206err_dir: 197err_dir:
207 debugfs_remove(dbp); 198 debugfs_remove(dbp);
208err_return:
209 return error; 199 return error;
210} 200}
211#endif 201#endif /* CONFIG_DEBUG_BOOT_PARAMS */
212 202
213static int __init arch_kdebugfs_init(void) 203static int __init arch_kdebugfs_init(void)
214{ 204{
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e948b28a5a9a..7b5169d2b000 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -193,6 +193,9 @@ static int __kprobes can_boost(kprobe_opcode_t *opcodes)
193 kprobe_opcode_t opcode; 193 kprobe_opcode_t opcode;
194 kprobe_opcode_t *orig_opcodes = opcodes; 194 kprobe_opcode_t *orig_opcodes = opcodes;
195 195
196 if (search_exception_tables((unsigned long)opcodes))
197 return 0; /* Page fault may occur on this address. */
198
196retry: 199retry:
197 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) 200 if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
198 return 0; 201 return 0;
@@ -635,13 +638,13 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
635#else 638#else
636 " pushf\n" 639 " pushf\n"
637 /* 640 /*
638 * Skip cs, ip, orig_ax. 641 * Skip cs, ip, orig_ax and gs.
639 * trampoline_handler() will plug in these values 642 * trampoline_handler() will plug in these values
640 */ 643 */
641 " subl $12, %esp\n" 644 " subl $16, %esp\n"
642 " pushl %fs\n" 645 " pushl %fs\n"
643 " pushl %ds\n"
644 " pushl %es\n" 646 " pushl %es\n"
647 " pushl %ds\n"
645 " pushl %eax\n" 648 " pushl %eax\n"
646 " pushl %ebp\n" 649 " pushl %ebp\n"
647 " pushl %edi\n" 650 " pushl %edi\n"
@@ -652,10 +655,10 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
652 " movl %esp, %eax\n" 655 " movl %esp, %eax\n"
653 " call trampoline_handler\n" 656 " call trampoline_handler\n"
654 /* Move flags to cs */ 657 /* Move flags to cs */
655 " movl 52(%esp), %edx\n" 658 " movl 56(%esp), %edx\n"
656 " movl %edx, 48(%esp)\n" 659 " movl %edx, 52(%esp)\n"
657 /* Replace saved flags with true return address. */ 660 /* Replace saved flags with true return address. */
658 " movl %eax, 52(%esp)\n" 661 " movl %eax, 56(%esp)\n"
659 " popl %ebx\n" 662 " popl %ebx\n"
660 " popl %ecx\n" 663 " popl %ecx\n"
661 " popl %edx\n" 664 " popl %edx\n"
@@ -663,8 +666,8 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
663 " popl %edi\n" 666 " popl %edi\n"
664 " popl %ebp\n" 667 " popl %ebp\n"
665 " popl %eax\n" 668 " popl %eax\n"
666 /* Skip ip, orig_ax, es, ds, fs */ 669 /* Skip ds, es, fs, gs, orig_ax and ip */
667 " addl $20, %esp\n" 670 " addl $24, %esp\n"
668 " popf\n" 671 " popf\n"
669#endif 672#endif
670 " ret\n"); 673 " ret\n");
@@ -688,6 +691,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
688 regs->cs = __KERNEL_CS; 691 regs->cs = __KERNEL_CS;
689#else 692#else
690 regs->cs = __KERNEL_CS | get_kernel_rpl(); 693 regs->cs = __KERNEL_CS | get_kernel_rpl();
694 regs->gs = 0;
691#endif 695#endif
692 regs->ip = trampoline_address; 696 regs->ip = trampoline_address;
693 regs->orig_ax = ~0UL; 697 regs->orig_ax = ~0UL;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 478bca986eca..33019ddb56b4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -138,12 +138,6 @@ static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte)
138 kvm_mmu_write(ptep, pte_val(pte)); 138 kvm_mmu_write(ptep, pte_val(pte));
139} 139}
140 140
141static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr,
142 pte_t *ptep, pte_t pte)
143{
144 kvm_mmu_write(ptep, pte_val(pte));
145}
146
147static void kvm_pte_clear(struct mm_struct *mm, 141static void kvm_pte_clear(struct mm_struct *mm,
148 unsigned long addr, pte_t *ptep) 142 unsigned long addr, pte_t *ptep)
149{ 143{
@@ -220,7 +214,6 @@ static void paravirt_ops_setup(void)
220#if PAGETABLE_LEVELS >= 3 214#if PAGETABLE_LEVELS >= 3
221#ifdef CONFIG_X86_PAE 215#ifdef CONFIG_X86_PAE
222 pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic; 216 pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
223 pv_mmu_ops.set_pte_present = kvm_set_pte_present;
224 pv_mmu_ops.pte_clear = kvm_pte_clear; 217 pv_mmu_ops.pte_clear = kvm_pte_clear;
225 pv_mmu_ops.pmd_clear = kvm_pmd_clear; 218 pv_mmu_ops.pmd_clear = kvm_pmd_clear;
226#endif 219#endif
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index f5fc8c781a62..e7368c1da01d 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -14,12 +14,12 @@
14#include <linux/ftrace.h> 14#include <linux/ftrace.h>
15#include <linux/suspend.h> 15#include <linux/suspend.h>
16#include <linux/gfp.h> 16#include <linux/gfp.h>
17#include <linux/io.h>
17 18
18#include <asm/pgtable.h> 19#include <asm/pgtable.h>
19#include <asm/pgalloc.h> 20#include <asm/pgalloc.h>
20#include <asm/tlbflush.h> 21#include <asm/tlbflush.h>
21#include <asm/mmu_context.h> 22#include <asm/mmu_context.h>
22#include <asm/io.h>
23#include <asm/apic.h> 23#include <asm/apic.h>
24#include <asm/cpufeature.h> 24#include <asm/cpufeature.h>
25#include <asm/desc.h> 25#include <asm/desc.h>
@@ -63,7 +63,7 @@ static void load_segments(void)
63 "\tmovl %%eax,%%fs\n" 63 "\tmovl %%eax,%%fs\n"
64 "\tmovl %%eax,%%gs\n" 64 "\tmovl %%eax,%%gs\n"
65 "\tmovl %%eax,%%ss\n" 65 "\tmovl %%eax,%%ss\n"
66 ::: "eax", "memory"); 66 : : : "eax", "memory");
67#undef STR 67#undef STR
68#undef __STR 68#undef __STR
69} 69}
@@ -205,7 +205,8 @@ void machine_kexec(struct kimage *image)
205 205
206 if (image->preserve_context) { 206 if (image->preserve_context) {
207#ifdef CONFIG_X86_IO_APIC 207#ifdef CONFIG_X86_IO_APIC
208 /* We need to put APICs in legacy mode so that we can 208 /*
209 * We need to put APICs in legacy mode so that we can
209 * get timer interrupts in second kernel. kexec/kdump 210 * get timer interrupts in second kernel. kexec/kdump
210 * paths already have calls to disable_IO_APIC() in 211 * paths already have calls to disable_IO_APIC() in
211 * one form or other. kexec jump path also need 212 * one form or other. kexec jump path also need
@@ -227,7 +228,8 @@ void machine_kexec(struct kimage *image)
227 page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) 228 page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
228 << PAGE_SHIFT); 229 << PAGE_SHIFT);
229 230
230 /* The segment registers are funny things, they have both a 231 /*
232 * The segment registers are funny things, they have both a
231 * visible and an invisible part. Whenever the visible part is 233 * visible and an invisible part. Whenever the visible part is
232 * set to a specific selector, the invisible part is loaded 234 * set to a specific selector, the invisible part is loaded
233 * with from a table in memory. At no other time is the 235 * with from a table in memory. At no other time is the
@@ -237,11 +239,12 @@ void machine_kexec(struct kimage *image)
237 * segments, before I zap the gdt with an invalid value. 239 * segments, before I zap the gdt with an invalid value.
238 */ 240 */
239 load_segments(); 241 load_segments();
240 /* The gdt & idt are now invalid. 242 /*
243 * The gdt & idt are now invalid.
241 * If you want to load them you must set up your own idt & gdt. 244 * If you want to load them you must set up your own idt & gdt.
242 */ 245 */
243 set_gdt(phys_to_virt(0),0); 246 set_gdt(phys_to_virt(0), 0);
244 set_idt(phys_to_virt(0),0); 247 set_idt(phys_to_virt(0), 0);
245 248
246 /* now call it */ 249 /* now call it */
247 image->start = relocate_kernel_ptr((unsigned long)image->head, 250 image->start = relocate_kernel_ptr((unsigned long)image->head,
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 6993d51b7fd8..89cea4d44679 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -12,11 +12,47 @@
12#include <linux/reboot.h> 12#include <linux/reboot.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h> 14#include <linux/ftrace.h>
15#include <linux/io.h>
16#include <linux/suspend.h>
15 17
16#include <asm/pgtable.h> 18#include <asm/pgtable.h>
17#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
18#include <asm/mmu_context.h> 20#include <asm/mmu_context.h>
19#include <asm/io.h> 21
22static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
23 unsigned long addr)
24{
25 pud_t *pud;
26 pmd_t *pmd;
27 struct page *page;
28 int result = -ENOMEM;
29
30 addr &= PMD_MASK;
31 pgd += pgd_index(addr);
32 if (!pgd_present(*pgd)) {
33 page = kimage_alloc_control_pages(image, 0);
34 if (!page)
35 goto out;
36 pud = (pud_t *)page_address(page);
37 memset(pud, 0, PAGE_SIZE);
38 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
39 }
40 pud = pud_offset(pgd, addr);
41 if (!pud_present(*pud)) {
42 page = kimage_alloc_control_pages(image, 0);
43 if (!page)
44 goto out;
45 pmd = (pmd_t *)page_address(page);
46 memset(pmd, 0, PAGE_SIZE);
47 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
48 }
49 pmd = pmd_offset(pud, addr);
50 if (!pmd_present(*pmd))
51 set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
52 result = 0;
53out:
54 return result;
55}
20 56
21static void init_level2_page(pmd_t *level2p, unsigned long addr) 57static void init_level2_page(pmd_t *level2p, unsigned long addr)
22{ 58{
@@ -83,9 +119,8 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p,
83 } 119 }
84 level3p = (pud_t *)page_address(page); 120 level3p = (pud_t *)page_address(page);
85 result = init_level3_page(image, level3p, addr, last_addr); 121 result = init_level3_page(image, level3p, addr, last_addr);
86 if (result) { 122 if (result)
87 goto out; 123 goto out;
88 }
89 set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); 124 set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
90 addr += PGDIR_SIZE; 125 addr += PGDIR_SIZE;
91 } 126 }
@@ -156,6 +191,13 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
156 result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); 191 result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
157 if (result) 192 if (result)
158 return result; 193 return result;
194 /*
195 * image->start may be outside 0 ~ max_pfn, for example when
196 * jump back to original kernel from kexeced kernel
197 */
198 result = init_one_level2_page(image, level4p, image->start);
199 if (result)
200 return result;
159 return init_transition_pgtable(image, level4p); 201 return init_transition_pgtable(image, level4p);
160} 202}
161 203
@@ -229,20 +271,45 @@ void machine_kexec(struct kimage *image)
229{ 271{
230 unsigned long page_list[PAGES_NR]; 272 unsigned long page_list[PAGES_NR];
231 void *control_page; 273 void *control_page;
274 int save_ftrace_enabled;
232 275
233 tracer_disable(); 276#ifdef CONFIG_KEXEC_JUMP
277 if (kexec_image->preserve_context)
278 save_processor_state();
279#endif
280
281 save_ftrace_enabled = __ftrace_enabled_save();
234 282
235 /* Interrupts aren't acceptable while we reboot */ 283 /* Interrupts aren't acceptable while we reboot */
236 local_irq_disable(); 284 local_irq_disable();
237 285
286 if (image->preserve_context) {
287#ifdef CONFIG_X86_IO_APIC
288 /*
289 * We need to put APICs in legacy mode so that we can
290 * get timer interrupts in second kernel. kexec/kdump
291 * paths already have calls to disable_IO_APIC() in
292 * one form or other. kexec jump path also need
293 * one.
294 */
295 disable_IO_APIC();
296#endif
297 }
298
238 control_page = page_address(image->control_code_page) + PAGE_SIZE; 299 control_page = page_address(image->control_code_page) + PAGE_SIZE;
239 memcpy(control_page, relocate_kernel, PAGE_SIZE); 300 memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
240 301
241 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); 302 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
303 page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
242 page_list[PA_TABLE_PAGE] = 304 page_list[PA_TABLE_PAGE] =
243 (unsigned long)__pa(page_address(image->control_code_page)); 305 (unsigned long)__pa(page_address(image->control_code_page));
244 306
245 /* The segment registers are funny things, they have both a 307 if (image->type == KEXEC_TYPE_DEFAULT)
308 page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
309 << PAGE_SHIFT);
310
311 /*
312 * The segment registers are funny things, they have both a
246 * visible and an invisible part. Whenever the visible part is 313 * visible and an invisible part. Whenever the visible part is
247 * set to a specific selector, the invisible part is loaded 314 * set to a specific selector, the invisible part is loaded
248 * with from a table in memory. At no other time is the 315 * with from a table in memory. At no other time is the
@@ -252,15 +319,25 @@ void machine_kexec(struct kimage *image)
252 * segments, before I zap the gdt with an invalid value. 319 * segments, before I zap the gdt with an invalid value.
253 */ 320 */
254 load_segments(); 321 load_segments();
255 /* The gdt & idt are now invalid. 322 /*
323 * The gdt & idt are now invalid.
256 * If you want to load them you must set up your own idt & gdt. 324 * If you want to load them you must set up your own idt & gdt.
257 */ 325 */
258 set_gdt(phys_to_virt(0),0); 326 set_gdt(phys_to_virt(0), 0);
259 set_idt(phys_to_virt(0),0); 327 set_idt(phys_to_virt(0), 0);
260 328
261 /* now call it */ 329 /* now call it */
262 relocate_kernel((unsigned long)image->head, (unsigned long)page_list, 330 image->start = relocate_kernel((unsigned long)image->head,
263 image->start); 331 (unsigned long)page_list,
332 image->start,
333 image->preserve_context);
334
335#ifdef CONFIG_KEXEC_JUMP
336 if (kexec_image->preserve_context)
337 restore_processor_state();
338#endif
339
340 __ftrace_enabled_restore(save_ftrace_enabled);
264} 341}
265 342
266void arch_crash_save_vmcoreinfo(void) 343void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 8815f3c7fec7..846510b78a09 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -348,7 +348,6 @@ static irqreturn_t mfgpt_tick(int irq, void *dev_id)
348static struct irqaction mfgptirq = { 348static struct irqaction mfgptirq = {
349 .handler = mfgpt_tick, 349 .handler = mfgpt_tick,
350 .flags = IRQF_DISABLED | IRQF_NOBALANCING, 350 .flags = IRQF_DISABLED | IRQF_NOBALANCING,
351 .mask = CPU_MASK_NONE,
352 .name = "mfgpt-timer" 351 .name = "mfgpt-timer"
353}; 352};
354 353
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index c25fdb382292..453b5795a5c6 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -12,31 +12,30 @@
12 * 12 *
13 * Licensed under the terms of the GNU General Public 13 * Licensed under the terms of the GNU General Public
14 * License version 2. See file COPYING for details. 14 * License version 2. See file COPYING for details.
15*/ 15 */
16 16#include <linux/platform_device.h>
17#include <linux/capability.h> 17#include <linux/capability.h>
18#include <linux/kernel.h>
19#include <linux/init.h>
20#include <linux/sched.h>
21#include <linux/cpumask.h>
22#include <linux/module.h>
23#include <linux/slab.h>
24#include <linux/vmalloc.h>
25#include <linux/miscdevice.h> 18#include <linux/miscdevice.h>
19#include <linux/firmware.h>
26#include <linux/spinlock.h> 20#include <linux/spinlock.h>
27#include <linux/mm.h> 21#include <linux/cpumask.h>
28#include <linux/fs.h> 22#include <linux/pci_ids.h>
23#include <linux/uaccess.h>
24#include <linux/vmalloc.h>
25#include <linux/kernel.h>
26#include <linux/module.h>
29#include <linux/mutex.h> 27#include <linux/mutex.h>
28#include <linux/sched.h>
29#include <linux/init.h>
30#include <linux/slab.h>
30#include <linux/cpu.h> 31#include <linux/cpu.h>
31#include <linux/firmware.h>
32#include <linux/platform_device.h>
33#include <linux/pci.h> 32#include <linux/pci.h>
34#include <linux/pci_ids.h> 33#include <linux/fs.h>
35#include <linux/uaccess.h> 34#include <linux/mm.h>
36 35
37#include <asm/msr.h>
38#include <asm/processor.h>
39#include <asm/microcode.h> 36#include <asm/microcode.h>
37#include <asm/processor.h>
38#include <asm/msr.h>
40 39
41MODULE_DESCRIPTION("AMD Microcode Update Driver"); 40MODULE_DESCRIPTION("AMD Microcode Update Driver");
42MODULE_AUTHOR("Peter Oruba"); 41MODULE_AUTHOR("Peter Oruba");
@@ -72,8 +71,8 @@ struct microcode_header_amd {
72} __attribute__((packed)); 71} __attribute__((packed));
73 72
74struct microcode_amd { 73struct microcode_amd {
75 struct microcode_header_amd hdr; 74 struct microcode_header_amd hdr;
76 unsigned int mpb[0]; 75 unsigned int mpb[0];
77}; 76};
78 77
79#define UCODE_MAX_SIZE 2048 78#define UCODE_MAX_SIZE 2048
@@ -184,8 +183,8 @@ static int get_ucode_data(void *to, const u8 *from, size_t n)
184 return 0; 183 return 0;
185} 184}
186 185
187static void *get_next_ucode(const u8 *buf, unsigned int size, 186static void *
188 unsigned int *mc_size) 187get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
189{ 188{
190 unsigned int total_size; 189 unsigned int total_size;
191 u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; 190 u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
@@ -223,7 +222,6 @@ static void *get_next_ucode(const u8 *buf, unsigned int size,
223 return mc; 222 return mc;
224} 223}
225 224
226
227static int install_equiv_cpu_table(const u8 *buf) 225static int install_equiv_cpu_table(const u8 *buf)
228{ 226{
229 u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; 227 u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE];
@@ -372,4 +370,3 @@ struct microcode_ops * __init init_amd_microcode(void)
372{ 370{
373 return &microcode_amd_ops; 371 return &microcode_amd_ops;
374} 372}
375
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index c9b721ba968c..a0f3851ef310 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -70,67 +70,78 @@
70 * Fix sigmatch() macro to handle old CPUs with pf == 0. 70 * Fix sigmatch() macro to handle old CPUs with pf == 0.
71 * Thanks to Stuart Swales for pointing out this bug. 71 * Thanks to Stuart Swales for pointing out this bug.
72 */ 72 */
73#include <linux/platform_device.h>
73#include <linux/capability.h> 74#include <linux/capability.h>
74#include <linux/kernel.h> 75#include <linux/miscdevice.h>
75#include <linux/init.h> 76#include <linux/firmware.h>
76#include <linux/sched.h>
77#include <linux/smp_lock.h> 77#include <linux/smp_lock.h>
78#include <linux/spinlock.h>
78#include <linux/cpumask.h> 79#include <linux/cpumask.h>
79#include <linux/module.h> 80#include <linux/uaccess.h>
80#include <linux/slab.h>
81#include <linux/vmalloc.h> 81#include <linux/vmalloc.h>
82#include <linux/miscdevice.h> 82#include <linux/kernel.h>
83#include <linux/spinlock.h> 83#include <linux/module.h>
84#include <linux/mm.h>
85#include <linux/fs.h>
86#include <linux/mutex.h> 84#include <linux/mutex.h>
85#include <linux/sched.h>
86#include <linux/init.h>
87#include <linux/slab.h>
87#include <linux/cpu.h> 88#include <linux/cpu.h>
88#include <linux/firmware.h> 89#include <linux/fs.h>
89#include <linux/platform_device.h> 90#include <linux/mm.h>
90 91
91#include <asm/msr.h>
92#include <asm/uaccess.h>
93#include <asm/processor.h>
94#include <asm/microcode.h> 92#include <asm/microcode.h>
93#include <asm/processor.h>
94#include <asm/msr.h>
95 95
96MODULE_DESCRIPTION("Microcode Update Driver"); 96MODULE_DESCRIPTION("Microcode Update Driver");
97MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); 97MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
98MODULE_LICENSE("GPL"); 98MODULE_LICENSE("GPL");
99 99
100#define MICROCODE_VERSION "2.00" 100#define MICROCODE_VERSION "2.00"
101 101
102static struct microcode_ops *microcode_ops; 102static struct microcode_ops *microcode_ops;
103 103
104/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ 104/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
105static DEFINE_MUTEX(microcode_mutex); 105static DEFINE_MUTEX(microcode_mutex);
106 106
107struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; 107struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
108EXPORT_SYMBOL_GPL(ucode_cpu_info); 108EXPORT_SYMBOL_GPL(ucode_cpu_info);
109 109
110#ifdef CONFIG_MICROCODE_OLD_INTERFACE 110#ifdef CONFIG_MICROCODE_OLD_INTERFACE
111struct update_for_cpu {
112 const void __user *buf;
113 size_t size;
114};
115
116static long update_for_cpu(void *_ufc)
117{
118 struct update_for_cpu *ufc = _ufc;
119 int error;
120
121 error = microcode_ops->request_microcode_user(smp_processor_id(),
122 ufc->buf, ufc->size);
123 if (error < 0)
124 return error;
125 if (!error)
126 microcode_ops->apply_microcode(smp_processor_id());
127 return error;
128}
129
111static int do_microcode_update(const void __user *buf, size_t size) 130static int do_microcode_update(const void __user *buf, size_t size)
112{ 131{
113 cpumask_t old;
114 int error = 0; 132 int error = 0;
115 int cpu; 133 int cpu;
116 134 struct update_for_cpu ufc = { .buf = buf, .size = size };
117 old = current->cpus_allowed;
118 135
119 for_each_online_cpu(cpu) { 136 for_each_online_cpu(cpu) {
120 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 137 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
121 138
122 if (!uci->valid) 139 if (!uci->valid)
123 continue; 140 continue;
124 141 error = work_on_cpu(cpu, update_for_cpu, &ufc);
125 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
126 error = microcode_ops->request_microcode_user(cpu, buf, size);
127 if (error < 0) 142 if (error < 0)
128 goto out; 143 break;
129 if (!error)
130 microcode_ops->apply_microcode(cpu);
131 } 144 }
132out:
133 set_cpus_allowed_ptr(current, &old);
134 return error; 145 return error;
135} 146}
136 147
@@ -198,18 +209,33 @@ static void microcode_dev_exit(void)
198 209
199MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); 210MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
200#else 211#else
201#define microcode_dev_init() 0 212#define microcode_dev_init() 0
202#define microcode_dev_exit() do { } while (0) 213#define microcode_dev_exit() do { } while (0)
203#endif 214#endif
204 215
205/* fake device for request_firmware */ 216/* fake device for request_firmware */
206static struct platform_device *microcode_pdev; 217static struct platform_device *microcode_pdev;
218
219static long reload_for_cpu(void *unused)
220{
221 struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id();
222 int err = 0;
223
224 mutex_lock(&microcode_mutex);
225 if (uci->valid) {
226 err = microcode_ops->request_microcode_fw(smp_processor_id(),
227 &microcode_pdev->dev);
228 if (!err)
229 microcode_ops->apply_microcode(smp_processor_id());
230 }
231 mutex_unlock(&microcode_mutex);
232 return err;
233}
207 234
208static ssize_t reload_store(struct sys_device *dev, 235static ssize_t reload_store(struct sys_device *dev,
209 struct sysdev_attribute *attr, 236 struct sysdev_attribute *attr,
210 const char *buf, size_t sz) 237 const char *buf, size_t sz)
211{ 238{
212 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
213 char *end; 239 char *end;
214 unsigned long val = simple_strtoul(buf, &end, 0); 240 unsigned long val = simple_strtoul(buf, &end, 0);
215 int err = 0; 241 int err = 0;
@@ -218,21 +244,9 @@ static ssize_t reload_store(struct sys_device *dev,
218 if (end == buf) 244 if (end == buf)
219 return -EINVAL; 245 return -EINVAL;
220 if (val == 1) { 246 if (val == 1) {
221 cpumask_t old = current->cpus_allowed;
222
223 get_online_cpus(); 247 get_online_cpus();
224 if (cpu_online(cpu)) { 248 if (cpu_online(cpu))
225 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); 249 err = work_on_cpu(cpu, reload_for_cpu, NULL);
226 mutex_lock(&microcode_mutex);
227 if (uci->valid) {
228 err = microcode_ops->request_microcode_fw(cpu,
229 &microcode_pdev->dev);
230 if (!err)
231 microcode_ops->apply_microcode(cpu);
232 }
233 mutex_unlock(&microcode_mutex);
234 set_cpus_allowed_ptr(current, &old);
235 }
236 put_online_cpus(); 250 put_online_cpus();
237 } 251 }
238 if (err) 252 if (err)
@@ -268,8 +282,8 @@ static struct attribute *mc_default_attrs[] = {
268}; 282};
269 283
270static struct attribute_group mc_attr_group = { 284static struct attribute_group mc_attr_group = {
271 .attrs = mc_default_attrs, 285 .attrs = mc_default_attrs,
272 .name = "microcode", 286 .name = "microcode",
273}; 287};
274 288
275static void __microcode_fini_cpu(int cpu) 289static void __microcode_fini_cpu(int cpu)
@@ -328,9 +342,9 @@ static int microcode_resume_cpu(int cpu)
328 return 0; 342 return 0;
329} 343}
330 344
331static void microcode_update_cpu(int cpu) 345static long microcode_update_cpu(void *unused)
332{ 346{
333 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 347 struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id();
334 int err = 0; 348 int err = 0;
335 349
336 /* 350 /*
@@ -338,30 +352,27 @@ static void microcode_update_cpu(int cpu)
338 * otherwise just request a firmware: 352 * otherwise just request a firmware:
339 */ 353 */
340 if (uci->valid) { 354 if (uci->valid) {
341 err = microcode_resume_cpu(cpu); 355 err = microcode_resume_cpu(smp_processor_id());
342 } else { 356 } else {
343 collect_cpu_info(cpu); 357 collect_cpu_info(smp_processor_id());
344 if (uci->valid && system_state == SYSTEM_RUNNING) 358 if (uci->valid && system_state == SYSTEM_RUNNING)
345 err = microcode_ops->request_microcode_fw(cpu, 359 err = microcode_ops->request_microcode_fw(
360 smp_processor_id(),
346 &microcode_pdev->dev); 361 &microcode_pdev->dev);
347 } 362 }
348 if (!err) 363 if (!err)
349 microcode_ops->apply_microcode(cpu); 364 microcode_ops->apply_microcode(smp_processor_id());
365 return err;
350} 366}
351 367
352static void microcode_init_cpu(int cpu) 368static int microcode_init_cpu(int cpu)
353{ 369{
354 cpumask_t old = current->cpus_allowed; 370 int err;
355
356 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
357 /* We should bind the task to the CPU */
358 BUG_ON(raw_smp_processor_id() != cpu);
359
360 mutex_lock(&microcode_mutex); 371 mutex_lock(&microcode_mutex);
361 microcode_update_cpu(cpu); 372 err = work_on_cpu(cpu, microcode_update_cpu, NULL);
362 mutex_unlock(&microcode_mutex); 373 mutex_unlock(&microcode_mutex);
363 374
364 set_cpus_allowed_ptr(current, &old); 375 return err;
365} 376}
366 377
367static int mc_sysdev_add(struct sys_device *sys_dev) 378static int mc_sysdev_add(struct sys_device *sys_dev)
@@ -379,8 +390,11 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
379 if (err) 390 if (err)
380 return err; 391 return err;
381 392
382 microcode_init_cpu(cpu); 393 err = microcode_init_cpu(cpu);
383 return 0; 394 if (err)
395 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
396
397 return err;
384} 398}
385 399
386static int mc_sysdev_remove(struct sys_device *sys_dev) 400static int mc_sysdev_remove(struct sys_device *sys_dev)
@@ -404,14 +418,14 @@ static int mc_sysdev_resume(struct sys_device *dev)
404 return 0; 418 return 0;
405 419
406 /* only CPU 0 will apply ucode here */ 420 /* only CPU 0 will apply ucode here */
407 microcode_update_cpu(0); 421 microcode_update_cpu(NULL);
408 return 0; 422 return 0;
409} 423}
410 424
411static struct sysdev_driver mc_sysdev_driver = { 425static struct sysdev_driver mc_sysdev_driver = {
412 .add = mc_sysdev_add, 426 .add = mc_sysdev_add,
413 .remove = mc_sysdev_remove, 427 .remove = mc_sysdev_remove,
414 .resume = mc_sysdev_resume, 428 .resume = mc_sysdev_resume,
415}; 429};
416 430
417static __cpuinit int 431static __cpuinit int
@@ -424,7 +438,9 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
424 switch (action) { 438 switch (action) {
425 case CPU_ONLINE: 439 case CPU_ONLINE:
426 case CPU_ONLINE_FROZEN: 440 case CPU_ONLINE_FROZEN:
427 microcode_init_cpu(cpu); 441 if (microcode_init_cpu(cpu))
442 printk(KERN_ERR "microcode: failed to init CPU%d\n",
443 cpu);
428 case CPU_DOWN_FAILED: 444 case CPU_DOWN_FAILED:
429 case CPU_DOWN_FAILED_FROZEN: 445 case CPU_DOWN_FAILED_FROZEN:
430 pr_debug("microcode: CPU%d added\n", cpu); 446 pr_debug("microcode: CPU%d added\n", cpu);
@@ -448,7 +464,7 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
448} 464}
449 465
450static struct notifier_block __refdata mc_cpu_notifier = { 466static struct notifier_block __refdata mc_cpu_notifier = {
451 .notifier_call = mc_cpu_callback, 467 .notifier_call = mc_cpu_callback,
452}; 468};
453 469
454static int __init microcode_init(void) 470static int __init microcode_init(void)
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 5e9f4fc51385..149b9ec7c1ab 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -70,28 +70,28 @@
70 * Fix sigmatch() macro to handle old CPUs with pf == 0. 70 * Fix sigmatch() macro to handle old CPUs with pf == 0.
71 * Thanks to Stuart Swales for pointing out this bug. 71 * Thanks to Stuart Swales for pointing out this bug.
72 */ 72 */
73#include <linux/platform_device.h>
73#include <linux/capability.h> 74#include <linux/capability.h>
74#include <linux/kernel.h> 75#include <linux/miscdevice.h>
75#include <linux/init.h> 76#include <linux/firmware.h>
76#include <linux/sched.h>
77#include <linux/smp_lock.h> 77#include <linux/smp_lock.h>
78#include <linux/spinlock.h>
78#include <linux/cpumask.h> 79#include <linux/cpumask.h>
79#include <linux/module.h> 80#include <linux/uaccess.h>
80#include <linux/slab.h>
81#include <linux/vmalloc.h> 81#include <linux/vmalloc.h>
82#include <linux/miscdevice.h> 82#include <linux/kernel.h>
83#include <linux/spinlock.h> 83#include <linux/module.h>
84#include <linux/mm.h>
85#include <linux/fs.h>
86#include <linux/mutex.h> 84#include <linux/mutex.h>
85#include <linux/sched.h>
86#include <linux/init.h>
87#include <linux/slab.h>
87#include <linux/cpu.h> 88#include <linux/cpu.h>
88#include <linux/firmware.h> 89#include <linux/fs.h>
89#include <linux/platform_device.h> 90#include <linux/mm.h>
90#include <linux/uaccess.h>
91 91
92#include <asm/msr.h>
93#include <asm/processor.h>
94#include <asm/microcode.h> 92#include <asm/microcode.h>
93#include <asm/processor.h>
94#include <asm/msr.h>
95 95
96MODULE_DESCRIPTION("Microcode Update Driver"); 96MODULE_DESCRIPTION("Microcode Update Driver");
97MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); 97MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
@@ -129,12 +129,13 @@ struct extended_sigtable {
129 struct extended_signature sigs[0]; 129 struct extended_signature sigs[0];
130}; 130};
131 131
132#define DEFAULT_UCODE_DATASIZE (2000) 132#define DEFAULT_UCODE_DATASIZE (2000)
133#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) 133#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel))
134#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) 134#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
135#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) 135#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
136#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) 136#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
137#define DWSIZE (sizeof(u32)) 137#define DWSIZE (sizeof(u32))
138
138#define get_totalsize(mc) \ 139#define get_totalsize(mc) \
139 (((struct microcode_intel *)mc)->hdr.totalsize ? \ 140 (((struct microcode_intel *)mc)->hdr.totalsize ? \
140 ((struct microcode_intel *)mc)->hdr.totalsize : \ 141 ((struct microcode_intel *)mc)->hdr.totalsize : \
@@ -197,30 +198,31 @@ static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf)
197} 198}
198 199
199static inline int 200static inline int
200update_match_revision(struct microcode_header_intel *mc_header, int rev) 201update_match_revision(struct microcode_header_intel *mc_header, int rev)
201{ 202{
202 return (mc_header->rev <= rev) ? 0 : 1; 203 return (mc_header->rev <= rev) ? 0 : 1;
203} 204}
204 205
205static int microcode_sanity_check(void *mc) 206static int microcode_sanity_check(void *mc)
206{ 207{
208 unsigned long total_size, data_size, ext_table_size;
207 struct microcode_header_intel *mc_header = mc; 209 struct microcode_header_intel *mc_header = mc;
208 struct extended_sigtable *ext_header = NULL; 210 struct extended_sigtable *ext_header = NULL;
209 struct extended_signature *ext_sig;
210 unsigned long total_size, data_size, ext_table_size;
211 int sum, orig_sum, ext_sigcount = 0, i; 211 int sum, orig_sum, ext_sigcount = 0, i;
212 struct extended_signature *ext_sig;
212 213
213 total_size = get_totalsize(mc_header); 214 total_size = get_totalsize(mc_header);
214 data_size = get_datasize(mc_header); 215 data_size = get_datasize(mc_header);
216
215 if (data_size + MC_HEADER_SIZE > total_size) { 217 if (data_size + MC_HEADER_SIZE > total_size) {
216 printk(KERN_ERR "microcode: error! " 218 printk(KERN_ERR "microcode: error! "
217 "Bad data size in microcode data file\n"); 219 "Bad data size in microcode data file\n");
218 return -EINVAL; 220 return -EINVAL;
219 } 221 }
220 222
221 if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { 223 if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
222 printk(KERN_ERR "microcode: error! " 224 printk(KERN_ERR "microcode: error! "
223 "Unknown microcode update format\n"); 225 "Unknown microcode update format\n");
224 return -EINVAL; 226 return -EINVAL;
225 } 227 }
226 ext_table_size = total_size - (MC_HEADER_SIZE + data_size); 228 ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
@@ -318,11 +320,15 @@ get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev)
318 320
319static void apply_microcode(int cpu) 321static void apply_microcode(int cpu)
320{ 322{
323 struct microcode_intel *mc_intel;
324 struct ucode_cpu_info *uci;
321 unsigned long flags; 325 unsigned long flags;
322 unsigned int val[2]; 326 unsigned int val[2];
323 int cpu_num = raw_smp_processor_id(); 327 int cpu_num;
324 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 328
325 struct microcode_intel *mc_intel = uci->mc; 329 cpu_num = raw_smp_processor_id();
330 uci = ucode_cpu_info + cpu;
331 mc_intel = uci->mc;
326 332
327 /* We should bind the task to the CPU */ 333 /* We should bind the task to the CPU */
328 BUG_ON(cpu_num != cpu); 334 BUG_ON(cpu_num != cpu);
@@ -348,15 +354,17 @@ static void apply_microcode(int cpu)
348 spin_unlock_irqrestore(&microcode_update_lock, flags); 354 spin_unlock_irqrestore(&microcode_update_lock, flags);
349 if (val[1] != mc_intel->hdr.rev) { 355 if (val[1] != mc_intel->hdr.rev) {
350 printk(KERN_ERR "microcode: CPU%d update from revision " 356 printk(KERN_ERR "microcode: CPU%d update from revision "
351 "0x%x to 0x%x failed\n", cpu_num, uci->cpu_sig.rev, val[1]); 357 "0x%x to 0x%x failed\n",
358 cpu_num, uci->cpu_sig.rev, val[1]);
352 return; 359 return;
353 } 360 }
354 printk(KERN_INFO "microcode: CPU%d updated from revision " 361 printk(KERN_INFO "microcode: CPU%d updated from revision "
355 "0x%x to 0x%x, date = %04x-%02x-%02x \n", 362 "0x%x to 0x%x, date = %04x-%02x-%02x \n",
356 cpu_num, uci->cpu_sig.rev, val[1], 363 cpu_num, uci->cpu_sig.rev, val[1],
357 mc_intel->hdr.date & 0xffff, 364 mc_intel->hdr.date & 0xffff,
358 mc_intel->hdr.date >> 24, 365 mc_intel->hdr.date >> 24,
359 (mc_intel->hdr.date >> 16) & 0xff); 366 (mc_intel->hdr.date >> 16) & 0xff);
367
360 uci->cpu_sig.rev = val[1]; 368 uci->cpu_sig.rev = val[1];
361} 369}
362 370
@@ -404,18 +412,23 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
404 leftover -= mc_size; 412 leftover -= mc_size;
405 } 413 }
406 414
407 if (new_mc) { 415 if (!new_mc)
408 if (!leftover) { 416 goto out;
409 if (uci->mc) 417
410 vfree(uci->mc); 418 if (leftover) {
411 uci->mc = (struct microcode_intel *)new_mc; 419 vfree(new_mc);
412 pr_debug("microcode: CPU%d found a matching microcode update with" 420 goto out;
413 " version 0x%x (current=0x%x)\n",
414 cpu, new_rev, uci->cpu_sig.rev);
415 } else
416 vfree(new_mc);
417 } 421 }
418 422
423 if (uci->mc)
424 vfree(uci->mc);
425 uci->mc = (struct microcode_intel *)new_mc;
426
427 pr_debug("microcode: CPU%d found a matching microcode update with"
428 " version 0x%x (current=0x%x)\n",
429 cpu, new_rev, uci->cpu_sig.rev);
430
431 out:
419 return (int)leftover; 432 return (int)leftover;
420} 433}
421 434
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index 666e43df51f9..712d15fdc416 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -226,7 +226,7 @@ static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d)
226 return 0; 226 return 0;
227} 227}
228 228
229static struct dmi_system_id __devinitdata mmconf_dmi_table[] = { 229static const struct dmi_system_id __cpuinitconst mmconf_dmi_table[] = {
230 { 230 {
231 .callback = set_check_enable_amd_mmconf, 231 .callback = set_check_enable_amd_mmconf,
232 .ident = "Sun Microsystems Machine", 232 .ident = "Sun Microsystems Machine",
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index e8192401da47..dce99dca6cf8 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -109,9 +109,6 @@ static void __init MP_bus_info(struct mpc_bus *m)
109 } else 109 } else
110 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); 110 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
111} 111}
112#endif
113
114#ifdef CONFIG_X86_IO_APIC
115 112
116static int bad_ioapic(unsigned long address) 113static int bad_ioapic(unsigned long address)
117{ 114{
@@ -224,8 +221,12 @@ static void __init MP_intsrc_info(struct mpc_intsrc *m)
224 if (++mp_irq_entries == MAX_IRQ_SOURCES) 221 if (++mp_irq_entries == MAX_IRQ_SOURCES)
225 panic("Max # of irq sources exceeded!!\n"); 222 panic("Max # of irq sources exceeded!!\n");
226} 223}
224#else /* CONFIG_X86_IO_APIC */
225static inline void __init MP_bus_info(struct mpc_bus *m) {}
226static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {}
227static inline void __init MP_intsrc_info(struct mpc_intsrc *m) {}
228#endif /* CONFIG_X86_IO_APIC */
227 229
228#endif
229 230
230static void __init MP_lintsrc_info(struct mpc_lintsrc *m) 231static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
231{ 232{
@@ -275,6 +276,20 @@ static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str)
275 return 1; 276 return 1;
276} 277}
277 278
279static void skip_entry(unsigned char **ptr, int *count, int size)
280{
281 *ptr += size;
282 *count += size;
283}
284
285static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt)
286{
287 printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"
288 "type %x\n", *mpt);
289 print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
290 1, mpc, mpc->length, 1);
291}
292
278static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) 293static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
279{ 294{
280 char str[16]; 295 char str[16];
@@ -310,61 +325,30 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
310 while (count < mpc->length) { 325 while (count < mpc->length) {
311 switch (*mpt) { 326 switch (*mpt) {
312 case MP_PROCESSOR: 327 case MP_PROCESSOR:
313 { 328 /* ACPI may have already provided this data */
314 struct mpc_cpu *m = (struct mpc_cpu *)mpt; 329 if (!acpi_lapic)
315 /* ACPI may have already provided this data */ 330 MP_processor_info((struct mpc_cpu *)mpt);
316 if (!acpi_lapic) 331 skip_entry(&mpt, &count, sizeof(struct mpc_cpu));
317 MP_processor_info(m); 332 break;
318 mpt += sizeof(*m);
319 count += sizeof(*m);
320 break;
321 }
322 case MP_BUS: 333 case MP_BUS:
323 { 334 MP_bus_info((struct mpc_bus *)mpt);
324 struct mpc_bus *m = (struct mpc_bus *)mpt; 335 skip_entry(&mpt, &count, sizeof(struct mpc_bus));
325#ifdef CONFIG_X86_IO_APIC 336 break;
326 MP_bus_info(m);
327#endif
328 mpt += sizeof(*m);
329 count += sizeof(*m);
330 break;
331 }
332 case MP_IOAPIC: 337 case MP_IOAPIC:
333 { 338 MP_ioapic_info((struct mpc_ioapic *)mpt);
334#ifdef CONFIG_X86_IO_APIC 339 skip_entry(&mpt, &count, sizeof(struct mpc_ioapic));
335 struct mpc_ioapic *m = (struct mpc_ioapic *)mpt; 340 break;
336 MP_ioapic_info(m);
337#endif
338 mpt += sizeof(struct mpc_ioapic);
339 count += sizeof(struct mpc_ioapic);
340 break;
341 }
342 case MP_INTSRC: 341 case MP_INTSRC:
343 { 342 MP_intsrc_info((struct mpc_intsrc *)mpt);
344#ifdef CONFIG_X86_IO_APIC 343 skip_entry(&mpt, &count, sizeof(struct mpc_intsrc));
345 struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; 344 break;
346
347 MP_intsrc_info(m);
348#endif
349 mpt += sizeof(struct mpc_intsrc);
350 count += sizeof(struct mpc_intsrc);
351 break;
352 }
353 case MP_LINTSRC: 345 case MP_LINTSRC:
354 { 346 MP_lintsrc_info((struct mpc_lintsrc *)mpt);
355 struct mpc_lintsrc *m = 347 skip_entry(&mpt, &count, sizeof(struct mpc_lintsrc));
356 (struct mpc_lintsrc *)mpt; 348 break;
357 MP_lintsrc_info(m);
358 mpt += sizeof(*m);
359 count += sizeof(*m);
360 break;
361 }
362 default: 349 default:
363 /* wrong mptable */ 350 /* wrong mptable */
364 printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); 351 smp_dump_mptable(mpc, mpt);
365 printk(KERN_ERR "type %x\n", *mpt);
366 print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
367 1, mpc, mpc->length, 1);
368 count = mpc->length; 352 count = mpc->length;
369 break; 353 break;
370 } 354 }
@@ -571,6 +555,55 @@ static unsigned long __init get_mpc_size(unsigned long physptr)
571 return size; 555 return size;
572} 556}
573 557
558static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
559{
560 struct mpc_table *mpc;
561 unsigned long size;
562
563 size = get_mpc_size(mpf->physptr);
564 mpc = early_ioremap(mpf->physptr, size);
565 /*
566 * Read the physical hardware table. Anything here will
567 * override the defaults.
568 */
569 if (!smp_read_mpc(mpc, early)) {
570#ifdef CONFIG_X86_LOCAL_APIC
571 smp_found_config = 0;
572#endif
573 printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"
574 "... disabling SMP support. (tell your hw vendor)\n");
575 early_iounmap(mpc, size);
576 return -1;
577 }
578 early_iounmap(mpc, size);
579
580 if (early)
581 return -1;
582
583#ifdef CONFIG_X86_IO_APIC
584 /*
585 * If there are no explicit MP IRQ entries, then we are
586 * broken. We set up most of the low 16 IO-APIC pins to
587 * ISA defaults and hope it will work.
588 */
589 if (!mp_irq_entries) {
590 struct mpc_bus bus;
591
592 printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
593 "using default mptable. (tell your hw vendor)\n");
594
595 bus.type = MP_BUS;
596 bus.busid = 0;
597 memcpy(bus.bustype, "ISA ", 6);
598 MP_bus_info(&bus);
599
600 construct_default_ioirq_mptable(0);
601 }
602#endif
603
604 return 0;
605}
606
574/* 607/*
575 * Scan the memory blocks for an SMP configuration block. 608 * Scan the memory blocks for an SMP configuration block.
576 */ 609 */
@@ -624,51 +657,8 @@ static void __init __get_smp_config(unsigned int early)
624 construct_default_ISA_mptable(mpf->feature1); 657 construct_default_ISA_mptable(mpf->feature1);
625 658
626 } else if (mpf->physptr) { 659 } else if (mpf->physptr) {
627 struct mpc_table *mpc; 660 if (check_physptr(mpf, early))
628 unsigned long size;
629
630 size = get_mpc_size(mpf->physptr);
631 mpc = early_ioremap(mpf->physptr, size);
632 /*
633 * Read the physical hardware table. Anything here will
634 * override the defaults.
635 */
636 if (!smp_read_mpc(mpc, early)) {
637#ifdef CONFIG_X86_LOCAL_APIC
638 smp_found_config = 0;
639#endif
640 printk(KERN_ERR
641 "BIOS bug, MP table errors detected!...\n");
642 printk(KERN_ERR "... disabling SMP support. "
643 "(tell your hw vendor)\n");
644 early_iounmap(mpc, size);
645 return;
646 }
647 early_iounmap(mpc, size);
648
649 if (early)
650 return; 661 return;
651#ifdef CONFIG_X86_IO_APIC
652 /*
653 * If there are no explicit MP IRQ entries, then we are
654 * broken. We set up most of the low 16 IO-APIC pins to
655 * ISA defaults and hope it will work.
656 */
657 if (!mp_irq_entries) {
658 struct mpc_bus bus;
659
660 printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
661 "using default mptable. "
662 "(tell your hw vendor)\n");
663
664 bus.type = MP_BUS;
665 bus.busid = 0;
666 memcpy(bus.bustype, "ISA ", 6);
667 MP_bus_info(&bus);
668
669 construct_default_ioirq_mptable(0);
670 }
671#endif
672 } else 662 } else
673 BUG(); 663 BUG();
674 664
@@ -689,6 +679,31 @@ void __init get_smp_config(void)
689 __get_smp_config(0); 679 __get_smp_config(0);
690} 680}
691 681
682static void smp_reserve_bootmem(struct mpf_intel *mpf)
683{
684 unsigned long size = get_mpc_size(mpf->physptr);
685#ifdef CONFIG_X86_32
686 /*
687 * We cannot access to MPC table to compute table size yet,
688 * as only few megabytes from the bottom is mapped now.
689 * PC-9800's MPC table places on the very last of physical
690 * memory; so that simply reserving PAGE_SIZE from mpf->physptr
691 * yields BUG() in reserve_bootmem.
692 * also need to make sure physptr is below than max_low_pfn
693 * we don't need reserve the area above max_low_pfn
694 */
695 unsigned long end = max_low_pfn * PAGE_SIZE;
696
697 if (mpf->physptr < end) {
698 if (mpf->physptr + size > end)
699 size = end - mpf->physptr;
700 reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
701 }
702#else
703 reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
704#endif
705}
706
692static int __init smp_scan_config(unsigned long base, unsigned long length, 707static int __init smp_scan_config(unsigned long base, unsigned long length,
693 unsigned reserve) 708 unsigned reserve)
694{ 709{
@@ -717,35 +732,9 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
717 if (!reserve) 732 if (!reserve)
718 return 1; 733 return 1;
719 reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf), 734 reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf),
720 BOOTMEM_DEFAULT);
721 if (mpf->physptr) {
722 unsigned long size = get_mpc_size(mpf->physptr);
723#ifdef CONFIG_X86_32
724 /*
725 * We cannot access to MPC table to compute
726 * table size yet, as only few megabytes from
727 * the bottom is mapped now.
728 * PC-9800's MPC table places on the very last
729 * of physical memory; so that simply reserving
730 * PAGE_SIZE from mpf->physptr yields BUG()
731 * in reserve_bootmem.
732 * also need to make sure physptr is below than
733 * max_low_pfn
734 * we don't need reserve the area above max_low_pfn
735 */
736 unsigned long end = max_low_pfn * PAGE_SIZE;
737
738 if (mpf->physptr < end) {
739 if (mpf->physptr + size > end)
740 size = end - mpf->physptr;
741 reserve_bootmem_generic(mpf->physptr, size,
742 BOOTMEM_DEFAULT);
743 }
744#else
745 reserve_bootmem_generic(mpf->physptr, size,
746 BOOTMEM_DEFAULT); 735 BOOTMEM_DEFAULT);
747#endif 736 if (mpf->physptr)
748 } 737 smp_reserve_bootmem(mpf);
749 738
750 return 1; 739 return 1;
751 } 740 }
@@ -848,7 +837,57 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
848#define SPARE_SLOT_NUM 20 837#define SPARE_SLOT_NUM 20
849 838
850static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM]; 839static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
851#endif 840
841static void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
842{
843 int i;
844
845 apic_printk(APIC_VERBOSE, "OLD ");
846 print_MP_intsrc_info(m);
847
848 i = get_MP_intsrc_index(m);
849 if (i > 0) {
850 assign_to_mpc_intsrc(&mp_irqs[i], m);
851 apic_printk(APIC_VERBOSE, "NEW ");
852 print_mp_irq_info(&mp_irqs[i]);
853 return;
854 }
855 if (!i) {
856 /* legacy, do nothing */
857 return;
858 }
859 if (*nr_m_spare < SPARE_SLOT_NUM) {
860 /*
861 * not found (-1), or duplicated (-2) are invalid entries,
862 * we need to use the slot later
863 */
864 m_spare[*nr_m_spare] = m;
865 *nr_m_spare += 1;
866 }
867}
868#else /* CONFIG_X86_IO_APIC */
869static inline void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
870#endif /* CONFIG_X86_IO_APIC */
871
872static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length,
873 int count)
874{
875 if (!mpc_new_phys) {
876 pr_info("No spare slots, try to append...take your risk, "
877 "new mpc_length %x\n", count);
878 } else {
879 if (count <= mpc_new_length)
880 pr_info("No spare slots, try to append..., "
881 "new mpc_length %x\n", count);
882 else {
883 pr_err("mpc_new_length %lx is too small\n",
884 mpc_new_length);
885 return -1;
886 }
887 }
888
889 return 0;
890}
852 891
853static int __init replace_intsrc_all(struct mpc_table *mpc, 892static int __init replace_intsrc_all(struct mpc_table *mpc,
854 unsigned long mpc_new_phys, 893 unsigned long mpc_new_phys,
@@ -856,77 +895,33 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
856{ 895{
857#ifdef CONFIG_X86_IO_APIC 896#ifdef CONFIG_X86_IO_APIC
858 int i; 897 int i;
859 int nr_m_spare = 0;
860#endif 898#endif
861
862 int count = sizeof(*mpc); 899 int count = sizeof(*mpc);
900 int nr_m_spare = 0;
863 unsigned char *mpt = ((unsigned char *)mpc) + count; 901 unsigned char *mpt = ((unsigned char *)mpc) + count;
864 902
865 printk(KERN_INFO "mpc_length %x\n", mpc->length); 903 printk(KERN_INFO "mpc_length %x\n", mpc->length);
866 while (count < mpc->length) { 904 while (count < mpc->length) {
867 switch (*mpt) { 905 switch (*mpt) {
868 case MP_PROCESSOR: 906 case MP_PROCESSOR:
869 { 907 skip_entry(&mpt, &count, sizeof(struct mpc_cpu));
870 struct mpc_cpu *m = (struct mpc_cpu *)mpt; 908 break;
871 mpt += sizeof(*m);
872 count += sizeof(*m);
873 break;
874 }
875 case MP_BUS: 909 case MP_BUS:
876 { 910 skip_entry(&mpt, &count, sizeof(struct mpc_bus));
877 struct mpc_bus *m = (struct mpc_bus *)mpt; 911 break;
878 mpt += sizeof(*m);
879 count += sizeof(*m);
880 break;
881 }
882 case MP_IOAPIC: 912 case MP_IOAPIC:
883 { 913 skip_entry(&mpt, &count, sizeof(struct mpc_ioapic));
884 mpt += sizeof(struct mpc_ioapic); 914 break;
885 count += sizeof(struct mpc_ioapic);
886 break;
887 }
888 case MP_INTSRC: 915 case MP_INTSRC:
889 { 916 check_irq_src((struct mpc_intsrc *)mpt, &nr_m_spare);
890#ifdef CONFIG_X86_IO_APIC 917 skip_entry(&mpt, &count, sizeof(struct mpc_intsrc));
891 struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; 918 break;
892
893 printk(KERN_INFO "OLD ");
894 print_MP_intsrc_info(m);
895 i = get_MP_intsrc_index(m);
896 if (i > 0) {
897 assign_to_mpc_intsrc(&mp_irqs[i], m);
898 printk(KERN_INFO "NEW ");
899 print_mp_irq_info(&mp_irqs[i]);
900 } else if (!i) {
901 /* legacy, do nothing */
902 } else if (nr_m_spare < SPARE_SLOT_NUM) {
903 /*
904 * not found (-1), or duplicated (-2)
905 * are invalid entries,
906 * we need to use the slot later
907 */
908 m_spare[nr_m_spare] = m;
909 nr_m_spare++;
910 }
911#endif
912 mpt += sizeof(struct mpc_intsrc);
913 count += sizeof(struct mpc_intsrc);
914 break;
915 }
916 case MP_LINTSRC: 919 case MP_LINTSRC:
917 { 920 skip_entry(&mpt, &count, sizeof(struct mpc_lintsrc));
918 struct mpc_lintsrc *m = 921 break;
919 (struct mpc_lintsrc *)mpt;
920 mpt += sizeof(*m);
921 count += sizeof(*m);
922 break;
923 }
924 default: 922 default:
925 /* wrong mptable */ 923 /* wrong mptable */
926 printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"); 924 smp_dump_mptable(mpc, mpt);
927 printk(KERN_ERR "type %x\n", *mpt);
928 print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
929 1, mpc, mpc->length, 1);
930 goto out; 925 goto out;
931 } 926 }
932 } 927 }
@@ -943,23 +938,15 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
943 continue; 938 continue;
944 939
945 if (nr_m_spare > 0) { 940 if (nr_m_spare > 0) {
946 printk(KERN_INFO "*NEW* found "); 941 apic_printk(APIC_VERBOSE, "*NEW* found\n");
947 nr_m_spare--; 942 nr_m_spare--;
948 assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]); 943 assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]);
949 m_spare[nr_m_spare] = NULL; 944 m_spare[nr_m_spare] = NULL;
950 } else { 945 } else {
951 struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; 946 struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
952 count += sizeof(struct mpc_intsrc); 947 count += sizeof(struct mpc_intsrc);
953 if (!mpc_new_phys) { 948 if (!check_slot(mpc_new_phys, mpc_new_length, count))
954 printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count); 949 goto out;
955 } else {
956 if (count <= mpc_new_length)
957 printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count);
958 else {
959 printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length);
960 goto out;
961 }
962 }
963 assign_to_mpc_intsrc(&mp_irqs[i], m); 950 assign_to_mpc_intsrc(&mp_irqs[i], m);
964 mpc->length = count; 951 mpc->length = count;
965 mpt += sizeof(struct mpc_intsrc); 952 mpt += sizeof(struct mpc_intsrc);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 63dd358d8ee1..8e45f4464880 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -470,7 +470,6 @@ struct pv_mmu_ops pv_mmu_ops = {
470#if PAGETABLE_LEVELS >= 3 470#if PAGETABLE_LEVELS >= 3
471#ifdef CONFIG_X86_PAE 471#ifdef CONFIG_X86_PAE
472 .set_pte_atomic = native_set_pte_atomic, 472 .set_pte_atomic = native_set_pte_atomic,
473 .set_pte_present = native_set_pte_present,
474 .pte_clear = native_pte_clear, 473 .pte_clear = native_pte_clear,
475 .pmd_clear = native_pmd_clear, 474 .pmd_clear = native_pmd_clear,
476#endif 475#endif
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index d28bbdc35e4e..755c21e906f3 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -380,8 +380,9 @@ static inline struct iommu_table *find_iommu_table(struct device *dev)
380 return tbl; 380 return tbl;
381} 381}
382 382
383static void calgary_unmap_sg(struct device *dev, 383static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
384 struct scatterlist *sglist, int nelems, int direction) 384 int nelems,enum dma_data_direction dir,
385 struct dma_attrs *attrs)
385{ 386{
386 struct iommu_table *tbl = find_iommu_table(dev); 387 struct iommu_table *tbl = find_iommu_table(dev);
387 struct scatterlist *s; 388 struct scatterlist *s;
@@ -404,7 +405,8 @@ static void calgary_unmap_sg(struct device *dev,
404} 405}
405 406
406static int calgary_map_sg(struct device *dev, struct scatterlist *sg, 407static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
407 int nelems, int direction) 408 int nelems, enum dma_data_direction dir,
409 struct dma_attrs *attrs)
408{ 410{
409 struct iommu_table *tbl = find_iommu_table(dev); 411 struct iommu_table *tbl = find_iommu_table(dev);
410 struct scatterlist *s; 412 struct scatterlist *s;
@@ -429,15 +431,14 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
429 s->dma_address = (entry << PAGE_SHIFT) | s->offset; 431 s->dma_address = (entry << PAGE_SHIFT) | s->offset;
430 432
431 /* insert into HW table */ 433 /* insert into HW table */
432 tce_build(tbl, entry, npages, vaddr & PAGE_MASK, 434 tce_build(tbl, entry, npages, vaddr & PAGE_MASK, dir);
433 direction);
434 435
435 s->dma_length = s->length; 436 s->dma_length = s->length;
436 } 437 }
437 438
438 return nelems; 439 return nelems;
439error: 440error:
440 calgary_unmap_sg(dev, sg, nelems, direction); 441 calgary_unmap_sg(dev, sg, nelems, dir, NULL);
441 for_each_sg(sg, s, nelems, i) { 442 for_each_sg(sg, s, nelems, i) {
442 sg->dma_address = bad_dma_address; 443 sg->dma_address = bad_dma_address;
443 sg->dma_length = 0; 444 sg->dma_length = 0;
@@ -445,10 +446,12 @@ error:
445 return 0; 446 return 0;
446} 447}
447 448
448static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, 449static dma_addr_t calgary_map_page(struct device *dev, struct page *page,
449 size_t size, int direction) 450 unsigned long offset, size_t size,
451 enum dma_data_direction dir,
452 struct dma_attrs *attrs)
450{ 453{
451 void *vaddr = phys_to_virt(paddr); 454 void *vaddr = page_address(page) + offset;
452 unsigned long uaddr; 455 unsigned long uaddr;
453 unsigned int npages; 456 unsigned int npages;
454 struct iommu_table *tbl = find_iommu_table(dev); 457 struct iommu_table *tbl = find_iommu_table(dev);
@@ -456,17 +459,18 @@ static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
456 uaddr = (unsigned long)vaddr; 459 uaddr = (unsigned long)vaddr;
457 npages = iommu_num_pages(uaddr, size, PAGE_SIZE); 460 npages = iommu_num_pages(uaddr, size, PAGE_SIZE);
458 461
459 return iommu_alloc(dev, tbl, vaddr, npages, direction); 462 return iommu_alloc(dev, tbl, vaddr, npages, dir);
460} 463}
461 464
462static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, 465static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr,
463 size_t size, int direction) 466 size_t size, enum dma_data_direction dir,
467 struct dma_attrs *attrs)
464{ 468{
465 struct iommu_table *tbl = find_iommu_table(dev); 469 struct iommu_table *tbl = find_iommu_table(dev);
466 unsigned int npages; 470 unsigned int npages;
467 471
468 npages = iommu_num_pages(dma_handle, size, PAGE_SIZE); 472 npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
469 iommu_free(tbl, dma_handle, npages); 473 iommu_free(tbl, dma_addr, npages);
470} 474}
471 475
472static void* calgary_alloc_coherent(struct device *dev, size_t size, 476static void* calgary_alloc_coherent(struct device *dev, size_t size,
@@ -515,13 +519,13 @@ static void calgary_free_coherent(struct device *dev, size_t size,
515 free_pages((unsigned long)vaddr, get_order(size)); 519 free_pages((unsigned long)vaddr, get_order(size));
516} 520}
517 521
518static struct dma_mapping_ops calgary_dma_ops = { 522static struct dma_map_ops calgary_dma_ops = {
519 .alloc_coherent = calgary_alloc_coherent, 523 .alloc_coherent = calgary_alloc_coherent,
520 .free_coherent = calgary_free_coherent, 524 .free_coherent = calgary_free_coherent,
521 .map_single = calgary_map_single,
522 .unmap_single = calgary_unmap_single,
523 .map_sg = calgary_map_sg, 525 .map_sg = calgary_map_sg,
524 .unmap_sg = calgary_unmap_sg, 526 .unmap_sg = calgary_unmap_sg,
527 .map_page = calgary_map_page,
528 .unmap_page = calgary_unmap_page,
525}; 529};
526 530
527static inline void __iomem * busno_to_bbar(unsigned char num) 531static inline void __iomem * busno_to_bbar(unsigned char num)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index b25428533141..90f5b9ef5def 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -1,4 +1,5 @@
1#include <linux/dma-mapping.h> 1#include <linux/dma-mapping.h>
2#include <linux/dma-debug.h>
2#include <linux/dmar.h> 3#include <linux/dmar.h>
3#include <linux/bootmem.h> 4#include <linux/bootmem.h>
4#include <linux/pci.h> 5#include <linux/pci.h>
@@ -12,7 +13,7 @@
12 13
13static int forbid_dac __read_mostly; 14static int forbid_dac __read_mostly;
14 15
15struct dma_mapping_ops *dma_ops; 16struct dma_map_ops *dma_ops;
16EXPORT_SYMBOL(dma_ops); 17EXPORT_SYMBOL(dma_ops);
17 18
18static int iommu_sac_force __read_mostly; 19static int iommu_sac_force __read_mostly;
@@ -44,6 +45,9 @@ struct device x86_dma_fallback_dev = {
44}; 45};
45EXPORT_SYMBOL(x86_dma_fallback_dev); 46EXPORT_SYMBOL(x86_dma_fallback_dev);
46 47
48/* Number of entries preallocated for DMA-API debugging */
49#define PREALLOC_DMA_DEBUG_ENTRIES 32768
50
47int dma_set_mask(struct device *dev, u64 mask) 51int dma_set_mask(struct device *dev, u64 mask)
48{ 52{
49 if (!dev->dma_mask || !dma_supported(dev, mask)) 53 if (!dev->dma_mask || !dma_supported(dev, mask))
@@ -224,7 +228,7 @@ early_param("iommu", iommu_setup);
224 228
225int dma_supported(struct device *dev, u64 mask) 229int dma_supported(struct device *dev, u64 mask)
226{ 230{
227 struct dma_mapping_ops *ops = get_dma_ops(dev); 231 struct dma_map_ops *ops = get_dma_ops(dev);
228 232
229#ifdef CONFIG_PCI 233#ifdef CONFIG_PCI
230 if (mask > 0xffffffff && forbid_dac > 0) { 234 if (mask > 0xffffffff && forbid_dac > 0) {
@@ -265,6 +269,12 @@ EXPORT_SYMBOL(dma_supported);
265 269
266static int __init pci_iommu_init(void) 270static int __init pci_iommu_init(void)
267{ 271{
272 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
273
274#ifdef CONFIG_PCI
275 dma_debug_add_bus(&pci_bus_type);
276#endif
277
268 calgary_iommu_init(); 278 calgary_iommu_init();
269 279
270 intel_iommu_init(); 280 intel_iommu_init();
@@ -290,8 +300,7 @@ fs_initcall(pci_iommu_init);
290static __devinit void via_no_dac(struct pci_dev *dev) 300static __devinit void via_no_dac(struct pci_dev *dev)
291{ 301{
292 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { 302 if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
293 printk(KERN_INFO 303 dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
294 "PCI: VIA PCI bridge detected. Disabling DAC.\n");
295 forbid_dac = 1; 304 forbid_dac = 1;
296 } 305 }
297} 306}
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index d5768b1af080..b284b58c035c 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -255,10 +255,13 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
255} 255}
256 256
257/* Map a single area into the IOMMU */ 257/* Map a single area into the IOMMU */
258static dma_addr_t 258static dma_addr_t gart_map_page(struct device *dev, struct page *page,
259gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) 259 unsigned long offset, size_t size,
260 enum dma_data_direction dir,
261 struct dma_attrs *attrs)
260{ 262{
261 unsigned long bus; 263 unsigned long bus;
264 phys_addr_t paddr = page_to_phys(page) + offset;
262 265
263 if (!dev) 266 if (!dev)
264 dev = &x86_dma_fallback_dev; 267 dev = &x86_dma_fallback_dev;
@@ -275,8 +278,9 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
275/* 278/*
276 * Free a DMA mapping. 279 * Free a DMA mapping.
277 */ 280 */
278static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, 281static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
279 size_t size, int direction) 282 size_t size, enum dma_data_direction dir,
283 struct dma_attrs *attrs)
280{ 284{
281 unsigned long iommu_page; 285 unsigned long iommu_page;
282 int npages; 286 int npages;
@@ -298,8 +302,8 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
298/* 302/*
299 * Wrapper for pci_unmap_single working with scatterlists. 303 * Wrapper for pci_unmap_single working with scatterlists.
300 */ 304 */
301static void 305static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
302gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) 306 enum dma_data_direction dir, struct dma_attrs *attrs)
303{ 307{
304 struct scatterlist *s; 308 struct scatterlist *s;
305 int i; 309 int i;
@@ -307,7 +311,7 @@ gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
307 for_each_sg(sg, s, nents, i) { 311 for_each_sg(sg, s, nents, i) {
308 if (!s->dma_length || !s->length) 312 if (!s->dma_length || !s->length)
309 break; 313 break;
310 gart_unmap_single(dev, s->dma_address, s->dma_length, dir); 314 gart_unmap_page(dev, s->dma_address, s->dma_length, dir, NULL);
311 } 315 }
312} 316}
313 317
@@ -329,7 +333,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
329 addr = dma_map_area(dev, addr, s->length, dir, 0); 333 addr = dma_map_area(dev, addr, s->length, dir, 0);
330 if (addr == bad_dma_address) { 334 if (addr == bad_dma_address) {
331 if (i > 0) 335 if (i > 0)
332 gart_unmap_sg(dev, sg, i, dir); 336 gart_unmap_sg(dev, sg, i, dir, NULL);
333 nents = 0; 337 nents = 0;
334 sg[0].dma_length = 0; 338 sg[0].dma_length = 0;
335 break; 339 break;
@@ -400,8 +404,8 @@ dma_map_cont(struct device *dev, struct scatterlist *start, int nelems,
400 * DMA map all entries in a scatterlist. 404 * DMA map all entries in a scatterlist.
401 * Merge chunks that have page aligned sizes into a continuous mapping. 405 * Merge chunks that have page aligned sizes into a continuous mapping.
402 */ 406 */
403static int 407static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
404gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) 408 enum dma_data_direction dir, struct dma_attrs *attrs)
405{ 409{
406 struct scatterlist *s, *ps, *start_sg, *sgmap; 410 struct scatterlist *s, *ps, *start_sg, *sgmap;
407 int need = 0, nextneed, i, out, start; 411 int need = 0, nextneed, i, out, start;
@@ -468,7 +472,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
468 472
469error: 473error:
470 flush_gart(); 474 flush_gart();
471 gart_unmap_sg(dev, sg, out, dir); 475 gart_unmap_sg(dev, sg, out, dir, NULL);
472 476
473 /* When it was forced or merged try again in a dumb way */ 477 /* When it was forced or merged try again in a dumb way */
474 if (force_iommu || iommu_merge) { 478 if (force_iommu || iommu_merge) {
@@ -521,7 +525,7 @@ static void
521gart_free_coherent(struct device *dev, size_t size, void *vaddr, 525gart_free_coherent(struct device *dev, size_t size, void *vaddr,
522 dma_addr_t dma_addr) 526 dma_addr_t dma_addr)
523{ 527{
524 gart_unmap_single(dev, dma_addr, size, DMA_BIDIRECTIONAL); 528 gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
525 free_pages((unsigned long)vaddr, get_order(size)); 529 free_pages((unsigned long)vaddr, get_order(size));
526} 530}
527 531
@@ -707,11 +711,11 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
707 return -1; 711 return -1;
708} 712}
709 713
710static struct dma_mapping_ops gart_dma_ops = { 714static struct dma_map_ops gart_dma_ops = {
711 .map_single = gart_map_single,
712 .unmap_single = gart_unmap_single,
713 .map_sg = gart_map_sg, 715 .map_sg = gart_map_sg,
714 .unmap_sg = gart_unmap_sg, 716 .unmap_sg = gart_unmap_sg,
717 .map_page = gart_map_page,
718 .unmap_page = gart_unmap_page,
715 .alloc_coherent = gart_alloc_coherent, 719 .alloc_coherent = gart_alloc_coherent,
716 .free_coherent = gart_free_coherent, 720 .free_coherent = gart_free_coherent,
717}; 721};
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index c70ab5a5d4c8..c6d703b39326 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -1,14 +1,14 @@
1/* Fallback functions when the main IOMMU code is not compiled in. This 1/* Fallback functions when the main IOMMU code is not compiled in. This
2 code is roughly equivalent to i386. */ 2 code is roughly equivalent to i386. */
3#include <linux/mm.h>
4#include <linux/init.h>
5#include <linux/pci.h>
6#include <linux/string.h>
7#include <linux/dma-mapping.h> 3#include <linux/dma-mapping.h>
8#include <linux/scatterlist.h> 4#include <linux/scatterlist.h>
5#include <linux/string.h>
6#include <linux/init.h>
7#include <linux/pci.h>
8#include <linux/mm.h>
9 9
10#include <asm/iommu.h>
11#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/iommu.h>
12#include <asm/dma.h> 12#include <asm/dma.h>
13 13
14static int 14static int
@@ -25,19 +25,19 @@ check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
25 return 1; 25 return 1;
26} 26}
27 27
28static dma_addr_t 28static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
29nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, 29 unsigned long offset, size_t size,
30 int direction) 30 enum dma_data_direction dir,
31 struct dma_attrs *attrs)
31{ 32{
32 dma_addr_t bus = paddr; 33 dma_addr_t bus = page_to_phys(page) + offset;
33 WARN_ON(size == 0); 34 WARN_ON(size == 0);
34 if (!check_addr("map_single", hwdev, bus, size)) 35 if (!check_addr("map_single", dev, bus, size))
35 return bad_dma_address; 36 return bad_dma_address;
36 flush_write_buffers(); 37 flush_write_buffers();
37 return bus; 38 return bus;
38} 39}
39 40
40
41/* Map a set of buffers described by scatterlist in streaming 41/* Map a set of buffers described by scatterlist in streaming
42 * mode for DMA. This is the scatter-gather version of the 42 * mode for DMA. This is the scatter-gather version of the
43 * above pci_map_single interface. Here the scatter gather list 43 * above pci_map_single interface. Here the scatter gather list
@@ -54,7 +54,8 @@ nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size,
54 * the same here. 54 * the same here.
55 */ 55 */
56static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, 56static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
57 int nents, int direction) 57 int nents, enum dma_data_direction dir,
58 struct dma_attrs *attrs)
58{ 59{
59 struct scatterlist *s; 60 struct scatterlist *s;
60 int i; 61 int i;
@@ -78,12 +79,12 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
78 free_pages((unsigned long)vaddr, get_order(size)); 79 free_pages((unsigned long)vaddr, get_order(size));
79} 80}
80 81
81struct dma_mapping_ops nommu_dma_ops = { 82struct dma_map_ops nommu_dma_ops = {
82 .alloc_coherent = dma_generic_alloc_coherent, 83 .alloc_coherent = dma_generic_alloc_coherent,
83 .free_coherent = nommu_free_coherent, 84 .free_coherent = nommu_free_coherent,
84 .map_single = nommu_map_single, 85 .map_sg = nommu_map_sg,
85 .map_sg = nommu_map_sg, 86 .map_page = nommu_map_page,
86 .is_phys = 1, 87 .is_phys = 1,
87}; 88};
88 89
89void __init no_iommu_init(void) 90void __init no_iommu_init(void)
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb.c
index d59c91747665..34f12e9996ed 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -33,18 +33,11 @@ phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
33 return baddr; 33 return baddr;
34} 34}
35 35
36int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size) 36int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
37{ 37{
38 return 0; 38 return 0;
39} 39}
40 40
41static dma_addr_t
42swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
43 int direction)
44{
45 return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction);
46}
47
48static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, 41static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
49 dma_addr_t *dma_handle, gfp_t flags) 42 dma_addr_t *dma_handle, gfp_t flags)
50{ 43{
@@ -57,20 +50,20 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
57 return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); 50 return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
58} 51}
59 52
60struct dma_mapping_ops swiotlb_dma_ops = { 53struct dma_map_ops swiotlb_dma_ops = {
61 .mapping_error = swiotlb_dma_mapping_error, 54 .mapping_error = swiotlb_dma_mapping_error,
62 .alloc_coherent = x86_swiotlb_alloc_coherent, 55 .alloc_coherent = x86_swiotlb_alloc_coherent,
63 .free_coherent = swiotlb_free_coherent, 56 .free_coherent = swiotlb_free_coherent,
64 .map_single = swiotlb_map_single_phys,
65 .unmap_single = swiotlb_unmap_single,
66 .sync_single_for_cpu = swiotlb_sync_single_for_cpu, 57 .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
67 .sync_single_for_device = swiotlb_sync_single_for_device, 58 .sync_single_for_device = swiotlb_sync_single_for_device,
68 .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, 59 .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
69 .sync_single_range_for_device = swiotlb_sync_single_range_for_device, 60 .sync_single_range_for_device = swiotlb_sync_single_range_for_device,
70 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, 61 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
71 .sync_sg_for_device = swiotlb_sync_sg_for_device, 62 .sync_sg_for_device = swiotlb_sync_sg_for_device,
72 .map_sg = swiotlb_map_sg, 63 .map_sg = swiotlb_map_sg_attrs,
73 .unmap_sg = swiotlb_unmap_sg, 64 .unmap_sg = swiotlb_unmap_sg_attrs,
65 .map_page = swiotlb_map_page,
66 .unmap_page = swiotlb_unmap_page,
74 .dma_supported = NULL, 67 .dma_supported = NULL,
75}; 68};
76 69
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 8c037051b353..ca989158e847 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -68,11 +68,11 @@ void exit_thread(void)
68{ 68{
69 struct task_struct *me = current; 69 struct task_struct *me = current;
70 struct thread_struct *t = &me->thread; 70 struct thread_struct *t = &me->thread;
71 unsigned long *bp = t->io_bitmap_ptr;
71 72
72 if (me->thread.io_bitmap_ptr) { 73 if (bp) {
73 struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); 74 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
74 75
75 kfree(t->io_bitmap_ptr);
76 t->io_bitmap_ptr = NULL; 76 t->io_bitmap_ptr = NULL;
77 clear_thread_flag(TIF_IO_BITMAP); 77 clear_thread_flag(TIF_IO_BITMAP);
78 /* 78 /*
@@ -81,6 +81,7 @@ void exit_thread(void)
81 memset(tss->io_bitmap, 0xff, t->io_bitmap_max); 81 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
82 t->io_bitmap_max = 0; 82 t->io_bitmap_max = 0;
83 put_cpu(); 83 put_cpu();
84 kfree(bp);
84 } 85 }
85 86
86 ds_exit_thread(current); 87 ds_exit_thread(current);
@@ -327,7 +328,7 @@ void stop_this_cpu(void *dummy)
327 /* 328 /*
328 * Remove this CPU: 329 * Remove this CPU:
329 */ 330 */
330 cpu_clear(smp_processor_id(), cpu_online_map); 331 set_cpu_online(smp_processor_id(), false);
331 disable_local_APIC(); 332 disable_local_APIC();
332 333
333 for (;;) { 334 for (;;) {
@@ -477,12 +478,13 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
477 return 1; 478 return 1;
478} 479}
479 480
480static cpumask_t c1e_mask = CPU_MASK_NONE; 481static cpumask_var_t c1e_mask;
481static int c1e_detected; 482static int c1e_detected;
482 483
483void c1e_remove_cpu(int cpu) 484void c1e_remove_cpu(int cpu)
484{ 485{
485 cpu_clear(cpu, c1e_mask); 486 if (c1e_mask != NULL)
487 cpumask_clear_cpu(cpu, c1e_mask);
486} 488}
487 489
488/* 490/*
@@ -511,8 +513,8 @@ static void c1e_idle(void)
511 if (c1e_detected) { 513 if (c1e_detected) {
512 int cpu = smp_processor_id(); 514 int cpu = smp_processor_id();
513 515
514 if (!cpu_isset(cpu, c1e_mask)) { 516 if (!cpumask_test_cpu(cpu, c1e_mask)) {
515 cpu_set(cpu, c1e_mask); 517 cpumask_set_cpu(cpu, c1e_mask);
516 /* 518 /*
517 * Force broadcast so ACPI can not interfere. Needs 519 * Force broadcast so ACPI can not interfere. Needs
518 * to run with interrupts enabled as it uses 520 * to run with interrupts enabled as it uses
@@ -564,6 +566,15 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
564 pm_idle = default_idle; 566 pm_idle = default_idle;
565} 567}
566 568
569void __init init_c1e_mask(void)
570{
571 /* If we're using c1e_idle, we need to allocate c1e_mask. */
572 if (pm_idle == c1e_idle) {
573 alloc_cpumask_var(&c1e_mask, GFP_KERNEL);
574 cpumask_clear(c1e_mask);
575 }
576}
577
567static int __init idle_setup(char *str) 578static int __init idle_setup(char *str)
568{ 579{
569 if (!str) 580 if (!str)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 14014d766cad..76f8f84043a2 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -245,7 +245,7 @@ void prepare_to_copy(struct task_struct *tsk)
245 unlazy_fpu(tsk); 245 unlazy_fpu(tsk);
246} 246}
247 247
248int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, 248int copy_thread(unsigned long clone_flags, unsigned long sp,
249 unsigned long unused, 249 unsigned long unused,
250 struct task_struct *p, struct pt_regs *regs) 250 struct task_struct *p, struct pt_regs *regs)
251{ 251{
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index abb7e6a7f0c6..b751a41392b1 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -278,7 +278,7 @@ void prepare_to_copy(struct task_struct *tsk)
278 unlazy_fpu(tsk); 278 unlazy_fpu(tsk);
279} 279}
280 280
281int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, 281int copy_thread(unsigned long clone_flags, unsigned long sp,
282 unsigned long unused, 282 unsigned long unused,
283 struct task_struct *p, struct pt_regs *regs) 283 struct task_struct *p, struct pt_regs *regs)
284{ 284{
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 99749d6e87a8..fe9345c967de 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -686,9 +686,8 @@ static int ptrace_bts_config(struct task_struct *child,
686 if (!cfg.signal) 686 if (!cfg.signal)
687 return -EINVAL; 687 return -EINVAL;
688 688
689 return -EOPNOTSUPP;
690
691 child->thread.bts_ovfl_signal = cfg.signal; 689 child->thread.bts_ovfl_signal = cfg.signal;
690 return -EOPNOTSUPP;
692 } 691 }
693 692
694 if ((cfg.flags & PTRACE_BTS_O_ALLOC) && 693 if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
@@ -1463,6 +1462,6 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
1463 * system call instruction. 1462 * system call instruction.
1464 */ 1463 */
1465 if (test_thread_flag(TIF_SINGLESTEP) && 1464 if (test_thread_flag(TIF_SINGLESTEP) &&
1466 tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL)) 1465 tracehook_consider_fatal_signal(current, SIGTRAP))
1467 send_sigtrap(current, regs, 0, TRAP_BRKPT); 1466 send_sigtrap(current, regs, 0, TRAP_BRKPT);
1468} 1467}
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 309949e9e1c1..e95022e4f5d5 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -74,8 +74,7 @@ static void ich_force_hpet_resume(void)
74 if (!force_hpet_address) 74 if (!force_hpet_address)
75 return; 75 return;
76 76
77 if (rcba_base == NULL) 77 BUG_ON(rcba_base == NULL);
78 BUG();
79 78
80 /* read the Function Disable register, dword mode only */ 79 /* read the Function Disable register, dword mode only */
81 val = readl(rcba_base + 0x3404); 80 val = readl(rcba_base + 0x3404);
@@ -172,7 +171,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4,
172 ich_force_enable_hpet); 171 ich_force_enable_hpet);
173DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, 172DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7,
174 ich_force_enable_hpet); 173 ich_force_enable_hpet);
175 174DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3a16, /* ICH10 */
175 ich_force_enable_hpet);
176 176
177static struct pci_dev *cached_dev; 177static struct pci_dev *cached_dev;
178 178
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 2064d0aa8d28..41235531b11c 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -17,7 +17,8 @@
17 17
18#define PTR(x) (x << 2) 18#define PTR(x) (x << 2)
19 19
20/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE 20/*
21 * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
21 * ~ control_page + PAGE_SIZE are used as data storage and stack for 22 * ~ control_page + PAGE_SIZE are used as data storage and stack for
22 * jumping back 23 * jumping back
23 */ 24 */
@@ -76,8 +77,10 @@ relocate_kernel:
76 movl %eax, CP_PA_SWAP_PAGE(%edi) 77 movl %eax, CP_PA_SWAP_PAGE(%edi)
77 movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi) 78 movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi)
78 79
79 /* get physical address of control page now */ 80 /*
80 /* this is impossible after page table switch */ 81 * get physical address of control page now
82 * this is impossible after page table switch
83 */
81 movl PTR(PA_CONTROL_PAGE)(%ebp), %edi 84 movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
82 85
83 /* switch to new set of page tables */ 86 /* switch to new set of page tables */
@@ -97,7 +100,8 @@ identity_mapped:
97 /* store the start address on the stack */ 100 /* store the start address on the stack */
98 pushl %edx 101 pushl %edx
99 102
100 /* Set cr0 to a known state: 103 /*
104 * Set cr0 to a known state:
101 * - Paging disabled 105 * - Paging disabled
102 * - Alignment check disabled 106 * - Alignment check disabled
103 * - Write protect disabled 107 * - Write protect disabled
@@ -113,7 +117,8 @@ identity_mapped:
113 /* clear cr4 if applicable */ 117 /* clear cr4 if applicable */
114 testl %ecx, %ecx 118 testl %ecx, %ecx
115 jz 1f 119 jz 1f
116 /* Set cr4 to a known state: 120 /*
121 * Set cr4 to a known state:
117 * Setting everything to zero seems safe. 122 * Setting everything to zero seems safe.
118 */ 123 */
119 xorl %eax, %eax 124 xorl %eax, %eax
@@ -132,15 +137,18 @@ identity_mapped:
132 call swap_pages 137 call swap_pages
133 addl $8, %esp 138 addl $8, %esp
134 139
135 /* To be certain of avoiding problems with self-modifying code 140 /*
141 * To be certain of avoiding problems with self-modifying code
136 * I need to execute a serializing instruction here. 142 * I need to execute a serializing instruction here.
137 * So I flush the TLB, it's handy, and not processor dependent. 143 * So I flush the TLB, it's handy, and not processor dependent.
138 */ 144 */
139 xorl %eax, %eax 145 xorl %eax, %eax
140 movl %eax, %cr3 146 movl %eax, %cr3
141 147
142 /* set all of the registers to known values */ 148 /*
143 /* leave %esp alone */ 149 * set all of the registers to known values
150 * leave %esp alone
151 */
144 152
145 testl %esi, %esi 153 testl %esi, %esi
146 jnz 1f 154 jnz 1f
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index d32cfb27a479..4de8f5b3d476 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -19,29 +19,77 @@
19#define PTR(x) (x << 3) 19#define PTR(x) (x << 3)
20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
21 21
22/*
23 * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
24 * ~ control_page + PAGE_SIZE are used as data storage and stack for
25 * jumping back
26 */
27#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
28
29/* Minimal CPU state */
30#define RSP DATA(0x0)
31#define CR0 DATA(0x8)
32#define CR3 DATA(0x10)
33#define CR4 DATA(0x18)
34
35/* other data */
36#define CP_PA_TABLE_PAGE DATA(0x20)
37#define CP_PA_SWAP_PAGE DATA(0x28)
38#define CP_PA_BACKUP_PAGES_MAP DATA(0x30)
39
22 .text 40 .text
23 .align PAGE_SIZE 41 .align PAGE_SIZE
24 .code64 42 .code64
25 .globl relocate_kernel 43 .globl relocate_kernel
26relocate_kernel: 44relocate_kernel:
27 /* %rdi indirection_page 45 /*
46 * %rdi indirection_page
28 * %rsi page_list 47 * %rsi page_list
29 * %rdx start address 48 * %rdx start address
49 * %rcx preserve_context
30 */ 50 */
31 51
52 /* Save the CPU context, used for jumping back */
53 pushq %rbx
54 pushq %rbp
55 pushq %r12
56 pushq %r13
57 pushq %r14
58 pushq %r15
59 pushf
60
61 movq PTR(VA_CONTROL_PAGE)(%rsi), %r11
62 movq %rsp, RSP(%r11)
63 movq %cr0, %rax
64 movq %rax, CR0(%r11)
65 movq %cr3, %rax
66 movq %rax, CR3(%r11)
67 movq %cr4, %rax
68 movq %rax, CR4(%r11)
69
32 /* zero out flags, and disable interrupts */ 70 /* zero out flags, and disable interrupts */
33 pushq $0 71 pushq $0
34 popfq 72 popfq
35 73
36 /* get physical address of control page now */ 74 /*
37 /* this is impossible after page table switch */ 75 * get physical address of control page now
76 * this is impossible after page table switch
77 */
38 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 78 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
39 79
40 /* get physical address of page table now too */ 80 /* get physical address of page table now too */
41 movq PTR(PA_TABLE_PAGE)(%rsi), %rcx 81 movq PTR(PA_TABLE_PAGE)(%rsi), %r9
82
83 /* get physical address of swap page now */
84 movq PTR(PA_SWAP_PAGE)(%rsi), %r10
85
86 /* save some information for jumping back */
87 movq %r9, CP_PA_TABLE_PAGE(%r11)
88 movq %r10, CP_PA_SWAP_PAGE(%r11)
89 movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
42 90
43 /* Switch to the identity mapped page tables */ 91 /* Switch to the identity mapped page tables */
44 movq %rcx, %cr3 92 movq %r9, %cr3
45 93
46 /* setup a new stack at the end of the physical control page */ 94 /* setup a new stack at the end of the physical control page */
47 lea PAGE_SIZE(%r8), %rsp 95 lea PAGE_SIZE(%r8), %rsp
@@ -55,7 +103,8 @@ identity_mapped:
55 /* store the start address on the stack */ 103 /* store the start address on the stack */
56 pushq %rdx 104 pushq %rdx
57 105
58 /* Set cr0 to a known state: 106 /*
107 * Set cr0 to a known state:
59 * - Paging enabled 108 * - Paging enabled
60 * - Alignment check disabled 109 * - Alignment check disabled
61 * - Write protect disabled 110 * - Write protect disabled
@@ -68,7 +117,8 @@ identity_mapped:
68 orl $(X86_CR0_PG | X86_CR0_PE), %eax 117 orl $(X86_CR0_PG | X86_CR0_PE), %eax
69 movq %rax, %cr0 118 movq %rax, %cr0
70 119
71 /* Set cr4 to a known state: 120 /*
121 * Set cr4 to a known state:
72 * - physical address extension enabled 122 * - physical address extension enabled
73 */ 123 */
74 movq $X86_CR4_PAE, %rax 124 movq $X86_CR4_PAE, %rax
@@ -78,9 +128,87 @@ identity_mapped:
781: 1281:
79 129
80 /* Flush the TLB (needed?) */ 130 /* Flush the TLB (needed?) */
81 movq %rcx, %cr3 131 movq %r9, %cr3
132
133 movq %rcx, %r11
134 call swap_pages
135
136 /*
137 * To be certain of avoiding problems with self-modifying code
138 * I need to execute a serializing instruction here.
139 * So I flush the TLB by reloading %cr3 here, it's handy,
140 * and not processor dependent.
141 */
142 movq %cr3, %rax
143 movq %rax, %cr3
144
145 /*
146 * set all of the registers to known values
147 * leave %rsp alone
148 */
149
150 testq %r11, %r11
151 jnz 1f
152 xorq %rax, %rax
153 xorq %rbx, %rbx
154 xorq %rcx, %rcx
155 xorq %rdx, %rdx
156 xorq %rsi, %rsi
157 xorq %rdi, %rdi
158 xorq %rbp, %rbp
159 xorq %r8, %r8
160 xorq %r9, %r9
161 xorq %r10, %r9
162 xorq %r11, %r11
163 xorq %r12, %r12
164 xorq %r13, %r13
165 xorq %r14, %r14
166 xorq %r15, %r15
167
168 ret
169
1701:
171 popq %rdx
172 leaq PAGE_SIZE(%r10), %rsp
173 call *%rdx
174
175 /* get the re-entry point of the peer system */
176 movq 0(%rsp), %rbp
177 call 1f
1781:
179 popq %r8
180 subq $(1b - relocate_kernel), %r8
181 movq CP_PA_SWAP_PAGE(%r8), %r10
182 movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
183 movq CP_PA_TABLE_PAGE(%r8), %rax
184 movq %rax, %cr3
185 lea PAGE_SIZE(%r8), %rsp
186 call swap_pages
187 movq $virtual_mapped, %rax
188 pushq %rax
189 ret
190
191virtual_mapped:
192 movq RSP(%r8), %rsp
193 movq CR4(%r8), %rax
194 movq %rax, %cr4
195 movq CR3(%r8), %rax
196 movq CR0(%r8), %r8
197 movq %rax, %cr3
198 movq %r8, %cr0
199 movq %rbp, %rax
200
201 popf
202 popq %r15
203 popq %r14
204 popq %r13
205 popq %r12
206 popq %rbp
207 popq %rbx
208 ret
82 209
83 /* Do the copies */ 210 /* Do the copies */
211swap_pages:
84 movq %rdi, %rcx /* Put the page_list in %rcx */ 212 movq %rdi, %rcx /* Put the page_list in %rcx */
85 xorq %rdi, %rdi 213 xorq %rdi, %rdi
86 xorq %rsi, %rsi 214 xorq %rsi, %rsi
@@ -112,36 +240,27 @@ identity_mapped:
112 movq %rcx, %rsi /* For ever source page do a copy */ 240 movq %rcx, %rsi /* For ever source page do a copy */
113 andq $0xfffffffffffff000, %rsi 241 andq $0xfffffffffffff000, %rsi
114 242
243 movq %rdi, %rdx
244 movq %rsi, %rax
245
246 movq %r10, %rdi
115 movq $512, %rcx 247 movq $512, %rcx
116 rep ; movsq 248 rep ; movsq
117 jmp 0b
1183:
119
120 /* To be certain of avoiding problems with self-modifying code
121 * I need to execute a serializing instruction here.
122 * So I flush the TLB by reloading %cr3 here, it's handy,
123 * and not processor dependent.
124 */
125 movq %cr3, %rax
126 movq %rax, %cr3
127 249
128 /* set all of the registers to known values */ 250 movq %rax, %rdi
129 /* leave %rsp alone */ 251 movq %rdx, %rsi
252 movq $512, %rcx
253 rep ; movsq
130 254
131 xorq %rax, %rax 255 movq %rdx, %rdi
132 xorq %rbx, %rbx 256 movq %r10, %rsi
133 xorq %rcx, %rcx 257 movq $512, %rcx
134 xorq %rdx, %rdx 258 rep ; movsq
135 xorq %rsi, %rsi
136 xorq %rdi, %rdi
137 xorq %rbp, %rbp
138 xorq %r8, %r8
139 xorq %r9, %r9
140 xorq %r10, %r9
141 xorq %r11, %r11
142 xorq %r12, %r12
143 xorq %r13, %r13
144 xorq %r14, %r14
145 xorq %r15, %r15
146 259
260 lea PAGE_SIZE(%rax), %rsi
261 jmp 0b
2623:
147 ret 263 ret
264
265 .globl kexec_control_code_size
266.set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index dd6f2b71561b..5d465b207e72 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -1,14 +1,14 @@
1/* 1/*
2 * RTC related functions 2 * RTC related functions
3 */ 3 */
4#include <linux/platform_device.h>
5#include <linux/mc146818rtc.h>
4#include <linux/acpi.h> 6#include <linux/acpi.h>
5#include <linux/bcd.h> 7#include <linux/bcd.h>
6#include <linux/mc146818rtc.h>
7#include <linux/platform_device.h>
8#include <linux/pnp.h> 8#include <linux/pnp.h>
9 9
10#include <asm/time.h>
11#include <asm/vsyscall.h> 10#include <asm/vsyscall.h>
11#include <asm/time.h>
12 12
13#ifdef CONFIG_X86_32 13#ifdef CONFIG_X86_32
14/* 14/*
@@ -16,9 +16,9 @@
16 * register we are working with. It is required for NMI access to the 16 * register we are working with. It is required for NMI access to the
17 * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details. 17 * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details.
18 */ 18 */
19volatile unsigned long cmos_lock = 0; 19volatile unsigned long cmos_lock;
20EXPORT_SYMBOL(cmos_lock); 20EXPORT_SYMBOL(cmos_lock);
21#endif 21#endif /* CONFIG_X86_32 */
22 22
23/* For two digit years assume time is always after that */ 23/* For two digit years assume time is always after that */
24#define CMOS_YEARS_OFFS 2000 24#define CMOS_YEARS_OFFS 2000
@@ -38,9 +38,9 @@ EXPORT_SYMBOL(rtc_lock);
38 */ 38 */
39int mach_set_rtc_mmss(unsigned long nowtime) 39int mach_set_rtc_mmss(unsigned long nowtime)
40{ 40{
41 int retval = 0;
42 int real_seconds, real_minutes, cmos_minutes; 41 int real_seconds, real_minutes, cmos_minutes;
43 unsigned char save_control, save_freq_select; 42 unsigned char save_control, save_freq_select;
43 int retval = 0;
44 44
45 /* tell the clock it's being set */ 45 /* tell the clock it's being set */
46 save_control = CMOS_READ(RTC_CONTROL); 46 save_control = CMOS_READ(RTC_CONTROL);
@@ -72,8 +72,8 @@ int mach_set_rtc_mmss(unsigned long nowtime)
72 real_seconds = bin2bcd(real_seconds); 72 real_seconds = bin2bcd(real_seconds);
73 real_minutes = bin2bcd(real_minutes); 73 real_minutes = bin2bcd(real_minutes);
74 } 74 }
75 CMOS_WRITE(real_seconds,RTC_SECONDS); 75 CMOS_WRITE(real_seconds, RTC_SECONDS);
76 CMOS_WRITE(real_minutes,RTC_MINUTES); 76 CMOS_WRITE(real_minutes, RTC_MINUTES);
77 } else { 77 } else {
78 printk(KERN_WARNING 78 printk(KERN_WARNING
79 "set_rtc_mmss: can't update from %d to %d\n", 79 "set_rtc_mmss: can't update from %d to %d\n",
@@ -151,6 +151,7 @@ unsigned char rtc_cmos_read(unsigned char addr)
151 outb(addr, RTC_PORT(0)); 151 outb(addr, RTC_PORT(0));
152 val = inb(RTC_PORT(1)); 152 val = inb(RTC_PORT(1));
153 lock_cmos_suffix(addr); 153 lock_cmos_suffix(addr);
154
154 return val; 155 return val;
155} 156}
156EXPORT_SYMBOL(rtc_cmos_read); 157EXPORT_SYMBOL(rtc_cmos_read);
@@ -166,8 +167,8 @@ EXPORT_SYMBOL(rtc_cmos_write);
166 167
167static int set_rtc_mmss(unsigned long nowtime) 168static int set_rtc_mmss(unsigned long nowtime)
168{ 169{
169 int retval;
170 unsigned long flags; 170 unsigned long flags;
171 int retval;
171 172
172 spin_lock_irqsave(&rtc_lock, flags); 173 spin_lock_irqsave(&rtc_lock, flags);
173 retval = set_wallclock(nowtime); 174 retval = set_wallclock(nowtime);
@@ -242,6 +243,7 @@ static __init int add_rtc_cmos(void)
242 platform_device_register(&rtc_device); 243 platform_device_register(&rtc_device);
243 dev_info(&rtc_device.dev, 244 dev_info(&rtc_device.dev,
244 "registered platform RTC device (no PNP device found)\n"); 245 "registered platform RTC device (no PNP device found)\n");
246
245 return 0; 247 return 0;
246} 248}
247device_initcall(add_rtc_cmos); 249device_initcall(add_rtc_cmos);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f28c56e6bf94..b4158439bf63 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -112,8 +112,13 @@
112#define ARCH_SETUP 112#define ARCH_SETUP
113#endif 113#endif
114 114
115RESERVE_BRK(dmi_alloc, 65536);
116
115unsigned int boot_cpu_id __read_mostly; 117unsigned int boot_cpu_id __read_mostly;
116 118
119static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
120unsigned long _brk_end = (unsigned long)__brk_base;
121
117#ifdef CONFIG_X86_64 122#ifdef CONFIG_X86_64
118int default_cpu_present_to_apicid(int mps_cpu) 123int default_cpu_present_to_apicid(int mps_cpu)
119{ 124{
@@ -158,12 +163,6 @@ static struct resource bss_resource = {
158 163
159 164
160#ifdef CONFIG_X86_32 165#ifdef CONFIG_X86_32
161/* This value is set up by the early boot code to point to the value
162 immediately after the boot time page tables. It contains a *physical*
163 address, and must not be in the .bss segment! */
164unsigned long init_pg_tables_start __initdata = ~0UL;
165unsigned long init_pg_tables_end __initdata = ~0UL;
166
167static struct resource video_ram_resource = { 166static struct resource video_ram_resource = {
168 .name = "Video RAM area", 167 .name = "Video RAM area",
169 .start = 0xa0000, 168 .start = 0xa0000,
@@ -219,12 +218,6 @@ unsigned long mmu_cr4_features = X86_CR4_PAE;
219int bootloader_type; 218int bootloader_type;
220 219
221/* 220/*
222 * Early DMI memory
223 */
224int dmi_alloc_index;
225char dmi_alloc_data[DMI_MAX_DATA];
226
227/*
228 * Setup options 221 * Setup options
229 */ 222 */
230struct screen_info screen_info; 223struct screen_info screen_info;
@@ -269,6 +262,35 @@ static inline void copy_edd(void)
269} 262}
270#endif 263#endif
271 264
265void * __init extend_brk(size_t size, size_t align)
266{
267 size_t mask = align - 1;
268 void *ret;
269
270 BUG_ON(_brk_start == 0);
271 BUG_ON(align & mask);
272
273 _brk_end = (_brk_end + mask) & ~mask;
274 BUG_ON((char *)(_brk_end + size) > __brk_limit);
275
276 ret = (void *)_brk_end;
277 _brk_end += size;
278
279 memset(ret, 0, size);
280
281 return ret;
282}
283
284static void __init reserve_brk(void)
285{
286 if (_brk_end > _brk_start)
287 reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK");
288
289 /* Mark brk area as locked down and no longer taking any
290 new allocations */
291 _brk_start = 0;
292}
293
272#ifdef CONFIG_BLK_DEV_INITRD 294#ifdef CONFIG_BLK_DEV_INITRD
273 295
274#ifdef CONFIG_X86_32 296#ifdef CONFIG_X86_32
@@ -717,11 +739,7 @@ void __init setup_arch(char **cmdline_p)
717 init_mm.start_code = (unsigned long) _text; 739 init_mm.start_code = (unsigned long) _text;
718 init_mm.end_code = (unsigned long) _etext; 740 init_mm.end_code = (unsigned long) _etext;
719 init_mm.end_data = (unsigned long) _edata; 741 init_mm.end_data = (unsigned long) _edata;
720#ifdef CONFIG_X86_32 742 init_mm.brk = _brk_end;
721 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
722#else
723 init_mm.brk = (unsigned long) &_end;
724#endif
725 743
726 code_resource.start = virt_to_phys(_text); 744 code_resource.start = virt_to_phys(_text);
727 code_resource.end = virt_to_phys(_etext)-1; 745 code_resource.end = virt_to_phys(_etext)-1;
@@ -842,6 +860,8 @@ void __init setup_arch(char **cmdline_p)
842 setup_bios_corruption_check(); 860 setup_bios_corruption_check();
843#endif 861#endif
844 862
863 reserve_brk();
864
845 /* max_pfn_mapped is updated here */ 865 /* max_pfn_mapped is updated here */
846 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); 866 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
847 max_pfn_mapped = max_low_pfn_mapped; 867 max_pfn_mapped = max_low_pfn_mapped;
@@ -1029,7 +1049,6 @@ void __init x86_quirk_trap_init(void)
1029static struct irqaction irq0 = { 1049static struct irqaction irq0 = {
1030 .handler = timer_interrupt, 1050 .handler = timer_interrupt,
1031 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, 1051 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
1032 .mask = CPU_MASK_NONE,
1033 .name = "timer" 1052 .name = "timer"
1034}; 1053};
1035 1054
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index efa615f2bf43..3a97a4cf1872 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -153,7 +153,6 @@ static struct page * __init pcpur_get_page(unsigned int cpu, int pageno)
153static ssize_t __init setup_pcpu_remap(size_t static_size) 153static ssize_t __init setup_pcpu_remap(size_t static_size)
154{ 154{
155 static struct vm_struct vm; 155 static struct vm_struct vm;
156 pg_data_t *last;
157 size_t ptrs_size, dyn_size; 156 size_t ptrs_size, dyn_size;
158 unsigned int cpu; 157 unsigned int cpu;
159 ssize_t ret; 158 ssize_t ret;
@@ -162,22 +161,9 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
162 * If large page isn't supported, there's no benefit in doing 161 * If large page isn't supported, there's no benefit in doing
163 * this. Also, on non-NUMA, embedding is better. 162 * this. Also, on non-NUMA, embedding is better.
164 */ 163 */
165 if (!cpu_has_pse || pcpu_need_numa()) 164 if (!cpu_has_pse || !pcpu_need_numa())
166 return -EINVAL; 165 return -EINVAL;
167 166
168 last = NULL;
169 for_each_possible_cpu(cpu) {
170 int node = early_cpu_to_node(cpu);
171
172 if (node_online(node) && NODE_DATA(node) &&
173 last && last != NODE_DATA(node))
174 goto proceed;
175
176 last = NODE_DATA(node);
177 }
178 return -EINVAL;
179
180proceed:
181 /* 167 /*
182 * Currently supports only single page. Supporting multiple 168 * Currently supports only single page. Supporting multiple
183 * pages won't be too difficult if it ever becomes necessary. 169 * pages won't be too difficult if it ever becomes necessary.
@@ -233,8 +219,8 @@ proceed:
233 "%zu bytes\n", vm.addr, static_size); 219 "%zu bytes\n", vm.addr, static_size);
234 220
235 ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, 221 ret = pcpu_setup_first_chunk(pcpur_get_page, static_size,
236 PERCPU_FIRST_CHUNK_RESERVE, 222 PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
237 PMD_SIZE, dyn_size, vm.addr, NULL); 223 PMD_SIZE, vm.addr, NULL);
238 goto out_free_ar; 224 goto out_free_ar;
239 225
240enomem: 226enomem:
@@ -257,31 +243,13 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
257 * Embedding allocator 243 * Embedding allocator
258 * 244 *
259 * The first chunk is sized to just contain the static area plus 245 * The first chunk is sized to just contain the static area plus
260 * module and dynamic reserves, and allocated as a contiguous area 246 * module and dynamic reserves and embedded into linear physical
261 * using bootmem allocator and used as-is without being mapped into 247 * mapping so that it can use PMD mapping without additional TLB
262 * vmalloc area. This enables the first chunk to piggy back on the 248 * pressure.
263 * linear physical PMD mapping and doesn't add any additional pressure
264 * to TLB. Note that if the needed size is smaller than the minimum
265 * unit size, the leftover is returned to the bootmem allocator.
266 */ 249 */
267static void *pcpue_ptr __initdata;
268static size_t pcpue_size __initdata;
269static size_t pcpue_unit_size __initdata;
270
271static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
272{
273 size_t off = (size_t)pageno << PAGE_SHIFT;
274
275 if (off >= pcpue_size)
276 return NULL;
277
278 return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off);
279}
280
281static ssize_t __init setup_pcpu_embed(size_t static_size) 250static ssize_t __init setup_pcpu_embed(size_t static_size)
282{ 251{
283 unsigned int cpu; 252 size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
284 size_t dyn_size;
285 253
286 /* 254 /*
287 * If large page isn't supported, there's no benefit in doing 255 * If large page isn't supported, there's no benefit in doing
@@ -291,33 +259,8 @@ static ssize_t __init setup_pcpu_embed(size_t static_size)
291 if (!cpu_has_pse || pcpu_need_numa()) 259 if (!cpu_has_pse || pcpu_need_numa())
292 return -EINVAL; 260 return -EINVAL;
293 261
294 /* allocate and copy */ 262 return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
295 pcpue_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + 263 reserve - PERCPU_FIRST_CHUNK_RESERVE, -1);
296 PERCPU_DYNAMIC_RESERVE);
297 pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
298 dyn_size = pcpue_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
299
300 pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size,
301 PAGE_SIZE);
302 if (!pcpue_ptr)
303 return -ENOMEM;
304
305 for_each_possible_cpu(cpu) {
306 void *ptr = pcpue_ptr + cpu * pcpue_unit_size;
307
308 free_bootmem(__pa(ptr + pcpue_size),
309 pcpue_unit_size - pcpue_size);
310 memcpy(ptr, __per_cpu_load, static_size);
311 }
312
313 /* we're ready, commit */
314 pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n",
315 pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size);
316
317 return pcpu_setup_first_chunk(pcpue_get_page, static_size,
318 PERCPU_FIRST_CHUNK_RESERVE,
319 pcpue_unit_size, dyn_size,
320 pcpue_ptr, NULL);
321} 264}
322 265
323/* 266/*
@@ -375,8 +318,8 @@ static ssize_t __init setup_pcpu_4k(size_t static_size)
375 pcpu4k_nr_static_pages, static_size); 318 pcpu4k_nr_static_pages, static_size);
376 319
377 ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 320 ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
378 PERCPU_FIRST_CHUNK_RESERVE, -1, -1, NULL, 321 PERCPU_FIRST_CHUNK_RESERVE, -1,
379 pcpu4k_populate_pte); 322 -1, NULL, pcpu4k_populate_pte);
380 goto out_free_ar; 323 goto out_free_ar;
381 324
382enomem: 325enomem:
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index d2cc6428c587..14425166b8e3 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -211,31 +211,27 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
211{ 211{
212 /* Default to using normal stack */ 212 /* Default to using normal stack */
213 unsigned long sp = regs->sp; 213 unsigned long sp = regs->sp;
214 int onsigstack = on_sig_stack(sp);
214 215
215#ifdef CONFIG_X86_64 216#ifdef CONFIG_X86_64
216 /* redzone */ 217 /* redzone */
217 sp -= 128; 218 sp -= 128;
218#endif /* CONFIG_X86_64 */ 219#endif /* CONFIG_X86_64 */
219 220
220 /* 221 if (!onsigstack) {
221 * If we are on the alternate signal stack and would overflow it, don't. 222 /* This is the X/Open sanctioned signal stack switching. */
222 * Return an always-bogus address instead so we will die with SIGSEGV. 223 if (ka->sa.sa_flags & SA_ONSTACK) {
223 */ 224 if (current->sas_ss_size)
224 if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size))) 225 sp = current->sas_ss_sp + current->sas_ss_size;
225 return (void __user *) -1L; 226 } else {
226
227 /* This is the X/Open sanctioned signal stack switching. */
228 if (ka->sa.sa_flags & SA_ONSTACK) {
229 if (sas_ss_flags(sp) == 0)
230 sp = current->sas_ss_sp + current->sas_ss_size;
231 } else {
232#ifdef CONFIG_X86_32 227#ifdef CONFIG_X86_32
233 /* This is the legacy signal stack switching. */ 228 /* This is the legacy signal stack switching. */
234 if ((regs->ss & 0xffff) != __USER_DS && 229 if ((regs->ss & 0xffff) != __USER_DS &&
235 !(ka->sa.sa_flags & SA_RESTORER) && 230 !(ka->sa.sa_flags & SA_RESTORER) &&
236 ka->sa.sa_restorer) 231 ka->sa.sa_restorer)
237 sp = (unsigned long) ka->sa.sa_restorer; 232 sp = (unsigned long) ka->sa.sa_restorer;
238#endif /* CONFIG_X86_32 */ 233#endif /* CONFIG_X86_32 */
234 }
239 } 235 }
240 236
241 if (used_math()) { 237 if (used_math()) {
@@ -244,12 +240,22 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
244 sp = round_down(sp, 64); 240 sp = round_down(sp, 64);
245#endif /* CONFIG_X86_64 */ 241#endif /* CONFIG_X86_64 */
246 *fpstate = (void __user *)sp; 242 *fpstate = (void __user *)sp;
247
248 if (save_i387_xstate(*fpstate) < 0)
249 return (void __user *)-1L;
250 } 243 }
251 244
252 return (void __user *)align_sigframe(sp - frame_size); 245 sp = align_sigframe(sp - frame_size);
246
247 /*
248 * If we are on the alternate signal stack and would overflow it, don't.
249 * Return an always-bogus address instead so we will die with SIGSEGV.
250 */
251 if (onsigstack && !likely(on_sig_stack(sp)))
252 return (void __user *)-1L;
253
254 /* save i387 state */
255 if (used_math() && save_i387_xstate(*fpstate) < 0)
256 return (void __user *)-1L;
257
258 return (void __user *)sp;
253} 259}
254 260
255#ifdef CONFIG_X86_32 261#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ef7d10170c30..58d24ef917d8 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -101,11 +101,11 @@ EXPORT_SYMBOL(smp_num_siblings);
101DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; 101DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
102 102
103/* representing HT siblings of each logical CPU */ 103/* representing HT siblings of each logical CPU */
104DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); 104DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
105EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); 105EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
106 106
107/* representing HT and core siblings of each logical CPU */ 107/* representing HT and core siblings of each logical CPU */
108DEFINE_PER_CPU(cpumask_t, cpu_core_map); 108DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
109EXPORT_PER_CPU_SYMBOL(cpu_core_map); 109EXPORT_PER_CPU_SYMBOL(cpu_core_map);
110 110
111/* Per CPU bogomips and other parameters */ 111/* Per CPU bogomips and other parameters */
@@ -115,11 +115,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
115atomic_t init_deasserted; 115atomic_t init_deasserted;
116 116
117#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) 117#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
118
119/* which logical CPUs are on which nodes */
120cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly =
121 { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
122EXPORT_SYMBOL(node_to_cpumask_map);
123/* which node each logical CPU is on */ 118/* which node each logical CPU is on */
124int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; 119int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
125EXPORT_SYMBOL(cpu_to_node_map); 120EXPORT_SYMBOL(cpu_to_node_map);
@@ -128,7 +123,7 @@ EXPORT_SYMBOL(cpu_to_node_map);
128static void map_cpu_to_node(int cpu, int node) 123static void map_cpu_to_node(int cpu, int node)
129{ 124{
130 printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node); 125 printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
131 cpumask_set_cpu(cpu, &node_to_cpumask_map[node]); 126 cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
132 cpu_to_node_map[cpu] = node; 127 cpu_to_node_map[cpu] = node;
133} 128}
134 129
@@ -139,7 +134,7 @@ static void unmap_cpu_to_node(int cpu)
139 134
140 printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu); 135 printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
141 for (node = 0; node < MAX_NUMNODES; node++) 136 for (node = 0; node < MAX_NUMNODES; node++)
142 cpumask_clear_cpu(cpu, &node_to_cpumask_map[node]); 137 cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
143 cpu_to_node_map[cpu] = 0; 138 cpu_to_node_map[cpu] = 0;
144} 139}
145#else /* !(CONFIG_NUMA && CONFIG_X86_32) */ 140#else /* !(CONFIG_NUMA && CONFIG_X86_32) */
@@ -301,7 +296,7 @@ notrace static void __cpuinit start_secondary(void *unused)
301 __flush_tlb_all(); 296 __flush_tlb_all();
302#endif 297#endif
303 298
304 /* This must be done before setting cpu_online_map */ 299 /* This must be done before setting cpu_online_mask */
305 set_cpu_sibling_map(raw_smp_processor_id()); 300 set_cpu_sibling_map(raw_smp_processor_id());
306 wmb(); 301 wmb();
307 302
@@ -334,6 +329,23 @@ notrace static void __cpuinit start_secondary(void *unused)
334 cpu_idle(); 329 cpu_idle();
335} 330}
336 331
332#ifdef CONFIG_CPUMASK_OFFSTACK
333/* In this case, llc_shared_map is a pointer to a cpumask. */
334static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
335 const struct cpuinfo_x86 *src)
336{
337 struct cpumask *llc = dst->llc_shared_map;
338 *dst = *src;
339 dst->llc_shared_map = llc;
340}
341#else
342static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
343 const struct cpuinfo_x86 *src)
344{
345 *dst = *src;
346}
347#endif /* CONFIG_CPUMASK_OFFSTACK */
348
337/* 349/*
338 * The bootstrap kernel entry code has set these up. Save them for 350 * The bootstrap kernel entry code has set these up. Save them for
339 * a given CPU 351 * a given CPU
@@ -343,7 +355,7 @@ void __cpuinit smp_store_cpu_info(int id)
343{ 355{
344 struct cpuinfo_x86 *c = &cpu_data(id); 356 struct cpuinfo_x86 *c = &cpu_data(id);
345 357
346 *c = boot_cpu_data; 358 copy_cpuinfo_x86(c, &boot_cpu_data);
347 c->cpu_index = id; 359 c->cpu_index = id;
348 if (id != 0) 360 if (id != 0)
349 identify_secondary_cpu(c); 361 identify_secondary_cpu(c);
@@ -367,15 +379,15 @@ void __cpuinit set_cpu_sibling_map(int cpu)
367 cpumask_set_cpu(cpu, cpu_sibling_mask(i)); 379 cpumask_set_cpu(cpu, cpu_sibling_mask(i));
368 cpumask_set_cpu(i, cpu_core_mask(cpu)); 380 cpumask_set_cpu(i, cpu_core_mask(cpu));
369 cpumask_set_cpu(cpu, cpu_core_mask(i)); 381 cpumask_set_cpu(cpu, cpu_core_mask(i));
370 cpumask_set_cpu(i, &c->llc_shared_map); 382 cpumask_set_cpu(i, c->llc_shared_map);
371 cpumask_set_cpu(cpu, &o->llc_shared_map); 383 cpumask_set_cpu(cpu, o->llc_shared_map);
372 } 384 }
373 } 385 }
374 } else { 386 } else {
375 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); 387 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
376 } 388 }
377 389
378 cpumask_set_cpu(cpu, &c->llc_shared_map); 390 cpumask_set_cpu(cpu, c->llc_shared_map);
379 391
380 if (current_cpu_data.x86_max_cores == 1) { 392 if (current_cpu_data.x86_max_cores == 1) {
381 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); 393 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
@@ -386,8 +398,8 @@ void __cpuinit set_cpu_sibling_map(int cpu)
386 for_each_cpu(i, cpu_sibling_setup_mask) { 398 for_each_cpu(i, cpu_sibling_setup_mask) {
387 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && 399 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
388 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { 400 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
389 cpumask_set_cpu(i, &c->llc_shared_map); 401 cpumask_set_cpu(i, c->llc_shared_map);
390 cpumask_set_cpu(cpu, &cpu_data(i).llc_shared_map); 402 cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map);
391 } 403 }
392 if (c->phys_proc_id == cpu_data(i).phys_proc_id) { 404 if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
393 cpumask_set_cpu(i, cpu_core_mask(cpu)); 405 cpumask_set_cpu(i, cpu_core_mask(cpu));
@@ -425,12 +437,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
425 if (sched_mc_power_savings || sched_smt_power_savings) 437 if (sched_mc_power_savings || sched_smt_power_savings)
426 return cpu_core_mask(cpu); 438 return cpu_core_mask(cpu);
427 else 439 else
428 return &c->llc_shared_map; 440 return c->llc_shared_map;
429}
430
431cpumask_t cpu_coregroup_map(int cpu)
432{
433 return *cpu_coregroup_mask(cpu);
434} 441}
435 442
436static void impress_friends(void) 443static void impress_friends(void)
@@ -897,9 +904,8 @@ int __cpuinit native_cpu_up(unsigned int cpu)
897 */ 904 */
898static __init void disable_smp(void) 905static __init void disable_smp(void)
899{ 906{
900 /* use the read/write pointers to the present and possible maps */ 907 init_cpu_present(cpumask_of(0));
901 cpumask_copy(&cpu_present_map, cpumask_of(0)); 908 init_cpu_possible(cpumask_of(0));
902 cpumask_copy(&cpu_possible_map, cpumask_of(0));
903 smpboot_clear_io_apic_irqs(); 909 smpboot_clear_io_apic_irqs();
904 910
905 if (smp_found_config) 911 if (smp_found_config)
@@ -1031,6 +1037,8 @@ static void __init smp_cpu_index_default(void)
1031 */ 1037 */
1032void __init native_smp_prepare_cpus(unsigned int max_cpus) 1038void __init native_smp_prepare_cpus(unsigned int max_cpus)
1033{ 1039{
1040 unsigned int i;
1041
1034 preempt_disable(); 1042 preempt_disable();
1035 smp_cpu_index_default(); 1043 smp_cpu_index_default();
1036 current_cpu_data = boot_cpu_data; 1044 current_cpu_data = boot_cpu_data;
@@ -1044,6 +1052,14 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1044 boot_cpu_logical_apicid = logical_smp_processor_id(); 1052 boot_cpu_logical_apicid = logical_smp_processor_id();
1045#endif 1053#endif
1046 current_thread_info()->cpu = 0; /* needed? */ 1054 current_thread_info()->cpu = 0; /* needed? */
1055 for_each_possible_cpu(i) {
1056 alloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
1057 alloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
1058 alloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
1059 cpumask_clear(per_cpu(cpu_core_map, i));
1060 cpumask_clear(per_cpu(cpu_sibling_map, i));
1061 cpumask_clear(cpu_data(i).llc_shared_map);
1062 }
1047 set_cpu_sibling_map(0); 1063 set_cpu_sibling_map(0);
1048 1064
1049 enable_IR_x2apic(); 1065 enable_IR_x2apic();
@@ -1132,11 +1148,11 @@ early_param("possible_cpus", _setup_possible_cpus);
1132 1148
1133 1149
1134/* 1150/*
1135 * cpu_possible_map should be static, it cannot change as cpu's 1151 * cpu_possible_mask should be static, it cannot change as cpu's
1136 * are onlined, or offlined. The reason is per-cpu data-structures 1152 * are onlined, or offlined. The reason is per-cpu data-structures
1137 * are allocated by some modules at init time, and dont expect to 1153 * are allocated by some modules at init time, and dont expect to
1138 * do this dynamically on cpu arrival/departure. 1154 * do this dynamically on cpu arrival/departure.
1139 * cpu_present_map on the other hand can change dynamically. 1155 * cpu_present_mask on the other hand can change dynamically.
1140 * In case when cpu_hotplug is not compiled, then we resort to current 1156 * In case when cpu_hotplug is not compiled, then we resort to current
1141 * behaviour, which is cpu_possible == cpu_present. 1157 * behaviour, which is cpu_possible == cpu_present.
1142 * - Ashok Raj 1158 * - Ashok Raj
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 3bdb64829b82..ff5c8736b491 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -332,3 +332,5 @@ ENTRY(sys_call_table)
332 .long sys_dup3 /* 330 */ 332 .long sys_dup3 /* 330 */
333 .long sys_pipe2 333 .long sys_pipe2
334 .long sys_inotify_init1 334 .long sys_inotify_init1
335 .long sys_preadv
336 .long sys_pwritev
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 241ec3923f61..5ba343e61844 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -116,7 +116,6 @@ unsigned long __init calibrate_cpu(void)
116static struct irqaction irq0 = { 116static struct irqaction irq0 = {
117 .handler = timer_interrupt, 117 .handler = timer_interrupt,
118 .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING | IRQF_TIMER, 118 .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING | IRQF_TIMER,
119 .mask = CPU_MASK_NONE,
120 .name = "timer" 119 .name = "timer"
121}; 120};
122 121
@@ -125,7 +124,6 @@ void __init hpet_time_init(void)
125 if (!hpet_enable()) 124 if (!hpet_enable())
126 setup_pit_timer(); 125 setup_pit_timer();
127 126
128 irq0.mask = cpumask_of_cpu(0);
129 setup_irq(0, &irq0); 127 setup_irq(0, &irq0);
130} 128}
131 129
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index d038b9c45cf8..deb5ebb32c3b 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -275,6 +275,8 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
275 return NULL; 275 return NULL;
276} 276}
277 277
278static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
279
278/** 280/**
279 * uv_flush_tlb_others - globally purge translation cache of a virtual 281 * uv_flush_tlb_others - globally purge translation cache of a virtual
280 * address or all TLB's 282 * address or all TLB's
@@ -304,8 +306,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
304 struct mm_struct *mm, 306 struct mm_struct *mm,
305 unsigned long va, unsigned int cpu) 307 unsigned long va, unsigned int cpu)
306{ 308{
307 static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); 309 struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask);
308 struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
309 int i; 310 int i;
310 int bit; 311 int bit;
311 int blade; 312 int blade;
@@ -750,16 +751,21 @@ static int __init uv_bau_init(void)
750 int node; 751 int node;
751 int nblades; 752 int nblades;
752 int last_blade; 753 int last_blade;
753 int cur_cpu = 0; 754 int cur_cpu;
754 755
755 if (!is_uv_system()) 756 if (!is_uv_system())
756 return 0; 757 return 0;
757 758
759 for_each_possible_cpu(cur_cpu)
760 alloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
761 GFP_KERNEL, cpu_to_node(cur_cpu));
762
758 uv_bau_retry_limit = 1; 763 uv_bau_retry_limit = 1;
759 uv_nshift = uv_hub_info->n_val; 764 uv_nshift = uv_hub_info->n_val;
760 uv_mmask = (1UL << uv_hub_info->n_val) - 1; 765 uv_mmask = (1UL << uv_hub_info->n_val) - 1;
761 nblades = 0; 766 nblades = 0;
762 last_blade = -1; 767 last_blade = -1;
768 cur_cpu = 0;
763 for_each_online_node(node) { 769 for_each_online_node(node) {
764 blade = uv_node_to_blade_id(node); 770 blade = uv_node_to_blade_id(node);
765 if (blade == last_blade) 771 if (blade == last_blade)
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 0fcc95a354f7..7e4515957a1c 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -25,10 +25,10 @@
25 * 25 *
26 * Send feedback to <colpatch@us.ibm.com> 26 * Send feedback to <colpatch@us.ibm.com>
27 */ 27 */
28#include <linux/init.h>
29#include <linux/smp.h>
30#include <linux/nodemask.h> 28#include <linux/nodemask.h>
31#include <linux/mmzone.h> 29#include <linux/mmzone.h>
30#include <linux/init.h>
31#include <linux/smp.h>
32#include <asm/cpu.h> 32#include <asm/cpu.h>
33 33
34static DEFINE_PER_CPU(struct x86_cpu, cpu_devices); 34static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
@@ -47,6 +47,7 @@ int __ref arch_register_cpu(int num)
47 */ 47 */
48 if (num) 48 if (num)
49 per_cpu(cpu_devices, num).cpu.hotpluggable = 1; 49 per_cpu(cpu_devices, num).cpu.hotpluggable = 1;
50
50 return register_cpu(&per_cpu(cpu_devices, num).cpu, num); 51 return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
51} 52}
52EXPORT_SYMBOL(arch_register_cpu); 53EXPORT_SYMBOL(arch_register_cpu);
@@ -56,12 +57,13 @@ void arch_unregister_cpu(int num)
56 unregister_cpu(&per_cpu(cpu_devices, num).cpu); 57 unregister_cpu(&per_cpu(cpu_devices, num).cpu);
57} 58}
58EXPORT_SYMBOL(arch_unregister_cpu); 59EXPORT_SYMBOL(arch_unregister_cpu);
59#else 60#else /* CONFIG_HOTPLUG_CPU */
61
60static int __init arch_register_cpu(int num) 62static int __init arch_register_cpu(int num)
61{ 63{
62 return register_cpu(&per_cpu(cpu_devices, num).cpu, num); 64 return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
63} 65}
64#endif /*CONFIG_HOTPLUG_CPU*/ 66#endif /* CONFIG_HOTPLUG_CPU */
65 67
66static int __init topology_init(void) 68static int __init topology_init(void)
67{ 69{
@@ -70,11 +72,11 @@ static int __init topology_init(void)
70#ifdef CONFIG_NUMA 72#ifdef CONFIG_NUMA
71 for_each_online_node(i) 73 for_each_online_node(i)
72 register_one_node(i); 74 register_one_node(i);
73#endif /* CONFIG_NUMA */ 75#endif
74 76
75 for_each_present_cpu(i) 77 for_each_present_cpu(i)
76 arch_register_cpu(i); 78 arch_register_cpu(i);
79
77 return 0; 80 return 0;
78} 81}
79
80subsys_initcall(topology_init); 82subsys_initcall(topology_init);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 83d53ce5d4c4..7a567ebe6361 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -17,20 +17,21 @@
17#include <asm/delay.h> 17#include <asm/delay.h>
18#include <asm/hypervisor.h> 18#include <asm/hypervisor.h>
19 19
20unsigned int cpu_khz; /* TSC clocks / usec, not used here */ 20unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
21EXPORT_SYMBOL(cpu_khz); 21EXPORT_SYMBOL(cpu_khz);
22unsigned int tsc_khz; 22
23unsigned int __read_mostly tsc_khz;
23EXPORT_SYMBOL(tsc_khz); 24EXPORT_SYMBOL(tsc_khz);
24 25
25/* 26/*
26 * TSC can be unstable due to cpufreq or due to unsynced TSCs 27 * TSC can be unstable due to cpufreq or due to unsynced TSCs
27 */ 28 */
28static int tsc_unstable; 29static int __read_mostly tsc_unstable;
29 30
30/* native_sched_clock() is called before tsc_init(), so 31/* native_sched_clock() is called before tsc_init(), so
31 we must start with the TSC soft disabled to prevent 32 we must start with the TSC soft disabled to prevent
32 erroneous rdtsc usage on !cpu_has_tsc processors */ 33 erroneous rdtsc usage on !cpu_has_tsc processors */
33static int tsc_disabled = -1; 34static int __read_mostly tsc_disabled = -1;
34 35
35static int tsc_clocksource_reliable; 36static int tsc_clocksource_reliable;
36/* 37/*
@@ -273,30 +274,43 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
273 * use the TSC value at the transitions to calculate a pretty 274 * use the TSC value at the transitions to calculate a pretty
274 * good value for the TSC frequencty. 275 * good value for the TSC frequencty.
275 */ 276 */
276static inline int pit_expect_msb(unsigned char val) 277static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
277{ 278{
278 int count = 0; 279 int count;
280 u64 tsc = 0;
279 281
280 for (count = 0; count < 50000; count++) { 282 for (count = 0; count < 50000; count++) {
281 /* Ignore LSB */ 283 /* Ignore LSB */
282 inb(0x42); 284 inb(0x42);
283 if (inb(0x42) != val) 285 if (inb(0x42) != val)
284 break; 286 break;
287 tsc = get_cycles();
285 } 288 }
286 return count > 50; 289 *deltap = get_cycles() - tsc;
290 *tscp = tsc;
291
292 /*
293 * We require _some_ success, but the quality control
294 * will be based on the error terms on the TSC values.
295 */
296 return count > 5;
287} 297}
288 298
289/* 299/*
290 * How many MSB values do we want to see? We aim for a 300 * How many MSB values do we want to see? We aim for
291 * 15ms calibration, which assuming a 2us counter read 301 * a maximum error rate of 500ppm (in practice the
292 * error should give us roughly 150 ppm precision for 302 * real error is much smaller), but refuse to spend
293 * the calibration. 303 * more than 25ms on it.
294 */ 304 */
295#define QUICK_PIT_MS 15 305#define MAX_QUICK_PIT_MS 25
296#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) 306#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
297 307
298static unsigned long quick_pit_calibrate(void) 308static unsigned long quick_pit_calibrate(void)
299{ 309{
310 int i;
311 u64 tsc, delta;
312 unsigned long d1, d2;
313
300 /* Set the Gate high, disable speaker */ 314 /* Set the Gate high, disable speaker */
301 outb((inb(0x61) & ~0x02) | 0x01, 0x61); 315 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
302 316
@@ -315,45 +329,52 @@ static unsigned long quick_pit_calibrate(void)
315 outb(0xff, 0x42); 329 outb(0xff, 0x42);
316 outb(0xff, 0x42); 330 outb(0xff, 0x42);
317 331
318 if (pit_expect_msb(0xff)) { 332 /*
319 int i; 333 * The PIT starts counting at the next edge, so we
320 u64 t1, t2, delta; 334 * need to delay for a microsecond. The easiest way
321 unsigned char expect = 0xfe; 335 * to do that is to just read back the 16-bit counter
322 336 * once from the PIT.
323 t1 = get_cycles(); 337 */
324 for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) { 338 inb(0x42);
325 if (!pit_expect_msb(expect)) 339 inb(0x42);
326 goto failed; 340
341 if (pit_expect_msb(0xff, &tsc, &d1)) {
342 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
343 if (!pit_expect_msb(0xff-i, &delta, &d2))
344 break;
345
346 /*
347 * Iterate until the error is less than 500 ppm
348 */
349 delta -= tsc;
350 if (d1+d2 < delta >> 11)
351 goto success;
327 } 352 }
328 t2 = get_cycles();
329
330 /*
331 * Make sure we can rely on the second TSC timestamp:
332 */
333 if (!pit_expect_msb(expect))
334 goto failed;
335
336 /*
337 * Ok, if we get here, then we've seen the
338 * MSB of the PIT decrement QUICK_PIT_ITERATIONS
339 * times, and each MSB had many hits, so we never
340 * had any sudden jumps.
341 *
342 * As a result, we can depend on there not being
343 * any odd delays anywhere, and the TSC reads are
344 * reliable.
345 *
346 * kHz = ticks / time-in-seconds / 1000;
347 * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000
348 * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000)
349 */
350 delta = (t2 - t1)*PIT_TICK_RATE;
351 do_div(delta, QUICK_PIT_ITERATIONS*256*1000);
352 printk("Fast TSC calibration using PIT\n");
353 return delta;
354 } 353 }
355failed: 354 printk("Fast TSC calibration failed\n");
356 return 0; 355 return 0;
356
357success:
358 /*
359 * Ok, if we get here, then we've seen the
360 * MSB of the PIT decrement 'i' times, and the
361 * error has shrunk to less than 500 ppm.
362 *
363 * As a result, we can depend on there not being
364 * any odd delays anywhere, and the TSC reads are
365 * reliable (within the error). We also adjust the
366 * delta to the middle of the error bars, just
367 * because it looks nicer.
368 *
369 * kHz = ticks / time-in-seconds / 1000;
370 * kHz = (t2 - t1) / (I * 256 / PIT_TICK_RATE) / 1000
371 * kHz = ((t2 - t1) * PIT_TICK_RATE) / (I * 256 * 1000)
372 */
373 delta += (long)(d2 - d1)/2;
374 delta *= PIT_TICK_RATE;
375 do_div(delta, i*256*1000);
376 printk("Fast TSC calibration using PIT\n");
377 return delta;
357} 378}
358 379
359/** 380/**
@@ -523,8 +544,6 @@ unsigned long native_calibrate_tsc(void)
523 return tsc_pit_min; 544 return tsc_pit_min;
524} 545}
525 546
526#ifdef CONFIG_X86_32
527/* Only called from the Powernow K7 cpu freq driver */
528int recalibrate_cpu_khz(void) 547int recalibrate_cpu_khz(void)
529{ 548{
530#ifndef CONFIG_SMP 549#ifndef CONFIG_SMP
@@ -546,7 +565,6 @@ int recalibrate_cpu_khz(void)
546 565
547EXPORT_SYMBOL(recalibrate_cpu_khz); 566EXPORT_SYMBOL(recalibrate_cpu_khz);
548 567
549#endif /* CONFIG_X86_32 */
550 568
551/* Accelerators for sched_clock() 569/* Accelerators for sched_clock()
552 * convert from cycles(64bits) => nanoseconds (64bits) 570 * convert from cycles(64bits) => nanoseconds (64bits)
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 191a876e9e87..31ffc24eec4d 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -578,7 +578,7 @@ static struct irq_chip piix4_virtual_irq_type = {
578static irqreturn_t piix4_master_intr(int irq, void *dev_id) 578static irqreturn_t piix4_master_intr(int irq, void *dev_id)
579{ 579{
580 int realirq; 580 int realirq;
581 irq_desc_t *desc; 581 struct irq_desc *desc;
582 unsigned long flags; 582 unsigned long flags;
583 583
584 spin_lock_irqsave(&i8259A_lock, flags); 584 spin_lock_irqsave(&i8259A_lock, flags);
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 2cc4a90e2cb3..95deb9f2211e 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -395,11 +395,6 @@ static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval)
395 vmi_ops.update_pte(ptep, VMI_PAGE_PT); 395 vmi_ops.update_pte(ptep, VMI_PAGE_PT);
396} 396}
397 397
398static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
399{
400 vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1));
401}
402
403static void vmi_set_pud(pud_t *pudp, pud_t pudval) 398static void vmi_set_pud(pud_t *pudp, pud_t pudval)
404{ 399{
405 /* Um, eww */ 400 /* Um, eww */
@@ -750,7 +745,6 @@ static inline int __init activate_vmi(void)
750 pv_mmu_ops.set_pmd = vmi_set_pmd; 745 pv_mmu_ops.set_pmd = vmi_set_pmd;
751#ifdef CONFIG_X86_PAE 746#ifdef CONFIG_X86_PAE
752 pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic; 747 pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic;
753 pv_mmu_ops.set_pte_present = vmi_set_pte_present;
754 pv_mmu_ops.set_pud = vmi_set_pud; 748 pv_mmu_ops.set_pud = vmi_set_pud;
755 pv_mmu_ops.pte_clear = vmi_pte_clear; 749 pv_mmu_ops.pte_clear = vmi_pte_clear;
756 pv_mmu_ops.pmd_clear = vmi_pmd_clear; 750 pv_mmu_ops.pmd_clear = vmi_pmd_clear;
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 33a788d5879c..d303369a7bad 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -202,7 +202,6 @@ static struct irqaction vmi_clock_action = {
202 .name = "vmi-timer", 202 .name = "vmi-timer",
203 .handler = vmi_timer_interrupt, 203 .handler = vmi_timer_interrupt,
204 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER, 204 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER,
205 .mask = CPU_MASK_ALL,
206}; 205};
207 206
208static void __devinit vmi_time_init_clockevent(void) 207static void __devinit vmi_time_init_clockevent(void)
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 0d860963f268..62ad500d55f3 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -189,15 +189,24 @@ SECTIONS
189 *(.bss) 189 *(.bss)
190 . = ALIGN(4); 190 . = ALIGN(4);
191 __bss_stop = .; 191 __bss_stop = .;
192 _end = . ; 192 }
193 /* This is where the kernel creates the early boot page tables */ 193
194 .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
194 . = ALIGN(PAGE_SIZE); 195 . = ALIGN(PAGE_SIZE);
195 pg0 = . ; 196 __brk_base = . ;
197 . += 64 * 1024 ; /* 64k alignment slop space */
198 *(.brk_reservation) /* areas brk users have reserved */
199 __brk_limit = . ;
200 }
201
202 .end : AT(ADDR(.end) - LOAD_OFFSET) {
203 _end = . ;
196 } 204 }
197 205
198 /* Sections to be discarded */ 206 /* Sections to be discarded */
199 /DISCARD/ : { 207 /DISCARD/ : {
200 *(.exitcall.exit) 208 *(.exitcall.exit)
209 *(.discard)
201 } 210 }
202 211
203 STABS_DEBUG 212 STABS_DEBUG
@@ -205,6 +214,12 @@ SECTIONS
205 DWARF_DEBUG 214 DWARF_DEBUG
206} 215}
207 216
217/*
218 * Build-time check on the image size:
219 */
220ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
221 "kernel image bigger than KERNEL_IMAGE_SIZE")
222
208#ifdef CONFIG_KEXEC 223#ifdef CONFIG_KEXEC
209/* Link time checks */ 224/* Link time checks */
210#include <asm/kexec.h> 225#include <asm/kexec.h>
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fbfced6f6800..c8742507b030 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -29,8 +29,8 @@ SECTIONS
29{ 29{
30 . = __START_KERNEL; 30 . = __START_KERNEL;
31 phys_startup_64 = startup_64 - LOAD_OFFSET; 31 phys_startup_64 = startup_64 - LOAD_OFFSET;
32 _text = .; /* Text and read-only data */
33 .text : AT(ADDR(.text) - LOAD_OFFSET) { 32 .text : AT(ADDR(.text) - LOAD_OFFSET) {
33 _text = .; /* Text and read-only data */
34 /* First the code that has to be first for bootstrapping */ 34 /* First the code that has to be first for bootstrapping */
35 *(.text.head) 35 *(.text.head)
36 _stext = .; 36 _stext = .;
@@ -61,13 +61,13 @@ SECTIONS
61 .data : AT(ADDR(.data) - LOAD_OFFSET) { 61 .data : AT(ADDR(.data) - LOAD_OFFSET) {
62 DATA_DATA 62 DATA_DATA
63 CONSTRUCTORS 63 CONSTRUCTORS
64 _edata = .; /* End of data section */
64 } :data 65 } :data
65 66
66 _edata = .; /* End of data section */
67 67
68 . = ALIGN(PAGE_SIZE);
69 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
70 .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { 68 .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
69 . = ALIGN(PAGE_SIZE);
70 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
71 *(.data.cacheline_aligned) 71 *(.data.cacheline_aligned)
72 } 72 }
73 . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); 73 . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
@@ -125,29 +125,29 @@ SECTIONS
125#undef VVIRT_OFFSET 125#undef VVIRT_OFFSET
126#undef VVIRT 126#undef VVIRT
127 127
128 . = ALIGN(THREAD_SIZE); /* init_task */
129 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { 128 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
129 . = ALIGN(THREAD_SIZE); /* init_task */
130 *(.data.init_task) 130 *(.data.init_task)
131 }:data.init 131 }:data.init
132 132
133 . = ALIGN(PAGE_SIZE);
134 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { 133 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
134 . = ALIGN(PAGE_SIZE);
135 *(.data.page_aligned) 135 *(.data.page_aligned)
136 } 136 }
137 137
138 /* might get freed after init */
139 . = ALIGN(PAGE_SIZE);
140 __smp_alt_begin = .;
141 __smp_locks = .;
142 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { 138 .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
139 /* might get freed after init */
140 . = ALIGN(PAGE_SIZE);
141 __smp_alt_begin = .;
142 __smp_locks = .;
143 *(.smp_locks) 143 *(.smp_locks)
144 __smp_locks_end = .;
145 . = ALIGN(PAGE_SIZE);
146 __smp_alt_end = .;
144 } 147 }
145 __smp_locks_end = .;
146 . = ALIGN(PAGE_SIZE);
147 __smp_alt_end = .;
148 148
149 . = ALIGN(PAGE_SIZE); /* Init code and data */ 149 . = ALIGN(PAGE_SIZE); /* Init code and data */
150 __init_begin = .; 150 __init_begin = .; /* paired with __init_end */
151 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { 151 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
152 _sinittext = .; 152 _sinittext = .;
153 INIT_TEXT 153 INIT_TEXT
@@ -159,40 +159,42 @@ SECTIONS
159 __initdata_end = .; 159 __initdata_end = .;
160 } 160 }
161 161
162 . = ALIGN(16); 162 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
163 __setup_start = .; 163 . = ALIGN(16);
164 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) } 164 __setup_start = .;
165 __setup_end = .; 165 *(.init.setup)
166 __initcall_start = .; 166 __setup_end = .;
167 }
167 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { 168 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
169 __initcall_start = .;
168 INITCALLS 170 INITCALLS
171 __initcall_end = .;
169 } 172 }
170 __initcall_end = .;
171 __con_initcall_start = .;
172 .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { 173 .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
174 __con_initcall_start = .;
173 *(.con_initcall.init) 175 *(.con_initcall.init)
176 __con_initcall_end = .;
174 } 177 }
175 __con_initcall_end = .;
176 __x86_cpu_dev_start = .;
177 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { 178 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
179 __x86_cpu_dev_start = .;
178 *(.x86_cpu_dev.init) 180 *(.x86_cpu_dev.init)
181 __x86_cpu_dev_end = .;
179 } 182 }
180 __x86_cpu_dev_end = .;
181 SECURITY_INIT 183 SECURITY_INIT
182 184
183 . = ALIGN(8); 185 . = ALIGN(8);
184 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { 186 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
185 __parainstructions = .; 187 __parainstructions = .;
186 *(.parainstructions) 188 *(.parainstructions)
187 __parainstructions_end = .; 189 __parainstructions_end = .;
188 } 190 }
189 191
190 . = ALIGN(8);
191 __alt_instructions = .;
192 .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { 192 .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
193 . = ALIGN(8);
194 __alt_instructions = .;
193 *(.altinstructions) 195 *(.altinstructions)
196 __alt_instructions_end = .;
194 } 197 }
195 __alt_instructions_end = .;
196 .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { 198 .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
197 *(.altinstr_replacement) 199 *(.altinstr_replacement)
198 } 200 }
@@ -207,9 +209,11 @@ SECTIONS
207 209
208#ifdef CONFIG_BLK_DEV_INITRD 210#ifdef CONFIG_BLK_DEV_INITRD
209 . = ALIGN(PAGE_SIZE); 211 . = ALIGN(PAGE_SIZE);
210 __initramfs_start = .; 212 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
211 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } 213 __initramfs_start = .;
212 __initramfs_end = .; 214 *(.init.ramfs)
215 __initramfs_end = .;
216 }
213#endif 217#endif
214 218
215#ifdef CONFIG_SMP 219#ifdef CONFIG_SMP
@@ -229,20 +233,29 @@ SECTIONS
229 . = ALIGN(PAGE_SIZE); 233 . = ALIGN(PAGE_SIZE);
230 __init_end = .; 234 __init_end = .;
231 235
232 . = ALIGN(PAGE_SIZE);
233 __nosave_begin = .;
234 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { 236 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
235 *(.data.nosave) 237 . = ALIGN(PAGE_SIZE);
238 __nosave_begin = .;
239 *(.data.nosave)
240 . = ALIGN(PAGE_SIZE);
241 __nosave_end = .;
236 } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */ 242 } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */
237 . = ALIGN(PAGE_SIZE);
238 __nosave_end = .;
239 243
240 __bss_start = .; /* BSS */
241 .bss : AT(ADDR(.bss) - LOAD_OFFSET) { 244 .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
245 . = ALIGN(PAGE_SIZE);
246 __bss_start = .; /* BSS */
242 *(.bss.page_aligned) 247 *(.bss.page_aligned)
243 *(.bss) 248 *(.bss)
244 } 249 __bss_stop = .;
245 __bss_stop = .; 250 }
251
252 .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
253 . = ALIGN(PAGE_SIZE);
254 __brk_base = . ;
255 . += 64 * 1024 ; /* 64k alignment slop space */
256 *(.brk_reservation) /* areas brk users have reserved */
257 __brk_limit = . ;
258 }
246 259
247 _end = . ; 260 _end = . ;
248 261
@@ -250,6 +263,7 @@ SECTIONS
250 /DISCARD/ : { 263 /DISCARD/ : {
251 *(.exitcall.exit) 264 *(.exitcall.exit)
252 *(.eh_frame) 265 *(.eh_frame)
266 *(.discard)
253 } 267 }
254 268
255 STABS_DEBUG 269 STABS_DEBUG
@@ -275,3 +289,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
275ASSERT((per_cpu__irq_stack_union == 0), 289ASSERT((per_cpu__irq_stack_union == 0),
276 "irq_stack_union is not at start of per-cpu area"); 290 "irq_stack_union is not at start of per-cpu area");
277#endif 291#endif
292
293#ifdef CONFIG_KEXEC
294#include <asm/kexec.h>
295
296ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
297 "kexec control code size is too big")
298#endif
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 74de562812cc..a1d804bcd483 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -22,7 +22,7 @@
22#include <asm/paravirt.h> 22#include <asm/paravirt.h>
23#include <asm/setup.h> 23#include <asm/setup.h>
24 24
25#ifdef CONFIG_PARAVIRT 25#if defined CONFIG_PCI && defined CONFIG_PARAVIRT
26/* 26/*
27 * Interrupt control on vSMPowered systems: 27 * Interrupt control on vSMPowered systems:
28 * ~AC is a shadow of IF. If IF is 'on' AC should be 'off' 28 * ~AC is a shadow of IF. If IF is 'on' AC should be 'off'
@@ -114,6 +114,7 @@ static void __init set_vsmp_pv_ops(void)
114} 114}
115#endif 115#endif
116 116
117#ifdef CONFIG_PCI
117static int is_vsmp = -1; 118static int is_vsmp = -1;
118 119
119static void __init detect_vsmp_box(void) 120static void __init detect_vsmp_box(void)
@@ -139,6 +140,15 @@ int is_vsmp_box(void)
139 } 140 }
140} 141}
141 142
143#else
144static void __init detect_vsmp_box(void)
145{
146}
147int is_vsmp_box(void)
148{
149 return 0;
150}
151#endif
142void __init vsmp_init(void) 152void __init vsmp_init(void)
143{ 153{
144 detect_vsmp_box(); 154 detect_vsmp_box();
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index c7da3683f4c5..a58504ea78cc 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -4,6 +4,10 @@
4config HAVE_KVM 4config HAVE_KVM
5 bool 5 bool
6 6
7config HAVE_KVM_IRQCHIP
8 bool
9 default y
10
7menuconfig VIRTUALIZATION 11menuconfig VIRTUALIZATION
8 bool "Virtualization" 12 bool "Virtualization"
9 depends on HAVE_KVM || X86 13 depends on HAVE_KVM || X86
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 72bd275a9b5c..c13bb92d3157 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -201,6 +201,9 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
201 if (!atomic_inc_and_test(&pt->pending)) 201 if (!atomic_inc_and_test(&pt->pending))
202 set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); 202 set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
203 203
204 if (!pt->reinject)
205 atomic_set(&pt->pending, 1);
206
204 if (vcpu0 && waitqueue_active(&vcpu0->wq)) 207 if (vcpu0 && waitqueue_active(&vcpu0->wq))
205 wake_up_interruptible(&vcpu0->wq); 208 wake_up_interruptible(&vcpu0->wq);
206 209
@@ -536,6 +539,16 @@ void kvm_pit_reset(struct kvm_pit *pit)
536 pit->pit_state.irq_ack = 1; 539 pit->pit_state.irq_ack = 1;
537} 540}
538 541
542static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
543{
544 struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
545
546 if (!mask) {
547 atomic_set(&pit->pit_state.pit_timer.pending, 0);
548 pit->pit_state.irq_ack = 1;
549 }
550}
551
539struct kvm_pit *kvm_create_pit(struct kvm *kvm) 552struct kvm_pit *kvm_create_pit(struct kvm *kvm)
540{ 553{
541 struct kvm_pit *pit; 554 struct kvm_pit *pit;
@@ -545,9 +558,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
545 if (!pit) 558 if (!pit)
546 return NULL; 559 return NULL;
547 560
548 mutex_lock(&kvm->lock);
549 pit->irq_source_id = kvm_request_irq_source_id(kvm); 561 pit->irq_source_id = kvm_request_irq_source_id(kvm);
550 mutex_unlock(&kvm->lock);
551 if (pit->irq_source_id < 0) { 562 if (pit->irq_source_id < 0) {
552 kfree(pit); 563 kfree(pit);
553 return NULL; 564 return NULL;
@@ -580,10 +591,14 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
580 pit_state->irq_ack_notifier.gsi = 0; 591 pit_state->irq_ack_notifier.gsi = 0;
581 pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq; 592 pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
582 kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); 593 kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
594 pit_state->pit_timer.reinject = true;
583 mutex_unlock(&pit->pit_state.lock); 595 mutex_unlock(&pit->pit_state.lock);
584 596
585 kvm_pit_reset(pit); 597 kvm_pit_reset(pit);
586 598
599 pit->mask_notifier.func = pit_mask_notifer;
600 kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
601
587 return pit; 602 return pit;
588} 603}
589 604
@@ -592,6 +607,8 @@ void kvm_free_pit(struct kvm *kvm)
592 struct hrtimer *timer; 607 struct hrtimer *timer;
593 608
594 if (kvm->arch.vpit) { 609 if (kvm->arch.vpit) {
610 kvm_unregister_irq_mask_notifier(kvm, 0,
611 &kvm->arch.vpit->mask_notifier);
595 mutex_lock(&kvm->arch.vpit->pit_state.lock); 612 mutex_lock(&kvm->arch.vpit->pit_state.lock);
596 timer = &kvm->arch.vpit->pit_state.pit_timer.timer; 613 timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
597 hrtimer_cancel(timer); 614 hrtimer_cancel(timer);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 4178022b97aa..6acbe4b505d5 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -9,6 +9,7 @@ struct kvm_kpit_timer {
9 s64 period; /* unit: ns */ 9 s64 period; /* unit: ns */
10 s64 scheduled; 10 s64 scheduled;
11 atomic_t pending; 11 atomic_t pending;
12 bool reinject;
12}; 13};
13 14
14struct kvm_kpit_channel_state { 15struct kvm_kpit_channel_state {
@@ -45,6 +46,7 @@ struct kvm_pit {
45 struct kvm *kvm; 46 struct kvm *kvm;
46 struct kvm_kpit_state pit_state; 47 struct kvm_kpit_state pit_state;
47 int irq_source_id; 48 int irq_source_id;
49 struct kvm_irq_mask_notifier mask_notifier;
48}; 50};
49 51
50#define KVM_PIT_BASE_ADDRESS 0x40 52#define KVM_PIT_BASE_ADDRESS 0x40
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 179dcb0103fd..1ccb50c74f18 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -32,11 +32,13 @@
32#include <linux/kvm_host.h> 32#include <linux/kvm_host.h>
33 33
34static void pic_lock(struct kvm_pic *s) 34static void pic_lock(struct kvm_pic *s)
35 __acquires(&s->lock)
35{ 36{
36 spin_lock(&s->lock); 37 spin_lock(&s->lock);
37} 38}
38 39
39static void pic_unlock(struct kvm_pic *s) 40static void pic_unlock(struct kvm_pic *s)
41 __releases(&s->lock)
40{ 42{
41 struct kvm *kvm = s->kvm; 43 struct kvm *kvm = s->kvm;
42 unsigned acks = s->pending_acks; 44 unsigned acks = s->pending_acks;
@@ -49,7 +51,8 @@ static void pic_unlock(struct kvm_pic *s)
49 spin_unlock(&s->lock); 51 spin_unlock(&s->lock);
50 52
51 while (acks) { 53 while (acks) {
52 kvm_notify_acked_irq(kvm, __ffs(acks)); 54 kvm_notify_acked_irq(kvm, SELECT_PIC(__ffs(acks)),
55 __ffs(acks));
53 acks &= acks - 1; 56 acks &= acks - 1;
54 } 57 }
55 58
@@ -76,12 +79,13 @@ void kvm_pic_clear_isr_ack(struct kvm *kvm)
76/* 79/*
77 * set irq level. If an edge is detected, then the IRR is set to 1 80 * set irq level. If an edge is detected, then the IRR is set to 1
78 */ 81 */
79static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level) 82static inline int pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
80{ 83{
81 int mask; 84 int mask, ret = 1;
82 mask = 1 << irq; 85 mask = 1 << irq;
83 if (s->elcr & mask) /* level triggered */ 86 if (s->elcr & mask) /* level triggered */
84 if (level) { 87 if (level) {
88 ret = !(s->irr & mask);
85 s->irr |= mask; 89 s->irr |= mask;
86 s->last_irr |= mask; 90 s->last_irr |= mask;
87 } else { 91 } else {
@@ -90,11 +94,15 @@ static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
90 } 94 }
91 else /* edge triggered */ 95 else /* edge triggered */
92 if (level) { 96 if (level) {
93 if ((s->last_irr & mask) == 0) 97 if ((s->last_irr & mask) == 0) {
98 ret = !(s->irr & mask);
94 s->irr |= mask; 99 s->irr |= mask;
100 }
95 s->last_irr |= mask; 101 s->last_irr |= mask;
96 } else 102 } else
97 s->last_irr &= ~mask; 103 s->last_irr &= ~mask;
104
105 return (s->imr & mask) ? -1 : ret;
98} 106}
99 107
100/* 108/*
@@ -171,16 +179,19 @@ void kvm_pic_update_irq(struct kvm_pic *s)
171 pic_unlock(s); 179 pic_unlock(s);
172} 180}
173 181
174void kvm_pic_set_irq(void *opaque, int irq, int level) 182int kvm_pic_set_irq(void *opaque, int irq, int level)
175{ 183{
176 struct kvm_pic *s = opaque; 184 struct kvm_pic *s = opaque;
185 int ret = -1;
177 186
178 pic_lock(s); 187 pic_lock(s);
179 if (irq >= 0 && irq < PIC_NUM_PINS) { 188 if (irq >= 0 && irq < PIC_NUM_PINS) {
180 pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); 189 ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
181 pic_update_irq(s); 190 pic_update_irq(s);
182 } 191 }
183 pic_unlock(s); 192 pic_unlock(s);
193
194 return ret;
184} 195}
185 196
186/* 197/*
@@ -232,7 +243,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
232 } 243 }
233 pic_update_irq(s); 244 pic_update_irq(s);
234 pic_unlock(s); 245 pic_unlock(s);
235 kvm_notify_acked_irq(kvm, irq); 246 kvm_notify_acked_irq(kvm, SELECT_PIC(irq), irq);
236 247
237 return intno; 248 return intno;
238} 249}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 82579ee538d0..9f593188129e 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -32,6 +32,8 @@
32#include "lapic.h" 32#include "lapic.h"
33 33
34#define PIC_NUM_PINS 16 34#define PIC_NUM_PINS 16
35#define SELECT_PIC(irq) \
36 ((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE)
35 37
36struct kvm; 38struct kvm;
37struct kvm_vcpu; 39struct kvm_vcpu;
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 8e5ee99551f6..ed66e4c078dc 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -18,7 +18,6 @@ static const u32 host_save_user_msrs[] = {
18}; 18};
19 19
20#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) 20#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
21#define NUM_DB_REGS 4
22 21
23struct kvm_vcpu; 22struct kvm_vcpu;
24 23
@@ -29,18 +28,23 @@ struct vcpu_svm {
29 struct svm_cpu_data *svm_data; 28 struct svm_cpu_data *svm_data;
30 uint64_t asid_generation; 29 uint64_t asid_generation;
31 30
32 unsigned long db_regs[NUM_DB_REGS];
33
34 u64 next_rip; 31 u64 next_rip;
35 32
36 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; 33 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
37 u64 host_gs_base; 34 u64 host_gs_base;
38 unsigned long host_cr2; 35 unsigned long host_cr2;
39 unsigned long host_db_regs[NUM_DB_REGS];
40 unsigned long host_dr6;
41 unsigned long host_dr7;
42 36
43 u32 *msrpm; 37 u32 *msrpm;
38 struct vmcb *hsave;
39 u64 hsave_msr;
40
41 u64 nested_vmcb;
42
43 /* These are the merged vectors */
44 u32 *nested_msrpm;
45
46 /* gpa pointers to the real vectors */
47 u64 nested_vmcb_msrpm;
44}; 48};
45 49
46#endif 50#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2d4477c71473..2a36f7f7c4c7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -145,11 +145,20 @@ struct kvm_rmap_desc {
145 struct kvm_rmap_desc *more; 145 struct kvm_rmap_desc *more;
146}; 146};
147 147
148struct kvm_shadow_walk { 148struct kvm_shadow_walk_iterator {
149 int (*entry)(struct kvm_shadow_walk *walk, struct kvm_vcpu *vcpu, 149 u64 addr;
150 u64 addr, u64 *spte, int level); 150 hpa_t shadow_addr;
151 int level;
152 u64 *sptep;
153 unsigned index;
151}; 154};
152 155
156#define for_each_shadow_entry(_vcpu, _addr, _walker) \
157 for (shadow_walk_init(&(_walker), _vcpu, _addr); \
158 shadow_walk_okay(&(_walker)); \
159 shadow_walk_next(&(_walker)))
160
161
153struct kvm_unsync_walk { 162struct kvm_unsync_walk {
154 int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); 163 int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk);
155}; 164};
@@ -343,7 +352,6 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
343 352
344 BUG_ON(!mc->nobjs); 353 BUG_ON(!mc->nobjs);
345 p = mc->objects[--mc->nobjs]; 354 p = mc->objects[--mc->nobjs];
346 memset(p, 0, size);
347 return p; 355 return p;
348} 356}
349 357
@@ -794,10 +802,8 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
794 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); 802 set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
795 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); 803 list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
796 INIT_LIST_HEAD(&sp->oos_link); 804 INIT_LIST_HEAD(&sp->oos_link);
797 ASSERT(is_empty_shadow_page(sp->spt));
798 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); 805 bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
799 sp->multimapped = 0; 806 sp->multimapped = 0;
800 sp->global = 1;
801 sp->parent_pte = parent_pte; 807 sp->parent_pte = parent_pte;
802 --vcpu->kvm->arch.n_free_mmu_pages; 808 --vcpu->kvm->arch.n_free_mmu_pages;
803 return sp; 809 return sp;
@@ -983,8 +989,8 @@ struct kvm_mmu_pages {
983 idx < 512; \ 989 idx < 512; \
984 idx = find_next_bit(bitmap, 512, idx+1)) 990 idx = find_next_bit(bitmap, 512, idx+1))
985 991
986int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, 992static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
987 int idx) 993 int idx)
988{ 994{
989 int i; 995 int i;
990 996
@@ -1059,7 +1065,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
1059 index = kvm_page_table_hashfn(gfn); 1065 index = kvm_page_table_hashfn(gfn);
1060 bucket = &kvm->arch.mmu_page_hash[index]; 1066 bucket = &kvm->arch.mmu_page_hash[index];
1061 hlist_for_each_entry(sp, node, bucket, hash_link) 1067 hlist_for_each_entry(sp, node, bucket, hash_link)
1062 if (sp->gfn == gfn && !sp->role.metaphysical 1068 if (sp->gfn == gfn && !sp->role.direct
1063 && !sp->role.invalid) { 1069 && !sp->role.invalid) {
1064 pgprintk("%s: found role %x\n", 1070 pgprintk("%s: found role %x\n",
1065 __func__, sp->role.word); 1071 __func__, sp->role.word);
@@ -1115,8 +1121,9 @@ struct mmu_page_path {
1115 i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ 1121 i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
1116 i = mmu_pages_next(&pvec, &parents, i)) 1122 i = mmu_pages_next(&pvec, &parents, i))
1117 1123
1118int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, 1124static int mmu_pages_next(struct kvm_mmu_pages *pvec,
1119 int i) 1125 struct mmu_page_path *parents,
1126 int i)
1120{ 1127{
1121 int n; 1128 int n;
1122 1129
@@ -1135,7 +1142,7 @@ int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
1135 return n; 1142 return n;
1136} 1143}
1137 1144
1138void mmu_pages_clear_parents(struct mmu_page_path *parents) 1145static void mmu_pages_clear_parents(struct mmu_page_path *parents)
1139{ 1146{
1140 struct kvm_mmu_page *sp; 1147 struct kvm_mmu_page *sp;
1141 unsigned int level = 0; 1148 unsigned int level = 0;
@@ -1193,7 +1200,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1193 gfn_t gfn, 1200 gfn_t gfn,
1194 gva_t gaddr, 1201 gva_t gaddr,
1195 unsigned level, 1202 unsigned level,
1196 int metaphysical, 1203 int direct,
1197 unsigned access, 1204 unsigned access,
1198 u64 *parent_pte) 1205 u64 *parent_pte)
1199{ 1206{
@@ -1204,10 +1211,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1204 struct kvm_mmu_page *sp; 1211 struct kvm_mmu_page *sp;
1205 struct hlist_node *node, *tmp; 1212 struct hlist_node *node, *tmp;
1206 1213
1207 role.word = 0; 1214 role = vcpu->arch.mmu.base_role;
1208 role.glevels = vcpu->arch.mmu.root_level;
1209 role.level = level; 1215 role.level = level;
1210 role.metaphysical = metaphysical; 1216 role.direct = direct;
1211 role.access = access; 1217 role.access = access;
1212 if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { 1218 if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
1213 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); 1219 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
@@ -1242,8 +1248,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1242 pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word); 1248 pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
1243 sp->gfn = gfn; 1249 sp->gfn = gfn;
1244 sp->role = role; 1250 sp->role = role;
1251 sp->global = role.cr4_pge;
1245 hlist_add_head(&sp->hash_link, bucket); 1252 hlist_add_head(&sp->hash_link, bucket);
1246 if (!metaphysical) { 1253 if (!direct) {
1247 if (rmap_write_protect(vcpu->kvm, gfn)) 1254 if (rmap_write_protect(vcpu->kvm, gfn))
1248 kvm_flush_remote_tlbs(vcpu->kvm); 1255 kvm_flush_remote_tlbs(vcpu->kvm);
1249 account_shadowed(vcpu->kvm, gfn); 1256 account_shadowed(vcpu->kvm, gfn);
@@ -1255,35 +1262,35 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1255 return sp; 1262 return sp;
1256} 1263}
1257 1264
1258static int walk_shadow(struct kvm_shadow_walk *walker, 1265static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
1259 struct kvm_vcpu *vcpu, u64 addr) 1266 struct kvm_vcpu *vcpu, u64 addr)
1260{ 1267{
1261 hpa_t shadow_addr; 1268 iterator->addr = addr;
1262 int level; 1269 iterator->shadow_addr = vcpu->arch.mmu.root_hpa;
1263 int r; 1270 iterator->level = vcpu->arch.mmu.shadow_root_level;
1264 u64 *sptep; 1271 if (iterator->level == PT32E_ROOT_LEVEL) {
1265 unsigned index; 1272 iterator->shadow_addr
1266 1273 = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
1267 shadow_addr = vcpu->arch.mmu.root_hpa; 1274 iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
1268 level = vcpu->arch.mmu.shadow_root_level; 1275 --iterator->level;
1269 if (level == PT32E_ROOT_LEVEL) { 1276 if (!iterator->shadow_addr)
1270 shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; 1277 iterator->level = 0;
1271 shadow_addr &= PT64_BASE_ADDR_MASK;
1272 if (!shadow_addr)
1273 return 1;
1274 --level;
1275 } 1278 }
1279}
1276 1280
1277 while (level >= PT_PAGE_TABLE_LEVEL) { 1281static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
1278 index = SHADOW_PT_INDEX(addr, level); 1282{
1279 sptep = ((u64 *)__va(shadow_addr)) + index; 1283 if (iterator->level < PT_PAGE_TABLE_LEVEL)
1280 r = walker->entry(walker, vcpu, addr, sptep, level); 1284 return false;
1281 if (r) 1285 iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level);
1282 return r; 1286 iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index;
1283 shadow_addr = *sptep & PT64_BASE_ADDR_MASK; 1287 return true;
1284 --level; 1288}
1285 } 1289
1286 return 0; 1290static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
1291{
1292 iterator->shadow_addr = *iterator->sptep & PT64_BASE_ADDR_MASK;
1293 --iterator->level;
1287} 1294}
1288 1295
1289static void kvm_mmu_page_unlink_children(struct kvm *kvm, 1296static void kvm_mmu_page_unlink_children(struct kvm *kvm,
@@ -1388,7 +1395,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1388 kvm_mmu_page_unlink_children(kvm, sp); 1395 kvm_mmu_page_unlink_children(kvm, sp);
1389 kvm_mmu_unlink_parents(kvm, sp); 1396 kvm_mmu_unlink_parents(kvm, sp);
1390 kvm_flush_remote_tlbs(kvm); 1397 kvm_flush_remote_tlbs(kvm);
1391 if (!sp->role.invalid && !sp->role.metaphysical) 1398 if (!sp->role.invalid && !sp->role.direct)
1392 unaccount_shadowed(kvm, sp->gfn); 1399 unaccount_shadowed(kvm, sp->gfn);
1393 if (sp->unsync) 1400 if (sp->unsync)
1394 kvm_unlink_unsync_page(kvm, sp); 1401 kvm_unlink_unsync_page(kvm, sp);
@@ -1451,7 +1458,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
1451 index = kvm_page_table_hashfn(gfn); 1458 index = kvm_page_table_hashfn(gfn);
1452 bucket = &kvm->arch.mmu_page_hash[index]; 1459 bucket = &kvm->arch.mmu_page_hash[index];
1453 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) 1460 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
1454 if (sp->gfn == gfn && !sp->role.metaphysical) { 1461 if (sp->gfn == gfn && !sp->role.direct) {
1455 pgprintk("%s: gfn %lx role %x\n", __func__, gfn, 1462 pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
1456 sp->role.word); 1463 sp->role.word);
1457 r = 1; 1464 r = 1;
@@ -1463,11 +1470,20 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
1463 1470
1464static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) 1471static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
1465{ 1472{
1473 unsigned index;
1474 struct hlist_head *bucket;
1466 struct kvm_mmu_page *sp; 1475 struct kvm_mmu_page *sp;
1476 struct hlist_node *node, *nn;
1467 1477
1468 while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) { 1478 index = kvm_page_table_hashfn(gfn);
1469 pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word); 1479 bucket = &kvm->arch.mmu_page_hash[index];
1470 kvm_mmu_zap_page(kvm, sp); 1480 hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) {
1481 if (sp->gfn == gfn && !sp->role.direct
1482 && !sp->role.invalid) {
1483 pgprintk("%s: zap %lx %x\n",
1484 __func__, gfn, sp->role.word);
1485 kvm_mmu_zap_page(kvm, sp);
1486 }
1471 } 1487 }
1472} 1488}
1473 1489
@@ -1622,7 +1638,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1622 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 1638 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
1623 /* don't unsync if pagetable is shadowed with multiple roles */ 1639 /* don't unsync if pagetable is shadowed with multiple roles */
1624 hlist_for_each_entry_safe(s, node, n, bucket, hash_link) { 1640 hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
1625 if (s->gfn != sp->gfn || s->role.metaphysical) 1641 if (s->gfn != sp->gfn || s->role.direct)
1626 continue; 1642 continue;
1627 if (s->role.word != sp->role.word) 1643 if (s->role.word != sp->role.word)
1628 return 1; 1644 return 1;
@@ -1669,8 +1685,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1669 u64 mt_mask = shadow_mt_mask; 1685 u64 mt_mask = shadow_mt_mask;
1670 struct kvm_mmu_page *sp = page_header(__pa(shadow_pte)); 1686 struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
1671 1687
1672 if (!(vcpu->arch.cr4 & X86_CR4_PGE))
1673 global = 0;
1674 if (!global && sp->global) { 1688 if (!global && sp->global) {
1675 sp->global = 0; 1689 sp->global = 0;
1676 if (sp->unsync) { 1690 if (sp->unsync) {
@@ -1777,12 +1791,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1777 pgprintk("hfn old %lx new %lx\n", 1791 pgprintk("hfn old %lx new %lx\n",
1778 spte_to_pfn(*shadow_pte), pfn); 1792 spte_to_pfn(*shadow_pte), pfn);
1779 rmap_remove(vcpu->kvm, shadow_pte); 1793 rmap_remove(vcpu->kvm, shadow_pte);
1780 } else { 1794 } else
1781 if (largepage) 1795 was_rmapped = 1;
1782 was_rmapped = is_large_pte(*shadow_pte);
1783 else
1784 was_rmapped = 1;
1785 }
1786 } 1796 }
1787 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, 1797 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
1788 dirty, largepage, global, gfn, pfn, speculative, true)) { 1798 dirty, largepage, global, gfn, pfn, speculative, true)) {
@@ -1820,67 +1830,42 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
1820{ 1830{
1821} 1831}
1822 1832
1823struct direct_shadow_walk { 1833static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
1824 struct kvm_shadow_walk walker; 1834 int largepage, gfn_t gfn, pfn_t pfn)
1825 pfn_t pfn;
1826 int write;
1827 int largepage;
1828 int pt_write;
1829};
1830
1831static int direct_map_entry(struct kvm_shadow_walk *_walk,
1832 struct kvm_vcpu *vcpu,
1833 u64 addr, u64 *sptep, int level)
1834{ 1835{
1835 struct direct_shadow_walk *walk = 1836 struct kvm_shadow_walk_iterator iterator;
1836 container_of(_walk, struct direct_shadow_walk, walker);
1837 struct kvm_mmu_page *sp; 1837 struct kvm_mmu_page *sp;
1838 int pt_write = 0;
1838 gfn_t pseudo_gfn; 1839 gfn_t pseudo_gfn;
1839 gfn_t gfn = addr >> PAGE_SHIFT;
1840
1841 if (level == PT_PAGE_TABLE_LEVEL
1842 || (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
1843 mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
1844 0, walk->write, 1, &walk->pt_write,
1845 walk->largepage, 0, gfn, walk->pfn, false);
1846 ++vcpu->stat.pf_fixed;
1847 return 1;
1848 }
1849 1840
1850 if (*sptep == shadow_trap_nonpresent_pte) { 1841 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
1851 pseudo_gfn = (addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT; 1842 if (iterator.level == PT_PAGE_TABLE_LEVEL
1852 sp = kvm_mmu_get_page(vcpu, pseudo_gfn, (gva_t)addr, level - 1, 1843 || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
1853 1, ACC_ALL, sptep); 1844 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
1854 if (!sp) { 1845 0, write, 1, &pt_write,
1855 pgprintk("nonpaging_map: ENOMEM\n"); 1846 largepage, 0, gfn, pfn, false);
1856 kvm_release_pfn_clean(walk->pfn); 1847 ++vcpu->stat.pf_fixed;
1857 return -ENOMEM; 1848 break;
1858 } 1849 }
1859 1850
1860 set_shadow_pte(sptep, 1851 if (*iterator.sptep == shadow_trap_nonpresent_pte) {
1861 __pa(sp->spt) 1852 pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
1862 | PT_PRESENT_MASK | PT_WRITABLE_MASK 1853 sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
1863 | shadow_user_mask | shadow_x_mask); 1854 iterator.level - 1,
1864 } 1855 1, ACC_ALL, iterator.sptep);
1865 return 0; 1856 if (!sp) {
1866} 1857 pgprintk("nonpaging_map: ENOMEM\n");
1858 kvm_release_pfn_clean(pfn);
1859 return -ENOMEM;
1860 }
1867 1861
1868static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, 1862 set_shadow_pte(iterator.sptep,
1869 int largepage, gfn_t gfn, pfn_t pfn) 1863 __pa(sp->spt)
1870{ 1864 | PT_PRESENT_MASK | PT_WRITABLE_MASK
1871 int r; 1865 | shadow_user_mask | shadow_x_mask);
1872 struct direct_shadow_walk walker = { 1866 }
1873 .walker = { .entry = direct_map_entry, }, 1867 }
1874 .pfn = pfn, 1868 return pt_write;
1875 .largepage = largepage,
1876 .write = write,
1877 .pt_write = 0,
1878 };
1879
1880 r = walk_shadow(&walker.walker, vcpu, gfn << PAGE_SHIFT);
1881 if (r < 0)
1882 return r;
1883 return walker.pt_write;
1884} 1869}
1885 1870
1886static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) 1871static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
@@ -1962,7 +1947,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1962 int i; 1947 int i;
1963 gfn_t root_gfn; 1948 gfn_t root_gfn;
1964 struct kvm_mmu_page *sp; 1949 struct kvm_mmu_page *sp;
1965 int metaphysical = 0; 1950 int direct = 0;
1966 1951
1967 root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; 1952 root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
1968 1953
@@ -1971,18 +1956,18 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1971 1956
1972 ASSERT(!VALID_PAGE(root)); 1957 ASSERT(!VALID_PAGE(root));
1973 if (tdp_enabled) 1958 if (tdp_enabled)
1974 metaphysical = 1; 1959 direct = 1;
1975 sp = kvm_mmu_get_page(vcpu, root_gfn, 0, 1960 sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
1976 PT64_ROOT_LEVEL, metaphysical, 1961 PT64_ROOT_LEVEL, direct,
1977 ACC_ALL, NULL); 1962 ACC_ALL, NULL);
1978 root = __pa(sp->spt); 1963 root = __pa(sp->spt);
1979 ++sp->root_count; 1964 ++sp->root_count;
1980 vcpu->arch.mmu.root_hpa = root; 1965 vcpu->arch.mmu.root_hpa = root;
1981 return; 1966 return;
1982 } 1967 }
1983 metaphysical = !is_paging(vcpu); 1968 direct = !is_paging(vcpu);
1984 if (tdp_enabled) 1969 if (tdp_enabled)
1985 metaphysical = 1; 1970 direct = 1;
1986 for (i = 0; i < 4; ++i) { 1971 for (i = 0; i < 4; ++i) {
1987 hpa_t root = vcpu->arch.mmu.pae_root[i]; 1972 hpa_t root = vcpu->arch.mmu.pae_root[i];
1988 1973
@@ -1996,7 +1981,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1996 } else if (vcpu->arch.mmu.root_level == 0) 1981 } else if (vcpu->arch.mmu.root_level == 0)
1997 root_gfn = 0; 1982 root_gfn = 0;
1998 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, 1983 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
1999 PT32_ROOT_LEVEL, metaphysical, 1984 PT32_ROOT_LEVEL, direct,
2000 ACC_ALL, NULL); 1985 ACC_ALL, NULL);
2001 root = __pa(sp->spt); 1986 root = __pa(sp->spt);
2002 ++sp->root_count; 1987 ++sp->root_count;
@@ -2251,17 +2236,23 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
2251 2236
2252static int init_kvm_softmmu(struct kvm_vcpu *vcpu) 2237static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
2253{ 2238{
2239 int r;
2240
2254 ASSERT(vcpu); 2241 ASSERT(vcpu);
2255 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 2242 ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
2256 2243
2257 if (!is_paging(vcpu)) 2244 if (!is_paging(vcpu))
2258 return nonpaging_init_context(vcpu); 2245 r = nonpaging_init_context(vcpu);
2259 else if (is_long_mode(vcpu)) 2246 else if (is_long_mode(vcpu))
2260 return paging64_init_context(vcpu); 2247 r = paging64_init_context(vcpu);
2261 else if (is_pae(vcpu)) 2248 else if (is_pae(vcpu))
2262 return paging32E_init_context(vcpu); 2249 r = paging32E_init_context(vcpu);
2263 else 2250 else
2264 return paging32_init_context(vcpu); 2251 r = paging32_init_context(vcpu);
2252
2253 vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level;
2254
2255 return r;
2265} 2256}
2266 2257
2267static int init_kvm_mmu(struct kvm_vcpu *vcpu) 2258static int init_kvm_mmu(struct kvm_vcpu *vcpu)
@@ -2492,7 +2483,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2492 index = kvm_page_table_hashfn(gfn); 2483 index = kvm_page_table_hashfn(gfn);
2493 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 2484 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
2494 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { 2485 hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
2495 if (sp->gfn != gfn || sp->role.metaphysical || sp->role.invalid) 2486 if (sp->gfn != gfn || sp->role.direct || sp->role.invalid)
2496 continue; 2487 continue;
2497 pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; 2488 pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
2498 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); 2489 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
@@ -3130,7 +3121,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
3130 gfn_t gfn; 3121 gfn_t gfn;
3131 3122
3132 list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) { 3123 list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
3133 if (sp->role.metaphysical) 3124 if (sp->role.direct)
3134 continue; 3125 continue;
3135 3126
3136 gfn = unalias_gfn(vcpu->kvm, sp->gfn); 3127 gfn = unalias_gfn(vcpu->kvm, sp->gfn);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 258e5d56298e..eaab2145f62b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -54,7 +54,7 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
54static inline int is_long_mode(struct kvm_vcpu *vcpu) 54static inline int is_long_mode(struct kvm_vcpu *vcpu)
55{ 55{
56#ifdef CONFIG_X86_64 56#ifdef CONFIG_X86_64
57 return vcpu->arch.shadow_efer & EFER_LME; 57 return vcpu->arch.shadow_efer & EFER_LMA;
58#else 58#else
59 return 0; 59 return 0;
60#endif 60#endif
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 9fd78b6e17ad..6bd70206c561 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -25,7 +25,6 @@
25#if PTTYPE == 64 25#if PTTYPE == 64
26 #define pt_element_t u64 26 #define pt_element_t u64
27 #define guest_walker guest_walker64 27 #define guest_walker guest_walker64
28 #define shadow_walker shadow_walker64
29 #define FNAME(name) paging##64_##name 28 #define FNAME(name) paging##64_##name
30 #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK 29 #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
31 #define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK 30 #define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
@@ -42,7 +41,6 @@
42#elif PTTYPE == 32 41#elif PTTYPE == 32
43 #define pt_element_t u32 42 #define pt_element_t u32
44 #define guest_walker guest_walker32 43 #define guest_walker guest_walker32
45 #define shadow_walker shadow_walker32
46 #define FNAME(name) paging##32_##name 44 #define FNAME(name) paging##32_##name
47 #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK 45 #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
48 #define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK 46 #define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
@@ -73,18 +71,6 @@ struct guest_walker {
73 u32 error_code; 71 u32 error_code;
74}; 72};
75 73
76struct shadow_walker {
77 struct kvm_shadow_walk walker;
78 struct guest_walker *guest_walker;
79 int user_fault;
80 int write_fault;
81 int largepage;
82 int *ptwrite;
83 pfn_t pfn;
84 u64 *sptep;
85 gpa_t pte_gpa;
86};
87
88static gfn_t gpte_to_gfn(pt_element_t gpte) 74static gfn_t gpte_to_gfn(pt_element_t gpte)
89{ 75{
90 return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; 76 return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
@@ -283,91 +269,79 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
283/* 269/*
284 * Fetch a shadow pte for a specific level in the paging hierarchy. 270 * Fetch a shadow pte for a specific level in the paging hierarchy.
285 */ 271 */
286static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, 272static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
287 struct kvm_vcpu *vcpu, u64 addr, 273 struct guest_walker *gw,
288 u64 *sptep, int level) 274 int user_fault, int write_fault, int largepage,
275 int *ptwrite, pfn_t pfn)
289{ 276{
290 struct shadow_walker *sw =
291 container_of(_sw, struct shadow_walker, walker);
292 struct guest_walker *gw = sw->guest_walker;
293 unsigned access = gw->pt_access; 277 unsigned access = gw->pt_access;
294 struct kvm_mmu_page *shadow_page; 278 struct kvm_mmu_page *shadow_page;
295 u64 spte; 279 u64 spte, *sptep;
296 int metaphysical; 280 int direct;
297 gfn_t table_gfn; 281 gfn_t table_gfn;
298 int r; 282 int r;
283 int level;
299 pt_element_t curr_pte; 284 pt_element_t curr_pte;
285 struct kvm_shadow_walk_iterator iterator;
300 286
301 if (level == PT_PAGE_TABLE_LEVEL 287 if (!is_present_pte(gw->ptes[gw->level - 1]))
302 || (sw->largepage && level == PT_DIRECTORY_LEVEL)) { 288 return NULL;
303 mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
304 sw->user_fault, sw->write_fault,
305 gw->ptes[gw->level-1] & PT_DIRTY_MASK,
306 sw->ptwrite, sw->largepage,
307 gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
308 gw->gfn, sw->pfn, false);
309 sw->sptep = sptep;
310 return 1;
311 }
312 289
313 if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) 290 for_each_shadow_entry(vcpu, addr, iterator) {
314 return 0; 291 level = iterator.level;
292 sptep = iterator.sptep;
293 if (level == PT_PAGE_TABLE_LEVEL
294 || (largepage && level == PT_DIRECTORY_LEVEL)) {
295 mmu_set_spte(vcpu, sptep, access,
296 gw->pte_access & access,
297 user_fault, write_fault,
298 gw->ptes[gw->level-1] & PT_DIRTY_MASK,
299 ptwrite, largepage,
300 gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
301 gw->gfn, pfn, false);
302 break;
303 }
315 304
316 if (is_large_pte(*sptep)) { 305 if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
317 set_shadow_pte(sptep, shadow_trap_nonpresent_pte); 306 continue;
318 kvm_flush_remote_tlbs(vcpu->kvm);
319 rmap_remove(vcpu->kvm, sptep);
320 }
321 307
322 if (level == PT_DIRECTORY_LEVEL && gw->level == PT_DIRECTORY_LEVEL) { 308 if (is_large_pte(*sptep)) {
323 metaphysical = 1; 309 rmap_remove(vcpu->kvm, sptep);
324 if (!is_dirty_pte(gw->ptes[level - 1])) 310 set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
325 access &= ~ACC_WRITE_MASK; 311 kvm_flush_remote_tlbs(vcpu->kvm);
326 table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
327 } else {
328 metaphysical = 0;
329 table_gfn = gw->table_gfn[level - 2];
330 }
331 shadow_page = kvm_mmu_get_page(vcpu, table_gfn, (gva_t)addr, level-1,
332 metaphysical, access, sptep);
333 if (!metaphysical) {
334 r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2],
335 &curr_pte, sizeof(curr_pte));
336 if (r || curr_pte != gw->ptes[level - 2]) {
337 kvm_mmu_put_page(shadow_page, sptep);
338 kvm_release_pfn_clean(sw->pfn);
339 sw->sptep = NULL;
340 return 1;
341 } 312 }
342 }
343 313
344 spte = __pa(shadow_page->spt) | PT_PRESENT_MASK | PT_ACCESSED_MASK 314 if (level == PT_DIRECTORY_LEVEL
345 | PT_WRITABLE_MASK | PT_USER_MASK; 315 && gw->level == PT_DIRECTORY_LEVEL) {
346 *sptep = spte; 316 direct = 1;
347 return 0; 317 if (!is_dirty_pte(gw->ptes[level - 1]))
348} 318 access &= ~ACC_WRITE_MASK;
349 319 table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
350static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, 320 } else {
351 struct guest_walker *guest_walker, 321 direct = 0;
352 int user_fault, int write_fault, int largepage, 322 table_gfn = gw->table_gfn[level - 2];
353 int *ptwrite, pfn_t pfn) 323 }
354{ 324 shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
355 struct shadow_walker walker = { 325 direct, access, sptep);
356 .walker = { .entry = FNAME(shadow_walk_entry), }, 326 if (!direct) {
357 .guest_walker = guest_walker, 327 r = kvm_read_guest_atomic(vcpu->kvm,
358 .user_fault = user_fault, 328 gw->pte_gpa[level - 2],
359 .write_fault = write_fault, 329 &curr_pte, sizeof(curr_pte));
360 .largepage = largepage, 330 if (r || curr_pte != gw->ptes[level - 2]) {
361 .ptwrite = ptwrite, 331 kvm_mmu_put_page(shadow_page, sptep);
362 .pfn = pfn, 332 kvm_release_pfn_clean(pfn);
363 }; 333 sptep = NULL;
364 334 break;
365 if (!is_present_pte(guest_walker->ptes[guest_walker->level - 1])) 335 }
366 return NULL; 336 }
367 337
368 walk_shadow(&walker.walker, vcpu, addr); 338 spte = __pa(shadow_page->spt)
339 | PT_PRESENT_MASK | PT_ACCESSED_MASK
340 | PT_WRITABLE_MASK | PT_USER_MASK;
341 *sptep = spte;
342 }
369 343
370 return walker.sptep; 344 return sptep;
371} 345}
372 346
373/* 347/*
@@ -465,54 +439,56 @@ out_unlock:
465 return 0; 439 return 0;
466} 440}
467 441
468static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, 442static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
469 struct kvm_vcpu *vcpu, u64 addr,
470 u64 *sptep, int level)
471{ 443{
472 struct shadow_walker *sw = 444 struct kvm_shadow_walk_iterator iterator;
473 container_of(_sw, struct shadow_walker, walker); 445 pt_element_t gpte;
474 446 gpa_t pte_gpa = -1;
475 /* FIXME: properly handle invlpg on large guest pages */ 447 int level;
476 if (level == PT_PAGE_TABLE_LEVEL || 448 u64 *sptep;
477 ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) { 449 int need_flush = 0;
478 struct kvm_mmu_page *sp = page_header(__pa(sptep));
479 450
480 sw->pte_gpa = (sp->gfn << PAGE_SHIFT); 451 spin_lock(&vcpu->kvm->mmu_lock);
481 sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
482 452
483 if (is_shadow_present_pte(*sptep)) { 453 for_each_shadow_entry(vcpu, gva, iterator) {
484 rmap_remove(vcpu->kvm, sptep); 454 level = iterator.level;
485 if (is_large_pte(*sptep)) 455 sptep = iterator.sptep;
486 --vcpu->kvm->stat.lpages; 456
457 /* FIXME: properly handle invlpg on large guest pages */
458 if (level == PT_PAGE_TABLE_LEVEL ||
459 ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
460 struct kvm_mmu_page *sp = page_header(__pa(sptep));
461
462 pte_gpa = (sp->gfn << PAGE_SHIFT);
463 pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
464
465 if (is_shadow_present_pte(*sptep)) {
466 rmap_remove(vcpu->kvm, sptep);
467 if (is_large_pte(*sptep))
468 --vcpu->kvm->stat.lpages;
469 need_flush = 1;
470 }
471 set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
472 break;
487 } 473 }
488 set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
489 return 1;
490 }
491 if (!is_shadow_present_pte(*sptep))
492 return 1;
493 return 0;
494}
495 474
496static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) 475 if (!is_shadow_present_pte(*sptep))
497{ 476 break;
498 pt_element_t gpte; 477 }
499 struct shadow_walker walker = {
500 .walker = { .entry = FNAME(shadow_invlpg_entry), },
501 .pte_gpa = -1,
502 };
503 478
504 spin_lock(&vcpu->kvm->mmu_lock); 479 if (need_flush)
505 walk_shadow(&walker.walker, vcpu, gva); 480 kvm_flush_remote_tlbs(vcpu->kvm);
506 spin_unlock(&vcpu->kvm->mmu_lock); 481 spin_unlock(&vcpu->kvm->mmu_lock);
507 if (walker.pte_gpa == -1) 482
483 if (pte_gpa == -1)
508 return; 484 return;
509 if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte, 485 if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
510 sizeof(pt_element_t))) 486 sizeof(pt_element_t)))
511 return; 487 return;
512 if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) { 488 if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
513 if (mmu_topup_memory_caches(vcpu)) 489 if (mmu_topup_memory_caches(vcpu))
514 return; 490 return;
515 kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte, 491 kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
516 sizeof(pt_element_t), 0); 492 sizeof(pt_element_t), 0);
517 } 493 }
518} 494}
@@ -540,7 +516,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
540 pt_element_t pt[256 / sizeof(pt_element_t)]; 516 pt_element_t pt[256 / sizeof(pt_element_t)];
541 gpa_t pte_gpa; 517 gpa_t pte_gpa;
542 518
543 if (sp->role.metaphysical 519 if (sp->role.direct
544 || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) { 520 || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
545 nonpaging_prefetch_page(vcpu, sp); 521 nonpaging_prefetch_page(vcpu, sp);
546 return; 522 return;
@@ -619,7 +595,6 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
619 595
620#undef pt_element_t 596#undef pt_element_t
621#undef guest_walker 597#undef guest_walker
622#undef shadow_walker
623#undef FNAME 598#undef FNAME
624#undef PT_BASE_ADDR_MASK 599#undef PT_BASE_ADDR_MASK
625#undef PT_INDEX 600#undef PT_INDEX
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a9e769e4e251..1821c2078199 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -38,9 +38,6 @@ MODULE_LICENSE("GPL");
38#define IOPM_ALLOC_ORDER 2 38#define IOPM_ALLOC_ORDER 2
39#define MSRPM_ALLOC_ORDER 1 39#define MSRPM_ALLOC_ORDER 1
40 40
41#define DR7_GD_MASK (1 << 13)
42#define DR6_BD_MASK (1 << 13)
43
44#define SEG_TYPE_LDT 2 41#define SEG_TYPE_LDT 2
45#define SEG_TYPE_BUSY_TSS16 3 42#define SEG_TYPE_BUSY_TSS16 3
46 43
@@ -50,6 +47,15 @@ MODULE_LICENSE("GPL");
50 47
51#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) 48#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
52 49
50/* Turn on to get debugging output*/
51/* #define NESTED_DEBUG */
52
53#ifdef NESTED_DEBUG
54#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
55#else
56#define nsvm_printk(fmt, args...) do {} while(0)
57#endif
58
53/* enable NPT for AMD64 and X86 with PAE */ 59/* enable NPT for AMD64 and X86 with PAE */
54#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 60#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
55static bool npt_enabled = true; 61static bool npt_enabled = true;
@@ -60,14 +66,29 @@ static int npt = 1;
60 66
61module_param(npt, int, S_IRUGO); 67module_param(npt, int, S_IRUGO);
62 68
69static int nested = 0;
70module_param(nested, int, S_IRUGO);
71
63static void kvm_reput_irq(struct vcpu_svm *svm); 72static void kvm_reput_irq(struct vcpu_svm *svm);
64static void svm_flush_tlb(struct kvm_vcpu *vcpu); 73static void svm_flush_tlb(struct kvm_vcpu *vcpu);
65 74
75static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
76static int nested_svm_vmexit(struct vcpu_svm *svm);
77static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
78 void *arg2, void *opaque);
79static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
80 bool has_error_code, u32 error_code);
81
66static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) 82static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
67{ 83{
68 return container_of(vcpu, struct vcpu_svm, vcpu); 84 return container_of(vcpu, struct vcpu_svm, vcpu);
69} 85}
70 86
87static inline bool is_nested(struct vcpu_svm *svm)
88{
89 return svm->nested_vmcb;
90}
91
71static unsigned long iopm_base; 92static unsigned long iopm_base;
72 93
73struct kvm_ldttss_desc { 94struct kvm_ldttss_desc {
@@ -157,32 +178,6 @@ static inline void kvm_write_cr2(unsigned long val)
157 asm volatile ("mov %0, %%cr2" :: "r" (val)); 178 asm volatile ("mov %0, %%cr2" :: "r" (val));
158} 179}
159 180
160static inline unsigned long read_dr6(void)
161{
162 unsigned long dr6;
163
164 asm volatile ("mov %%dr6, %0" : "=r" (dr6));
165 return dr6;
166}
167
168static inline void write_dr6(unsigned long val)
169{
170 asm volatile ("mov %0, %%dr6" :: "r" (val));
171}
172
173static inline unsigned long read_dr7(void)
174{
175 unsigned long dr7;
176
177 asm volatile ("mov %%dr7, %0" : "=r" (dr7));
178 return dr7;
179}
180
181static inline void write_dr7(unsigned long val)
182{
183 asm volatile ("mov %0, %%dr7" :: "r" (val));
184}
185
186static inline void force_new_asid(struct kvm_vcpu *vcpu) 181static inline void force_new_asid(struct kvm_vcpu *vcpu)
187{ 182{
188 to_svm(vcpu)->asid_generation--; 183 to_svm(vcpu)->asid_generation--;
@@ -198,7 +193,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
198 if (!npt_enabled && !(efer & EFER_LMA)) 193 if (!npt_enabled && !(efer & EFER_LMA))
199 efer &= ~EFER_LME; 194 efer &= ~EFER_LME;
200 195
201 to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; 196 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
202 vcpu->arch.shadow_efer = efer; 197 vcpu->arch.shadow_efer = efer;
203} 198}
204 199
@@ -207,6 +202,11 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
207{ 202{
208 struct vcpu_svm *svm = to_svm(vcpu); 203 struct vcpu_svm *svm = to_svm(vcpu);
209 204
205 /* If we are within a nested VM we'd better #VMEXIT and let the
206 guest handle the exception */
207 if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
208 return;
209
210 svm->vmcb->control.event_inj = nr 210 svm->vmcb->control.event_inj = nr
211 | SVM_EVTINJ_VALID 211 | SVM_EVTINJ_VALID
212 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) 212 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
@@ -242,7 +242,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
242 kvm_rip_write(vcpu, svm->next_rip); 242 kvm_rip_write(vcpu, svm->next_rip);
243 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; 243 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
244 244
245 vcpu->arch.interrupt_window_open = 1; 245 vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
246} 246}
247 247
248static int has_svm(void) 248static int has_svm(void)
@@ -250,7 +250,7 @@ static int has_svm(void)
250 const char *msg; 250 const char *msg;
251 251
252 if (!cpu_has_svm(&msg)) { 252 if (!cpu_has_svm(&msg)) {
253 printk(KERN_INFO "has_svn: %s\n", msg); 253 printk(KERN_INFO "has_svm: %s\n", msg);
254 return 0; 254 return 0;
255 } 255 }
256 256
@@ -292,7 +292,7 @@ static void svm_hardware_enable(void *garbage)
292 svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); 292 svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
293 293
294 rdmsrl(MSR_EFER, efer); 294 rdmsrl(MSR_EFER, efer);
295 wrmsrl(MSR_EFER, efer | MSR_EFER_SVME_MASK); 295 wrmsrl(MSR_EFER, efer | EFER_SVME);
296 296
297 wrmsrl(MSR_VM_HSAVE_PA, 297 wrmsrl(MSR_VM_HSAVE_PA,
298 page_to_pfn(svm_data->save_area) << PAGE_SHIFT); 298 page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
@@ -417,6 +417,14 @@ static __init int svm_hardware_setup(void)
417 if (boot_cpu_has(X86_FEATURE_NX)) 417 if (boot_cpu_has(X86_FEATURE_NX))
418 kvm_enable_efer_bits(EFER_NX); 418 kvm_enable_efer_bits(EFER_NX);
419 419
420 if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
421 kvm_enable_efer_bits(EFER_FFXSR);
422
423 if (nested) {
424 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
425 kvm_enable_efer_bits(EFER_SVME);
426 }
427
420 for_each_online_cpu(cpu) { 428 for_each_online_cpu(cpu) {
421 r = svm_cpu_init(cpu); 429 r = svm_cpu_init(cpu);
422 if (r) 430 if (r)
@@ -559,7 +567,7 @@ static void init_vmcb(struct vcpu_svm *svm)
559 init_sys_seg(&save->ldtr, SEG_TYPE_LDT); 567 init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
560 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); 568 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
561 569
562 save->efer = MSR_EFER_SVME_MASK; 570 save->efer = EFER_SVME;
563 save->dr6 = 0xffff0ff0; 571 save->dr6 = 0xffff0ff0;
564 save->dr7 = 0x400; 572 save->dr7 = 0x400;
565 save->rflags = 2; 573 save->rflags = 2;
@@ -591,6 +599,9 @@ static void init_vmcb(struct vcpu_svm *svm)
591 save->cr4 = 0; 599 save->cr4 = 0;
592 } 600 }
593 force_new_asid(&svm->vcpu); 601 force_new_asid(&svm->vcpu);
602
603 svm->nested_vmcb = 0;
604 svm->vcpu.arch.hflags = HF_GIF_MASK;
594} 605}
595 606
596static int svm_vcpu_reset(struct kvm_vcpu *vcpu) 607static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -615,6 +626,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
615 struct vcpu_svm *svm; 626 struct vcpu_svm *svm;
616 struct page *page; 627 struct page *page;
617 struct page *msrpm_pages; 628 struct page *msrpm_pages;
629 struct page *hsave_page;
630 struct page *nested_msrpm_pages;
618 int err; 631 int err;
619 632
620 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 633 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -637,14 +650,25 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
637 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 650 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
638 if (!msrpm_pages) 651 if (!msrpm_pages)
639 goto uninit; 652 goto uninit;
653
654 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
655 if (!nested_msrpm_pages)
656 goto uninit;
657
640 svm->msrpm = page_address(msrpm_pages); 658 svm->msrpm = page_address(msrpm_pages);
641 svm_vcpu_init_msrpm(svm->msrpm); 659 svm_vcpu_init_msrpm(svm->msrpm);
642 660
661 hsave_page = alloc_page(GFP_KERNEL);
662 if (!hsave_page)
663 goto uninit;
664 svm->hsave = page_address(hsave_page);
665
666 svm->nested_msrpm = page_address(nested_msrpm_pages);
667
643 svm->vmcb = page_address(page); 668 svm->vmcb = page_address(page);
644 clear_page(svm->vmcb); 669 clear_page(svm->vmcb);
645 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; 670 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
646 svm->asid_generation = 0; 671 svm->asid_generation = 0;
647 memset(svm->db_regs, 0, sizeof(svm->db_regs));
648 init_vmcb(svm); 672 init_vmcb(svm);
649 673
650 fx_init(&svm->vcpu); 674 fx_init(&svm->vcpu);
@@ -669,6 +693,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
669 693
670 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); 694 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
671 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); 695 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
696 __free_page(virt_to_page(svm->hsave));
697 __free_pages(virt_to_page(svm->nested_msrpm), MSRPM_ALLOC_ORDER);
672 kvm_vcpu_uninit(vcpu); 698 kvm_vcpu_uninit(vcpu);
673 kmem_cache_free(kvm_vcpu_cache, svm); 699 kmem_cache_free(kvm_vcpu_cache, svm);
674} 700}
@@ -718,6 +744,16 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
718 to_svm(vcpu)->vmcb->save.rflags = rflags; 744 to_svm(vcpu)->vmcb->save.rflags = rflags;
719} 745}
720 746
747static void svm_set_vintr(struct vcpu_svm *svm)
748{
749 svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
750}
751
752static void svm_clear_vintr(struct vcpu_svm *svm)
753{
754 svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
755}
756
721static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) 757static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
722{ 758{
723 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save; 759 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
@@ -760,20 +796,37 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
760 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 796 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
761 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; 797 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
762 798
763 /* 799 switch (seg) {
764 * SVM always stores 0 for the 'G' bit in the CS selector in 800 case VCPU_SREG_CS:
765 * the VMCB on a VMEXIT. This hurts cross-vendor migration: 801 /*
766 * Intel's VMENTRY has a check on the 'G' bit. 802 * SVM always stores 0 for the 'G' bit in the CS selector in
767 */ 803 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
768 if (seg == VCPU_SREG_CS) 804 * Intel's VMENTRY has a check on the 'G' bit.
805 */
769 var->g = s->limit > 0xfffff; 806 var->g = s->limit > 0xfffff;
770 807 break;
771 /* 808 case VCPU_SREG_TR:
772 * Work around a bug where the busy flag in the tr selector 809 /*
773 * isn't exposed 810 * Work around a bug where the busy flag in the tr selector
774 */ 811 * isn't exposed
775 if (seg == VCPU_SREG_TR) 812 */
776 var->type |= 0x2; 813 var->type |= 0x2;
814 break;
815 case VCPU_SREG_DS:
816 case VCPU_SREG_ES:
817 case VCPU_SREG_FS:
818 case VCPU_SREG_GS:
819 /*
820 * The accessed bit must always be set in the segment
821 * descriptor cache, although it can be cleared in the
822 * descriptor, the cached bit always remains at 1. Since
823 * Intel has a check on this, set it here to support
824 * cross-vendor migration.
825 */
826 if (!var->unusable)
827 var->type |= 0x1;
828 break;
829 }
777 830
778 var->unusable = !var->present; 831 var->unusable = !var->present;
779} 832}
@@ -905,9 +958,37 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
905 958
906} 959}
907 960
908static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) 961static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
909{ 962{
910 return -EOPNOTSUPP; 963 int old_debug = vcpu->guest_debug;
964 struct vcpu_svm *svm = to_svm(vcpu);
965
966 vcpu->guest_debug = dbg->control;
967
968 svm->vmcb->control.intercept_exceptions &=
969 ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
970 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
971 if (vcpu->guest_debug &
972 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
973 svm->vmcb->control.intercept_exceptions |=
974 1 << DB_VECTOR;
975 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
976 svm->vmcb->control.intercept_exceptions |=
977 1 << BP_VECTOR;
978 } else
979 vcpu->guest_debug = 0;
980
981 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
982 svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
983 else
984 svm->vmcb->save.dr7 = vcpu->arch.dr7;
985
986 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
987 svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
988 else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
989 svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
990
991 return 0;
911} 992}
912 993
913static int svm_get_irq(struct kvm_vcpu *vcpu) 994static int svm_get_irq(struct kvm_vcpu *vcpu)
@@ -949,7 +1030,29 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
949 1030
950static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) 1031static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
951{ 1032{
952 unsigned long val = to_svm(vcpu)->db_regs[dr]; 1033 struct vcpu_svm *svm = to_svm(vcpu);
1034 unsigned long val;
1035
1036 switch (dr) {
1037 case 0 ... 3:
1038 val = vcpu->arch.db[dr];
1039 break;
1040 case 6:
1041 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1042 val = vcpu->arch.dr6;
1043 else
1044 val = svm->vmcb->save.dr6;
1045 break;
1046 case 7:
1047 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1048 val = vcpu->arch.dr7;
1049 else
1050 val = svm->vmcb->save.dr7;
1051 break;
1052 default:
1053 val = 0;
1054 }
1055
953 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); 1056 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
954 return val; 1057 return val;
955} 1058}
@@ -959,33 +1062,40 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
959{ 1062{
960 struct vcpu_svm *svm = to_svm(vcpu); 1063 struct vcpu_svm *svm = to_svm(vcpu);
961 1064
962 *exception = 0; 1065 KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler);
963 1066
964 if (svm->vmcb->save.dr7 & DR7_GD_MASK) { 1067 *exception = 0;
965 svm->vmcb->save.dr7 &= ~DR7_GD_MASK;
966 svm->vmcb->save.dr6 |= DR6_BD_MASK;
967 *exception = DB_VECTOR;
968 return;
969 }
970 1068
971 switch (dr) { 1069 switch (dr) {
972 case 0 ... 3: 1070 case 0 ... 3:
973 svm->db_regs[dr] = value; 1071 vcpu->arch.db[dr] = value;
1072 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1073 vcpu->arch.eff_db[dr] = value;
974 return; 1074 return;
975 case 4 ... 5: 1075 case 4 ... 5:
976 if (vcpu->arch.cr4 & X86_CR4_DE) { 1076 if (vcpu->arch.cr4 & X86_CR4_DE)
977 *exception = UD_VECTOR; 1077 *exception = UD_VECTOR;
1078 return;
1079 case 6:
1080 if (value & 0xffffffff00000000ULL) {
1081 *exception = GP_VECTOR;
978 return; 1082 return;
979 } 1083 }
980 case 7: { 1084 vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
981 if (value & ~((1ULL << 32) - 1)) { 1085 return;
1086 case 7:
1087 if (value & 0xffffffff00000000ULL) {
982 *exception = GP_VECTOR; 1088 *exception = GP_VECTOR;
983 return; 1089 return;
984 } 1090 }
985 svm->vmcb->save.dr7 = value; 1091 vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
1092 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1093 svm->vmcb->save.dr7 = vcpu->arch.dr7;
1094 vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
1095 }
986 return; 1096 return;
987 }
988 default: 1097 default:
1098 /* FIXME: Possible case? */
989 printk(KERN_DEBUG "%s: unexpected dr %u\n", 1099 printk(KERN_DEBUG "%s: unexpected dr %u\n",
990 __func__, dr); 1100 __func__, dr);
991 *exception = UD_VECTOR; 1101 *exception = UD_VECTOR;
@@ -1031,6 +1141,27 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1031 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); 1141 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
1032} 1142}
1033 1143
1144static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1145{
1146 if (!(svm->vcpu.guest_debug &
1147 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
1148 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
1149 return 1;
1150 }
1151 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1152 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1153 kvm_run->debug.arch.exception = DB_VECTOR;
1154 return 0;
1155}
1156
1157static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1158{
1159 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1160 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1161 kvm_run->debug.arch.exception = BP_VECTOR;
1162 return 0;
1163}
1164
1034static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1165static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1035{ 1166{
1036 int er; 1167 int er;
@@ -1080,7 +1211,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1080static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) 1211static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1081{ 1212{
1082 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ 1213 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1083 int size, down, in, string, rep; 1214 int size, in, string;
1084 unsigned port; 1215 unsigned port;
1085 1216
1086 ++svm->vcpu.stat.io_exits; 1217 ++svm->vcpu.stat.io_exits;
@@ -1099,8 +1230,6 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1099 in = (io_info & SVM_IOIO_TYPE_MASK) != 0; 1230 in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
1100 port = io_info >> 16; 1231 port = io_info >> 16;
1101 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; 1232 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1102 rep = (io_info & SVM_IOIO_REP_MASK) != 0;
1103 down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
1104 1233
1105 skip_emulated_instruction(&svm->vcpu); 1234 skip_emulated_instruction(&svm->vcpu);
1106 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); 1235 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
@@ -1139,6 +1268,567 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1139 return 1; 1268 return 1;
1140} 1269}
1141 1270
1271static int nested_svm_check_permissions(struct vcpu_svm *svm)
1272{
1273 if (!(svm->vcpu.arch.shadow_efer & EFER_SVME)
1274 || !is_paging(&svm->vcpu)) {
1275 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1276 return 1;
1277 }
1278
1279 if (svm->vmcb->save.cpl) {
1280 kvm_inject_gp(&svm->vcpu, 0);
1281 return 1;
1282 }
1283
1284 return 0;
1285}
1286
1287static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
1288 bool has_error_code, u32 error_code)
1289{
1290 if (is_nested(svm)) {
1291 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
1292 svm->vmcb->control.exit_code_hi = 0;
1293 svm->vmcb->control.exit_info_1 = error_code;
1294 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
1295 if (nested_svm_exit_handled(svm, false)) {
1296 nsvm_printk("VMexit -> EXCP 0x%x\n", nr);
1297
1298 nested_svm_vmexit(svm);
1299 return 1;
1300 }
1301 }
1302
1303 return 0;
1304}
1305
1306static inline int nested_svm_intr(struct vcpu_svm *svm)
1307{
1308 if (is_nested(svm)) {
1309 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1310 return 0;
1311
1312 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
1313 return 0;
1314
1315 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
1316
1317 if (nested_svm_exit_handled(svm, false)) {
1318 nsvm_printk("VMexit -> INTR\n");
1319 nested_svm_vmexit(svm);
1320 return 1;
1321 }
1322 }
1323
1324 return 0;
1325}
1326
1327static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
1328{
1329 struct page *page;
1330
1331 down_read(&current->mm->mmap_sem);
1332 page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
1333 up_read(&current->mm->mmap_sem);
1334
1335 if (is_error_page(page)) {
1336 printk(KERN_INFO "%s: could not find page at 0x%llx\n",
1337 __func__, gpa);
1338 kvm_release_page_clean(page);
1339 kvm_inject_gp(&svm->vcpu, 0);
1340 return NULL;
1341 }
1342 return page;
1343}
1344
1345static int nested_svm_do(struct vcpu_svm *svm,
1346 u64 arg1_gpa, u64 arg2_gpa, void *opaque,
1347 int (*handler)(struct vcpu_svm *svm,
1348 void *arg1,
1349 void *arg2,
1350 void *opaque))
1351{
1352 struct page *arg1_page;
1353 struct page *arg2_page = NULL;
1354 void *arg1;
1355 void *arg2 = NULL;
1356 int retval;
1357
1358 arg1_page = nested_svm_get_page(svm, arg1_gpa);
1359 if(arg1_page == NULL)
1360 return 1;
1361
1362 if (arg2_gpa) {
1363 arg2_page = nested_svm_get_page(svm, arg2_gpa);
1364 if(arg2_page == NULL) {
1365 kvm_release_page_clean(arg1_page);
1366 return 1;
1367 }
1368 }
1369
1370 arg1 = kmap_atomic(arg1_page, KM_USER0);
1371 if (arg2_gpa)
1372 arg2 = kmap_atomic(arg2_page, KM_USER1);
1373
1374 retval = handler(svm, arg1, arg2, opaque);
1375
1376 kunmap_atomic(arg1, KM_USER0);
1377 if (arg2_gpa)
1378 kunmap_atomic(arg2, KM_USER1);
1379
1380 kvm_release_page_dirty(arg1_page);
1381 if (arg2_gpa)
1382 kvm_release_page_dirty(arg2_page);
1383
1384 return retval;
1385}
1386
1387static int nested_svm_exit_handled_real(struct vcpu_svm *svm,
1388 void *arg1,
1389 void *arg2,
1390 void *opaque)
1391{
1392 struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1393 bool kvm_overrides = *(bool *)opaque;
1394 u32 exit_code = svm->vmcb->control.exit_code;
1395
1396 if (kvm_overrides) {
1397 switch (exit_code) {
1398 case SVM_EXIT_INTR:
1399 case SVM_EXIT_NMI:
1400 return 0;
1401 /* For now we are always handling NPFs when using them */
1402 case SVM_EXIT_NPF:
1403 if (npt_enabled)
1404 return 0;
1405 break;
1406 /* When we're shadowing, trap PFs */
1407 case SVM_EXIT_EXCP_BASE + PF_VECTOR:
1408 if (!npt_enabled)
1409 return 0;
1410 break;
1411 default:
1412 break;
1413 }
1414 }
1415
1416 switch (exit_code) {
1417 case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
1418 u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
1419 if (nested_vmcb->control.intercept_cr_read & cr_bits)
1420 return 1;
1421 break;
1422 }
1423 case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
1424 u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
1425 if (nested_vmcb->control.intercept_cr_write & cr_bits)
1426 return 1;
1427 break;
1428 }
1429 case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
1430 u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
1431 if (nested_vmcb->control.intercept_dr_read & dr_bits)
1432 return 1;
1433 break;
1434 }
1435 case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
1436 u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
1437 if (nested_vmcb->control.intercept_dr_write & dr_bits)
1438 return 1;
1439 break;
1440 }
1441 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
1442 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
1443 if (nested_vmcb->control.intercept_exceptions & excp_bits)
1444 return 1;
1445 break;
1446 }
1447 default: {
1448 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
1449 nsvm_printk("exit code: 0x%x\n", exit_code);
1450 if (nested_vmcb->control.intercept & exit_bits)
1451 return 1;
1452 }
1453 }
1454
1455 return 0;
1456}
1457
1458static int nested_svm_exit_handled_msr(struct vcpu_svm *svm,
1459 void *arg1, void *arg2,
1460 void *opaque)
1461{
1462 struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1463 u8 *msrpm = (u8 *)arg2;
1464 u32 t0, t1;
1465 u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
1466 u32 param = svm->vmcb->control.exit_info_1 & 1;
1467
1468 if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT)))
1469 return 0;
1470
1471 switch(msr) {
1472 case 0 ... 0x1fff:
1473 t0 = (msr * 2) % 8;
1474 t1 = msr / 8;
1475 break;
1476 case 0xc0000000 ... 0xc0001fff:
1477 t0 = (8192 + msr - 0xc0000000) * 2;
1478 t1 = (t0 / 8);
1479 t0 %= 8;
1480 break;
1481 case 0xc0010000 ... 0xc0011fff:
1482 t0 = (16384 + msr - 0xc0010000) * 2;
1483 t1 = (t0 / 8);
1484 t0 %= 8;
1485 break;
1486 default:
1487 return 1;
1488 break;
1489 }
1490 if (msrpm[t1] & ((1 << param) << t0))
1491 return 1;
1492
1493 return 0;
1494}
1495
1496static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override)
1497{
1498 bool k = kvm_override;
1499
1500 switch (svm->vmcb->control.exit_code) {
1501 case SVM_EXIT_MSR:
1502 return nested_svm_do(svm, svm->nested_vmcb,
1503 svm->nested_vmcb_msrpm, NULL,
1504 nested_svm_exit_handled_msr);
1505 default: break;
1506 }
1507
1508 return nested_svm_do(svm, svm->nested_vmcb, 0, &k,
1509 nested_svm_exit_handled_real);
1510}
1511
1512static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
1513 void *arg2, void *opaque)
1514{
1515 struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1516 struct vmcb *hsave = svm->hsave;
1517 u64 nested_save[] = { nested_vmcb->save.cr0,
1518 nested_vmcb->save.cr3,
1519 nested_vmcb->save.cr4,
1520 nested_vmcb->save.efer,
1521 nested_vmcb->control.intercept_cr_read,
1522 nested_vmcb->control.intercept_cr_write,
1523 nested_vmcb->control.intercept_dr_read,
1524 nested_vmcb->control.intercept_dr_write,
1525 nested_vmcb->control.intercept_exceptions,
1526 nested_vmcb->control.intercept,
1527 nested_vmcb->control.msrpm_base_pa,
1528 nested_vmcb->control.iopm_base_pa,
1529 nested_vmcb->control.tsc_offset };
1530
1531 /* Give the current vmcb to the guest */
1532 memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb));
1533 nested_vmcb->save.cr0 = nested_save[0];
1534 if (!npt_enabled)
1535 nested_vmcb->save.cr3 = nested_save[1];
1536 nested_vmcb->save.cr4 = nested_save[2];
1537 nested_vmcb->save.efer = nested_save[3];
1538 nested_vmcb->control.intercept_cr_read = nested_save[4];
1539 nested_vmcb->control.intercept_cr_write = nested_save[5];
1540 nested_vmcb->control.intercept_dr_read = nested_save[6];
1541 nested_vmcb->control.intercept_dr_write = nested_save[7];
1542 nested_vmcb->control.intercept_exceptions = nested_save[8];
1543 nested_vmcb->control.intercept = nested_save[9];
1544 nested_vmcb->control.msrpm_base_pa = nested_save[10];
1545 nested_vmcb->control.iopm_base_pa = nested_save[11];
1546 nested_vmcb->control.tsc_offset = nested_save[12];
1547
1548 /* We always set V_INTR_MASKING and remember the old value in hflags */
1549 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1550 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
1551
1552 if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) &&
1553 (nested_vmcb->control.int_vector)) {
1554 nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n",
1555 nested_vmcb->control.int_vector);
1556 }
1557
1558 /* Restore the original control entries */
1559 svm->vmcb->control = hsave->control;
1560
1561 /* Kill any pending exceptions */
1562 if (svm->vcpu.arch.exception.pending == true)
1563 nsvm_printk("WARNING: Pending Exception\n");
1564 svm->vcpu.arch.exception.pending = false;
1565
1566 /* Restore selected save entries */
1567 svm->vmcb->save.es = hsave->save.es;
1568 svm->vmcb->save.cs = hsave->save.cs;
1569 svm->vmcb->save.ss = hsave->save.ss;
1570 svm->vmcb->save.ds = hsave->save.ds;
1571 svm->vmcb->save.gdtr = hsave->save.gdtr;
1572 svm->vmcb->save.idtr = hsave->save.idtr;
1573 svm->vmcb->save.rflags = hsave->save.rflags;
1574 svm_set_efer(&svm->vcpu, hsave->save.efer);
1575 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
1576 svm_set_cr4(&svm->vcpu, hsave->save.cr4);
1577 if (npt_enabled) {
1578 svm->vmcb->save.cr3 = hsave->save.cr3;
1579 svm->vcpu.arch.cr3 = hsave->save.cr3;
1580 } else {
1581 kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
1582 }
1583 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
1584 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
1585 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
1586 svm->vmcb->save.dr7 = 0;
1587 svm->vmcb->save.cpl = 0;
1588 svm->vmcb->control.exit_int_info = 0;
1589
1590 svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
1591 /* Exit nested SVM mode */
1592 svm->nested_vmcb = 0;
1593
1594 return 0;
1595}
1596
1597static int nested_svm_vmexit(struct vcpu_svm *svm)
1598{
1599 nsvm_printk("VMexit\n");
1600 if (nested_svm_do(svm, svm->nested_vmcb, 0,
1601 NULL, nested_svm_vmexit_real))
1602 return 1;
1603
1604 kvm_mmu_reset_context(&svm->vcpu);
1605 kvm_mmu_load(&svm->vcpu);
1606
1607 return 0;
1608}
1609
1610static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
1611 void *arg2, void *opaque)
1612{
1613 int i;
1614 u32 *nested_msrpm = (u32*)arg1;
1615 for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
1616 svm->nested_msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
1617 svm->vmcb->control.msrpm_base_pa = __pa(svm->nested_msrpm);
1618
1619 return 0;
1620}
1621
1622static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
1623 void *arg2, void *opaque)
1624{
1625 struct vmcb *nested_vmcb = (struct vmcb *)arg1;
1626 struct vmcb *hsave = svm->hsave;
1627
1628 /* nested_vmcb is our indicator if nested SVM is activated */
1629 svm->nested_vmcb = svm->vmcb->save.rax;
1630
1631 /* Clear internal status */
1632 svm->vcpu.arch.exception.pending = false;
1633
1634 /* Save the old vmcb, so we don't need to pick what we save, but
1635 can restore everything when a VMEXIT occurs */
1636 memcpy(hsave, svm->vmcb, sizeof(struct vmcb));
1637 /* We need to remember the original CR3 in the SPT case */
1638 if (!npt_enabled)
1639 hsave->save.cr3 = svm->vcpu.arch.cr3;
1640 hsave->save.cr4 = svm->vcpu.arch.cr4;
1641 hsave->save.rip = svm->next_rip;
1642
1643 if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
1644 svm->vcpu.arch.hflags |= HF_HIF_MASK;
1645 else
1646 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
1647
1648 /* Load the nested guest state */
1649 svm->vmcb->save.es = nested_vmcb->save.es;
1650 svm->vmcb->save.cs = nested_vmcb->save.cs;
1651 svm->vmcb->save.ss = nested_vmcb->save.ss;
1652 svm->vmcb->save.ds = nested_vmcb->save.ds;
1653 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
1654 svm->vmcb->save.idtr = nested_vmcb->save.idtr;
1655 svm->vmcb->save.rflags = nested_vmcb->save.rflags;
1656 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
1657 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
1658 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
1659 if (npt_enabled) {
1660 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
1661 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
1662 } else {
1663 kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
1664 kvm_mmu_reset_context(&svm->vcpu);
1665 }
1666 svm->vmcb->save.cr2 = nested_vmcb->save.cr2;
1667 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
1668 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
1669 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
1670 /* In case we don't even reach vcpu_run, the fields are not updated */
1671 svm->vmcb->save.rax = nested_vmcb->save.rax;
1672 svm->vmcb->save.rsp = nested_vmcb->save.rsp;
1673 svm->vmcb->save.rip = nested_vmcb->save.rip;
1674 svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
1675 svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
1676 svm->vmcb->save.cpl = nested_vmcb->save.cpl;
1677
1678 /* We don't want a nested guest to be more powerful than the guest,
1679 so all intercepts are ORed */
1680 svm->vmcb->control.intercept_cr_read |=
1681 nested_vmcb->control.intercept_cr_read;
1682 svm->vmcb->control.intercept_cr_write |=
1683 nested_vmcb->control.intercept_cr_write;
1684 svm->vmcb->control.intercept_dr_read |=
1685 nested_vmcb->control.intercept_dr_read;
1686 svm->vmcb->control.intercept_dr_write |=
1687 nested_vmcb->control.intercept_dr_write;
1688 svm->vmcb->control.intercept_exceptions |=
1689 nested_vmcb->control.intercept_exceptions;
1690
1691 svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
1692
1693 svm->nested_vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
1694
1695 force_new_asid(&svm->vcpu);
1696 svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
1697 svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
1698 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
1699 if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
1700 nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
1701 nested_vmcb->control.int_ctl);
1702 }
1703 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
1704 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
1705 else
1706 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
1707
1708 nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
1709 nested_vmcb->control.exit_int_info,
1710 nested_vmcb->control.int_state);
1711
1712 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
1713 svm->vmcb->control.int_state = nested_vmcb->control.int_state;
1714 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
1715 if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
1716 nsvm_printk("Injecting Event: 0x%x\n",
1717 nested_vmcb->control.event_inj);
1718 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
1719 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
1720
1721 svm->vcpu.arch.hflags |= HF_GIF_MASK;
1722
1723 return 0;
1724}
1725
1726static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
1727{
1728 to_vmcb->save.fs = from_vmcb->save.fs;
1729 to_vmcb->save.gs = from_vmcb->save.gs;
1730 to_vmcb->save.tr = from_vmcb->save.tr;
1731 to_vmcb->save.ldtr = from_vmcb->save.ldtr;
1732 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
1733 to_vmcb->save.star = from_vmcb->save.star;
1734 to_vmcb->save.lstar = from_vmcb->save.lstar;
1735 to_vmcb->save.cstar = from_vmcb->save.cstar;
1736 to_vmcb->save.sfmask = from_vmcb->save.sfmask;
1737 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
1738 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
1739 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
1740
1741 return 1;
1742}
1743
1744static int nested_svm_vmload(struct vcpu_svm *svm, void *nested_vmcb,
1745 void *arg2, void *opaque)
1746{
1747 return nested_svm_vmloadsave((struct vmcb *)nested_vmcb, svm->vmcb);
1748}
1749
1750static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
1751 void *arg2, void *opaque)
1752{
1753 return nested_svm_vmloadsave(svm->vmcb, (struct vmcb *)nested_vmcb);
1754}
1755
1756static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1757{
1758 if (nested_svm_check_permissions(svm))
1759 return 1;
1760
1761 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1762 skip_emulated_instruction(&svm->vcpu);
1763
1764 nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmload);
1765
1766 return 1;
1767}
1768
1769static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1770{
1771 if (nested_svm_check_permissions(svm))
1772 return 1;
1773
1774 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1775 skip_emulated_instruction(&svm->vcpu);
1776
1777 nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmsave);
1778
1779 return 1;
1780}
1781
1782static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1783{
1784 nsvm_printk("VMrun\n");
1785 if (nested_svm_check_permissions(svm))
1786 return 1;
1787
1788 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1789 skip_emulated_instruction(&svm->vcpu);
1790
1791 if (nested_svm_do(svm, svm->vmcb->save.rax, 0,
1792 NULL, nested_svm_vmrun))
1793 return 1;
1794
1795 if (nested_svm_do(svm, svm->nested_vmcb_msrpm, 0,
1796 NULL, nested_svm_vmrun_msrpm))
1797 return 1;
1798
1799 return 1;
1800}
1801
1802static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1803{
1804 if (nested_svm_check_permissions(svm))
1805 return 1;
1806
1807 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1808 skip_emulated_instruction(&svm->vcpu);
1809
1810 svm->vcpu.arch.hflags |= HF_GIF_MASK;
1811
1812 return 1;
1813}
1814
1815static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1816{
1817 if (nested_svm_check_permissions(svm))
1818 return 1;
1819
1820 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1821 skip_emulated_instruction(&svm->vcpu);
1822
1823 svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
1824
1825 /* After a CLGI no interrupts should come */
1826 svm_clear_vintr(svm);
1827 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
1828
1829 return 1;
1830}
1831
1142static int invalid_op_interception(struct vcpu_svm *svm, 1832static int invalid_op_interception(struct vcpu_svm *svm,
1143 struct kvm_run *kvm_run) 1833 struct kvm_run *kvm_run)
1144{ 1834{
@@ -1250,6 +1940,15 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
1250 case MSR_IA32_LASTINTTOIP: 1940 case MSR_IA32_LASTINTTOIP:
1251 *data = svm->vmcb->save.last_excp_to; 1941 *data = svm->vmcb->save.last_excp_to;
1252 break; 1942 break;
1943 case MSR_VM_HSAVE_PA:
1944 *data = svm->hsave_msr;
1945 break;
1946 case MSR_VM_CR:
1947 *data = 0;
1948 break;
1949 case MSR_IA32_UCODE_REV:
1950 *data = 0x01000065;
1951 break;
1253 default: 1952 default:
1254 return kvm_get_msr_common(vcpu, ecx, data); 1953 return kvm_get_msr_common(vcpu, ecx, data);
1255 } 1954 }
@@ -1344,6 +2043,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
1344 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data); 2043 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
1345 2044
1346 break; 2045 break;
2046 case MSR_VM_HSAVE_PA:
2047 svm->hsave_msr = data;
2048 break;
1347 default: 2049 default:
1348 return kvm_set_msr_common(vcpu, ecx, data); 2050 return kvm_set_msr_common(vcpu, ecx, data);
1349 } 2051 }
@@ -1380,7 +2082,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
1380{ 2082{
1381 KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler); 2083 KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
1382 2084
1383 svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR); 2085 svm_clear_vintr(svm);
1384 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; 2086 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
1385 /* 2087 /*
1386 * If the user space waits to inject interrupts, exit as soon as 2088 * If the user space waits to inject interrupts, exit as soon as
@@ -1417,6 +2119,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
1417 [SVM_EXIT_WRITE_DR3] = emulate_on_interception, 2119 [SVM_EXIT_WRITE_DR3] = emulate_on_interception,
1418 [SVM_EXIT_WRITE_DR5] = emulate_on_interception, 2120 [SVM_EXIT_WRITE_DR5] = emulate_on_interception,
1419 [SVM_EXIT_WRITE_DR7] = emulate_on_interception, 2121 [SVM_EXIT_WRITE_DR7] = emulate_on_interception,
2122 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
2123 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
1420 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, 2124 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
1421 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 2125 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
1422 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, 2126 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
@@ -1436,12 +2140,12 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
1436 [SVM_EXIT_MSR] = msr_interception, 2140 [SVM_EXIT_MSR] = msr_interception,
1437 [SVM_EXIT_TASK_SWITCH] = task_switch_interception, 2141 [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
1438 [SVM_EXIT_SHUTDOWN] = shutdown_interception, 2142 [SVM_EXIT_SHUTDOWN] = shutdown_interception,
1439 [SVM_EXIT_VMRUN] = invalid_op_interception, 2143 [SVM_EXIT_VMRUN] = vmrun_interception,
1440 [SVM_EXIT_VMMCALL] = vmmcall_interception, 2144 [SVM_EXIT_VMMCALL] = vmmcall_interception,
1441 [SVM_EXIT_VMLOAD] = invalid_op_interception, 2145 [SVM_EXIT_VMLOAD] = vmload_interception,
1442 [SVM_EXIT_VMSAVE] = invalid_op_interception, 2146 [SVM_EXIT_VMSAVE] = vmsave_interception,
1443 [SVM_EXIT_STGI] = invalid_op_interception, 2147 [SVM_EXIT_STGI] = stgi_interception,
1444 [SVM_EXIT_CLGI] = invalid_op_interception, 2148 [SVM_EXIT_CLGI] = clgi_interception,
1445 [SVM_EXIT_SKINIT] = invalid_op_interception, 2149 [SVM_EXIT_SKINIT] = invalid_op_interception,
1446 [SVM_EXIT_WBINVD] = emulate_on_interception, 2150 [SVM_EXIT_WBINVD] = emulate_on_interception,
1447 [SVM_EXIT_MONITOR] = invalid_op_interception, 2151 [SVM_EXIT_MONITOR] = invalid_op_interception,
@@ -1457,6 +2161,17 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1457 KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip, 2161 KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
1458 (u32)((u64)svm->vmcb->save.rip >> 32), entryexit); 2162 (u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
1459 2163
2164 if (is_nested(svm)) {
2165 nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
2166 exit_code, svm->vmcb->control.exit_info_1,
2167 svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
2168 if (nested_svm_exit_handled(svm, true)) {
2169 nested_svm_vmexit(svm);
2170 nsvm_printk("-> #VMEXIT\n");
2171 return 1;
2172 }
2173 }
2174
1460 if (npt_enabled) { 2175 if (npt_enabled) {
1461 int mmu_reload = 0; 2176 int mmu_reload = 0;
1462 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { 2177 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
@@ -1544,6 +2259,8 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
1544{ 2259{
1545 struct vcpu_svm *svm = to_svm(vcpu); 2260 struct vcpu_svm *svm = to_svm(vcpu);
1546 2261
2262 nested_svm_intr(svm);
2263
1547 svm_inject_irq(svm, irq); 2264 svm_inject_irq(svm, irq);
1548} 2265}
1549 2266
@@ -1589,11 +2306,17 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
1589 if (!kvm_cpu_has_interrupt(vcpu)) 2306 if (!kvm_cpu_has_interrupt(vcpu))
1590 goto out; 2307 goto out;
1591 2308
2309 if (nested_svm_intr(svm))
2310 goto out;
2311
2312 if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
2313 goto out;
2314
1592 if (!(vmcb->save.rflags & X86_EFLAGS_IF) || 2315 if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
1593 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || 2316 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
1594 (vmcb->control.event_inj & SVM_EVTINJ_VALID)) { 2317 (vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
1595 /* unable to deliver irq, set pending irq */ 2318 /* unable to deliver irq, set pending irq */
1596 vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR); 2319 svm_set_vintr(svm);
1597 svm_inject_irq(svm, 0x0); 2320 svm_inject_irq(svm, 0x0);
1598 goto out; 2321 goto out;
1599 } 2322 }
@@ -1615,7 +2338,8 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
1615 } 2338 }
1616 2339
1617 svm->vcpu.arch.interrupt_window_open = 2340 svm->vcpu.arch.interrupt_window_open =
1618 !(control->int_state & SVM_INTERRUPT_SHADOW_MASK); 2341 !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
2342 (svm->vcpu.arch.hflags & HF_GIF_MASK);
1619} 2343}
1620 2344
1621static void svm_do_inject_vector(struct vcpu_svm *svm) 2345static void svm_do_inject_vector(struct vcpu_svm *svm)
@@ -1637,9 +2361,13 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
1637 struct vcpu_svm *svm = to_svm(vcpu); 2361 struct vcpu_svm *svm = to_svm(vcpu);
1638 struct vmcb_control_area *control = &svm->vmcb->control; 2362 struct vmcb_control_area *control = &svm->vmcb->control;
1639 2363
2364 if (nested_svm_intr(svm))
2365 return;
2366
1640 svm->vcpu.arch.interrupt_window_open = 2367 svm->vcpu.arch.interrupt_window_open =
1641 (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && 2368 (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
1642 (svm->vmcb->save.rflags & X86_EFLAGS_IF)); 2369 (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
2370 (svm->vcpu.arch.hflags & HF_GIF_MASK));
1643 2371
1644 if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary) 2372 if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
1645 /* 2373 /*
@@ -1652,9 +2380,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
1652 */ 2380 */
1653 if (!svm->vcpu.arch.interrupt_window_open && 2381 if (!svm->vcpu.arch.interrupt_window_open &&
1654 (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window)) 2382 (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
1655 control->intercept |= 1ULL << INTERCEPT_VINTR; 2383 svm_set_vintr(svm);
1656 else 2384 else
1657 control->intercept &= ~(1ULL << INTERCEPT_VINTR); 2385 svm_clear_vintr(svm);
1658} 2386}
1659 2387
1660static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) 2388static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -1662,22 +2390,6 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
1662 return 0; 2390 return 0;
1663} 2391}
1664 2392
1665static void save_db_regs(unsigned long *db_regs)
1666{
1667 asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0]));
1668 asm volatile ("mov %%dr1, %0" : "=r"(db_regs[1]));
1669 asm volatile ("mov %%dr2, %0" : "=r"(db_regs[2]));
1670 asm volatile ("mov %%dr3, %0" : "=r"(db_regs[3]));
1671}
1672
1673static void load_db_regs(unsigned long *db_regs)
1674{
1675 asm volatile ("mov %0, %%dr0" : : "r"(db_regs[0]));
1676 asm volatile ("mov %0, %%dr1" : : "r"(db_regs[1]));
1677 asm volatile ("mov %0, %%dr2" : : "r"(db_regs[2]));
1678 asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3]));
1679}
1680
1681static void svm_flush_tlb(struct kvm_vcpu *vcpu) 2393static void svm_flush_tlb(struct kvm_vcpu *vcpu)
1682{ 2394{
1683 force_new_asid(vcpu); 2395 force_new_asid(vcpu);
@@ -1736,19 +2448,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1736 gs_selector = kvm_read_gs(); 2448 gs_selector = kvm_read_gs();
1737 ldt_selector = kvm_read_ldt(); 2449 ldt_selector = kvm_read_ldt();
1738 svm->host_cr2 = kvm_read_cr2(); 2450 svm->host_cr2 = kvm_read_cr2();
1739 svm->host_dr6 = read_dr6(); 2451 if (!is_nested(svm))
1740 svm->host_dr7 = read_dr7(); 2452 svm->vmcb->save.cr2 = vcpu->arch.cr2;
1741 svm->vmcb->save.cr2 = vcpu->arch.cr2;
1742 /* required for live migration with NPT */ 2453 /* required for live migration with NPT */
1743 if (npt_enabled) 2454 if (npt_enabled)
1744 svm->vmcb->save.cr3 = vcpu->arch.cr3; 2455 svm->vmcb->save.cr3 = vcpu->arch.cr3;
1745 2456
1746 if (svm->vmcb->save.dr7 & 0xff) {
1747 write_dr7(0);
1748 save_db_regs(svm->host_db_regs);
1749 load_db_regs(svm->db_regs);
1750 }
1751
1752 clgi(); 2457 clgi();
1753 2458
1754 local_irq_enable(); 2459 local_irq_enable();
@@ -1824,16 +2529,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1824#endif 2529#endif
1825 ); 2530 );
1826 2531
1827 if ((svm->vmcb->save.dr7 & 0xff))
1828 load_db_regs(svm->host_db_regs);
1829
1830 vcpu->arch.cr2 = svm->vmcb->save.cr2; 2532 vcpu->arch.cr2 = svm->vmcb->save.cr2;
1831 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; 2533 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
1832 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; 2534 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
1833 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; 2535 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
1834 2536
1835 write_dr6(svm->host_dr6);
1836 write_dr7(svm->host_dr7);
1837 kvm_write_cr2(svm->host_cr2); 2537 kvm_write_cr2(svm->host_cr2);
1838 2538
1839 kvm_load_fs(fs_selector); 2539 kvm_load_fs(fs_selector);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7611af576829..bb481330716f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -91,6 +91,7 @@ struct vcpu_vmx {
91 } rmode; 91 } rmode;
92 int vpid; 92 int vpid;
93 bool emulation_required; 93 bool emulation_required;
94 enum emulation_result invalid_state_emulation_result;
94 95
95 /* Support for vnmi-less CPUs */ 96 /* Support for vnmi-less CPUs */
96 int soft_vnmi_blocked; 97 int soft_vnmi_blocked;
@@ -189,21 +190,21 @@ static inline int is_page_fault(u32 intr_info)
189{ 190{
190 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 191 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
191 INTR_INFO_VALID_MASK)) == 192 INTR_INFO_VALID_MASK)) ==
192 (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); 193 (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
193} 194}
194 195
195static inline int is_no_device(u32 intr_info) 196static inline int is_no_device(u32 intr_info)
196{ 197{
197 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 198 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
198 INTR_INFO_VALID_MASK)) == 199 INTR_INFO_VALID_MASK)) ==
199 (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); 200 (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
200} 201}
201 202
202static inline int is_invalid_opcode(u32 intr_info) 203static inline int is_invalid_opcode(u32 intr_info)
203{ 204{
204 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | 205 return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
205 INTR_INFO_VALID_MASK)) == 206 INTR_INFO_VALID_MASK)) ==
206 (INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); 207 (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
207} 208}
208 209
209static inline int is_external_interrupt(u32 intr_info) 210static inline int is_external_interrupt(u32 intr_info)
@@ -480,8 +481,13 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
480 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR); 481 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR);
481 if (!vcpu->fpu_active) 482 if (!vcpu->fpu_active)
482 eb |= 1u << NM_VECTOR; 483 eb |= 1u << NM_VECTOR;
483 if (vcpu->guest_debug.enabled) 484 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
484 eb |= 1u << DB_VECTOR; 485 if (vcpu->guest_debug &
486 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
487 eb |= 1u << DB_VECTOR;
488 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
489 eb |= 1u << BP_VECTOR;
490 }
485 if (vcpu->arch.rmode.active) 491 if (vcpu->arch.rmode.active)
486 eb = ~0; 492 eb = ~0;
487 if (vm_need_ept()) 493 if (vm_need_ept())
@@ -747,29 +753,33 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
747 bool has_error_code, u32 error_code) 753 bool has_error_code, u32 error_code)
748{ 754{
749 struct vcpu_vmx *vmx = to_vmx(vcpu); 755 struct vcpu_vmx *vmx = to_vmx(vcpu);
756 u32 intr_info = nr | INTR_INFO_VALID_MASK;
750 757
751 if (has_error_code) 758 if (has_error_code) {
752 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); 759 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
760 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
761 }
753 762
754 if (vcpu->arch.rmode.active) { 763 if (vcpu->arch.rmode.active) {
755 vmx->rmode.irq.pending = true; 764 vmx->rmode.irq.pending = true;
756 vmx->rmode.irq.vector = nr; 765 vmx->rmode.irq.vector = nr;
757 vmx->rmode.irq.rip = kvm_rip_read(vcpu); 766 vmx->rmode.irq.rip = kvm_rip_read(vcpu);
758 if (nr == BP_VECTOR) 767 if (nr == BP_VECTOR || nr == OF_VECTOR)
759 vmx->rmode.irq.rip++; 768 vmx->rmode.irq.rip++;
760 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 769 intr_info |= INTR_TYPE_SOFT_INTR;
761 nr | INTR_TYPE_SOFT_INTR 770 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
762 | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
763 | INTR_INFO_VALID_MASK);
764 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); 771 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
765 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); 772 kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
766 return; 773 return;
767 } 774 }
768 775
769 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 776 if (nr == BP_VECTOR || nr == OF_VECTOR) {
770 nr | INTR_TYPE_EXCEPTION 777 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
771 | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) 778 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
772 | INTR_INFO_VALID_MASK); 779 } else
780 intr_info |= INTR_TYPE_HARD_EXCEPTION;
781
782 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
773} 783}
774 784
775static bool vmx_exception_injected(struct kvm_vcpu *vcpu) 785static bool vmx_exception_injected(struct kvm_vcpu *vcpu)
@@ -856,11 +866,8 @@ static u64 guest_read_tsc(void)
856 * writes 'guest_tsc' into guest's timestamp counter "register" 866 * writes 'guest_tsc' into guest's timestamp counter "register"
857 * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc 867 * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc
858 */ 868 */
859static void guest_write_tsc(u64 guest_tsc) 869static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
860{ 870{
861 u64 host_tsc;
862
863 rdtscll(host_tsc);
864 vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc); 871 vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc);
865} 872}
866 873
@@ -925,14 +932,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
925{ 932{
926 struct vcpu_vmx *vmx = to_vmx(vcpu); 933 struct vcpu_vmx *vmx = to_vmx(vcpu);
927 struct kvm_msr_entry *msr; 934 struct kvm_msr_entry *msr;
935 u64 host_tsc;
928 int ret = 0; 936 int ret = 0;
929 937
930 switch (msr_index) { 938 switch (msr_index) {
931#ifdef CONFIG_X86_64
932 case MSR_EFER: 939 case MSR_EFER:
933 vmx_load_host_state(vmx); 940 vmx_load_host_state(vmx);
934 ret = kvm_set_msr_common(vcpu, msr_index, data); 941 ret = kvm_set_msr_common(vcpu, msr_index, data);
935 break; 942 break;
943#ifdef CONFIG_X86_64
936 case MSR_FS_BASE: 944 case MSR_FS_BASE:
937 vmcs_writel(GUEST_FS_BASE, data); 945 vmcs_writel(GUEST_FS_BASE, data);
938 break; 946 break;
@@ -950,7 +958,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
950 vmcs_writel(GUEST_SYSENTER_ESP, data); 958 vmcs_writel(GUEST_SYSENTER_ESP, data);
951 break; 959 break;
952 case MSR_IA32_TIME_STAMP_COUNTER: 960 case MSR_IA32_TIME_STAMP_COUNTER:
953 guest_write_tsc(data); 961 rdtscll(host_tsc);
962 guest_write_tsc(data, host_tsc);
954 break; 963 break;
955 case MSR_P6_PERFCTR0: 964 case MSR_P6_PERFCTR0:
956 case MSR_P6_PERFCTR1: 965 case MSR_P6_PERFCTR1:
@@ -999,40 +1008,28 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
999 } 1008 }
1000} 1009}
1001 1010
1002static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) 1011static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
1003{ 1012{
1004 unsigned long dr7 = 0x400; 1013 int old_debug = vcpu->guest_debug;
1005 int old_singlestep; 1014 unsigned long flags;
1006
1007 old_singlestep = vcpu->guest_debug.singlestep;
1008
1009 vcpu->guest_debug.enabled = dbg->enabled;
1010 if (vcpu->guest_debug.enabled) {
1011 int i;
1012 1015
1013 dr7 |= 0x200; /* exact */ 1016 vcpu->guest_debug = dbg->control;
1014 for (i = 0; i < 4; ++i) { 1017 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
1015 if (!dbg->breakpoints[i].enabled) 1018 vcpu->guest_debug = 0;
1016 continue;
1017 vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
1018 dr7 |= 2 << (i*2); /* global enable */
1019 dr7 |= 0 << (i*4+16); /* execution breakpoint */
1020 }
1021 1019
1022 vcpu->guest_debug.singlestep = dbg->singlestep; 1020 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1023 } else 1021 vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]);
1024 vcpu->guest_debug.singlestep = 0; 1022 else
1025 1023 vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
1026 if (old_singlestep && !vcpu->guest_debug.singlestep) {
1027 unsigned long flags;
1028 1024
1029 flags = vmcs_readl(GUEST_RFLAGS); 1025 flags = vmcs_readl(GUEST_RFLAGS);
1026 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
1027 flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
1028 else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
1030 flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); 1029 flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1031 vmcs_writel(GUEST_RFLAGS, flags); 1030 vmcs_writel(GUEST_RFLAGS, flags);
1032 }
1033 1031
1034 update_exception_bitmap(vcpu); 1032 update_exception_bitmap(vcpu);
1035 vmcs_writel(GUEST_DR7, dr7);
1036 1033
1037 return 0; 1034 return 0;
1038} 1035}
@@ -1433,6 +1430,29 @@ continue_rmode:
1433 init_rmode(vcpu->kvm); 1430 init_rmode(vcpu->kvm);
1434} 1431}
1435 1432
1433static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
1434{
1435 struct vcpu_vmx *vmx = to_vmx(vcpu);
1436 struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
1437
1438 vcpu->arch.shadow_efer = efer;
1439 if (!msr)
1440 return;
1441 if (efer & EFER_LMA) {
1442 vmcs_write32(VM_ENTRY_CONTROLS,
1443 vmcs_read32(VM_ENTRY_CONTROLS) |
1444 VM_ENTRY_IA32E_MODE);
1445 msr->data = efer;
1446 } else {
1447 vmcs_write32(VM_ENTRY_CONTROLS,
1448 vmcs_read32(VM_ENTRY_CONTROLS) &
1449 ~VM_ENTRY_IA32E_MODE);
1450
1451 msr->data = efer & ~EFER_LME;
1452 }
1453 setup_msrs(vmx);
1454}
1455
1436#ifdef CONFIG_X86_64 1456#ifdef CONFIG_X86_64
1437 1457
1438static void enter_lmode(struct kvm_vcpu *vcpu) 1458static void enter_lmode(struct kvm_vcpu *vcpu)
@@ -1447,13 +1467,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
1447 (guest_tr_ar & ~AR_TYPE_MASK) 1467 (guest_tr_ar & ~AR_TYPE_MASK)
1448 | AR_TYPE_BUSY_64_TSS); 1468 | AR_TYPE_BUSY_64_TSS);
1449 } 1469 }
1450
1451 vcpu->arch.shadow_efer |= EFER_LMA; 1470 vcpu->arch.shadow_efer |= EFER_LMA;
1452 1471 vmx_set_efer(vcpu, vcpu->arch.shadow_efer);
1453 find_msr_entry(to_vmx(vcpu), MSR_EFER)->data |= EFER_LMA | EFER_LME;
1454 vmcs_write32(VM_ENTRY_CONTROLS,
1455 vmcs_read32(VM_ENTRY_CONTROLS)
1456 | VM_ENTRY_IA32E_MODE);
1457} 1472}
1458 1473
1459static void exit_lmode(struct kvm_vcpu *vcpu) 1474static void exit_lmode(struct kvm_vcpu *vcpu)
@@ -1612,30 +1627,6 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1612 vmcs_writel(GUEST_CR4, hw_cr4); 1627 vmcs_writel(GUEST_CR4, hw_cr4);
1613} 1628}
1614 1629
1615static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
1616{
1617 struct vcpu_vmx *vmx = to_vmx(vcpu);
1618 struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
1619
1620 vcpu->arch.shadow_efer = efer;
1621 if (!msr)
1622 return;
1623 if (efer & EFER_LMA) {
1624 vmcs_write32(VM_ENTRY_CONTROLS,
1625 vmcs_read32(VM_ENTRY_CONTROLS) |
1626 VM_ENTRY_IA32E_MODE);
1627 msr->data = efer;
1628
1629 } else {
1630 vmcs_write32(VM_ENTRY_CONTROLS,
1631 vmcs_read32(VM_ENTRY_CONTROLS) &
1632 ~VM_ENTRY_IA32E_MODE);
1633
1634 msr->data = efer & ~EFER_LME;
1635 }
1636 setup_msrs(vmx);
1637}
1638
1639static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) 1630static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1640{ 1631{
1641 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 1632 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -1653,7 +1644,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
1653 var->limit = vmcs_read32(sf->limit); 1644 var->limit = vmcs_read32(sf->limit);
1654 var->selector = vmcs_read16(sf->selector); 1645 var->selector = vmcs_read16(sf->selector);
1655 ar = vmcs_read32(sf->ar_bytes); 1646 ar = vmcs_read32(sf->ar_bytes);
1656 if (ar & AR_UNUSABLE_MASK) 1647 if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
1657 ar = 0; 1648 ar = 0;
1658 var->type = ar & 15; 1649 var->type = ar & 15;
1659 var->s = (ar >> 4) & 1; 1650 var->s = (ar >> 4) & 1;
@@ -1788,14 +1779,16 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
1788 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); 1779 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
1789 cs_rpl = cs.selector & SELECTOR_RPL_MASK; 1780 cs_rpl = cs.selector & SELECTOR_RPL_MASK;
1790 1781
1782 if (cs.unusable)
1783 return false;
1791 if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK)) 1784 if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK))
1792 return false; 1785 return false;
1793 if (!cs.s) 1786 if (!cs.s)
1794 return false; 1787 return false;
1795 if (!(~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK))) { 1788 if (cs.type & AR_TYPE_WRITEABLE_MASK) {
1796 if (cs.dpl > cs_rpl) 1789 if (cs.dpl > cs_rpl)
1797 return false; 1790 return false;
1798 } else if (cs.type & AR_TYPE_CODE_MASK) { 1791 } else {
1799 if (cs.dpl != cs_rpl) 1792 if (cs.dpl != cs_rpl)
1800 return false; 1793 return false;
1801 } 1794 }
@@ -1814,7 +1807,9 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu)
1814 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); 1807 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
1815 ss_rpl = ss.selector & SELECTOR_RPL_MASK; 1808 ss_rpl = ss.selector & SELECTOR_RPL_MASK;
1816 1809
1817 if ((ss.type != 3) || (ss.type != 7)) 1810 if (ss.unusable)
1811 return true;
1812 if (ss.type != 3 && ss.type != 7)
1818 return false; 1813 return false;
1819 if (!ss.s) 1814 if (!ss.s)
1820 return false; 1815 return false;
@@ -1834,6 +1829,8 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
1834 vmx_get_segment(vcpu, &var, seg); 1829 vmx_get_segment(vcpu, &var, seg);
1835 rpl = var.selector & SELECTOR_RPL_MASK; 1830 rpl = var.selector & SELECTOR_RPL_MASK;
1836 1831
1832 if (var.unusable)
1833 return true;
1837 if (!var.s) 1834 if (!var.s)
1838 return false; 1835 return false;
1839 if (!var.present) 1836 if (!var.present)
@@ -1855,9 +1852,11 @@ static bool tr_valid(struct kvm_vcpu *vcpu)
1855 1852
1856 vmx_get_segment(vcpu, &tr, VCPU_SREG_TR); 1853 vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
1857 1854
1855 if (tr.unusable)
1856 return false;
1858 if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */ 1857 if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */
1859 return false; 1858 return false;
1860 if ((tr.type != 3) || (tr.type != 11)) /* TODO: Check if guest is in IA32e mode */ 1859 if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
1861 return false; 1860 return false;
1862 if (!tr.present) 1861 if (!tr.present)
1863 return false; 1862 return false;
@@ -1871,6 +1870,8 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu)
1871 1870
1872 vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR); 1871 vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
1873 1872
1873 if (ldtr.unusable)
1874 return true;
1874 if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */ 1875 if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */
1875 return false; 1876 return false;
1876 if (ldtr.type != 2) 1877 if (ldtr.type != 2)
@@ -2112,7 +2113,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2112{ 2113{
2113 u32 host_sysenter_cs, msr_low, msr_high; 2114 u32 host_sysenter_cs, msr_low, msr_high;
2114 u32 junk; 2115 u32 junk;
2115 u64 host_pat; 2116 u64 host_pat, tsc_this, tsc_base;
2116 unsigned long a; 2117 unsigned long a;
2117 struct descriptor_table dt; 2118 struct descriptor_table dt;
2118 int i; 2119 int i;
@@ -2240,6 +2241,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2240 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 2241 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
2241 vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); 2242 vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
2242 2243
2244 tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
2245 rdtscll(tsc_this);
2246 if (tsc_this < vmx->vcpu.kvm->arch.vm_init_tsc)
2247 tsc_base = tsc_this;
2248
2249 guest_write_tsc(0, tsc_base);
2243 2250
2244 return 0; 2251 return 0;
2245} 2252}
@@ -2319,7 +2326,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2319 kvm_rip_write(vcpu, 0); 2326 kvm_rip_write(vcpu, 0);
2320 kvm_register_write(vcpu, VCPU_REGS_RSP, 0); 2327 kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
2321 2328
2322 /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */
2323 vmcs_writel(GUEST_DR7, 0x400); 2329 vmcs_writel(GUEST_DR7, 0x400);
2324 2330
2325 vmcs_writel(GUEST_GDTR_BASE, 0); 2331 vmcs_writel(GUEST_GDTR_BASE, 0);
@@ -2332,8 +2338,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2332 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); 2338 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
2333 vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); 2339 vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
2334 2340
2335 guest_write_tsc(0);
2336
2337 /* Special registers */ 2341 /* Special registers */
2338 vmcs_write64(GUEST_IA32_DEBUGCTL, 0); 2342 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
2339 2343
@@ -2486,6 +2490,11 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
2486{ 2490{
2487 vmx_update_window_states(vcpu); 2491 vmx_update_window_states(vcpu);
2488 2492
2493 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
2494 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
2495 GUEST_INTR_STATE_STI |
2496 GUEST_INTR_STATE_MOV_SS);
2497
2489 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 2498 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
2490 if (vcpu->arch.interrupt.pending) { 2499 if (vcpu->arch.interrupt.pending) {
2491 enable_nmi_window(vcpu); 2500 enable_nmi_window(vcpu);
@@ -2536,24 +2545,6 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
2536 return 0; 2545 return 0;
2537} 2546}
2538 2547
2539static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
2540{
2541 struct kvm_guest_debug *dbg = &vcpu->guest_debug;
2542
2543 set_debugreg(dbg->bp[0], 0);
2544 set_debugreg(dbg->bp[1], 1);
2545 set_debugreg(dbg->bp[2], 2);
2546 set_debugreg(dbg->bp[3], 3);
2547
2548 if (dbg->singlestep) {
2549 unsigned long flags;
2550
2551 flags = vmcs_readl(GUEST_RFLAGS);
2552 flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
2553 vmcs_writel(GUEST_RFLAGS, flags);
2554 }
2555}
2556
2557static int handle_rmode_exception(struct kvm_vcpu *vcpu, 2548static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2558 int vec, u32 err_code) 2549 int vec, u32 err_code)
2559{ 2550{
@@ -2570,9 +2561,17 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2570 * the required debugging infrastructure rework. 2561 * the required debugging infrastructure rework.
2571 */ 2562 */
2572 switch (vec) { 2563 switch (vec) {
2573 case DE_VECTOR:
2574 case DB_VECTOR: 2564 case DB_VECTOR:
2565 if (vcpu->guest_debug &
2566 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
2567 return 0;
2568 kvm_queue_exception(vcpu, vec);
2569 return 1;
2575 case BP_VECTOR: 2570 case BP_VECTOR:
2571 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
2572 return 0;
2573 /* fall through */
2574 case DE_VECTOR:
2576 case OF_VECTOR: 2575 case OF_VECTOR:
2577 case BR_VECTOR: 2576 case BR_VECTOR:
2578 case UD_VECTOR: 2577 case UD_VECTOR:
@@ -2589,8 +2588,8 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2589static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2588static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2590{ 2589{
2591 struct vcpu_vmx *vmx = to_vmx(vcpu); 2590 struct vcpu_vmx *vmx = to_vmx(vcpu);
2592 u32 intr_info, error_code; 2591 u32 intr_info, ex_no, error_code;
2593 unsigned long cr2, rip; 2592 unsigned long cr2, rip, dr6;
2594 u32 vect_info; 2593 u32 vect_info;
2595 enum emulation_result er; 2594 enum emulation_result er;
2596 2595
@@ -2649,14 +2648,30 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2649 return 1; 2648 return 1;
2650 } 2649 }
2651 2650
2652 if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == 2651 ex_no = intr_info & INTR_INFO_VECTOR_MASK;
2653 (INTR_TYPE_EXCEPTION | 1)) { 2652 switch (ex_no) {
2653 case DB_VECTOR:
2654 dr6 = vmcs_readl(EXIT_QUALIFICATION);
2655 if (!(vcpu->guest_debug &
2656 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
2657 vcpu->arch.dr6 = dr6 | DR6_FIXED_1;
2658 kvm_queue_exception(vcpu, DB_VECTOR);
2659 return 1;
2660 }
2661 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
2662 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
2663 /* fall through */
2664 case BP_VECTOR:
2654 kvm_run->exit_reason = KVM_EXIT_DEBUG; 2665 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2655 return 0; 2666 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
2667 kvm_run->debug.arch.exception = ex_no;
2668 break;
2669 default:
2670 kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
2671 kvm_run->ex.exception = ex_no;
2672 kvm_run->ex.error_code = error_code;
2673 break;
2656 } 2674 }
2657 kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
2658 kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK;
2659 kvm_run->ex.error_code = error_code;
2660 return 0; 2675 return 0;
2661} 2676}
2662 2677
@@ -2677,7 +2692,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2677static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2692static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2678{ 2693{
2679 unsigned long exit_qualification; 2694 unsigned long exit_qualification;
2680 int size, down, in, string, rep; 2695 int size, in, string;
2681 unsigned port; 2696 unsigned port;
2682 2697
2683 ++vcpu->stat.io_exits; 2698 ++vcpu->stat.io_exits;
@@ -2693,8 +2708,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2693 2708
2694 size = (exit_qualification & 7) + 1; 2709 size = (exit_qualification & 7) + 1;
2695 in = (exit_qualification & 8) != 0; 2710 in = (exit_qualification & 8) != 0;
2696 down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0;
2697 rep = (exit_qualification & 32) != 0;
2698 port = exit_qualification >> 16; 2711 port = exit_qualification >> 16;
2699 2712
2700 skip_emulated_instruction(vcpu); 2713 skip_emulated_instruction(vcpu);
@@ -2795,21 +2808,44 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2795 unsigned long val; 2808 unsigned long val;
2796 int dr, reg; 2809 int dr, reg;
2797 2810
2798 /* 2811 dr = vmcs_readl(GUEST_DR7);
2799 * FIXME: this code assumes the host is debugging the guest. 2812 if (dr & DR7_GD) {
2800 * need to deal with guest debugging itself too. 2813 /*
2801 */ 2814 * As the vm-exit takes precedence over the debug trap, we
2815 * need to emulate the latter, either for the host or the
2816 * guest debugging itself.
2817 */
2818 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
2819 kvm_run->debug.arch.dr6 = vcpu->arch.dr6;
2820 kvm_run->debug.arch.dr7 = dr;
2821 kvm_run->debug.arch.pc =
2822 vmcs_readl(GUEST_CS_BASE) +
2823 vmcs_readl(GUEST_RIP);
2824 kvm_run->debug.arch.exception = DB_VECTOR;
2825 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2826 return 0;
2827 } else {
2828 vcpu->arch.dr7 &= ~DR7_GD;
2829 vcpu->arch.dr6 |= DR6_BD;
2830 vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
2831 kvm_queue_exception(vcpu, DB_VECTOR);
2832 return 1;
2833 }
2834 }
2835
2802 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 2836 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
2803 dr = exit_qualification & 7; 2837 dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
2804 reg = (exit_qualification >> 8) & 15; 2838 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
2805 if (exit_qualification & 16) { 2839 if (exit_qualification & TYPE_MOV_FROM_DR) {
2806 /* mov from dr */
2807 switch (dr) { 2840 switch (dr) {
2841 case 0 ... 3:
2842 val = vcpu->arch.db[dr];
2843 break;
2808 case 6: 2844 case 6:
2809 val = 0xffff0ff0; 2845 val = vcpu->arch.dr6;
2810 break; 2846 break;
2811 case 7: 2847 case 7:
2812 val = 0x400; 2848 val = vcpu->arch.dr7;
2813 break; 2849 break;
2814 default: 2850 default:
2815 val = 0; 2851 val = 0;
@@ -2817,7 +2853,38 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2817 kvm_register_write(vcpu, reg, val); 2853 kvm_register_write(vcpu, reg, val);
2818 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); 2854 KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
2819 } else { 2855 } else {
2820 /* mov to dr */ 2856 val = vcpu->arch.regs[reg];
2857 switch (dr) {
2858 case 0 ... 3:
2859 vcpu->arch.db[dr] = val;
2860 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
2861 vcpu->arch.eff_db[dr] = val;
2862 break;
2863 case 4 ... 5:
2864 if (vcpu->arch.cr4 & X86_CR4_DE)
2865 kvm_queue_exception(vcpu, UD_VECTOR);
2866 break;
2867 case 6:
2868 if (val & 0xffffffff00000000ULL) {
2869 kvm_queue_exception(vcpu, GP_VECTOR);
2870 break;
2871 }
2872 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
2873 break;
2874 case 7:
2875 if (val & 0xffffffff00000000ULL) {
2876 kvm_queue_exception(vcpu, GP_VECTOR);
2877 break;
2878 }
2879 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
2880 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
2881 vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
2882 vcpu->arch.switch_db_regs =
2883 (val & DR7_BP_EN_MASK);
2884 }
2885 break;
2886 }
2887 KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler);
2821 } 2888 }
2822 skip_emulated_instruction(vcpu); 2889 skip_emulated_instruction(vcpu);
2823 return 1; 2890 return 1;
@@ -2968,17 +3035,25 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2968 } 3035 }
2969 tss_selector = exit_qualification; 3036 tss_selector = exit_qualification;
2970 3037
2971 return kvm_task_switch(vcpu, tss_selector, reason); 3038 if (!kvm_task_switch(vcpu, tss_selector, reason))
3039 return 0;
3040
3041 /* clear all local breakpoint enable flags */
3042 vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55);
3043
3044 /*
3045 * TODO: What about debug traps on tss switch?
3046 * Are we supposed to inject them and update dr6?
3047 */
3048
3049 return 1;
2972} 3050}
2973 3051
2974static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3052static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2975{ 3053{
2976 u64 exit_qualification; 3054 u64 exit_qualification;
2977 enum emulation_result er;
2978 gpa_t gpa; 3055 gpa_t gpa;
2979 unsigned long hva;
2980 int gla_validity; 3056 int gla_validity;
2981 int r;
2982 3057
2983 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); 3058 exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
2984 3059
@@ -3001,32 +3076,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3001 } 3076 }
3002 3077
3003 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); 3078 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
3004 hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT); 3079 return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
3005 if (!kvm_is_error_hva(hva)) {
3006 r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
3007 if (r < 0) {
3008 printk(KERN_ERR "EPT: Not enough memory!\n");
3009 return -ENOMEM;
3010 }
3011 return 1;
3012 } else {
3013 /* must be MMIO */
3014 er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
3015
3016 if (er == EMULATE_FAIL) {
3017 printk(KERN_ERR
3018 "EPT: Fail to handle EPT violation vmexit!er is %d\n",
3019 er);
3020 printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
3021 (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
3022 (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
3023 printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
3024 (long unsigned int)exit_qualification);
3025 return -ENOTSUPP;
3026 } else if (er == EMULATE_DO_MMIO)
3027 return 0;
3028 }
3029 return 1;
3030} 3080}
3031 3081
3032static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3082static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -3046,7 +3096,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3046 struct kvm_run *kvm_run) 3096 struct kvm_run *kvm_run)
3047{ 3097{
3048 struct vcpu_vmx *vmx = to_vmx(vcpu); 3098 struct vcpu_vmx *vmx = to_vmx(vcpu);
3049 int err; 3099 enum emulation_result err = EMULATE_DONE;
3050 3100
3051 preempt_enable(); 3101 preempt_enable();
3052 local_irq_enable(); 3102 local_irq_enable();
@@ -3071,10 +3121,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3071 local_irq_disable(); 3121 local_irq_disable();
3072 preempt_disable(); 3122 preempt_disable();
3073 3123
3074 /* Guest state should be valid now except if we need to 3124 vmx->invalid_state_emulation_result = err;
3075 * emulate an MMIO */
3076 if (guest_state_valid(vcpu))
3077 vmx->emulation_required = 0;
3078} 3125}
3079 3126
3080/* 3127/*
@@ -3123,8 +3170,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3123 3170
3124 /* If we need to emulate an MMIO from handle_invalid_guest_state 3171 /* If we need to emulate an MMIO from handle_invalid_guest_state
3125 * we just return 0 */ 3172 * we just return 0 */
3126 if (vmx->emulation_required && emulate_invalid_guest_state) 3173 if (vmx->emulation_required && emulate_invalid_guest_state) {
3127 return 0; 3174 if (guest_state_valid(vcpu))
3175 vmx->emulation_required = 0;
3176 return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO;
3177 }
3128 3178
3129 /* Access CR3 don't cause VMExit in paging mode, so we need 3179 /* Access CR3 don't cause VMExit in paging mode, so we need
3130 * to sync with guest real CR3. */ 3180 * to sync with guest real CR3. */
@@ -3238,7 +3288,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3238 vmx->vcpu.arch.nmi_injected = false; 3288 vmx->vcpu.arch.nmi_injected = false;
3239 } 3289 }
3240 kvm_clear_exception_queue(&vmx->vcpu); 3290 kvm_clear_exception_queue(&vmx->vcpu);
3241 if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) { 3291 if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION ||
3292 type == INTR_TYPE_SOFT_EXCEPTION)) {
3242 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { 3293 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
3243 error = vmcs_read32(IDT_VECTORING_ERROR_CODE); 3294 error = vmcs_read32(IDT_VECTORING_ERROR_CODE);
3244 kvm_queue_exception_e(&vmx->vcpu, vector, error); 3295 kvm_queue_exception_e(&vmx->vcpu, vector, error);
@@ -3259,6 +3310,11 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3259 3310
3260 vmx_update_window_states(vcpu); 3311 vmx_update_window_states(vcpu);
3261 3312
3313 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
3314 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
3315 GUEST_INTR_STATE_STI |
3316 GUEST_INTR_STATE_MOV_SS);
3317
3262 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { 3318 if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
3263 if (vcpu->arch.interrupt.pending) { 3319 if (vcpu->arch.interrupt.pending) {
3264 enable_nmi_window(vcpu); 3320 enable_nmi_window(vcpu);
@@ -3347,6 +3403,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3347 */ 3403 */
3348 vmcs_writel(HOST_CR0, read_cr0()); 3404 vmcs_writel(HOST_CR0, read_cr0());
3349 3405
3406 set_debugreg(vcpu->arch.dr6, 6);
3407
3350 asm( 3408 asm(
3351 /* Store host registers */ 3409 /* Store host registers */
3352 "push %%"R"dx; push %%"R"bp;" 3410 "push %%"R"dx; push %%"R"bp;"
@@ -3441,6 +3499,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3441 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); 3499 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
3442 vcpu->arch.regs_dirty = 0; 3500 vcpu->arch.regs_dirty = 0;
3443 3501
3502 get_debugreg(vcpu->arch.dr6, 6);
3503
3444 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 3504 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
3445 if (vmx->rmode.irq.pending) 3505 if (vmx->rmode.irq.pending)
3446 fixup_rmode_irq(vmx); 3506 fixup_rmode_irq(vmx);
@@ -3595,7 +3655,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
3595 .vcpu_put = vmx_vcpu_put, 3655 .vcpu_put = vmx_vcpu_put,
3596 3656
3597 .set_guest_debug = set_guest_debug, 3657 .set_guest_debug = set_guest_debug,
3598 .guest_debug_pre = kvm_guest_debug_pre,
3599 .get_msr = vmx_get_msr, 3658 .get_msr = vmx_get_msr,
3600 .set_msr = vmx_set_msr, 3659 .set_msr = vmx_set_msr,
3601 .get_segment_base = vmx_get_segment_base, 3660 .get_segment_base = vmx_get_segment_base,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 758b7a155ae9..8ca100a9ecac 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -36,6 +36,7 @@
36#include <linux/highmem.h> 36#include <linux/highmem.h>
37#include <linux/iommu.h> 37#include <linux/iommu.h>
38#include <linux/intel-iommu.h> 38#include <linux/intel-iommu.h>
39#include <linux/cpufreq.h>
39 40
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include <asm/msr.h> 42#include <asm/msr.h>
@@ -69,6 +70,8 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
69 70
70static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 71static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
71 struct kvm_cpuid_entry2 __user *entries); 72 struct kvm_cpuid_entry2 __user *entries);
73struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
74 u32 function, u32 index);
72 75
73struct kvm_x86_ops *kvm_x86_ops; 76struct kvm_x86_ops *kvm_x86_ops;
74EXPORT_SYMBOL_GPL(kvm_x86_ops); 77EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -173,6 +176,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
173 u32 error_code) 176 u32 error_code)
174{ 177{
175 ++vcpu->stat.pf_guest; 178 ++vcpu->stat.pf_guest;
179
176 if (vcpu->arch.exception.pending) { 180 if (vcpu->arch.exception.pending) {
177 if (vcpu->arch.exception.nr == PF_VECTOR) { 181 if (vcpu->arch.exception.nr == PF_VECTOR) {
178 printk(KERN_DEBUG "kvm: inject_page_fault:" 182 printk(KERN_DEBUG "kvm: inject_page_fault:"
@@ -361,6 +365,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
361 } 365 }
362 kvm_x86_ops->set_cr4(vcpu, cr4); 366 kvm_x86_ops->set_cr4(vcpu, cr4);
363 vcpu->arch.cr4 = cr4; 367 vcpu->arch.cr4 = cr4;
368 vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled;
364 kvm_mmu_sync_global(vcpu); 369 kvm_mmu_sync_global(vcpu);
365 kvm_mmu_reset_context(vcpu); 370 kvm_mmu_reset_context(vcpu);
366} 371}
@@ -442,6 +447,11 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
442} 447}
443EXPORT_SYMBOL_GPL(kvm_get_cr8); 448EXPORT_SYMBOL_GPL(kvm_get_cr8);
444 449
450static inline u32 bit(int bitno)
451{
452 return 1 << (bitno & 31);
453}
454
445/* 455/*
446 * List of msr numbers which we expose to userspace through KVM_GET_MSRS 456 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
447 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. 457 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -456,7 +466,7 @@ static u32 msrs_to_save[] = {
456 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 466 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
457#endif 467#endif
458 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 468 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
459 MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT 469 MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
460}; 470};
461 471
462static unsigned num_msrs_to_save; 472static unsigned num_msrs_to_save;
@@ -481,6 +491,28 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
481 return; 491 return;
482 } 492 }
483 493
494 if (efer & EFER_FFXSR) {
495 struct kvm_cpuid_entry2 *feat;
496
497 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
498 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
499 printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
500 kvm_inject_gp(vcpu, 0);
501 return;
502 }
503 }
504
505 if (efer & EFER_SVME) {
506 struct kvm_cpuid_entry2 *feat;
507
508 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
509 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
510 printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
511 kvm_inject_gp(vcpu, 0);
512 return;
513 }
514 }
515
484 kvm_x86_ops->set_efer(vcpu, efer); 516 kvm_x86_ops->set_efer(vcpu, efer);
485 517
486 efer &= ~EFER_LMA; 518 efer &= ~EFER_LMA;
@@ -586,6 +618,8 @@ static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *
586 hv_clock->tsc_to_system_mul); 618 hv_clock->tsc_to_system_mul);
587} 619}
588 620
621static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
622
589static void kvm_write_guest_time(struct kvm_vcpu *v) 623static void kvm_write_guest_time(struct kvm_vcpu *v)
590{ 624{
591 struct timespec ts; 625 struct timespec ts;
@@ -596,9 +630,9 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
596 if ((!vcpu->time_page)) 630 if ((!vcpu->time_page))
597 return; 631 return;
598 632
599 if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) { 633 if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) {
600 kvm_set_time_scale(tsc_khz, &vcpu->hv_clock); 634 kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock);
601 vcpu->hv_clock_tsc_khz = tsc_khz; 635 vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz);
602 } 636 }
603 637
604 /* Keep irq disabled to prevent changes to the clock */ 638 /* Keep irq disabled to prevent changes to the clock */
@@ -629,6 +663,16 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
629 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); 663 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
630} 664}
631 665
666static int kvm_request_guest_time_update(struct kvm_vcpu *v)
667{
668 struct kvm_vcpu_arch *vcpu = &v->arch;
669
670 if (!vcpu->time_page)
671 return 0;
672 set_bit(KVM_REQ_KVMCLOCK_UPDATE, &v->requests);
673 return 1;
674}
675
632static bool msr_mtrr_valid(unsigned msr) 676static bool msr_mtrr_valid(unsigned msr)
633{ 677{
634 switch (msr) { 678 switch (msr) {
@@ -722,6 +766,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
722 break; 766 break;
723 case MSR_IA32_UCODE_REV: 767 case MSR_IA32_UCODE_REV:
724 case MSR_IA32_UCODE_WRITE: 768 case MSR_IA32_UCODE_WRITE:
769 case MSR_VM_HSAVE_PA:
725 break; 770 break;
726 case 0x200 ... 0x2ff: 771 case 0x200 ... 0x2ff:
727 return set_msr_mtrr(vcpu, msr, data); 772 return set_msr_mtrr(vcpu, msr, data);
@@ -758,7 +803,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
758 vcpu->arch.time_page = NULL; 803 vcpu->arch.time_page = NULL;
759 } 804 }
760 805
761 kvm_write_guest_time(vcpu); 806 kvm_request_guest_time_update(vcpu);
762 break; 807 break;
763 } 808 }
764 default: 809 default:
@@ -843,6 +888,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
843 case MSR_IA32_LASTBRANCHTOIP: 888 case MSR_IA32_LASTBRANCHTOIP:
844 case MSR_IA32_LASTINTFROMIP: 889 case MSR_IA32_LASTINTFROMIP:
845 case MSR_IA32_LASTINTTOIP: 890 case MSR_IA32_LASTINTTOIP:
891 case MSR_VM_HSAVE_PA:
846 data = 0; 892 data = 0;
847 break; 893 break;
848 case MSR_MTRRcap: 894 case MSR_MTRRcap:
@@ -967,10 +1013,13 @@ int kvm_dev_ioctl_check_extension(long ext)
967 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 1013 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
968 case KVM_CAP_SET_TSS_ADDR: 1014 case KVM_CAP_SET_TSS_ADDR:
969 case KVM_CAP_EXT_CPUID: 1015 case KVM_CAP_EXT_CPUID:
1016 case KVM_CAP_CLOCKSOURCE:
970 case KVM_CAP_PIT: 1017 case KVM_CAP_PIT:
971 case KVM_CAP_NOP_IO_DELAY: 1018 case KVM_CAP_NOP_IO_DELAY:
972 case KVM_CAP_MP_STATE: 1019 case KVM_CAP_MP_STATE:
973 case KVM_CAP_SYNC_MMU: 1020 case KVM_CAP_SYNC_MMU:
1021 case KVM_CAP_REINJECT_CONTROL:
1022 case KVM_CAP_IRQ_INJECT_STATUS:
974 r = 1; 1023 r = 1;
975 break; 1024 break;
976 case KVM_CAP_COALESCED_MMIO: 1025 case KVM_CAP_COALESCED_MMIO:
@@ -991,9 +1040,6 @@ int kvm_dev_ioctl_check_extension(long ext)
991 case KVM_CAP_IOMMU: 1040 case KVM_CAP_IOMMU:
992 r = iommu_found(); 1041 r = iommu_found();
993 break; 1042 break;
994 case KVM_CAP_CLOCKSOURCE:
995 r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
996 break;
997 default: 1043 default:
998 r = 0; 1044 r = 0;
999 break; 1045 break;
@@ -1044,7 +1090,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
1044 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 1090 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
1045 goto out; 1091 goto out;
1046 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, 1092 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
1047 cpuid_arg->entries); 1093 cpuid_arg->entries);
1048 if (r) 1094 if (r)
1049 goto out; 1095 goto out;
1050 1096
@@ -1064,7 +1110,7 @@ out:
1064void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1110void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1065{ 1111{
1066 kvm_x86_ops->vcpu_load(vcpu, cpu); 1112 kvm_x86_ops->vcpu_load(vcpu, cpu);
1067 kvm_write_guest_time(vcpu); 1113 kvm_request_guest_time_update(vcpu);
1068} 1114}
1069 1115
1070void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1116void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1142,8 +1188,8 @@ out:
1142} 1188}
1143 1189
1144static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, 1190static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
1145 struct kvm_cpuid2 *cpuid, 1191 struct kvm_cpuid2 *cpuid,
1146 struct kvm_cpuid_entry2 __user *entries) 1192 struct kvm_cpuid_entry2 __user *entries)
1147{ 1193{
1148 int r; 1194 int r;
1149 1195
@@ -1162,8 +1208,8 @@ out:
1162} 1208}
1163 1209
1164static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, 1210static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
1165 struct kvm_cpuid2 *cpuid, 1211 struct kvm_cpuid2 *cpuid,
1166 struct kvm_cpuid_entry2 __user *entries) 1212 struct kvm_cpuid_entry2 __user *entries)
1167{ 1213{
1168 int r; 1214 int r;
1169 1215
@@ -1172,7 +1218,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
1172 goto out; 1218 goto out;
1173 r = -EFAULT; 1219 r = -EFAULT;
1174 if (copy_to_user(entries, &vcpu->arch.cpuid_entries, 1220 if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
1175 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) 1221 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
1176 goto out; 1222 goto out;
1177 return 0; 1223 return 0;
1178 1224
@@ -1181,18 +1227,13 @@ out:
1181 return r; 1227 return r;
1182} 1228}
1183 1229
1184static inline u32 bit(int bitno)
1185{
1186 return 1 << (bitno & 31);
1187}
1188
1189static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, 1230static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1190 u32 index) 1231 u32 index)
1191{ 1232{
1192 entry->function = function; 1233 entry->function = function;
1193 entry->index = index; 1234 entry->index = index;
1194 cpuid_count(entry->function, entry->index, 1235 cpuid_count(entry->function, entry->index,
1195 &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); 1236 &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
1196 entry->flags = 0; 1237 entry->flags = 0;
1197} 1238}
1198 1239
@@ -1222,15 +1263,17 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1222#ifdef CONFIG_X86_64 1263#ifdef CONFIG_X86_64
1223 bit(X86_FEATURE_LM) | 1264 bit(X86_FEATURE_LM) |
1224#endif 1265#endif
1266 bit(X86_FEATURE_FXSR_OPT) |
1225 bit(X86_FEATURE_MMXEXT) | 1267 bit(X86_FEATURE_MMXEXT) |
1226 bit(X86_FEATURE_3DNOWEXT) | 1268 bit(X86_FEATURE_3DNOWEXT) |
1227 bit(X86_FEATURE_3DNOW); 1269 bit(X86_FEATURE_3DNOW);
1228 const u32 kvm_supported_word3_x86_features = 1270 const u32 kvm_supported_word3_x86_features =
1229 bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16); 1271 bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
1230 const u32 kvm_supported_word6_x86_features = 1272 const u32 kvm_supported_word6_x86_features =
1231 bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY); 1273 bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) |
1274 bit(X86_FEATURE_SVM);
1232 1275
1233 /* all func 2 cpuid_count() should be called on the same cpu */ 1276 /* all calls to cpuid_count() should be made on the same cpu */
1234 get_cpu(); 1277 get_cpu();
1235 do_cpuid_1_ent(entry, function, index); 1278 do_cpuid_1_ent(entry, function, index);
1236 ++*nent; 1279 ++*nent;
@@ -1304,7 +1347,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1304} 1347}
1305 1348
1306static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, 1349static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1307 struct kvm_cpuid_entry2 __user *entries) 1350 struct kvm_cpuid_entry2 __user *entries)
1308{ 1351{
1309 struct kvm_cpuid_entry2 *cpuid_entries; 1352 struct kvm_cpuid_entry2 *cpuid_entries;
1310 int limit, nent = 0, r = -E2BIG; 1353 int limit, nent = 0, r = -E2BIG;
@@ -1321,7 +1364,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1321 limit = cpuid_entries[0].eax; 1364 limit = cpuid_entries[0].eax;
1322 for (func = 1; func <= limit && nent < cpuid->nent; ++func) 1365 for (func = 1; func <= limit && nent < cpuid->nent; ++func)
1323 do_cpuid_ent(&cpuid_entries[nent], func, 0, 1366 do_cpuid_ent(&cpuid_entries[nent], func, 0,
1324 &nent, cpuid->nent); 1367 &nent, cpuid->nent);
1325 r = -E2BIG; 1368 r = -E2BIG;
1326 if (nent >= cpuid->nent) 1369 if (nent >= cpuid->nent)
1327 goto out_free; 1370 goto out_free;
@@ -1330,10 +1373,10 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
1330 limit = cpuid_entries[nent - 1].eax; 1373 limit = cpuid_entries[nent - 1].eax;
1331 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) 1374 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
1332 do_cpuid_ent(&cpuid_entries[nent], func, 0, 1375 do_cpuid_ent(&cpuid_entries[nent], func, 0,
1333 &nent, cpuid->nent); 1376 &nent, cpuid->nent);
1334 r = -EFAULT; 1377 r = -EFAULT;
1335 if (copy_to_user(entries, cpuid_entries, 1378 if (copy_to_user(entries, cpuid_entries,
1336 nent * sizeof(struct kvm_cpuid_entry2))) 1379 nent * sizeof(struct kvm_cpuid_entry2)))
1337 goto out_free; 1380 goto out_free;
1338 cpuid->nent = nent; 1381 cpuid->nent = nent;
1339 r = 0; 1382 r = 0;
@@ -1477,7 +1520,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1477 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 1520 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
1478 goto out; 1521 goto out;
1479 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid, 1522 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
1480 cpuid_arg->entries); 1523 cpuid_arg->entries);
1481 if (r) 1524 if (r)
1482 goto out; 1525 goto out;
1483 break; 1526 break;
@@ -1490,7 +1533,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
1490 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) 1533 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
1491 goto out; 1534 goto out;
1492 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid, 1535 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
1493 cpuid_arg->entries); 1536 cpuid_arg->entries);
1494 if (r) 1537 if (r)
1495 goto out; 1538 goto out;
1496 r = -EFAULT; 1539 r = -EFAULT;
@@ -1710,6 +1753,15 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
1710 return r; 1753 return r;
1711} 1754}
1712 1755
1756static int kvm_vm_ioctl_reinject(struct kvm *kvm,
1757 struct kvm_reinject_control *control)
1758{
1759 if (!kvm->arch.vpit)
1760 return -ENXIO;
1761 kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
1762 return 0;
1763}
1764
1713/* 1765/*
1714 * Get (and clear) the dirty memory log for a memory slot. 1766 * Get (and clear) the dirty memory log for a memory slot.
1715 */ 1767 */
@@ -1807,13 +1859,26 @@ long kvm_arch_vm_ioctl(struct file *filp,
1807 } 1859 }
1808 } else 1860 } else
1809 goto out; 1861 goto out;
1862 r = kvm_setup_default_irq_routing(kvm);
1863 if (r) {
1864 kfree(kvm->arch.vpic);
1865 kfree(kvm->arch.vioapic);
1866 goto out;
1867 }
1810 break; 1868 break;
1811 case KVM_CREATE_PIT: 1869 case KVM_CREATE_PIT:
1870 mutex_lock(&kvm->lock);
1871 r = -EEXIST;
1872 if (kvm->arch.vpit)
1873 goto create_pit_unlock;
1812 r = -ENOMEM; 1874 r = -ENOMEM;
1813 kvm->arch.vpit = kvm_create_pit(kvm); 1875 kvm->arch.vpit = kvm_create_pit(kvm);
1814 if (kvm->arch.vpit) 1876 if (kvm->arch.vpit)
1815 r = 0; 1877 r = 0;
1878 create_pit_unlock:
1879 mutex_unlock(&kvm->lock);
1816 break; 1880 break;
1881 case KVM_IRQ_LINE_STATUS:
1817 case KVM_IRQ_LINE: { 1882 case KVM_IRQ_LINE: {
1818 struct kvm_irq_level irq_event; 1883 struct kvm_irq_level irq_event;
1819 1884
@@ -1821,10 +1886,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
1821 if (copy_from_user(&irq_event, argp, sizeof irq_event)) 1886 if (copy_from_user(&irq_event, argp, sizeof irq_event))
1822 goto out; 1887 goto out;
1823 if (irqchip_in_kernel(kvm)) { 1888 if (irqchip_in_kernel(kvm)) {
1889 __s32 status;
1824 mutex_lock(&kvm->lock); 1890 mutex_lock(&kvm->lock);
1825 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1891 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
1826 irq_event.irq, irq_event.level); 1892 irq_event.irq, irq_event.level);
1827 mutex_unlock(&kvm->lock); 1893 mutex_unlock(&kvm->lock);
1894 if (ioctl == KVM_IRQ_LINE_STATUS) {
1895 irq_event.status = status;
1896 if (copy_to_user(argp, &irq_event,
1897 sizeof irq_event))
1898 goto out;
1899 }
1828 r = 0; 1900 r = 0;
1829 } 1901 }
1830 break; 1902 break;
@@ -1907,6 +1979,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
1907 r = 0; 1979 r = 0;
1908 break; 1980 break;
1909 } 1981 }
1982 case KVM_REINJECT_CONTROL: {
1983 struct kvm_reinject_control control;
1984 r = -EFAULT;
1985 if (copy_from_user(&control, argp, sizeof(control)))
1986 goto out;
1987 r = kvm_vm_ioctl_reinject(kvm, &control);
1988 if (r)
1989 goto out;
1990 r = 0;
1991 break;
1992 }
1910 default: 1993 default:
1911 ; 1994 ;
1912 } 1995 }
@@ -1960,10 +2043,38 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
1960 return dev; 2043 return dev;
1961} 2044}
1962 2045
1963int emulator_read_std(unsigned long addr, 2046static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
1964 void *val, 2047 struct kvm_vcpu *vcpu)
1965 unsigned int bytes, 2048{
1966 struct kvm_vcpu *vcpu) 2049 void *data = val;
2050 int r = X86EMUL_CONTINUE;
2051
2052 while (bytes) {
2053 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
2054 unsigned offset = addr & (PAGE_SIZE-1);
2055 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
2056 int ret;
2057
2058 if (gpa == UNMAPPED_GVA) {
2059 r = X86EMUL_PROPAGATE_FAULT;
2060 goto out;
2061 }
2062 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
2063 if (ret < 0) {
2064 r = X86EMUL_UNHANDLEABLE;
2065 goto out;
2066 }
2067
2068 bytes -= toread;
2069 data += toread;
2070 addr += toread;
2071 }
2072out:
2073 return r;
2074}
2075
2076static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
2077 struct kvm_vcpu *vcpu)
1967{ 2078{
1968 void *data = val; 2079 void *data = val;
1969 int r = X86EMUL_CONTINUE; 2080 int r = X86EMUL_CONTINUE;
@@ -1971,27 +2082,27 @@ int emulator_read_std(unsigned long addr,
1971 while (bytes) { 2082 while (bytes) {
1972 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 2083 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
1973 unsigned offset = addr & (PAGE_SIZE-1); 2084 unsigned offset = addr & (PAGE_SIZE-1);
1974 unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset); 2085 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
1975 int ret; 2086 int ret;
1976 2087
1977 if (gpa == UNMAPPED_GVA) { 2088 if (gpa == UNMAPPED_GVA) {
1978 r = X86EMUL_PROPAGATE_FAULT; 2089 r = X86EMUL_PROPAGATE_FAULT;
1979 goto out; 2090 goto out;
1980 } 2091 }
1981 ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy); 2092 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
1982 if (ret < 0) { 2093 if (ret < 0) {
1983 r = X86EMUL_UNHANDLEABLE; 2094 r = X86EMUL_UNHANDLEABLE;
1984 goto out; 2095 goto out;
1985 } 2096 }
1986 2097
1987 bytes -= tocopy; 2098 bytes -= towrite;
1988 data += tocopy; 2099 data += towrite;
1989 addr += tocopy; 2100 addr += towrite;
1990 } 2101 }
1991out: 2102out:
1992 return r; 2103 return r;
1993} 2104}
1994EXPORT_SYMBOL_GPL(emulator_read_std); 2105
1995 2106
1996static int emulator_read_emulated(unsigned long addr, 2107static int emulator_read_emulated(unsigned long addr,
1997 void *val, 2108 void *val,
@@ -2013,8 +2124,8 @@ static int emulator_read_emulated(unsigned long addr,
2013 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 2124 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
2014 goto mmio; 2125 goto mmio;
2015 2126
2016 if (emulator_read_std(addr, val, bytes, vcpu) 2127 if (kvm_read_guest_virt(addr, val, bytes, vcpu)
2017 == X86EMUL_CONTINUE) 2128 == X86EMUL_CONTINUE)
2018 return X86EMUL_CONTINUE; 2129 return X86EMUL_CONTINUE;
2019 if (gpa == UNMAPPED_GVA) 2130 if (gpa == UNMAPPED_GVA)
2020 return X86EMUL_PROPAGATE_FAULT; 2131 return X86EMUL_PROPAGATE_FAULT;
@@ -2217,7 +2328,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
2217 2328
2218 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); 2329 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
2219 2330
2220 emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu); 2331 kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu);
2221 2332
2222 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", 2333 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
2223 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); 2334 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
@@ -2225,7 +2336,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
2225EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 2336EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
2226 2337
2227static struct x86_emulate_ops emulate_ops = { 2338static struct x86_emulate_ops emulate_ops = {
2228 .read_std = emulator_read_std, 2339 .read_std = kvm_read_guest_virt,
2229 .read_emulated = emulator_read_emulated, 2340 .read_emulated = emulator_read_emulated,
2230 .write_emulated = emulator_write_emulated, 2341 .write_emulated = emulator_write_emulated,
2231 .cmpxchg_emulated = emulator_cmpxchg_emulated, 2342 .cmpxchg_emulated = emulator_cmpxchg_emulated,
@@ -2327,40 +2438,19 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
2327} 2438}
2328EXPORT_SYMBOL_GPL(emulate_instruction); 2439EXPORT_SYMBOL_GPL(emulate_instruction);
2329 2440
2330static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
2331{
2332 int i;
2333
2334 for (i = 0; i < ARRAY_SIZE(vcpu->arch.pio.guest_pages); ++i)
2335 if (vcpu->arch.pio.guest_pages[i]) {
2336 kvm_release_page_dirty(vcpu->arch.pio.guest_pages[i]);
2337 vcpu->arch.pio.guest_pages[i] = NULL;
2338 }
2339}
2340
2341static int pio_copy_data(struct kvm_vcpu *vcpu) 2441static int pio_copy_data(struct kvm_vcpu *vcpu)
2342{ 2442{
2343 void *p = vcpu->arch.pio_data; 2443 void *p = vcpu->arch.pio_data;
2344 void *q; 2444 gva_t q = vcpu->arch.pio.guest_gva;
2345 unsigned bytes; 2445 unsigned bytes;
2346 int nr_pages = vcpu->arch.pio.guest_pages[1] ? 2 : 1; 2446 int ret;
2347 2447
2348 q = vmap(vcpu->arch.pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
2349 PAGE_KERNEL);
2350 if (!q) {
2351 free_pio_guest_pages(vcpu);
2352 return -ENOMEM;
2353 }
2354 q += vcpu->arch.pio.guest_page_offset;
2355 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; 2448 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
2356 if (vcpu->arch.pio.in) 2449 if (vcpu->arch.pio.in)
2357 memcpy(q, p, bytes); 2450 ret = kvm_write_guest_virt(q, p, bytes, vcpu);
2358 else 2451 else
2359 memcpy(p, q, bytes); 2452 ret = kvm_read_guest_virt(q, p, bytes, vcpu);
2360 q -= vcpu->arch.pio.guest_page_offset; 2453 return ret;
2361 vunmap(q);
2362 free_pio_guest_pages(vcpu);
2363 return 0;
2364} 2454}
2365 2455
2366int complete_pio(struct kvm_vcpu *vcpu) 2456int complete_pio(struct kvm_vcpu *vcpu)
@@ -2471,7 +2561,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2471 vcpu->arch.pio.in = in; 2561 vcpu->arch.pio.in = in;
2472 vcpu->arch.pio.string = 0; 2562 vcpu->arch.pio.string = 0;
2473 vcpu->arch.pio.down = 0; 2563 vcpu->arch.pio.down = 0;
2474 vcpu->arch.pio.guest_page_offset = 0;
2475 vcpu->arch.pio.rep = 0; 2564 vcpu->arch.pio.rep = 0;
2476 2565
2477 if (vcpu->run->io.direction == KVM_EXIT_IO_IN) 2566 if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2499,9 +2588,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2499 gva_t address, int rep, unsigned port) 2588 gva_t address, int rep, unsigned port)
2500{ 2589{
2501 unsigned now, in_page; 2590 unsigned now, in_page;
2502 int i, ret = 0; 2591 int ret = 0;
2503 int nr_pages = 1;
2504 struct page *page;
2505 struct kvm_io_device *pio_dev; 2592 struct kvm_io_device *pio_dev;
2506 2593
2507 vcpu->run->exit_reason = KVM_EXIT_IO; 2594 vcpu->run->exit_reason = KVM_EXIT_IO;
@@ -2513,7 +2600,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2513 vcpu->arch.pio.in = in; 2600 vcpu->arch.pio.in = in;
2514 vcpu->arch.pio.string = 1; 2601 vcpu->arch.pio.string = 1;
2515 vcpu->arch.pio.down = down; 2602 vcpu->arch.pio.down = down;
2516 vcpu->arch.pio.guest_page_offset = offset_in_page(address);
2517 vcpu->arch.pio.rep = rep; 2603 vcpu->arch.pio.rep = rep;
2518 2604
2519 if (vcpu->run->io.direction == KVM_EXIT_IO_IN) 2605 if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2533,15 +2619,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2533 else 2619 else
2534 in_page = offset_in_page(address) + size; 2620 in_page = offset_in_page(address) + size;
2535 now = min(count, (unsigned long)in_page / size); 2621 now = min(count, (unsigned long)in_page / size);
2536 if (!now) { 2622 if (!now)
2537 /*
2538 * String I/O straddles page boundary. Pin two guest pages
2539 * so that we satisfy atomicity constraints. Do just one
2540 * transaction to avoid complexity.
2541 */
2542 nr_pages = 2;
2543 now = 1; 2623 now = 1;
2544 }
2545 if (down) { 2624 if (down) {
2546 /* 2625 /*
2547 * String I/O in reverse. Yuck. Kill the guest, fix later. 2626 * String I/O in reverse. Yuck. Kill the guest, fix later.
@@ -2556,15 +2635,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2556 if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count) 2635 if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
2557 kvm_x86_ops->skip_emulated_instruction(vcpu); 2636 kvm_x86_ops->skip_emulated_instruction(vcpu);
2558 2637
2559 for (i = 0; i < nr_pages; ++i) { 2638 vcpu->arch.pio.guest_gva = address;
2560 page = gva_to_page(vcpu, address + i * PAGE_SIZE);
2561 vcpu->arch.pio.guest_pages[i] = page;
2562 if (!page) {
2563 kvm_inject_gp(vcpu, 0);
2564 free_pio_guest_pages(vcpu);
2565 return 1;
2566 }
2567 }
2568 2639
2569 pio_dev = vcpu_find_pio_dev(vcpu, port, 2640 pio_dev = vcpu_find_pio_dev(vcpu, port,
2570 vcpu->arch.pio.cur_count, 2641 vcpu->arch.pio.cur_count,
@@ -2572,7 +2643,11 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2572 if (!vcpu->arch.pio.in) { 2643 if (!vcpu->arch.pio.in) {
2573 /* string PIO write */ 2644 /* string PIO write */
2574 ret = pio_copy_data(vcpu); 2645 ret = pio_copy_data(vcpu);
2575 if (ret >= 0 && pio_dev) { 2646 if (ret == X86EMUL_PROPAGATE_FAULT) {
2647 kvm_inject_gp(vcpu, 0);
2648 return 1;
2649 }
2650 if (ret == 0 && pio_dev) {
2576 pio_string_write(pio_dev, vcpu); 2651 pio_string_write(pio_dev, vcpu);
2577 complete_pio(vcpu); 2652 complete_pio(vcpu);
2578 if (vcpu->arch.pio.count == 0) 2653 if (vcpu->arch.pio.count == 0)
@@ -2587,9 +2662,72 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2587} 2662}
2588EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); 2663EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
2589 2664
2665static void bounce_off(void *info)
2666{
2667 /* nothing */
2668}
2669
2670static unsigned int ref_freq;
2671static unsigned long tsc_khz_ref;
2672
2673static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
2674 void *data)
2675{
2676 struct cpufreq_freqs *freq = data;
2677 struct kvm *kvm;
2678 struct kvm_vcpu *vcpu;
2679 int i, send_ipi = 0;
2680
2681 if (!ref_freq)
2682 ref_freq = freq->old;
2683
2684 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
2685 return 0;
2686 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
2687 return 0;
2688 per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
2689
2690 spin_lock(&kvm_lock);
2691 list_for_each_entry(kvm, &vm_list, vm_list) {
2692 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
2693 vcpu = kvm->vcpus[i];
2694 if (!vcpu)
2695 continue;
2696 if (vcpu->cpu != freq->cpu)
2697 continue;
2698 if (!kvm_request_guest_time_update(vcpu))
2699 continue;
2700 if (vcpu->cpu != smp_processor_id())
2701 send_ipi++;
2702 }
2703 }
2704 spin_unlock(&kvm_lock);
2705
2706 if (freq->old < freq->new && send_ipi) {
2707 /*
2708 * We upscale the frequency. Must make the guest
2709 * doesn't see old kvmclock values while running with
2710 * the new frequency, otherwise we risk the guest sees
2711 * time go backwards.
2712 *
2713 * In case we update the frequency for another cpu
2714 * (which might be in guest context) send an interrupt
2715 * to kick the cpu out of guest context. Next time
2716 * guest context is entered kvmclock will be updated,
2717 * so the guest will not see stale values.
2718 */
2719 smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
2720 }
2721 return 0;
2722}
2723
2724static struct notifier_block kvmclock_cpufreq_notifier_block = {
2725 .notifier_call = kvmclock_cpufreq_notifier
2726};
2727
2590int kvm_arch_init(void *opaque) 2728int kvm_arch_init(void *opaque)
2591{ 2729{
2592 int r; 2730 int r, cpu;
2593 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; 2731 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
2594 2732
2595 if (kvm_x86_ops) { 2733 if (kvm_x86_ops) {
@@ -2620,6 +2758,15 @@ int kvm_arch_init(void *opaque)
2620 kvm_mmu_set_base_ptes(PT_PRESENT_MASK); 2758 kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
2621 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 2759 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
2622 PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); 2760 PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
2761
2762 for_each_possible_cpu(cpu)
2763 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
2764 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
2765 tsc_khz_ref = tsc_khz;
2766 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
2767 CPUFREQ_TRANSITION_NOTIFIER);
2768 }
2769
2623 return 0; 2770 return 0;
2624 2771
2625out: 2772out:
@@ -2827,25 +2974,20 @@ static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
2827 if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index) 2974 if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
2828 return 0; 2975 return 0;
2829 if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) && 2976 if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
2830 !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) 2977 !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
2831 return 0; 2978 return 0;
2832 return 1; 2979 return 1;
2833} 2980}
2834 2981
2835void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) 2982struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
2983 u32 function, u32 index)
2836{ 2984{
2837 int i; 2985 int i;
2838 u32 function, index; 2986 struct kvm_cpuid_entry2 *best = NULL;
2839 struct kvm_cpuid_entry2 *e, *best;
2840 2987
2841 function = kvm_register_read(vcpu, VCPU_REGS_RAX);
2842 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
2843 kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
2844 kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
2845 kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
2846 kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
2847 best = NULL;
2848 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { 2988 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
2989 struct kvm_cpuid_entry2 *e;
2990
2849 e = &vcpu->arch.cpuid_entries[i]; 2991 e = &vcpu->arch.cpuid_entries[i];
2850 if (is_matching_cpuid_entry(e, function, index)) { 2992 if (is_matching_cpuid_entry(e, function, index)) {
2851 if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) 2993 if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
@@ -2860,6 +3002,21 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
2860 if (!best || e->function > best->function) 3002 if (!best || e->function > best->function)
2861 best = e; 3003 best = e;
2862 } 3004 }
3005 return best;
3006}
3007
3008void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
3009{
3010 u32 function, index;
3011 struct kvm_cpuid_entry2 *best;
3012
3013 function = kvm_register_read(vcpu, VCPU_REGS_RAX);
3014 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
3015 kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
3016 kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
3017 kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
3018 kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
3019 best = kvm_find_cpuid_entry(vcpu, function, index);
2863 if (best) { 3020 if (best) {
2864 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); 3021 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
2865 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); 3022 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
@@ -2945,6 +3102,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2945 if (vcpu->requests) { 3102 if (vcpu->requests) {
2946 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) 3103 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
2947 __kvm_migrate_timers(vcpu); 3104 __kvm_migrate_timers(vcpu);
3105 if (test_and_clear_bit(KVM_REQ_KVMCLOCK_UPDATE, &vcpu->requests))
3106 kvm_write_guest_time(vcpu);
2948 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) 3107 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
2949 kvm_mmu_sync_roots(vcpu); 3108 kvm_mmu_sync_roots(vcpu);
2950 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 3109 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
@@ -2979,9 +3138,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2979 goto out; 3138 goto out;
2980 } 3139 }
2981 3140
2982 if (vcpu->guest_debug.enabled)
2983 kvm_x86_ops->guest_debug_pre(vcpu);
2984
2985 vcpu->guest_mode = 1; 3141 vcpu->guest_mode = 1;
2986 /* 3142 /*
2987 * Make sure that guest_mode assignment won't happen after 3143 * Make sure that guest_mode assignment won't happen after
@@ -3002,10 +3158,34 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3002 3158
3003 kvm_guest_enter(); 3159 kvm_guest_enter();
3004 3160
3161 get_debugreg(vcpu->arch.host_dr6, 6);
3162 get_debugreg(vcpu->arch.host_dr7, 7);
3163 if (unlikely(vcpu->arch.switch_db_regs)) {
3164 get_debugreg(vcpu->arch.host_db[0], 0);
3165 get_debugreg(vcpu->arch.host_db[1], 1);
3166 get_debugreg(vcpu->arch.host_db[2], 2);
3167 get_debugreg(vcpu->arch.host_db[3], 3);
3168
3169 set_debugreg(0, 7);
3170 set_debugreg(vcpu->arch.eff_db[0], 0);
3171 set_debugreg(vcpu->arch.eff_db[1], 1);
3172 set_debugreg(vcpu->arch.eff_db[2], 2);
3173 set_debugreg(vcpu->arch.eff_db[3], 3);
3174 }
3005 3175
3006 KVMTRACE_0D(VMENTRY, vcpu, entryexit); 3176 KVMTRACE_0D(VMENTRY, vcpu, entryexit);
3007 kvm_x86_ops->run(vcpu, kvm_run); 3177 kvm_x86_ops->run(vcpu, kvm_run);
3008 3178
3179 if (unlikely(vcpu->arch.switch_db_regs)) {
3180 set_debugreg(0, 7);
3181 set_debugreg(vcpu->arch.host_db[0], 0);
3182 set_debugreg(vcpu->arch.host_db[1], 1);
3183 set_debugreg(vcpu->arch.host_db[2], 2);
3184 set_debugreg(vcpu->arch.host_db[3], 3);
3185 }
3186 set_debugreg(vcpu->arch.host_dr6, 6);
3187 set_debugreg(vcpu->arch.host_dr7, 7);
3188
3009 vcpu->guest_mode = 0; 3189 vcpu->guest_mode = 0;
3010 local_irq_enable(); 3190 local_irq_enable();
3011 3191
@@ -3192,7 +3372,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3192 /* 3372 /*
3193 * Don't leak debug flags in case they were set for guest debugging 3373 * Don't leak debug flags in case they were set for guest debugging
3194 */ 3374 */
3195 if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep) 3375 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
3196 regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); 3376 regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
3197 3377
3198 vcpu_put(vcpu); 3378 vcpu_put(vcpu);
@@ -3811,15 +3991,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3811 return 0; 3991 return 0;
3812} 3992}
3813 3993
3814int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, 3994int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3815 struct kvm_debug_guest *dbg) 3995 struct kvm_guest_debug *dbg)
3816{ 3996{
3817 int r; 3997 int i, r;
3818 3998
3819 vcpu_load(vcpu); 3999 vcpu_load(vcpu);
3820 4000
4001 if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
4002 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
4003 for (i = 0; i < KVM_NR_DB_REGS; ++i)
4004 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
4005 vcpu->arch.switch_db_regs =
4006 (dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
4007 } else {
4008 for (i = 0; i < KVM_NR_DB_REGS; i++)
4009 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
4010 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
4011 }
4012
3821 r = kvm_x86_ops->set_guest_debug(vcpu, dbg); 4013 r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
3822 4014
4015 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
4016 kvm_queue_exception(vcpu, DB_VECTOR);
4017 else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
4018 kvm_queue_exception(vcpu, BP_VECTOR);
4019
3823 vcpu_put(vcpu); 4020 vcpu_put(vcpu);
3824 4021
3825 return r; 4022 return r;
@@ -4007,6 +4204,11 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
4007 vcpu->arch.nmi_pending = false; 4204 vcpu->arch.nmi_pending = false;
4008 vcpu->arch.nmi_injected = false; 4205 vcpu->arch.nmi_injected = false;
4009 4206
4207 vcpu->arch.switch_db_regs = 0;
4208 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
4209 vcpu->arch.dr6 = DR6_FIXED_1;
4210 vcpu->arch.dr7 = DR7_FIXED_1;
4211
4010 return kvm_x86_ops->vcpu_reset(vcpu); 4212 return kvm_x86_ops->vcpu_reset(vcpu);
4011} 4213}
4012 4214
@@ -4100,6 +4302,8 @@ struct kvm *kvm_arch_create_vm(void)
4100 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ 4302 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
4101 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); 4303 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
4102 4304
4305 rdtscll(kvm->arch.vm_init_tsc);
4306
4103 return kvm; 4307 return kvm;
4104} 4308}
4105 4309
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index d174db7a3370..ca91749d2083 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -178,7 +178,7 @@ static u32 opcode_table[256] = {
178 0, ImplicitOps | Stack, 0, 0, 178 0, ImplicitOps | Stack, 0, 0,
179 ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, 179 ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
180 /* 0xC8 - 0xCF */ 180 /* 0xC8 - 0xCF */
181 0, 0, 0, 0, 0, 0, 0, 0, 181 0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0,
182 /* 0xD0 - 0xD7 */ 182 /* 0xD0 - 0xD7 */
183 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, 183 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
184 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, 184 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
@@ -1136,18 +1136,19 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
1136} 1136}
1137 1137
1138static int emulate_pop(struct x86_emulate_ctxt *ctxt, 1138static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1139 struct x86_emulate_ops *ops) 1139 struct x86_emulate_ops *ops,
1140 void *dest, int len)
1140{ 1141{
1141 struct decode_cache *c = &ctxt->decode; 1142 struct decode_cache *c = &ctxt->decode;
1142 int rc; 1143 int rc;
1143 1144
1144 rc = ops->read_emulated(register_address(c, ss_base(ctxt), 1145 rc = ops->read_emulated(register_address(c, ss_base(ctxt),
1145 c->regs[VCPU_REGS_RSP]), 1146 c->regs[VCPU_REGS_RSP]),
1146 &c->src.val, c->src.bytes, ctxt->vcpu); 1147 dest, len, ctxt->vcpu);
1147 if (rc != 0) 1148 if (rc != 0)
1148 return rc; 1149 return rc;
1149 1150
1150 register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes); 1151 register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1151 return rc; 1152 return rc;
1152} 1153}
1153 1154
@@ -1157,11 +1158,9 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1157 struct decode_cache *c = &ctxt->decode; 1158 struct decode_cache *c = &ctxt->decode;
1158 int rc; 1159 int rc;
1159 1160
1160 c->src.bytes = c->dst.bytes; 1161 rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1161 rc = emulate_pop(ctxt, ops);
1162 if (rc != 0) 1162 if (rc != 0)
1163 return rc; 1163 return rc;
1164 c->dst.val = c->src.val;
1165 return 0; 1164 return 0;
1166} 1165}
1167 1166
@@ -1279,6 +1278,25 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1279 return 0; 1278 return 0;
1280} 1279}
1281 1280
1281static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1282 struct x86_emulate_ops *ops)
1283{
1284 struct decode_cache *c = &ctxt->decode;
1285 int rc;
1286 unsigned long cs;
1287
1288 rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1289 if (rc)
1290 return rc;
1291 if (c->op_bytes == 4)
1292 c->eip = (u32)c->eip;
1293 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1294 if (rc)
1295 return rc;
1296 rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS);
1297 return rc;
1298}
1299
1282static inline int writeback(struct x86_emulate_ctxt *ctxt, 1300static inline int writeback(struct x86_emulate_ctxt *ctxt,
1283 struct x86_emulate_ops *ops) 1301 struct x86_emulate_ops *ops)
1284{ 1302{
@@ -1467,11 +1485,9 @@ special_insn:
1467 break; 1485 break;
1468 case 0x58 ... 0x5f: /* pop reg */ 1486 case 0x58 ... 0x5f: /* pop reg */
1469 pop_instruction: 1487 pop_instruction:
1470 c->src.bytes = c->op_bytes; 1488 rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
1471 rc = emulate_pop(ctxt, ops);
1472 if (rc != 0) 1489 if (rc != 0)
1473 goto done; 1490 goto done;
1474 c->dst.val = c->src.val;
1475 break; 1491 break;
1476 case 0x63: /* movsxd */ 1492 case 0x63: /* movsxd */
1477 if (ctxt->mode != X86EMUL_MODE_PROT64) 1493 if (ctxt->mode != X86EMUL_MODE_PROT64)
@@ -1738,6 +1754,11 @@ special_insn:
1738 mov: 1754 mov:
1739 c->dst.val = c->src.val; 1755 c->dst.val = c->src.val;
1740 break; 1756 break;
1757 case 0xcb: /* ret far */
1758 rc = emulate_ret_far(ctxt, ops);
1759 if (rc)
1760 goto done;
1761 break;
1741 case 0xd0 ... 0xd1: /* Grp2 */ 1762 case 0xd0 ... 0xd1: /* Grp2 */
1742 c->src.val = 1; 1763 c->src.val = 1;
1743 emulate_grp2(ctxt); 1764 emulate_grp2(ctxt);
@@ -1908,11 +1929,16 @@ twobyte_insn:
1908 c->dst.type = OP_NONE; 1929 c->dst.type = OP_NONE;
1909 break; 1930 break;
1910 case 3: /* lidt/vmmcall */ 1931 case 3: /* lidt/vmmcall */
1911 if (c->modrm_mod == 3 && c->modrm_rm == 1) { 1932 if (c->modrm_mod == 3) {
1912 rc = kvm_fix_hypercall(ctxt->vcpu); 1933 switch (c->modrm_rm) {
1913 if (rc) 1934 case 1:
1914 goto done; 1935 rc = kvm_fix_hypercall(ctxt->vcpu);
1915 kvm_emulate_hypercall(ctxt->vcpu); 1936 if (rc)
1937 goto done;
1938 break;
1939 default:
1940 goto cannot_emulate;
1941 }
1916 } else { 1942 } else {
1917 rc = read_descriptor(ctxt, ops, c->src.ptr, 1943 rc = read_descriptor(ctxt, ops, c->src.ptr,
1918 &size, &address, 1944 &size, &address,
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 9fe4ddaa8f6f..e94a11e42f98 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -107,7 +107,7 @@ static void async_hcall(unsigned long call, unsigned long arg1,
107 local_irq_save(flags); 107 local_irq_save(flags);
108 if (lguest_data.hcall_status[next_call] != 0xFF) { 108 if (lguest_data.hcall_status[next_call] != 0xFF) {
109 /* Table full, so do normal hcall which will flush table. */ 109 /* Table full, so do normal hcall which will flush table. */
110 hcall(call, arg1, arg2, arg3); 110 kvm_hypercall3(call, arg1, arg2, arg3);
111 } else { 111 } else {
112 lguest_data.hcalls[next_call].arg0 = call; 112 lguest_data.hcalls[next_call].arg0 = call;
113 lguest_data.hcalls[next_call].arg1 = arg1; 113 lguest_data.hcalls[next_call].arg1 = arg1;
@@ -134,13 +134,32 @@ static void async_hcall(unsigned long call, unsigned long arg1,
134 * 134 *
135 * So, when we're in lazy mode, we call async_hcall() to store the call for 135 * So, when we're in lazy mode, we call async_hcall() to store the call for
136 * future processing: */ 136 * future processing: */
137static void lazy_hcall(unsigned long call, 137static void lazy_hcall1(unsigned long call,
138 unsigned long arg1)
139{
140 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
141 kvm_hypercall1(call, arg1);
142 else
143 async_hcall(call, arg1, 0, 0);
144}
145
146static void lazy_hcall2(unsigned long call,
147 unsigned long arg1,
148 unsigned long arg2)
149{
150 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
151 kvm_hypercall2(call, arg1, arg2);
152 else
153 async_hcall(call, arg1, arg2, 0);
154}
155
156static void lazy_hcall3(unsigned long call,
138 unsigned long arg1, 157 unsigned long arg1,
139 unsigned long arg2, 158 unsigned long arg2,
140 unsigned long arg3) 159 unsigned long arg3)
141{ 160{
142 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) 161 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
143 hcall(call, arg1, arg2, arg3); 162 kvm_hypercall3(call, arg1, arg2, arg3);
144 else 163 else
145 async_hcall(call, arg1, arg2, arg3); 164 async_hcall(call, arg1, arg2, arg3);
146} 165}
@@ -150,7 +169,7 @@ static void lazy_hcall(unsigned long call,
150static void lguest_leave_lazy_mode(void) 169static void lguest_leave_lazy_mode(void)
151{ 170{
152 paravirt_leave_lazy(paravirt_get_lazy_mode()); 171 paravirt_leave_lazy(paravirt_get_lazy_mode());
153 hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); 172 kvm_hypercall0(LHCALL_FLUSH_ASYNC);
154} 173}
155 174
156/*G:033 175/*G:033
@@ -229,7 +248,7 @@ static void lguest_write_idt_entry(gate_desc *dt,
229 /* Keep the local copy up to date. */ 248 /* Keep the local copy up to date. */
230 native_write_idt_entry(dt, entrynum, g); 249 native_write_idt_entry(dt, entrynum, g);
231 /* Tell Host about this new entry. */ 250 /* Tell Host about this new entry. */
232 hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]); 251 kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]);
233} 252}
234 253
235/* Changing to a different IDT is very rare: we keep the IDT up-to-date every 254/* Changing to a different IDT is very rare: we keep the IDT up-to-date every
@@ -241,7 +260,7 @@ static void lguest_load_idt(const struct desc_ptr *desc)
241 struct desc_struct *idt = (void *)desc->address; 260 struct desc_struct *idt = (void *)desc->address;
242 261
243 for (i = 0; i < (desc->size+1)/8; i++) 262 for (i = 0; i < (desc->size+1)/8; i++)
244 hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b); 263 kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b);
245} 264}
246 265
247/* 266/*
@@ -261,8 +280,8 @@ static void lguest_load_idt(const struct desc_ptr *desc)
261 */ 280 */
262static void lguest_load_gdt(const struct desc_ptr *desc) 281static void lguest_load_gdt(const struct desc_ptr *desc)
263{ 282{
264 BUG_ON((desc->size+1)/8 != GDT_ENTRIES); 283 BUG_ON((desc->size + 1) / 8 != GDT_ENTRIES);
265 hcall(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES, 0); 284 kvm_hypercall2(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES);
266} 285}
267 286
268/* For a single GDT entry which changes, we do the lazy thing: alter our GDT, 287/* For a single GDT entry which changes, we do the lazy thing: alter our GDT,
@@ -272,7 +291,7 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
272 const void *desc, int type) 291 const void *desc, int type)
273{ 292{
274 native_write_gdt_entry(dt, entrynum, desc, type); 293 native_write_gdt_entry(dt, entrynum, desc, type);
275 hcall(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES, 0); 294 kvm_hypercall2(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES);
276} 295}
277 296
278/* OK, I lied. There are three "thread local storage" GDT entries which change 297/* OK, I lied. There are three "thread local storage" GDT entries which change
@@ -284,7 +303,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
284 * can't handle us removing entries we're currently using. So we clear 303 * can't handle us removing entries we're currently using. So we clear
285 * the GS register here: if it's needed it'll be reloaded anyway. */ 304 * the GS register here: if it's needed it'll be reloaded anyway. */
286 lazy_load_gs(0); 305 lazy_load_gs(0);
287 lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0); 306 lazy_hcall2(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu);
288} 307}
289 308
290/*G:038 That's enough excitement for now, back to ploughing through each of 309/*G:038 That's enough excitement for now, back to ploughing through each of
@@ -382,7 +401,7 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
382static unsigned long current_cr0; 401static unsigned long current_cr0;
383static void lguest_write_cr0(unsigned long val) 402static void lguest_write_cr0(unsigned long val)
384{ 403{
385 lazy_hcall(LHCALL_TS, val & X86_CR0_TS, 0, 0); 404 lazy_hcall1(LHCALL_TS, val & X86_CR0_TS);
386 current_cr0 = val; 405 current_cr0 = val;
387} 406}
388 407
@@ -396,7 +415,7 @@ static unsigned long lguest_read_cr0(void)
396 * the vowels have been optimized out. */ 415 * the vowels have been optimized out. */
397static void lguest_clts(void) 416static void lguest_clts(void)
398{ 417{
399 lazy_hcall(LHCALL_TS, 0, 0, 0); 418 lazy_hcall1(LHCALL_TS, 0);
400 current_cr0 &= ~X86_CR0_TS; 419 current_cr0 &= ~X86_CR0_TS;
401} 420}
402 421
@@ -418,7 +437,7 @@ static bool cr3_changed = false;
418static void lguest_write_cr3(unsigned long cr3) 437static void lguest_write_cr3(unsigned long cr3)
419{ 438{
420 lguest_data.pgdir = cr3; 439 lguest_data.pgdir = cr3;
421 lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0); 440 lazy_hcall1(LHCALL_NEW_PGTABLE, cr3);
422 cr3_changed = true; 441 cr3_changed = true;
423} 442}
424 443
@@ -490,11 +509,17 @@ static void lguest_write_cr4(unsigned long val)
490 * into a process' address space. We set the entry then tell the Host the 509 * into a process' address space. We set the entry then tell the Host the
491 * toplevel and address this corresponds to. The Guest uses one pagetable per 510 * toplevel and address this corresponds to. The Guest uses one pagetable per
492 * process, so we need to tell the Host which one we're changing (mm->pgd). */ 511 * process, so we need to tell the Host which one we're changing (mm->pgd). */
512static void lguest_pte_update(struct mm_struct *mm, unsigned long addr,
513 pte_t *ptep)
514{
515 lazy_hcall3(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low);
516}
517
493static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, 518static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
494 pte_t *ptep, pte_t pteval) 519 pte_t *ptep, pte_t pteval)
495{ 520{
496 *ptep = pteval; 521 *ptep = pteval;
497 lazy_hcall(LHCALL_SET_PTE, __pa(mm->pgd), addr, pteval.pte_low); 522 lguest_pte_update(mm, addr, ptep);
498} 523}
499 524
500/* The Guest calls this to set a top-level entry. Again, we set the entry then 525/* The Guest calls this to set a top-level entry. Again, we set the entry then
@@ -503,8 +528,8 @@ static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
503static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) 528static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
504{ 529{
505 *pmdp = pmdval; 530 *pmdp = pmdval;
506 lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK, 531 lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) & PAGE_MASK,
507 (__pa(pmdp)&(PAGE_SIZE-1))/4, 0); 532 (__pa(pmdp) & (PAGE_SIZE - 1)) / 4);
508} 533}
509 534
510/* There are a couple of legacy places where the kernel sets a PTE, but we 535/* There are a couple of legacy places where the kernel sets a PTE, but we
@@ -520,7 +545,7 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval)
520{ 545{
521 *ptep = pteval; 546 *ptep = pteval;
522 if (cr3_changed) 547 if (cr3_changed)
523 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); 548 lazy_hcall1(LHCALL_FLUSH_TLB, 1);
524} 549}
525 550
526/* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on 551/* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
@@ -536,7 +561,7 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval)
536static void lguest_flush_tlb_single(unsigned long addr) 561static void lguest_flush_tlb_single(unsigned long addr)
537{ 562{
538 /* Simply set it to zero: if it was not, it will fault back in. */ 563 /* Simply set it to zero: if it was not, it will fault back in. */
539 lazy_hcall(LHCALL_SET_PTE, lguest_data.pgdir, addr, 0); 564 lazy_hcall3(LHCALL_SET_PTE, lguest_data.pgdir, addr, 0);
540} 565}
541 566
542/* This is what happens after the Guest has removed a large number of entries. 567/* This is what happens after the Guest has removed a large number of entries.
@@ -544,7 +569,7 @@ static void lguest_flush_tlb_single(unsigned long addr)
544 * have changed, ie. virtual addresses below PAGE_OFFSET. */ 569 * have changed, ie. virtual addresses below PAGE_OFFSET. */
545static void lguest_flush_tlb_user(void) 570static void lguest_flush_tlb_user(void)
546{ 571{
547 lazy_hcall(LHCALL_FLUSH_TLB, 0, 0, 0); 572 lazy_hcall1(LHCALL_FLUSH_TLB, 0);
548} 573}
549 574
550/* This is called when the kernel page tables have changed. That's not very 575/* This is called when the kernel page tables have changed. That's not very
@@ -552,7 +577,7 @@ static void lguest_flush_tlb_user(void)
552 * slow), so it's worth separating this from the user flushing above. */ 577 * slow), so it's worth separating this from the user flushing above. */
553static void lguest_flush_tlb_kernel(void) 578static void lguest_flush_tlb_kernel(void)
554{ 579{
555 lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); 580 lazy_hcall1(LHCALL_FLUSH_TLB, 1);
556} 581}
557 582
558/* 583/*
@@ -689,7 +714,7 @@ static int lguest_clockevent_set_next_event(unsigned long delta,
689 } 714 }
690 715
691 /* Please wake us this far in the future. */ 716 /* Please wake us this far in the future. */
692 hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0); 717 kvm_hypercall1(LHCALL_SET_CLOCKEVENT, delta);
693 return 0; 718 return 0;
694} 719}
695 720
@@ -700,7 +725,7 @@ static void lguest_clockevent_set_mode(enum clock_event_mode mode,
700 case CLOCK_EVT_MODE_UNUSED: 725 case CLOCK_EVT_MODE_UNUSED:
701 case CLOCK_EVT_MODE_SHUTDOWN: 726 case CLOCK_EVT_MODE_SHUTDOWN:
702 /* A 0 argument shuts the clock down. */ 727 /* A 0 argument shuts the clock down. */
703 hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0); 728 kvm_hypercall0(LHCALL_SET_CLOCKEVENT);
704 break; 729 break;
705 case CLOCK_EVT_MODE_ONESHOT: 730 case CLOCK_EVT_MODE_ONESHOT:
706 /* This is what we expect. */ 731 /* This is what we expect. */
@@ -775,8 +800,8 @@ static void lguest_time_init(void)
775static void lguest_load_sp0(struct tss_struct *tss, 800static void lguest_load_sp0(struct tss_struct *tss,
776 struct thread_struct *thread) 801 struct thread_struct *thread)
777{ 802{
778 lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->sp0, 803 lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS | 0x1, thread->sp0,
779 THREAD_SIZE/PAGE_SIZE); 804 THREAD_SIZE / PAGE_SIZE);
780} 805}
781 806
782/* Let's just say, I wouldn't do debugging under a Guest. */ 807/* Let's just say, I wouldn't do debugging under a Guest. */
@@ -849,7 +874,7 @@ static void set_lguest_basic_apic_ops(void)
849/* STOP! Until an interrupt comes in. */ 874/* STOP! Until an interrupt comes in. */
850static void lguest_safe_halt(void) 875static void lguest_safe_halt(void)
851{ 876{
852 hcall(LHCALL_HALT, 0, 0, 0); 877 kvm_hypercall0(LHCALL_HALT);
853} 878}
854 879
855/* The SHUTDOWN hypercall takes a string to describe what's happening, and 880/* The SHUTDOWN hypercall takes a string to describe what's happening, and
@@ -859,7 +884,8 @@ static void lguest_safe_halt(void)
859 * rather than virtual addresses, so we use __pa() here. */ 884 * rather than virtual addresses, so we use __pa() here. */
860static void lguest_power_off(void) 885static void lguest_power_off(void)
861{ 886{
862 hcall(LHCALL_SHUTDOWN, __pa("Power down"), LGUEST_SHUTDOWN_POWEROFF, 0); 887 kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"),
888 LGUEST_SHUTDOWN_POWEROFF);
863} 889}
864 890
865/* 891/*
@@ -869,7 +895,7 @@ static void lguest_power_off(void)
869 */ 895 */
870static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p) 896static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
871{ 897{
872 hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0); 898 kvm_hypercall2(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF);
873 /* The hcall won't return, but to keep gcc happy, we're "done". */ 899 /* The hcall won't return, but to keep gcc happy, we're "done". */
874 return NOTIFY_DONE; 900 return NOTIFY_DONE;
875} 901}
@@ -910,7 +936,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
910 len = sizeof(scratch) - 1; 936 len = sizeof(scratch) - 1;
911 scratch[len] = '\0'; 937 scratch[len] = '\0';
912 memcpy(scratch, buf, len); 938 memcpy(scratch, buf, len);
913 hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0); 939 kvm_hypercall1(LHCALL_NOTIFY, __pa(scratch));
914 940
915 /* This routine returns the number of bytes actually written. */ 941 /* This routine returns the number of bytes actually written. */
916 return len; 942 return len;
@@ -920,7 +946,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
920 * Launcher to reboot us. */ 946 * Launcher to reboot us. */
921static void lguest_restart(char *reason) 947static void lguest_restart(char *reason)
922{ 948{
923 hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0); 949 kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART);
924} 950}
925 951
926/*G:050 952/*G:050
@@ -1040,6 +1066,8 @@ __init void lguest_init(void)
1040 pv_mmu_ops.read_cr3 = lguest_read_cr3; 1066 pv_mmu_ops.read_cr3 = lguest_read_cr3;
1041 pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; 1067 pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
1042 pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode; 1068 pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
1069 pv_mmu_ops.pte_update = lguest_pte_update;
1070 pv_mmu_ops.pte_update_defer = lguest_pte_update;
1043 1071
1044#ifdef CONFIG_X86_LOCAL_APIC 1072#ifdef CONFIG_X86_LOCAL_APIC
1045 /* apic read/write intercepts */ 1073 /* apic read/write intercepts */
@@ -1058,14 +1086,6 @@ __init void lguest_init(void)
1058 * lguest_init() where the rest of the fairly chaotic boot setup 1086 * lguest_init() where the rest of the fairly chaotic boot setup
1059 * occurs. */ 1087 * occurs. */
1060 1088
1061 /* The native boot code sets up initial page tables immediately after
1062 * the kernel itself, and sets init_pg_tables_end so they're not
1063 * clobbered. The Launcher places our initial pagetables somewhere at
1064 * the top of our physical memory, so we don't need extra space: set
1065 * init_pg_tables_end to the end of the kernel. */
1066 init_pg_tables_start = __pa(pg0);
1067 init_pg_tables_end = __pa(pg0);
1068
1069 /* As described in head_32.S, we map the first 128M of memory. */ 1089 /* As described in head_32.S, we map the first 128M of memory. */
1070 max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; 1090 max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
1071 1091
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 10b9bd35a8ff..f79541989471 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -27,8 +27,8 @@ ENTRY(lguest_entry)
27 /* We make the "initialization" hypercall now to tell the Host about 27 /* We make the "initialization" hypercall now to tell the Host about
28 * us, and also find out where it put our page tables. */ 28 * us, and also find out where it put our page tables. */
29 movl $LHCALL_LGUEST_INIT, %eax 29 movl $LHCALL_LGUEST_INIT, %eax
30 movl $lguest_data - __PAGE_OFFSET, %edx 30 movl $lguest_data - __PAGE_OFFSET, %ebx
31 int $LGUEST_TRAP_ENTRY 31 .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */
32 32
33 /* Set up the initial stack so we can run C code. */ 33 /* Set up the initial stack so we can run C code. */
34 movl $(init_thread_union+THREAD_SIZE),%esp 34 movl $(init_thread_union+THREAD_SIZE),%esp
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index c22981fa2f3a..ad5441ed1b57 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -1,30 +1,38 @@
1/* Copyright 2002 Andi Kleen */ 1/* Copyright 2002 Andi Kleen */
2 2
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4#include <asm/dwarf2.h> 4
5#include <asm/cpufeature.h> 5#include <asm/cpufeature.h>
6#include <asm/dwarf2.h>
6 7
7/* 8/*
8 * memcpy - Copy a memory block. 9 * memcpy - Copy a memory block.
9 * 10 *
10 * Input: 11 * Input:
11 * rdi destination 12 * rdi destination
12 * rsi source 13 * rsi source
13 * rdx count 14 * rdx count
14 * 15 *
15 * Output: 16 * Output:
16 * rax original destination 17 * rax original destination
17 */ 18 */
18 19
20/*
21 * memcpy_c() - fast string ops (REP MOVSQ) based variant.
22 *
23 * Calls to this get patched into the kernel image via the
24 * alternative instructions framework:
25 */
19 ALIGN 26 ALIGN
20memcpy_c: 27memcpy_c:
21 CFI_STARTPROC 28 CFI_STARTPROC
22 movq %rdi,%rax 29 movq %rdi, %rax
23 movl %edx,%ecx 30
24 shrl $3,%ecx 31 movl %edx, %ecx
25 andl $7,%edx 32 shrl $3, %ecx
33 andl $7, %edx
26 rep movsq 34 rep movsq
27 movl %edx,%ecx 35 movl %edx, %ecx
28 rep movsb 36 rep movsb
29 ret 37 ret
30 CFI_ENDPROC 38 CFI_ENDPROC
@@ -33,99 +41,110 @@ ENDPROC(memcpy_c)
33ENTRY(__memcpy) 41ENTRY(__memcpy)
34ENTRY(memcpy) 42ENTRY(memcpy)
35 CFI_STARTPROC 43 CFI_STARTPROC
36 pushq %rbx
37 CFI_ADJUST_CFA_OFFSET 8
38 CFI_REL_OFFSET rbx, 0
39 movq %rdi,%rax
40 44
41 movl %edx,%ecx 45 /*
42 shrl $6,%ecx 46 * Put the number of full 64-byte blocks into %ecx.
47 * Tail portion is handled at the end:
48 */
49 movq %rdi, %rax
50 movl %edx, %ecx
51 shrl $6, %ecx
43 jz .Lhandle_tail 52 jz .Lhandle_tail
44 53
45 .p2align 4 54 .p2align 4
46.Lloop_64: 55.Lloop_64:
56 /*
57 * We decrement the loop index here - and the zero-flag is
58 * checked at the end of the loop (instructions inbetween do
59 * not change the zero flag):
60 */
47 decl %ecx 61 decl %ecx
48 62
49 movq (%rsi),%r11 63 /*
50 movq 8(%rsi),%r8 64 * Move in blocks of 4x16 bytes:
65 */
66 movq 0*8(%rsi), %r11
67 movq 1*8(%rsi), %r8
68 movq %r11, 0*8(%rdi)
69 movq %r8, 1*8(%rdi)
51 70
52 movq %r11,(%rdi) 71 movq 2*8(%rsi), %r9
53 movq %r8,1*8(%rdi) 72 movq 3*8(%rsi), %r10
73 movq %r9, 2*8(%rdi)
74 movq %r10, 3*8(%rdi)
54 75
55 movq 2*8(%rsi),%r9 76 movq 4*8(%rsi), %r11
56 movq 3*8(%rsi),%r10 77 movq 5*8(%rsi), %r8
78 movq %r11, 4*8(%rdi)
79 movq %r8, 5*8(%rdi)
57 80
58 movq %r9,2*8(%rdi) 81 movq 6*8(%rsi), %r9
59 movq %r10,3*8(%rdi) 82 movq 7*8(%rsi), %r10
83 movq %r9, 6*8(%rdi)
84 movq %r10, 7*8(%rdi)
60 85
61 movq 4*8(%rsi),%r11 86 leaq 64(%rsi), %rsi
62 movq 5*8(%rsi),%r8 87 leaq 64(%rdi), %rdi
63 88
64 movq %r11,4*8(%rdi)
65 movq %r8,5*8(%rdi)
66
67 movq 6*8(%rsi),%r9
68 movq 7*8(%rsi),%r10
69
70 movq %r9,6*8(%rdi)
71 movq %r10,7*8(%rdi)
72
73 leaq 64(%rsi),%rsi
74 leaq 64(%rdi),%rdi
75 jnz .Lloop_64 89 jnz .Lloop_64
76 90
77.Lhandle_tail: 91.Lhandle_tail:
78 movl %edx,%ecx 92 movl %edx, %ecx
79 andl $63,%ecx 93 andl $63, %ecx
80 shrl $3,%ecx 94 shrl $3, %ecx
81 jz .Lhandle_7 95 jz .Lhandle_7
96
82 .p2align 4 97 .p2align 4
83.Lloop_8: 98.Lloop_8:
84 decl %ecx 99 decl %ecx
85 movq (%rsi),%r8 100 movq (%rsi), %r8
86 movq %r8,(%rdi) 101 movq %r8, (%rdi)
87 leaq 8(%rdi),%rdi 102 leaq 8(%rdi), %rdi
88 leaq 8(%rsi),%rsi 103 leaq 8(%rsi), %rsi
89 jnz .Lloop_8 104 jnz .Lloop_8
90 105
91.Lhandle_7: 106.Lhandle_7:
92 movl %edx,%ecx 107 movl %edx, %ecx
93 andl $7,%ecx 108 andl $7, %ecx
94 jz .Lende 109 jz .Lend
110
95 .p2align 4 111 .p2align 4
96.Lloop_1: 112.Lloop_1:
97 movb (%rsi),%r8b 113 movb (%rsi), %r8b
98 movb %r8b,(%rdi) 114 movb %r8b, (%rdi)
99 incq %rdi 115 incq %rdi
100 incq %rsi 116 incq %rsi
101 decl %ecx 117 decl %ecx
102 jnz .Lloop_1 118 jnz .Lloop_1
103 119
104.Lende: 120.Lend:
105 popq %rbx
106 CFI_ADJUST_CFA_OFFSET -8
107 CFI_RESTORE rbx
108 ret 121 ret
109.Lfinal:
110 CFI_ENDPROC 122 CFI_ENDPROC
111ENDPROC(memcpy) 123ENDPROC(memcpy)
112ENDPROC(__memcpy) 124ENDPROC(__memcpy)
113 125
114 /* Some CPUs run faster using the string copy instructions. 126 /*
115 It is also a lot simpler. Use this when possible */ 127 * Some CPUs run faster using the string copy instructions.
128 * It is also a lot simpler. Use this when possible:
129 */
116 130
117 .section .altinstr_replacement,"ax" 131 .section .altinstr_replacement, "ax"
1181: .byte 0xeb /* jmp <disp8> */ 1321: .byte 0xeb /* jmp <disp8> */
119 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ 133 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
1202: 1342:
121 .previous 135 .previous
122 .section .altinstructions,"a" 136
137 .section .altinstructions, "a"
123 .align 8 138 .align 8
124 .quad memcpy 139 .quad memcpy
125 .quad 1b 140 .quad 1b
126 .byte X86_FEATURE_REP_GOOD 141 .byte X86_FEATURE_REP_GOOD
127 /* Replace only beginning, memcpy is used to apply alternatives, so it 142
128 * is silly to overwrite itself with nops - reboot is only outcome... */ 143 /*
144 * Replace only beginning, memcpy is used to apply alternatives,
145 * so it is silly to overwrite itself with nops - reboot is the
146 * only outcome...
147 */
129 .byte 2b - 1b 148 .byte 2b - 1b
130 .byte 2b - 1b 149 .byte 2b - 1b
131 .previous 150 .previous
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 08537747cb58..fdd30d08ab52 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
14mmiotrace-y := kmmio.o pf_in.o mmio-mod.o 14mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
15obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 15obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
16 16
17obj-$(CONFIG_NUMA) += numa_$(BITS).o 17obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
18obj-$(CONFIG_K8_NUMA) += k8topology_64.o 18obj-$(CONFIG_K8_NUMA) += k8topology_64.o
19obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o 19obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
20 20
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index d11745334a67..8126e8d1a2a4 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -19,49 +19,6 @@ void kunmap(struct page *page)
19 kunmap_high(page); 19 kunmap_high(page);
20} 20}
21 21
22static void debug_kmap_atomic_prot(enum km_type type)
23{
24#ifdef CONFIG_DEBUG_HIGHMEM
25 static unsigned warn_count = 10;
26
27 if (unlikely(warn_count == 0))
28 return;
29
30 if (unlikely(in_interrupt())) {
31 if (in_irq()) {
32 if (type != KM_IRQ0 && type != KM_IRQ1 &&
33 type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ &&
34 type != KM_BOUNCE_READ) {
35 WARN_ON(1);
36 warn_count--;
37 }
38 } else if (!irqs_disabled()) { /* softirq */
39 if (type != KM_IRQ0 && type != KM_IRQ1 &&
40 type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 &&
41 type != KM_SKB_SUNRPC_DATA &&
42 type != KM_SKB_DATA_SOFTIRQ &&
43 type != KM_BOUNCE_READ) {
44 WARN_ON(1);
45 warn_count--;
46 }
47 }
48 }
49
50 if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ ||
51 type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) {
52 if (!irqs_disabled()) {
53 WARN_ON(1);
54 warn_count--;
55 }
56 } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) {
57 if (irq_count() == 0 && !irqs_disabled()) {
58 WARN_ON(1);
59 warn_count--;
60 }
61 }
62#endif
63}
64
65/* 22/*
66 * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because 23 * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
67 * no global lock is needed and because the kmap code must perform a global TLB 24 * no global lock is needed and because the kmap code must perform a global TLB
@@ -81,7 +38,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
81 if (!PageHighMem(page)) 38 if (!PageHighMem(page))
82 return page_address(page); 39 return page_address(page);
83 40
84 debug_kmap_atomic_prot(type); 41 debug_kmap_atomic(type);
85 42
86 idx = type + KM_TYPE_NR*smp_processor_id(); 43 idx = type + KM_TYPE_NR*smp_processor_id();
87 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 44 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
@@ -121,22 +78,13 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
121 pagefault_enable(); 78 pagefault_enable();
122} 79}
123 80
124/* This is the same as kmap_atomic() but can map memory that doesn't 81/*
82 * This is the same as kmap_atomic() but can map memory that doesn't
125 * have a struct page associated with it. 83 * have a struct page associated with it.
126 */ 84 */
127void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) 85void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
128{ 86{
129 enum fixed_addresses idx; 87 return kmap_atomic_prot_pfn(pfn, type, kmap_prot);
130 unsigned long vaddr;
131
132 pagefault_disable();
133
134 idx = type + KM_TYPE_NR*smp_processor_id();
135 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
136 set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
137 arch_flush_lazy_mmu_mode();
138
139 return (void*) vaddr;
140} 88}
141EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ 89EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */
142 90
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 15219e0d1243..fd3da1dda1c9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -94,9 +94,9 @@ struct map_range {
94#define NR_RANGE_MR 5 94#define NR_RANGE_MR 5
95#endif 95#endif
96 96
97static int save_mr(struct map_range *mr, int nr_range, 97static int __meminit save_mr(struct map_range *mr, int nr_range,
98 unsigned long start_pfn, unsigned long end_pfn, 98 unsigned long start_pfn, unsigned long end_pfn,
99 unsigned long page_size_mask) 99 unsigned long page_size_mask)
100{ 100{
101 if (start_pfn < end_pfn) { 101 if (start_pfn < end_pfn) {
102 if (nr_range >= NR_RANGE_MR) 102 if (nr_range >= NR_RANGE_MR)
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 04102d42ff42..8056545e2d39 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -19,10 +19,11 @@
19#include <asm/iomap.h> 19#include <asm/iomap.h>
20#include <asm/pat.h> 20#include <asm/pat.h>
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/highmem.h>
22 23
23int is_io_mapping_possible(resource_size_t base, unsigned long size) 24int is_io_mapping_possible(resource_size_t base, unsigned long size)
24{ 25{
25#ifndef CONFIG_X86_PAE 26#if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT)
26 /* There is no way to map greater than 1 << 32 address without PAE */ 27 /* There is no way to map greater than 1 << 32 address without PAE */
27 if (base + size > 0x100000000ULL) 28 if (base + size > 0x100000000ULL)
28 return 0; 29 return 0;
@@ -31,16 +32,28 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size)
31} 32}
32EXPORT_SYMBOL_GPL(is_io_mapping_possible); 33EXPORT_SYMBOL_GPL(is_io_mapping_possible);
33 34
34/* Map 'pfn' using fixed map 'type' and protections 'prot' 35void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
35 */
36void *
37iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
38{ 36{
39 enum fixed_addresses idx; 37 enum fixed_addresses idx;
40 unsigned long vaddr; 38 unsigned long vaddr;
41 39
42 pagefault_disable(); 40 pagefault_disable();
43 41
42 debug_kmap_atomic(type);
43 idx = type + KM_TYPE_NR * smp_processor_id();
44 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
45 set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
46 arch_flush_lazy_mmu_mode();
47
48 return (void *)vaddr;
49}
50
51/*
52 * Map 'pfn' using fixed map 'type' and protections 'prot'
53 */
54void *
55iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
56{
44 /* 57 /*
45 * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. 58 * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
46 * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the 59 * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the
@@ -50,12 +63,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
50 if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) 63 if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC))
51 prot = PAGE_KERNEL_UC_MINUS; 64 prot = PAGE_KERNEL_UC_MINUS;
52 65
53 idx = type + KM_TYPE_NR*smp_processor_id(); 66 return kmap_atomic_prot_pfn(pfn, type, prot);
54 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
55 set_pte(kmap_pte-idx, pfn_pte(pfn, prot));
56 arch_flush_lazy_mmu_mode();
57
58 return (void*) vaddr;
59} 67}
60EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); 68EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
61 69
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index aca924a30ee6..0dfa09d69e80 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -22,13 +22,17 @@
22#include <asm/pgalloc.h> 22#include <asm/pgalloc.h>
23#include <asm/pat.h> 23#include <asm/pat.h>
24 24
25#ifdef CONFIG_X86_64 25static inline int phys_addr_valid(resource_size_t addr)
26
27static inline int phys_addr_valid(unsigned long addr)
28{ 26{
29 return addr < (1UL << boot_cpu_data.x86_phys_bits); 27#ifdef CONFIG_PHYS_ADDR_T_64BIT
28 return !(addr >> boot_cpu_data.x86_phys_bits);
29#else
30 return 1;
31#endif
30} 32}
31 33
34#ifdef CONFIG_X86_64
35
32unsigned long __phys_addr(unsigned long x) 36unsigned long __phys_addr(unsigned long x)
33{ 37{
34 if (x >= __START_KERNEL_map) { 38 if (x >= __START_KERNEL_map) {
@@ -65,11 +69,6 @@ EXPORT_SYMBOL(__virt_addr_valid);
65 69
66#else 70#else
67 71
68static inline int phys_addr_valid(unsigned long addr)
69{
70 return 1;
71}
72
73#ifdef CONFIG_DEBUG_VIRTUAL 72#ifdef CONFIG_DEBUG_VIRTUAL
74unsigned long __phys_addr(unsigned long x) 73unsigned long __phys_addr(unsigned long x)
75{ 74{
@@ -517,7 +516,7 @@ void __init early_ioremap_init(void)
517 printk(KERN_INFO "early_ioremap_init()\n"); 516 printk(KERN_INFO "early_ioremap_init()\n");
518 517
519 for (i = 0; i < FIX_BTMAPS_SLOTS; i++) 518 for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
520 slot_virt[i] = fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); 519 slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i);
521 520
522 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); 521 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
523 memset(bm_pte, 0, sizeof(bm_pte)); 522 memset(bm_pte, 0, sizeof(bm_pte));
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 6a518dd08a36..4f115e00486b 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -310,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
310 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); 310 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
311 311
312 if (!ctx->active) { 312 if (!ctx->active) {
313 pr_warning("kmmio: spurious debug trap on CPU %d.\n", 313 pr_debug("kmmio: spurious debug trap on CPU %d.\n",
314 smp_processor_id()); 314 smp_processor_id());
315 goto out; 315 goto out;
316 } 316 }
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 2c4baa88f2cb..c9342ed8b402 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -378,27 +378,34 @@ static void clear_trace_list(void)
378} 378}
379 379
380#ifdef CONFIG_HOTPLUG_CPU 380#ifdef CONFIG_HOTPLUG_CPU
381static cpumask_t downed_cpus; 381static cpumask_var_t downed_cpus;
382 382
383static void enter_uniprocessor(void) 383static void enter_uniprocessor(void)
384{ 384{
385 int cpu; 385 int cpu;
386 int err; 386 int err;
387 387
388 if (downed_cpus == NULL &&
389 !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) {
390 pr_notice(NAME "Failed to allocate mask\n");
391 goto out;
392 }
393
388 get_online_cpus(); 394 get_online_cpus();
389 downed_cpus = cpu_online_map; 395 cpumask_copy(downed_cpus, cpu_online_mask);
390 cpu_clear(first_cpu(cpu_online_map), downed_cpus); 396 cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus);
391 if (num_online_cpus() > 1) 397 if (num_online_cpus() > 1)
392 pr_notice(NAME "Disabling non-boot CPUs...\n"); 398 pr_notice(NAME "Disabling non-boot CPUs...\n");
393 put_online_cpus(); 399 put_online_cpus();
394 400
395 for_each_cpu_mask(cpu, downed_cpus) { 401 for_each_cpu(cpu, downed_cpus) {
396 err = cpu_down(cpu); 402 err = cpu_down(cpu);
397 if (!err) 403 if (!err)
398 pr_info(NAME "CPU%d is down.\n", cpu); 404 pr_info(NAME "CPU%d is down.\n", cpu);
399 else 405 else
400 pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); 406 pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err);
401 } 407 }
408out:
402 if (num_online_cpus() > 1) 409 if (num_online_cpus() > 1)
403 pr_warning(NAME "multiple CPUs still online, " 410 pr_warning(NAME "multiple CPUs still online, "
404 "may miss events.\n"); 411 "may miss events.\n");
@@ -411,10 +418,10 @@ static void __ref leave_uniprocessor(void)
411 int cpu; 418 int cpu;
412 int err; 419 int err;
413 420
414 if (cpus_weight(downed_cpus) == 0) 421 if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0)
415 return; 422 return;
416 pr_notice(NAME "Re-enabling CPUs...\n"); 423 pr_notice(NAME "Re-enabling CPUs...\n");
417 for_each_cpu_mask(cpu, downed_cpus) { 424 for_each_cpu(cpu, downed_cpus) {
418 err = cpu_up(cpu); 425 err = cpu_up(cpu);
419 if (!err) 426 if (!err)
420 pr_info(NAME "enabled CPU%d.\n", cpu); 427 pr_info(NAME "enabled CPU%d.\n", cpu);
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
new file mode 100644
index 000000000000..550df481accd
--- /dev/null
+++ b/arch/x86/mm/numa.c
@@ -0,0 +1,67 @@
1/* Common code for 32 and 64-bit NUMA */
2#include <linux/topology.h>
3#include <linux/module.h>
4#include <linux/bootmem.h>
5
6#ifdef CONFIG_DEBUG_PER_CPU_MAPS
7# define DBG(x...) printk(KERN_DEBUG x)
8#else
9# define DBG(x...)
10#endif
11
12/*
13 * Which logical CPUs are on which nodes
14 */
15cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
16EXPORT_SYMBOL(node_to_cpumask_map);
17
18/*
19 * Allocate node_to_cpumask_map based on number of available nodes
20 * Requires node_possible_map to be valid.
21 *
22 * Note: node_to_cpumask() is not valid until after this is done.
23 * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
24 */
25void __init setup_node_to_cpumask_map(void)
26{
27 unsigned int node, num = 0;
28
29 /* setup nr_node_ids if not done yet */
30 if (nr_node_ids == MAX_NUMNODES) {
31 for_each_node_mask(node, node_possible_map)
32 num = node;
33 nr_node_ids = num + 1;
34 }
35
36 /* allocate the map */
37 for (node = 0; node < nr_node_ids; node++)
38 alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
39
40 /* cpumask_of_node() will now work */
41 pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
42}
43
44#ifdef CONFIG_DEBUG_PER_CPU_MAPS
45/*
46 * Returns a pointer to the bitmask of CPUs on Node 'node'.
47 */
48const struct cpumask *cpumask_of_node(int node)
49{
50 if (node >= nr_node_ids) {
51 printk(KERN_WARNING
52 "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
53 node, nr_node_ids);
54 dump_stack();
55 return cpu_none_mask;
56 }
57 if (node_to_cpumask_map[node] == NULL) {
58 printk(KERN_WARNING
59 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
60 node);
61 dump_stack();
62 return cpu_online_mask;
63 }
64 return node_to_cpumask_map[node];
65}
66EXPORT_SYMBOL(cpumask_of_node);
67#endif
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 64c9cf043cdd..d73aaa892371 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -20,12 +20,6 @@
20#include <asm/acpi.h> 20#include <asm/acpi.h>
21#include <asm/k8.h> 21#include <asm/k8.h>
22 22
23#ifdef CONFIG_DEBUG_PER_CPU_MAPS
24# define DBG(x...) printk(KERN_DEBUG x)
25#else
26# define DBG(x...)
27#endif
28
29struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 23struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
30EXPORT_SYMBOL(node_data); 24EXPORT_SYMBOL(node_data);
31 25
@@ -49,12 +43,6 @@ DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
49EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 43EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
50 44
51/* 45/*
52 * Which logical CPUs are on which nodes
53 */
54cpumask_t *node_to_cpumask_map;
55EXPORT_SYMBOL(node_to_cpumask_map);
56
57/*
58 * Given a shift value, try to populate memnodemap[] 46 * Given a shift value, try to populate memnodemap[]
59 * Returns : 47 * Returns :
60 * 1 if OK 48 * 1 if OK
@@ -661,36 +649,6 @@ void __init init_cpu_to_node(void)
661#endif 649#endif
662 650
663 651
664/*
665 * Allocate node_to_cpumask_map based on number of available nodes
666 * Requires node_possible_map to be valid.
667 *
668 * Note: node_to_cpumask() is not valid until after this is done.
669 * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
670 */
671void __init setup_node_to_cpumask_map(void)
672{
673 unsigned int node, num = 0;
674 cpumask_t *map;
675
676 /* setup nr_node_ids if not done yet */
677 if (nr_node_ids == MAX_NUMNODES) {
678 for_each_node_mask(node, node_possible_map)
679 num = node;
680 nr_node_ids = num + 1;
681 }
682
683 /* allocate the map */
684 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
685 DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids);
686
687 pr_debug("Node to cpumask map at %p for %d nodes\n",
688 map, nr_node_ids);
689
690 /* node_to_cpumask() will now work */
691 node_to_cpumask_map = map;
692}
693
694void __cpuinit numa_set_node(int cpu, int node) 652void __cpuinit numa_set_node(int cpu, int node)
695{ 653{
696 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 654 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
@@ -723,12 +681,12 @@ void __cpuinit numa_clear_node(int cpu)
723 681
724void __cpuinit numa_add_cpu(int cpu) 682void __cpuinit numa_add_cpu(int cpu)
725{ 683{
726 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 684 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
727} 685}
728 686
729void __cpuinit numa_remove_cpu(int cpu) 687void __cpuinit numa_remove_cpu(int cpu)
730{ 688{
731 cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 689 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
732} 690}
733 691
734#else /* CONFIG_DEBUG_PER_CPU_MAPS */ 692#else /* CONFIG_DEBUG_PER_CPU_MAPS */
@@ -739,20 +697,20 @@ void __cpuinit numa_remove_cpu(int cpu)
739static void __cpuinit numa_set_cpumask(int cpu, int enable) 697static void __cpuinit numa_set_cpumask(int cpu, int enable)
740{ 698{
741 int node = early_cpu_to_node(cpu); 699 int node = early_cpu_to_node(cpu);
742 cpumask_t *mask; 700 struct cpumask *mask;
743 char buf[64]; 701 char buf[64];
744 702
745 if (node_to_cpumask_map == NULL) { 703 mask = node_to_cpumask_map[node];
746 printk(KERN_ERR "node_to_cpumask_map NULL\n"); 704 if (mask == NULL) {
705 printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node);
747 dump_stack(); 706 dump_stack();
748 return; 707 return;
749 } 708 }
750 709
751 mask = &node_to_cpumask_map[node];
752 if (enable) 710 if (enable)
753 cpu_set(cpu, *mask); 711 cpumask_set_cpu(cpu, mask);
754 else 712 else
755 cpu_clear(cpu, *mask); 713 cpumask_clear_cpu(cpu, mask);
756 714
757 cpulist_scnprintf(buf, sizeof(buf), mask); 715 cpulist_scnprintf(buf, sizeof(buf), mask);
758 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 716 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
@@ -799,59 +757,6 @@ int early_cpu_to_node(int cpu)
799 return per_cpu(x86_cpu_to_node_map, cpu); 757 return per_cpu(x86_cpu_to_node_map, cpu);
800} 758}
801 759
802
803/* empty cpumask */
804static const cpumask_t cpu_mask_none;
805
806/*
807 * Returns a pointer to the bitmask of CPUs on Node 'node'.
808 */
809const cpumask_t *cpumask_of_node(int node)
810{
811 if (node_to_cpumask_map == NULL) {
812 printk(KERN_WARNING
813 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
814 node);
815 dump_stack();
816 return (const cpumask_t *)&cpu_online_map;
817 }
818 if (node >= nr_node_ids) {
819 printk(KERN_WARNING
820 "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
821 node, nr_node_ids);
822 dump_stack();
823 return &cpu_mask_none;
824 }
825 return &node_to_cpumask_map[node];
826}
827EXPORT_SYMBOL(cpumask_of_node);
828
829/*
830 * Returns a bitmask of CPUs on Node 'node'.
831 *
832 * Side note: this function creates the returned cpumask on the stack
833 * so with a high NR_CPUS count, excessive stack space is used. The
834 * node_to_cpumask_ptr function should be used whenever possible.
835 */
836cpumask_t node_to_cpumask(int node)
837{
838 if (node_to_cpumask_map == NULL) {
839 printk(KERN_WARNING
840 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
841 dump_stack();
842 return cpu_online_map;
843 }
844 if (node >= nr_node_ids) {
845 printk(KERN_WARNING
846 "node_to_cpumask(%d): node > nr_node_ids(%d)\n",
847 node, nr_node_ids);
848 dump_stack();
849 return cpu_mask_none;
850 }
851 return node_to_cpumask_map[node];
852}
853EXPORT_SYMBOL(node_to_cpumask);
854
855/* 760/*
856 * --------- end of debug versions of the numa functions --------- 761 * --------- end of debug versions of the numa functions ---------
857 */ 762 */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 9c4294986af7..d71e1b636ce6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -16,6 +16,7 @@
16#include <asm/processor.h> 16#include <asm/processor.h>
17#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
18#include <asm/sections.h> 18#include <asm/sections.h>
19#include <asm/setup.h>
19#include <asm/uaccess.h> 20#include <asm/uaccess.h>
20#include <asm/pgalloc.h> 21#include <asm/pgalloc.h>
21#include <asm/proto.h> 22#include <asm/proto.h>
@@ -33,6 +34,7 @@ struct cpa_data {
33 unsigned long pfn; 34 unsigned long pfn;
34 unsigned force_split : 1; 35 unsigned force_split : 1;
35 int curpage; 36 int curpage;
37 struct page **pages;
36}; 38};
37 39
38/* 40/*
@@ -45,6 +47,7 @@ static DEFINE_SPINLOCK(cpa_lock);
45 47
46#define CPA_FLUSHTLB 1 48#define CPA_FLUSHTLB 1
47#define CPA_ARRAY 2 49#define CPA_ARRAY 2
50#define CPA_PAGES_ARRAY 4
48 51
49#ifdef CONFIG_PROC_FS 52#ifdef CONFIG_PROC_FS
50static unsigned long direct_pages_count[PG_LEVEL_NUM]; 53static unsigned long direct_pages_count[PG_LEVEL_NUM];
@@ -95,7 +98,7 @@ static inline unsigned long highmap_start_pfn(void)
95 98
96static inline unsigned long highmap_end_pfn(void) 99static inline unsigned long highmap_end_pfn(void)
97{ 100{
98 return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; 101 return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
99} 102}
100 103
101#endif 104#endif
@@ -201,10 +204,10 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
201 } 204 }
202} 205}
203 206
204static void cpa_flush_array(unsigned long *start, int numpages, int cache) 207static void cpa_flush_array(unsigned long *start, int numpages, int cache,
208 int in_flags, struct page **pages)
205{ 209{
206 unsigned int i, level; 210 unsigned int i, level;
207 unsigned long *addr;
208 211
209 BUG_ON(irqs_disabled()); 212 BUG_ON(irqs_disabled());
210 213
@@ -225,14 +228,22 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache)
225 * will cause all other CPUs to flush the same 228 * will cause all other CPUs to flush the same
226 * cachelines: 229 * cachelines:
227 */ 230 */
228 for (i = 0, addr = start; i < numpages; i++, addr++) { 231 for (i = 0; i < numpages; i++) {
229 pte_t *pte = lookup_address(*addr, &level); 232 unsigned long addr;
233 pte_t *pte;
234
235 if (in_flags & CPA_PAGES_ARRAY)
236 addr = (unsigned long)page_address(pages[i]);
237 else
238 addr = start[i];
239
240 pte = lookup_address(addr, &level);
230 241
231 /* 242 /*
232 * Only flush present addresses: 243 * Only flush present addresses:
233 */ 244 */
234 if (pte && (pte_val(*pte) & _PAGE_PRESENT)) 245 if (pte && (pte_val(*pte) & _PAGE_PRESENT))
235 clflush_cache_range((void *) *addr, PAGE_SIZE); 246 clflush_cache_range((void *)addr, PAGE_SIZE);
236 } 247 }
237} 248}
238 249
@@ -584,7 +595,9 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
584 unsigned int level; 595 unsigned int level;
585 pte_t *kpte, old_pte; 596 pte_t *kpte, old_pte;
586 597
587 if (cpa->flags & CPA_ARRAY) 598 if (cpa->flags & CPA_PAGES_ARRAY)
599 address = (unsigned long)page_address(cpa->pages[cpa->curpage]);
600 else if (cpa->flags & CPA_ARRAY)
588 address = cpa->vaddr[cpa->curpage]; 601 address = cpa->vaddr[cpa->curpage];
589 else 602 else
590 address = *cpa->vaddr; 603 address = *cpa->vaddr;
@@ -687,7 +700,9 @@ static int cpa_process_alias(struct cpa_data *cpa)
687 * No need to redo, when the primary call touched the direct 700 * No need to redo, when the primary call touched the direct
688 * mapping already: 701 * mapping already:
689 */ 702 */
690 if (cpa->flags & CPA_ARRAY) 703 if (cpa->flags & CPA_PAGES_ARRAY)
704 vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]);
705 else if (cpa->flags & CPA_ARRAY)
691 vaddr = cpa->vaddr[cpa->curpage]; 706 vaddr = cpa->vaddr[cpa->curpage];
692 else 707 else
693 vaddr = *cpa->vaddr; 708 vaddr = *cpa->vaddr;
@@ -698,7 +713,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
698 alias_cpa = *cpa; 713 alias_cpa = *cpa;
699 temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); 714 temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
700 alias_cpa.vaddr = &temp_cpa_vaddr; 715 alias_cpa.vaddr = &temp_cpa_vaddr;
701 alias_cpa.flags &= ~CPA_ARRAY; 716 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
702 717
703 718
704 ret = __change_page_attr_set_clr(&alias_cpa, 0); 719 ret = __change_page_attr_set_clr(&alias_cpa, 0);
@@ -711,7 +726,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
711 * No need to redo, when the primary call touched the high 726 * No need to redo, when the primary call touched the high
712 * mapping already: 727 * mapping already:
713 */ 728 */
714 if (within(vaddr, (unsigned long) _text, (unsigned long) _end)) 729 if (within(vaddr, (unsigned long) _text, _brk_end))
715 return 0; 730 return 0;
716 731
717 /* 732 /*
@@ -724,7 +739,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
724 alias_cpa = *cpa; 739 alias_cpa = *cpa;
725 temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; 740 temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
726 alias_cpa.vaddr = &temp_cpa_vaddr; 741 alias_cpa.vaddr = &temp_cpa_vaddr;
727 alias_cpa.flags &= ~CPA_ARRAY; 742 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
728 743
729 /* 744 /*
730 * The high mapping range is imprecise, so ignore the return value. 745 * The high mapping range is imprecise, so ignore the return value.
@@ -745,7 +760,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
745 */ 760 */
746 cpa->numpages = numpages; 761 cpa->numpages = numpages;
747 /* for array changes, we can't use large page */ 762 /* for array changes, we can't use large page */
748 if (cpa->flags & CPA_ARRAY) 763 if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY))
749 cpa->numpages = 1; 764 cpa->numpages = 1;
750 765
751 if (!debug_pagealloc) 766 if (!debug_pagealloc)
@@ -769,7 +784,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
769 */ 784 */
770 BUG_ON(cpa->numpages > numpages); 785 BUG_ON(cpa->numpages > numpages);
771 numpages -= cpa->numpages; 786 numpages -= cpa->numpages;
772 if (cpa->flags & CPA_ARRAY) 787 if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
773 cpa->curpage++; 788 cpa->curpage++;
774 else 789 else
775 *cpa->vaddr += cpa->numpages * PAGE_SIZE; 790 *cpa->vaddr += cpa->numpages * PAGE_SIZE;
@@ -786,7 +801,8 @@ static inline int cache_attr(pgprot_t attr)
786 801
787static int change_page_attr_set_clr(unsigned long *addr, int numpages, 802static int change_page_attr_set_clr(unsigned long *addr, int numpages,
788 pgprot_t mask_set, pgprot_t mask_clr, 803 pgprot_t mask_set, pgprot_t mask_clr,
789 int force_split, int array) 804 int force_split, int in_flag,
805 struct page **pages)
790{ 806{
791 struct cpa_data cpa; 807 struct cpa_data cpa;
792 int ret, cache, checkalias; 808 int ret, cache, checkalias;
@@ -801,15 +817,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
801 return 0; 817 return 0;
802 818
803 /* Ensure we are PAGE_SIZE aligned */ 819 /* Ensure we are PAGE_SIZE aligned */
804 if (!array) { 820 if (in_flag & CPA_ARRAY) {
805 if (*addr & ~PAGE_MASK) {
806 *addr &= PAGE_MASK;
807 /*
808 * People should not be passing in unaligned addresses:
809 */
810 WARN_ON_ONCE(1);
811 }
812 } else {
813 int i; 821 int i;
814 for (i = 0; i < numpages; i++) { 822 for (i = 0; i < numpages; i++) {
815 if (addr[i] & ~PAGE_MASK) { 823 if (addr[i] & ~PAGE_MASK) {
@@ -817,6 +825,18 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
817 WARN_ON_ONCE(1); 825 WARN_ON_ONCE(1);
818 } 826 }
819 } 827 }
828 } else if (!(in_flag & CPA_PAGES_ARRAY)) {
829 /*
830 * in_flag of CPA_PAGES_ARRAY implies it is aligned.
831 * No need to cehck in that case
832 */
833 if (*addr & ~PAGE_MASK) {
834 *addr &= PAGE_MASK;
835 /*
836 * People should not be passing in unaligned addresses:
837 */
838 WARN_ON_ONCE(1);
839 }
820 } 840 }
821 841
822 /* Must avoid aliasing mappings in the highmem code */ 842 /* Must avoid aliasing mappings in the highmem code */
@@ -832,6 +852,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
832 arch_flush_lazy_mmu_mode(); 852 arch_flush_lazy_mmu_mode();
833 853
834 cpa.vaddr = addr; 854 cpa.vaddr = addr;
855 cpa.pages = pages;
835 cpa.numpages = numpages; 856 cpa.numpages = numpages;
836 cpa.mask_set = mask_set; 857 cpa.mask_set = mask_set;
837 cpa.mask_clr = mask_clr; 858 cpa.mask_clr = mask_clr;
@@ -839,8 +860,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
839 cpa.curpage = 0; 860 cpa.curpage = 0;
840 cpa.force_split = force_split; 861 cpa.force_split = force_split;
841 862
842 if (array) 863 if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
843 cpa.flags |= CPA_ARRAY; 864 cpa.flags |= in_flag;
844 865
845 /* No alias checking for _NX bit modifications */ 866 /* No alias checking for _NX bit modifications */
846 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; 867 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
@@ -866,9 +887,10 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
866 * wbindv): 887 * wbindv):
867 */ 888 */
868 if (!ret && cpu_has_clflush) { 889 if (!ret && cpu_has_clflush) {
869 if (cpa.flags & CPA_ARRAY) 890 if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
870 cpa_flush_array(addr, numpages, cache); 891 cpa_flush_array(addr, numpages, cache,
871 else 892 cpa.flags, pages);
893 } else
872 cpa_flush_range(*addr, numpages, cache); 894 cpa_flush_range(*addr, numpages, cache);
873 } else 895 } else
874 cpa_flush_all(cache); 896 cpa_flush_all(cache);
@@ -888,14 +910,28 @@ static inline int change_page_attr_set(unsigned long *addr, int numpages,
888 pgprot_t mask, int array) 910 pgprot_t mask, int array)
889{ 911{
890 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, 912 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
891 array); 913 (array ? CPA_ARRAY : 0), NULL);
892} 914}
893 915
894static inline int change_page_attr_clear(unsigned long *addr, int numpages, 916static inline int change_page_attr_clear(unsigned long *addr, int numpages,
895 pgprot_t mask, int array) 917 pgprot_t mask, int array)
896{ 918{
897 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, 919 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
898 array); 920 (array ? CPA_ARRAY : 0), NULL);
921}
922
923static inline int cpa_set_pages_array(struct page **pages, int numpages,
924 pgprot_t mask)
925{
926 return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0,
927 CPA_PAGES_ARRAY, pages);
928}
929
930static inline int cpa_clear_pages_array(struct page **pages, int numpages,
931 pgprot_t mask)
932{
933 return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0,
934 CPA_PAGES_ARRAY, pages);
899} 935}
900 936
901int _set_memory_uc(unsigned long addr, int numpages) 937int _set_memory_uc(unsigned long addr, int numpages)
@@ -1043,7 +1079,7 @@ int set_memory_np(unsigned long addr, int numpages)
1043int set_memory_4k(unsigned long addr, int numpages) 1079int set_memory_4k(unsigned long addr, int numpages)
1044{ 1080{
1045 return change_page_attr_set_clr(&addr, numpages, __pgprot(0), 1081 return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
1046 __pgprot(0), 1, 0); 1082 __pgprot(0), 1, 0, NULL);
1047} 1083}
1048 1084
1049int set_pages_uc(struct page *page, int numpages) 1085int set_pages_uc(struct page *page, int numpages)
@@ -1054,6 +1090,35 @@ int set_pages_uc(struct page *page, int numpages)
1054} 1090}
1055EXPORT_SYMBOL(set_pages_uc); 1091EXPORT_SYMBOL(set_pages_uc);
1056 1092
1093int set_pages_array_uc(struct page **pages, int addrinarray)
1094{
1095 unsigned long start;
1096 unsigned long end;
1097 int i;
1098 int free_idx;
1099
1100 for (i = 0; i < addrinarray; i++) {
1101 start = (unsigned long)page_address(pages[i]);
1102 end = start + PAGE_SIZE;
1103 if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
1104 goto err_out;
1105 }
1106
1107 if (cpa_set_pages_array(pages, addrinarray,
1108 __pgprot(_PAGE_CACHE_UC_MINUS)) == 0) {
1109 return 0; /* Success */
1110 }
1111err_out:
1112 free_idx = i;
1113 for (i = 0; i < free_idx; i++) {
1114 start = (unsigned long)page_address(pages[i]);
1115 end = start + PAGE_SIZE;
1116 free_memtype(start, end);
1117 }
1118 return -EINVAL;
1119}
1120EXPORT_SYMBOL(set_pages_array_uc);
1121
1057int set_pages_wb(struct page *page, int numpages) 1122int set_pages_wb(struct page *page, int numpages)
1058{ 1123{
1059 unsigned long addr = (unsigned long)page_address(page); 1124 unsigned long addr = (unsigned long)page_address(page);
@@ -1062,6 +1127,26 @@ int set_pages_wb(struct page *page, int numpages)
1062} 1127}
1063EXPORT_SYMBOL(set_pages_wb); 1128EXPORT_SYMBOL(set_pages_wb);
1064 1129
1130int set_pages_array_wb(struct page **pages, int addrinarray)
1131{
1132 int retval;
1133 unsigned long start;
1134 unsigned long end;
1135 int i;
1136
1137 retval = cpa_clear_pages_array(pages, addrinarray,
1138 __pgprot(_PAGE_CACHE_MASK));
1139
1140 for (i = 0; i < addrinarray; i++) {
1141 start = (unsigned long)page_address(pages[i]);
1142 end = start + PAGE_SIZE;
1143 free_memtype(start, end);
1144 }
1145
1146 return retval;
1147}
1148EXPORT_SYMBOL(set_pages_array_wb);
1149
1065int set_pages_x(struct page *page, int numpages) 1150int set_pages_x(struct page *page, int numpages)
1066{ 1151{
1067 unsigned long addr = (unsigned long)page_address(page); 1152 unsigned long addr = (unsigned long)page_address(page);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 2ed37158012d..640339ee4fb2 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -677,10 +677,11 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
677 is_ram = pat_pagerange_is_ram(paddr, paddr + size); 677 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
678 678
679 /* 679 /*
680 * reserve_pfn_range() doesn't support RAM pages. 680 * reserve_pfn_range() doesn't support RAM pages. Maintain the current
681 * behavior with RAM pages by returning success.
681 */ 682 */
682 if (is_ram != 0) 683 if (is_ram != 0)
683 return -EINVAL; 684 return 0;
684 685
685 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); 686 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
686 if (ret) 687 if (ret)
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index f2e477c91c1b..46c8834aedc0 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -50,7 +50,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
50 } 50 }
51 pte = pte_offset_kernel(pmd, vaddr); 51 pte = pte_offset_kernel(pmd, vaddr);
52 if (pte_val(pteval)) 52 if (pte_val(pteval))
53 set_pte_present(&init_mm, vaddr, pte, pteval); 53 set_pte_at(&init_mm, vaddr, pte, pteval);
54 else 54 else
55 pte_clear(&init_mm, vaddr, pte); 55 pte_clear(&init_mm, vaddr, pte);
56 56
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 574c8bc95ef0..c7d272b8574c 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -116,6 +116,36 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
116 reserve_early(phys, phys + length, "ACPI SLIT"); 116 reserve_early(phys, phys + length, "ACPI SLIT");
117} 117}
118 118
119/* Callback for Proximity Domain -> x2APIC mapping */
120void __init
121acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
122{
123 int pxm, node;
124 int apic_id;
125
126 if (srat_disabled())
127 return;
128 if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
129 bad_srat();
130 return;
131 }
132 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
133 return;
134 pxm = pa->proximity_domain;
135 node = setup_node(pxm);
136 if (node < 0) {
137 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
138 bad_srat();
139 return;
140 }
141
142 apic_id = pa->apic_id;
143 apicid_to_node[apic_id] = node;
144 acpi_numa = 1;
145 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
146 pxm, apic_id, node);
147}
148
119/* Callback for Proximity Domain -> LAPIC mapping */ 149/* Callback for Proximity Domain -> LAPIC mapping */
120void __init 150void __init
121acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) 151acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index a654d59e4483..821e97017e95 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -187,11 +187,6 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
187 cpumask, cpumask_of(smp_processor_id())); 187 cpumask, cpumask_of(smp_processor_id()));
188 188
189 /* 189 /*
190 * Make the above memory operations globally visible before
191 * sending the IPI.
192 */
193 smp_mb();
194 /*
195 * We have to send the IPI only to 190 * We have to send the IPI only to
196 * CPUs affected. 191 * CPUs affected.
197 */ 192 */
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 4c4a51c90bc2..819b131fd752 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -380,7 +380,7 @@ static unsigned int get_stagger(void)
380{ 380{
381#ifdef CONFIG_SMP 381#ifdef CONFIG_SMP
382 int cpu = smp_processor_id(); 382 int cpu = smp_processor_id();
383 return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu))); 383 return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
384#endif 384#endif
385 return 0; 385 return 0;
386} 386}
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 82d22fc601ae..8c362b96b644 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -90,7 +90,7 @@ static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d)
90 return 0; 90 return 0;
91} 91}
92 92
93static struct dmi_system_id can_skip_pciprobe_dmi_table[] __devinitdata = { 93static const struct dmi_system_id can_skip_pciprobe_dmi_table[] __devinitconst = {
94/* 94/*
95 * Systems where PCI IO resource ISA alignment can be skipped 95 * Systems where PCI IO resource ISA alignment can be skipped
96 * when the ISA enable bit in the bridge control is not set 96 * when the ISA enable bit in the bridge control is not set
@@ -183,7 +183,7 @@ static int __devinit assign_all_busses(const struct dmi_system_id *d)
183} 183}
184#endif 184#endif
185 185
186static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = { 186static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = {
187#ifdef __i386__ 187#ifdef __i386__
188/* 188/*
189 * Laptops which need pci=assign-busses to see Cardbus cards 189 * Laptops which need pci=assign-busses to see Cardbus cards
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index f6adf2c6d751..aaf26ae58cd5 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -69,11 +69,12 @@ void early_dump_pci_device(u8 bus, u8 slot, u8 func)
69 int j; 69 int j;
70 u32 val; 70 u32 val;
71 71
72 printk(KERN_INFO "PCI: %02x:%02x:%02x", bus, slot, func); 72 printk(KERN_INFO "pci 0000:%02x:%02x.%d config space:",
73 bus, slot, func);
73 74
74 for (i = 0; i < 256; i += 4) { 75 for (i = 0; i < 256; i += 4) {
75 if (!(i & 0x0f)) 76 if (!(i & 0x0f))
76 printk("\n%04x:",i); 77 printk("\n %02x:",i);
77 78
78 val = read_pci_config(bus, slot, func, i); 79 val = read_pci_config(bus, slot, func, i);
79 for (j = 0; j < 4; j++) { 80 for (j = 0; j < 4; j++) {
@@ -96,20 +97,22 @@ void early_dump_pci_devices(void)
96 for (func = 0; func < 8; func++) { 97 for (func = 0; func < 8; func++) {
97 u32 class; 98 u32 class;
98 u8 type; 99 u8 type;
100
99 class = read_pci_config(bus, slot, func, 101 class = read_pci_config(bus, slot, func,
100 PCI_CLASS_REVISION); 102 PCI_CLASS_REVISION);
101 if (class == 0xffffffff) 103 if (class == 0xffffffff)
102 break; 104 continue;
103 105
104 early_dump_pci_device(bus, slot, func); 106 early_dump_pci_device(bus, slot, func);
105 107
106 /* No multi-function device? */ 108 if (func == 0) {
107 type = read_pci_config_byte(bus, slot, func, 109 type = read_pci_config_byte(bus, slot,
110 func,
108 PCI_HEADER_TYPE); 111 PCI_HEADER_TYPE);
109 if (!(type & 0x80)) 112 if (!(type & 0x80))
110 break; 113 break;
114 }
111 } 115 }
112 } 116 }
113 } 117 }
114} 118}
115
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 7d388d5cf548..6dd89555fbfa 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -356,7 +356,7 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev)
356DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video); 356DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video);
357 357
358 358
359static struct dmi_system_id __devinitdata msi_k8t_dmi_table[] = { 359static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = {
360 { 360 {
361 .ident = "MSI-K8T-Neo2Fir", 361 .ident = "MSI-K8T-Neo2Fir",
362 .matches = { 362 .matches = {
@@ -413,7 +413,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237,
413 */ 413 */
414static u16 toshiba_line_size; 414static u16 toshiba_line_size;
415 415
416static struct dmi_system_id __devinitdata toshiba_ohci1394_dmi_table[] = { 416static const struct dmi_system_id __devinitconst toshiba_ohci1394_dmi_table[] = {
417 { 417 {
418 .ident = "Toshiba PS5 based laptop", 418 .ident = "Toshiba PS5 based laptop",
419 .matches = { 419 .matches = {
@@ -495,26 +495,6 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
495 pci_siemens_interrupt_controller); 495 pci_siemens_interrupt_controller);
496 496
497/* 497/*
498 * Regular PCI devices have 256 bytes, but AMD Family 10h/11h CPUs have
499 * 4096 bytes configuration space for each function of their processor
500 * configuration space.
501 */
502static void amd_cpu_pci_cfg_space_size(struct pci_dev *dev)
503{
504 dev->cfg_size = pci_cfg_space_size_ext(dev);
505}
506DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, amd_cpu_pci_cfg_space_size);
507DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, amd_cpu_pci_cfg_space_size);
508DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, amd_cpu_pci_cfg_space_size);
509DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, amd_cpu_pci_cfg_space_size);
510DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, amd_cpu_pci_cfg_space_size);
511DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1300, amd_cpu_pci_cfg_space_size);
512DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1301, amd_cpu_pci_cfg_space_size);
513DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1302, amd_cpu_pci_cfg_space_size);
514DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1303, amd_cpu_pci_cfg_space_size);
515DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1304, amd_cpu_pci_cfg_space_size);
516
517/*
518 * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from 498 * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from
519 * confusing the PCI engine: 499 * confusing the PCI engine:
520 */ 500 */
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 5ead808dd70c..f1817f71e009 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -258,24 +258,7 @@ void pcibios_set_master(struct pci_dev *dev)
258 pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); 258 pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
259} 259}
260 260
261static void pci_unmap_page_range(struct vm_area_struct *vma)
262{
263 u64 addr = (u64)vma->vm_pgoff << PAGE_SHIFT;
264 free_memtype(addr, addr + vma->vm_end - vma->vm_start);
265}
266
267static void pci_track_mmap_page_range(struct vm_area_struct *vma)
268{
269 u64 addr = (u64)vma->vm_pgoff << PAGE_SHIFT;
270 unsigned long flags = pgprot_val(vma->vm_page_prot)
271 & _PAGE_CACHE_MASK;
272
273 reserve_memtype(addr, addr + vma->vm_end - vma->vm_start, flags, NULL);
274}
275
276static struct vm_operations_struct pci_mmap_ops = { 261static struct vm_operations_struct pci_mmap_ops = {
277 .open = pci_track_mmap_page_range,
278 .close = pci_unmap_page_range,
279 .access = generic_access_phys, 262 .access = generic_access_phys,
280}; 263};
281 264
@@ -283,11 +266,6 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
283 enum pci_mmap_state mmap_state, int write_combine) 266 enum pci_mmap_state mmap_state, int write_combine)
284{ 267{
285 unsigned long prot; 268 unsigned long prot;
286 u64 addr = vma->vm_pgoff << PAGE_SHIFT;
287 unsigned long len = vma->vm_end - vma->vm_start;
288 unsigned long flags;
289 unsigned long new_flags;
290 int retval;
291 269
292 /* I/O space cannot be accessed via normal processor loads and 270 /* I/O space cannot be accessed via normal processor loads and
293 * stores on this platform. 271 * stores on this platform.
@@ -308,27 +286,6 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
308 286
309 vma->vm_page_prot = __pgprot(prot); 287 vma->vm_page_prot = __pgprot(prot);
310 288
311 flags = pgprot_val(vma->vm_page_prot) & _PAGE_CACHE_MASK;
312 retval = reserve_memtype(addr, addr + len, flags, &new_flags);
313 if (retval)
314 return retval;
315
316 if (flags != new_flags) {
317 if (!is_new_memtype_allowed(flags, new_flags)) {
318 free_memtype(addr, addr+len);
319 return -EINVAL;
320 }
321 flags = new_flags;
322 }
323
324 if (((vma->vm_pgoff < max_low_pfn_mapped) ||
325 (vma->vm_pgoff >= (1UL<<(32 - PAGE_SHIFT)) &&
326 vma->vm_pgoff < max_pfn_mapped)) &&
327 ioremap_change_attr((unsigned long)__va(addr), len, flags)) {
328 free_memtype(addr, addr + len);
329 return -EINVAL;
330 }
331
332 if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, 289 if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
333 vma->vm_end - vma->vm_start, 290 vma->vm_end - vma->vm_start,
334 vma->vm_page_prot)) 291 vma->vm_page_prot))
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index f1065b129e9c..4061bb0f267d 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -50,8 +50,6 @@ static int __init pci_legacy_init(void)
50 if (pci_root_bus) 50 if (pci_root_bus)
51 pci_bus_add_devices(pci_root_bus); 51 pci_bus_add_devices(pci_root_bus);
52 52
53 pcibios_fixup_peer_bridges();
54
55 return 0; 53 return 0;
56} 54}
57 55
@@ -67,6 +65,7 @@ int __init pci_subsys_init(void)
67 pci_visws_init(); 65 pci_visws_init();
68#endif 66#endif
69 pci_legacy_init(); 67 pci_legacy_init();
68 pcibios_fixup_peer_bridges();
70 pcibios_irq_init(); 69 pcibios_irq_init();
71 pcibios_init(); 70 pcibios_init();
72 71
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 89bf9242c80a..905bb526b133 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -14,6 +14,7 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/acpi.h> 15#include <linux/acpi.h>
16#include <linux/bitmap.h> 16#include <linux/bitmap.h>
17#include <linux/sort.h>
17#include <asm/e820.h> 18#include <asm/e820.h>
18#include <asm/pci_x86.h> 19#include <asm/pci_x86.h>
19 20
@@ -24,24 +25,49 @@
24/* Indicate if the mmcfg resources have been placed into the resource table. */ 25/* Indicate if the mmcfg resources have been placed into the resource table. */
25static int __initdata pci_mmcfg_resources_inserted; 26static int __initdata pci_mmcfg_resources_inserted;
26 27
28static __init int extend_mmcfg(int num)
29{
30 struct acpi_mcfg_allocation *new;
31 int new_num = pci_mmcfg_config_num + num;
32
33 new = kzalloc(sizeof(pci_mmcfg_config[0]) * new_num, GFP_KERNEL);
34 if (!new)
35 return -1;
36
37 if (pci_mmcfg_config) {
38 memcpy(new, pci_mmcfg_config,
39 sizeof(pci_mmcfg_config[0]) * new_num);
40 kfree(pci_mmcfg_config);
41 }
42 pci_mmcfg_config = new;
43
44 return 0;
45}
46
47static __init void fill_one_mmcfg(u64 addr, int segment, int start, int end)
48{
49 int i = pci_mmcfg_config_num;
50
51 pci_mmcfg_config_num++;
52 pci_mmcfg_config[i].address = addr;
53 pci_mmcfg_config[i].pci_segment = segment;
54 pci_mmcfg_config[i].start_bus_number = start;
55 pci_mmcfg_config[i].end_bus_number = end;
56}
57
27static const char __init *pci_mmcfg_e7520(void) 58static const char __init *pci_mmcfg_e7520(void)
28{ 59{
29 u32 win; 60 u32 win;
30 raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0xce, 2, &win); 61 raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0xce, 2, &win);
31 62
32 win = win & 0xf000; 63 win = win & 0xf000;
33 if(win == 0x0000 || win == 0xf000) 64 if (win == 0x0000 || win == 0xf000)
34 pci_mmcfg_config_num = 0; 65 return NULL;
35 else { 66
36 pci_mmcfg_config_num = 1; 67 if (extend_mmcfg(1) == -1)
37 pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); 68 return NULL;
38 if (!pci_mmcfg_config) 69
39 return NULL; 70 fill_one_mmcfg(win << 16, 0, 0, 255);
40 pci_mmcfg_config[0].address = win << 16;
41 pci_mmcfg_config[0].pci_segment = 0;
42 pci_mmcfg_config[0].start_bus_number = 0;
43 pci_mmcfg_config[0].end_bus_number = 255;
44 }
45 71
46 return "Intel Corporation E7520 Memory Controller Hub"; 72 return "Intel Corporation E7520 Memory Controller Hub";
47} 73}
@@ -50,13 +76,11 @@ static const char __init *pci_mmcfg_intel_945(void)
50{ 76{
51 u32 pciexbar, mask = 0, len = 0; 77 u32 pciexbar, mask = 0, len = 0;
52 78
53 pci_mmcfg_config_num = 1;
54
55 raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0x48, 4, &pciexbar); 79 raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0x48, 4, &pciexbar);
56 80
57 /* Enable bit */ 81 /* Enable bit */
58 if (!(pciexbar & 1)) 82 if (!(pciexbar & 1))
59 pci_mmcfg_config_num = 0; 83 return NULL;
60 84
61 /* Size bits */ 85 /* Size bits */
62 switch ((pciexbar >> 1) & 3) { 86 switch ((pciexbar >> 1) & 3) {
@@ -73,28 +97,23 @@ static const char __init *pci_mmcfg_intel_945(void)
73 len = 0x04000000U; 97 len = 0x04000000U;
74 break; 98 break;
75 default: 99 default:
76 pci_mmcfg_config_num = 0; 100 return NULL;
77 } 101 }
78 102
79 /* Errata #2, things break when not aligned on a 256Mb boundary */ 103 /* Errata #2, things break when not aligned on a 256Mb boundary */
80 /* Can only happen in 64M/128M mode */ 104 /* Can only happen in 64M/128M mode */
81 105
82 if ((pciexbar & mask) & 0x0fffffffU) 106 if ((pciexbar & mask) & 0x0fffffffU)
83 pci_mmcfg_config_num = 0; 107 return NULL;
84 108
85 /* Don't hit the APIC registers and their friends */ 109 /* Don't hit the APIC registers and their friends */
86 if ((pciexbar & mask) >= 0xf0000000U) 110 if ((pciexbar & mask) >= 0xf0000000U)
87 pci_mmcfg_config_num = 0; 111 return NULL;
88 112
89 if (pci_mmcfg_config_num) { 113 if (extend_mmcfg(1) == -1)
90 pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); 114 return NULL;
91 if (!pci_mmcfg_config) 115
92 return NULL; 116 fill_one_mmcfg(pciexbar & mask, 0, 0, (len >> 20) - 1);
93 pci_mmcfg_config[0].address = pciexbar & mask;
94 pci_mmcfg_config[0].pci_segment = 0;
95 pci_mmcfg_config[0].start_bus_number = 0;
96 pci_mmcfg_config[0].end_bus_number = (len >> 20) - 1;
97 }
98 117
99 return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub"; 118 return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub";
100} 119}
@@ -138,22 +157,77 @@ static const char __init *pci_mmcfg_amd_fam10h(void)
138 busnbits = 8; 157 busnbits = 8;
139 } 158 }
140 159
141 pci_mmcfg_config_num = (1 << segnbits); 160 if (extend_mmcfg(1 << segnbits) == -1)
142 pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]) *
143 pci_mmcfg_config_num, GFP_KERNEL);
144 if (!pci_mmcfg_config)
145 return NULL; 161 return NULL;
146 162
147 for (i = 0; i < (1 << segnbits); i++) { 163 for (i = 0; i < (1 << segnbits); i++)
148 pci_mmcfg_config[i].address = base + (1<<28) * i; 164 fill_one_mmcfg(base + (1<<28) * i, i, 0, (1 << busnbits) - 1);
149 pci_mmcfg_config[i].pci_segment = i;
150 pci_mmcfg_config[i].start_bus_number = 0;
151 pci_mmcfg_config[i].end_bus_number = (1 << busnbits) - 1;
152 }
153 165
154 return "AMD Family 10h NB"; 166 return "AMD Family 10h NB";
155} 167}
156 168
169static bool __initdata mcp55_checked;
170static const char __init *pci_mmcfg_nvidia_mcp55(void)
171{
172 int bus;
173 int mcp55_mmconf_found = 0;
174
175 static const u32 extcfg_regnum = 0x90;
176 static const u32 extcfg_regsize = 4;
177 static const u32 extcfg_enable_mask = 1<<31;
178 static const u32 extcfg_start_mask = 0xff<<16;
179 static const int extcfg_start_shift = 16;
180 static const u32 extcfg_size_mask = 0x3<<28;
181 static const int extcfg_size_shift = 28;
182 static const int extcfg_sizebus[] = {0x100, 0x80, 0x40, 0x20};
183 static const u32 extcfg_base_mask[] = {0x7ff8, 0x7ffc, 0x7ffe, 0x7fff};
184 static const int extcfg_base_lshift = 25;
185
186 /*
187 * do check if amd fam10h already took over
188 */
189 if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked)
190 return NULL;
191
192 mcp55_checked = true;
193 for (bus = 0; bus < 256; bus++) {
194 u64 base;
195 u32 l, extcfg;
196 u16 vendor, device;
197 int start, size_index, end;
198
199 raw_pci_ops->read(0, bus, PCI_DEVFN(0, 0), 0, 4, &l);
200 vendor = l & 0xffff;
201 device = (l >> 16) & 0xffff;
202
203 if (PCI_VENDOR_ID_NVIDIA != vendor || 0x0369 != device)
204 continue;
205
206 raw_pci_ops->read(0, bus, PCI_DEVFN(0, 0), extcfg_regnum,
207 extcfg_regsize, &extcfg);
208
209 if (!(extcfg & extcfg_enable_mask))
210 continue;
211
212 if (extend_mmcfg(1) == -1)
213 continue;
214
215 size_index = (extcfg & extcfg_size_mask) >> extcfg_size_shift;
216 base = extcfg & extcfg_base_mask[size_index];
217 /* base could > 4G */
218 base <<= extcfg_base_lshift;
219 start = (extcfg & extcfg_start_mask) >> extcfg_start_shift;
220 end = start + extcfg_sizebus[size_index] - 1;
221 fill_one_mmcfg(base, 0, start, end);
222 mcp55_mmconf_found++;
223 }
224
225 if (!mcp55_mmconf_found)
226 return NULL;
227
228 return "nVidia MCP55";
229}
230
157struct pci_mmcfg_hostbridge_probe { 231struct pci_mmcfg_hostbridge_probe {
158 u32 bus; 232 u32 bus;
159 u32 devfn; 233 u32 devfn;
@@ -171,8 +245,52 @@ static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = {
171 0x1200, pci_mmcfg_amd_fam10h }, 245 0x1200, pci_mmcfg_amd_fam10h },
172 { 0xff, PCI_DEVFN(0, 0), PCI_VENDOR_ID_AMD, 246 { 0xff, PCI_DEVFN(0, 0), PCI_VENDOR_ID_AMD,
173 0x1200, pci_mmcfg_amd_fam10h }, 247 0x1200, pci_mmcfg_amd_fam10h },
248 { 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_NVIDIA,
249 0x0369, pci_mmcfg_nvidia_mcp55 },
174}; 250};
175 251
252static int __init cmp_mmcfg(const void *x1, const void *x2)
253{
254 const typeof(pci_mmcfg_config[0]) *m1 = x1;
255 const typeof(pci_mmcfg_config[0]) *m2 = x2;
256 int start1, start2;
257
258 start1 = m1->start_bus_number;
259 start2 = m2->start_bus_number;
260
261 return start1 - start2;
262}
263
264static void __init pci_mmcfg_check_end_bus_number(void)
265{
266 int i;
267 typeof(pci_mmcfg_config[0]) *cfg, *cfgx;
268
269 /* sort them at first */
270 sort(pci_mmcfg_config, pci_mmcfg_config_num,
271 sizeof(pci_mmcfg_config[0]), cmp_mmcfg, NULL);
272
273 /* last one*/
274 if (pci_mmcfg_config_num > 0) {
275 i = pci_mmcfg_config_num - 1;
276 cfg = &pci_mmcfg_config[i];
277 if (cfg->end_bus_number < cfg->start_bus_number)
278 cfg->end_bus_number = 255;
279 }
280
281 /* don't overlap please */
282 for (i = 0; i < pci_mmcfg_config_num - 1; i++) {
283 cfg = &pci_mmcfg_config[i];
284 cfgx = &pci_mmcfg_config[i+1];
285
286 if (cfg->end_bus_number < cfg->start_bus_number)
287 cfg->end_bus_number = 255;
288
289 if (cfg->end_bus_number >= cfgx->start_bus_number)
290 cfg->end_bus_number = cfgx->start_bus_number - 1;
291 }
292}
293
176static int __init pci_mmcfg_check_hostbridge(void) 294static int __init pci_mmcfg_check_hostbridge(void)
177{ 295{
178 u32 l; 296 u32 l;
@@ -186,31 +304,33 @@ static int __init pci_mmcfg_check_hostbridge(void)
186 304
187 pci_mmcfg_config_num = 0; 305 pci_mmcfg_config_num = 0;
188 pci_mmcfg_config = NULL; 306 pci_mmcfg_config = NULL;
189 name = NULL;
190 307
191 for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) { 308 for (i = 0; i < ARRAY_SIZE(pci_mmcfg_probes); i++) {
192 bus = pci_mmcfg_probes[i].bus; 309 bus = pci_mmcfg_probes[i].bus;
193 devfn = pci_mmcfg_probes[i].devfn; 310 devfn = pci_mmcfg_probes[i].devfn;
194 raw_pci_ops->read(0, bus, devfn, 0, 4, &l); 311 raw_pci_ops->read(0, bus, devfn, 0, 4, &l);
195 vendor = l & 0xffff; 312 vendor = l & 0xffff;
196 device = (l >> 16) & 0xffff; 313 device = (l >> 16) & 0xffff;
197 314
315 name = NULL;
198 if (pci_mmcfg_probes[i].vendor == vendor && 316 if (pci_mmcfg_probes[i].vendor == vendor &&
199 pci_mmcfg_probes[i].device == device) 317 pci_mmcfg_probes[i].device == device)
200 name = pci_mmcfg_probes[i].probe(); 318 name = pci_mmcfg_probes[i].probe();
201 }
202 319
203 if (name) { 320 if (name)
204 printk(KERN_INFO "PCI: Found %s %s MMCONFIG support.\n", 321 printk(KERN_INFO "PCI: Found %s with MMCONFIG support.\n",
205 name, pci_mmcfg_config_num ? "with" : "without"); 322 name);
206 } 323 }
207 324
208 return name != NULL; 325 /* some end_bus_number is crazy, fix it */
326 pci_mmcfg_check_end_bus_number();
327
328 return pci_mmcfg_config_num != 0;
209} 329}
210 330
211static void __init pci_mmcfg_insert_resources(void) 331static void __init pci_mmcfg_insert_resources(void)
212{ 332{
213#define PCI_MMCFG_RESOURCE_NAME_LEN 19 333#define PCI_MMCFG_RESOURCE_NAME_LEN 24
214 int i; 334 int i;
215 struct resource *res; 335 struct resource *res;
216 char *names; 336 char *names;
@@ -228,9 +348,10 @@ static void __init pci_mmcfg_insert_resources(void)
228 struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i]; 348 struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i];
229 num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; 349 num_buses = cfg->end_bus_number - cfg->start_bus_number + 1;
230 res->name = names; 350 res->name = names;
231 snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u", 351 snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN,
232 cfg->pci_segment); 352 "PCI MMCONFIG %u [%02x-%02x]", cfg->pci_segment,
233 res->start = cfg->address; 353 cfg->start_bus_number, cfg->end_bus_number);
354 res->start = cfg->address + (cfg->start_bus_number << 20);
234 res->end = res->start + (num_buses << 20) - 1; 355 res->end = res->start + (num_buses << 20) - 1;
235 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; 356 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
236 insert_resource(&iomem_resource, res); 357 insert_resource(&iomem_resource, res);
@@ -354,8 +475,6 @@ static void __init pci_mmcfg_reject_broken(int early)
354 (pci_mmcfg_config[0].address == 0)) 475 (pci_mmcfg_config[0].address == 0))
355 return; 476 return;
356 477
357 cfg = &pci_mmcfg_config[0];
358
359 for (i = 0; i < pci_mmcfg_config_num; i++) { 478 for (i = 0; i < pci_mmcfg_config_num; i++) {
360 int valid = 0; 479 int valid = 0;
361 u64 addr, size; 480 u64 addr, size;
@@ -423,10 +542,10 @@ static void __init __pci_mmcfg_init(int early)
423 known_bridge = 1; 542 known_bridge = 1;
424 } 543 }
425 544
426 if (!known_bridge) { 545 if (!known_bridge)
427 acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); 546 acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg);
428 pci_mmcfg_reject_broken(early); 547
429 } 548 pci_mmcfg_reject_broken(early);
430 549
431 if ((pci_mmcfg_config_num == 0) || 550 if ((pci_mmcfg_config_num == 0) ||
432 (pci_mmcfg_config == NULL) || 551 (pci_mmcfg_config == NULL) ||
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index 30007ffc8e11..94349f8b2f96 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -112,13 +112,18 @@ static struct pci_raw_ops pci_mmcfg = {
112static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg) 112static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg)
113{ 113{
114 void __iomem *addr; 114 void __iomem *addr;
115 u32 size; 115 u64 start, size;
116 116
117 size = (cfg->end_bus_number + 1) << 20; 117 start = cfg->start_bus_number;
118 addr = ioremap_nocache(cfg->address, size); 118 start <<= 20;
119 start += cfg->address;
120 size = cfg->end_bus_number + 1 - cfg->start_bus_number;
121 size <<= 20;
122 addr = ioremap_nocache(start, size);
119 if (addr) { 123 if (addr) {
120 printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n", 124 printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n",
121 cfg->address, cfg->address + size - 1); 125 start, start + size - 1);
126 addr -= cfg->start_bus_number << 20;
122 } 127 }
123 return addr; 128 return addr;
124} 129}
@@ -157,7 +162,7 @@ void __init pci_mmcfg_arch_free(void)
157 162
158 for (i = 0; i < pci_mmcfg_config_num; ++i) { 163 for (i = 0; i < pci_mmcfg_config_num; ++i) {
159 if (pci_mmcfg_virt[i].virt) { 164 if (pci_mmcfg_virt[i].virt) {
160 iounmap(pci_mmcfg_virt[i].virt); 165 iounmap(pci_mmcfg_virt[i].virt + (pci_mmcfg_virt[i].cfg->start_bus_number << 20));
161 pci_mmcfg_virt[i].virt = NULL; 166 pci_mmcfg_virt[i].virt = NULL;
162 pci_mmcfg_virt[i].cfg = NULL; 167 pci_mmcfg_virt[i].cfg = NULL;
163 } 168 }
diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile
index 9ff4d5b55ad1..58b32db33125 100644
--- a/arch/x86/power/Makefile
+++ b/arch/x86/power/Makefile
@@ -1,2 +1,7 @@
1# __restore_processor_state() restores %gs after S3 resume and so should not
2# itself be stack-protected
3nostackp := $(call cc-option, -fno-stack-protector)
4CFLAGS_cpu_$(BITS).o := $(nostackp)
5
1obj-$(CONFIG_PM_SLEEP) += cpu_$(BITS).o 6obj-$(CONFIG_PM_SLEEP) += cpu_$(BITS).o
2obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o 7obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o
diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c
index 274d06082f48..ce702c5b3a2c 100644
--- a/arch/x86/power/cpu_32.c
+++ b/arch/x86/power/cpu_32.c
@@ -12,6 +12,7 @@
12#include <asm/mtrr.h> 12#include <asm/mtrr.h>
13#include <asm/mce.h> 13#include <asm/mce.h>
14#include <asm/xcr.h> 14#include <asm/xcr.h>
15#include <asm/suspend.h>
15 16
16static struct saved_context saved_context; 17static struct saved_context saved_context;
17 18
diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c
index e3b6cf70d62c..5343540f2607 100644
--- a/arch/x86/power/cpu_64.c
+++ b/arch/x86/power/cpu_64.c
@@ -15,6 +15,7 @@
15#include <asm/pgtable.h> 15#include <asm/pgtable.h>
16#include <asm/mtrr.h> 16#include <asm/mtrr.h>
17#include <asm/xcr.h> 17#include <asm/xcr.h>
18#include <asm/suspend.h>
18 19
19static void fix_processor_context(void); 20static void fix_processor_context(void);
20 21
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 6dd000dd7933..65fdc86e923f 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -14,6 +14,7 @@
14#include <asm/page.h> 14#include <asm/page.h>
15#include <asm/pgtable.h> 15#include <asm/pgtable.h>
16#include <asm/mtrr.h> 16#include <asm/mtrr.h>
17#include <asm/suspend.h>
17 18
18/* References to section boundaries */ 19/* References to section boundaries */
19extern const void __nosave_begin, __nosave_end; 20extern const void __nosave_begin, __nosave_end;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index cb6afa4ec95c..db3802fb7b84 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1723,9 +1723,9 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
1723{ 1723{
1724 pmd_t *kernel_pmd; 1724 pmd_t *kernel_pmd;
1725 1725
1726 init_pg_tables_start = __pa(pgd); 1726 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
1727 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; 1727 xen_start_info->nr_pt_frames * PAGE_SIZE +
1728 max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); 1728 512*1024);
1729 1729
1730 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); 1730 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
1731 memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); 1731 memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
@@ -1870,7 +1870,6 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
1870 1870
1871#ifdef CONFIG_X86_PAE 1871#ifdef CONFIG_X86_PAE
1872 .set_pte_atomic = xen_set_pte_atomic, 1872 .set_pte_atomic = xen_set_pte_atomic,
1873 .set_pte_present = xen_set_pte_at,
1874 .pte_clear = xen_pte_clear, 1873 .pte_clear = xen_pte_clear,
1875 .pmd_clear = xen_pmd_clear, 1874 .pmd_clear = xen_pmd_clear,
1876#endif /* CONFIG_X86_PAE */ 1875#endif /* CONFIG_X86_PAE */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 8d470562ffc9..585a6e330837 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -158,7 +158,7 @@ static void __init xen_fill_possible_map(void)
158 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); 158 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
159 if (rc >= 0) { 159 if (rc >= 0) {
160 num_processors++; 160 num_processors++;
161 cpu_set(i, cpu_possible_map); 161 set_cpu_possible(i, true);
162 } 162 }
163 } 163 }
164} 164}
@@ -197,7 +197,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
197 while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { 197 while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
198 for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--) 198 for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
199 continue; 199 continue;
200 cpu_clear(cpu, cpu_possible_map); 200 set_cpu_possible(cpu, false);
201 } 201 }
202 202
203 for_each_possible_cpu (cpu) { 203 for_each_possible_cpu (cpu) {
@@ -210,7 +210,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
210 if (IS_ERR(idle)) 210 if (IS_ERR(idle))
211 panic("failed fork for CPU %d", cpu); 211 panic("failed fork for CPU %d", cpu);
212 212
213 cpu_set(cpu, cpu_present_map); 213 set_cpu_present(cpu, true);
214 } 214 }
215} 215}
216 216